@article {2663, title = {Adaptively-Tuned Particle Swarm Optimization with Application to Spatial Design}, journal = {Stat}, volume = {6}, year = {2017}, pages = {145{\textendash}159}, abstract = {Particle swarm optimization (PSO) algorithms are a class of heuristic optimization algorithms that are attractive for complex optimization problems. We propose using PSO to solve spatial design problems, e.g. choosing new locations to add to an existing monitoring network. Additionally, we introduce two new classes of PSO algorithms that perform well in a wide variety of circumstances, called adaptively tuned PSO and adaptively tuned bare bones PSO. To illustrate these algorithms, we apply them to a common spatial design problem: choosing new locations to add to an existing monitoring network. Specifically, we consider a network in the Houston, TX, area for monitoring ambient ozone levels, which have been linked to out-of-hospital cardiac arrest rates. Published 2017. This article has been contributed to by US Government employees and their work is in the public domain in the USA}, doi = {10.1002/sta4.142}, url = {http://onlinelibrary.wiley.com/doi/10.1002/sta4.142/abstract}, author = {Simpson, M. and Wikle, C.K. and Holan, S.H.} } @article {2664, title = {Bayesian Hierarchical Multi-Population Multistate Jolly-Seber Models with Covariates: Application to the Pallid Sturgeon Population Assessment Program}, journal = {Journal of the American Statistical Association}, volume = {112}, year = {2017}, pages = {471-483}, abstract = {Estimating abundance for multiple populations is of fundamental importance to many ecological monitoring programs. Equally important is quantifying the spatial distribution and characterizing the migratory behavior of target populations within the study domain. To achieve these goals, we propose a Bayesian hierarchical multi-population multistate Jolly{\textendash}Seber model that incorporates covariates. The model is proposed using a state-space framework and has several distinct advantages. First, multiple populations within the same study area can be modeled simultaneously. As a consequence, it is possible to achieve improved parameter estimation by {\textquotedblleft}borrowing strength{\textquotedblright} across different populations. In many cases, such as our motivating example involving endangered species, this borrowing of strength is crucial, as there is relatively less information for one of the populations under consideration. Second, in addition to accommodating covariate information, we develop a computationally efficient Markov chain Monte Carlo algorithm that requires no tuning. Importantly, the model we propose allows us to draw inference on each population as well as on multiple populations simultaneously. Finally, we demonstrate the effectiveness of our method through a motivating example of estimating the spatial distribution and migration of hatchery and wild populations of the endangered pallid sturgeon (Scaphirhynchus albus), using data from the Pallid Sturgeon Population Assessment Program on the Lower Missouri River. Supplementary materials for this article are available online.}, doi = {10.1080/01621459.2016.1211531}, url = {http://www.tandfonline.com/doi/abs/10.1080/01621459.2016.1211531}, author = {Wu, G. and Holan, S.H.} } @article {2658, title = {The Cepstral Model for Multivariate Time Series: The Vector Exponential Model}, journal = {Statistica Sinica}, volume = {27}, year = {2017}, pages = {23-42}, abstract = {Vector autoregressive (VAR) models have become a staple in the analysis of multivariate time series and are formulated in the time domain as difference equations, with an implied covariance structure. In many contexts, it is desirable to work with a stable, or at least stationary, representation. To fit such models, one must impose restrictions on the coefficient matrices to ensure that certain determinants are nonzero; which, except in special cases, may prove burdensome. To circumvent these difficulties, we propose a flexible frequency domain model expressed in terms of the spectral density matrix. Specifically, this paper treats the modeling of covariance stationary vector-valued (i.e., multivariate) time series via an extension of the exponential model for the spectrum of a scalar time series. We discuss the modeling advantages of the vector exponential model and its computational facets, such as how to obtain Wold coefficients from given cepstral coefficients. Finally, we demonstrate the utility of our approach through simulation as well as two illustrative data examples focusing on multi-step ahead forecasting and estimation of squared coherence.}, keywords = {Autocovariance matrix, Bayesian estimation, Cepstral, Coherence, Spectral density matrix, stochastic search variable selection, Wold coefficients.}, doi = {10.5705/ss.202014.0024}, url = {http://www3.stat.sinica.edu.tw/statistica/J27N1/J27N12/J27N12.html}, author = {Holan, S.H. and McElroy, T.S. and Wu, G.} } @techreport {2655, title = {Computationally Efficient Multivariate Spatio-Temporal Models for High-Dimensional Count-Valued Data. (With Discussion).}, number = {1512.07273}, year = {2017}, abstract = {We introduce a Bayesian approach for multivariate spatio-temporal prediction for high-dimensional count-valued data. Our primary interest is when there are possibly millions of data points referenced over different variables, geographic regions, and times. This problem requires extensive methodological advancements, as jointly modeling correlated data of this size leads to the so-called "big n problem." The computational complexity of prediction in this setting is further exacerbated by acknowledging that count-valued data are naturally non-Gaussian. Thus, we develop a new computationally efficient distribution theory for this setting. In particular, we introduce a multivariate log-gamma distribution and provide substantial theoretical development including: results regarding conditional distributions, marginal distributions, an asymptotic relationship with the multivariate normal distribution, and full-conditional distributions for a Gibbs sampler. To incorporate dependence between variables, regions, and time points, a multivariate spatio-temporal mixed effects model (MSTM) is used. The results in this manuscript are extremely general, and can be used for data that exhibit fewer sources of dependency than what we consider (e.g., multivariate, spatial-only, or spatio-temporal-only data). Hence, the implications of our modeling framework may have a large impact on the general problem of jointly modeling correlated count-valued data. We show the effectiveness of our approach through a simulation study. Additionally, we demonstrate our proposed methodology with an important application analyzing data obtained from the Longitudinal Employer-Household Dynamics (LEHD) program, which is administered by the U.S. Census Bureau.}, keywords = {Aggregation, American Community Survey, Bayesian hierarchical model, Big Data, Longitudinal Employer-Household Dynamics (LEHD) program, Markov chain Monte Carlo, Non-Gaussian., Quarterly Workforce Indicators}, url = {https://arxiv.org/abs/1512.07273}, author = {Bradley, J.R. and Holan, S.H. and Wikle, C.K.} } @article {Hu2017-nm, title = {Dirichlet Process Mixture Models for Modeling and Generating Synthetic Versions of Nested Categorical Data}, journal = {Bayesian Analysis}, year = {2017}, month = {24 January 2017}, abstract = {We present a Bayesian model for estimating the joint distribution of multivariate categorical data when units are nested within groups. Such data arise frequently in social science settings, for example, people living in households. The model assumes that (i) each group is a member of a group-level latent class, and (ii) each unit is a member of a unit-level latent class nested within its group-level latent class. This structure allows the model to capture dependence among units in the same group. It also facilitates simultaneous modeling of variables at both group and unit levels. We develop a version of the model that assigns zero probability to groups and units with physically impossible combinations of variables. We apply the model to estimate multivariate relationships in a subset of the American Community Survey. Using the estimated model, we generate synthetic household data that could be disseminated as redacted public use files. Supplementary materials (Hu et al., 2017) for this article are available online.}, doi = {10.1214/16-BA1047}, url = {http://projecteuclid.org/euclid.ba/1485227030}, author = {Hu, Jingchen and Reiter, Jerome P and Wang, Quanli} } @techreport {handle:1813:52650, title = {Effects of a Government-Academic Partnership: Has the NSF-Census Bureau Research Network Helped Secure the Future of the Federal Statistical System?}, number = {1813:52650}, year = {2017}, institution = {NCRN Coordinating Office}, type = {Preprint}, abstract = {

Effects of a Government-Academic Partnership: Has the NSF-Census Bureau Research Network Helped Secure the Future of the Federal Statistical System? Weinberg, Daniel; Abowd, John M.; Belli, Robert F.; Cressie, Noel; Folch, David C.; Holan, Scott H.; Levenstein, Margaret C.; Olson, Kristen M.; Reiter, Jerome P.; Shapiro, Matthew D.; Smyth, Jolene; Soh, Leen-Kiat; Spencer, Bruce; Spielman, Seth E.; Vilhuber, Lars; Wikle, Christopher The National Science Foundation-Census Bureau Research Network (NCRN) was established in 2011 to create interdisciplinary research nodes on methodological questions of interest and significance to the broader research community and to the Federal Statistical System (FSS), particularly the Census Bureau. The activities to date have covered both fundamental and applied statistical research and have focused at least in part on the training of current and future generations of researchers in skills of relevance to surveys and alternative measurement of economic units, households, and persons. This paper discusses some of the key research findings of the eight nodes, organized into six topics: (1) Improving census and survey data collection methods; (2) Using alternative sources of data; (3) Protecting privacy and confidentiality by improving disclosure avoidance; (4) Using spatial and spatio-temporal statistical modeling to improve estimates; (5) Assessing data cost and quality tradeoffs; and (6) Combining information from multiple sources. It also reports on collaborations across nodes and with federal agencies, new software developed, and educational activities and outcomes. The paper concludes with an evaluation of the ability of the FSS to apply the NCRN{\textquoteright}s research outcomes and suggests some next steps, as well as the implications of this research-network model for future federal government renewal initiatives. This paper began as a May 8, 2015 presentation to the National Academies of Science{\textquoteright}s Committee on National Statistics by two of the principal investigators of the National Science Foundation-Census Bureau Research Network (NCRN) {\textendash} John Abowd and the late Steve Fienberg (Carnegie Mellon University). The authors acknowledge the contributions of the other principal investigators of the NCRN who are not co-authors of the paper (William Block, William Eddy, Alan Karr, Charles Manski, Nicholas Nagle, and Rebecca Nugent), the co- principal investigators, and the comments of Patrick Cantwell, Constance Citro, Adam Eck, Brian Harris-Kojetin, and Eloise Parker. We note with sorrow the deaths of Stephen Fienberg and Allan McCutcheon, two of the original NCRN principal investigators. The principal investigators also wish to acknowledge Cheryl Eavey{\textquoteright}s sterling grant administration on behalf of the NSF. The conclusions reached in this paper are not the responsibility of the National Science Foundation (NSF), the Census Bureau, or any of the institutions to which the authors belong

}, url = {http://hdl.handle.net/1813/52650}, author = {Weinberg, Daniel and Abowd, John M. and Belli, Robert F. and Cressie, Noel and Folch, David C. and Holan, Scott H. and Levenstein, Margaret C. and Olson, Kristen M. and Reiter, Jerome P. and Shapiro, Matthew D. and Smyth, Jolene and Soh, Leen-Kiat and Spencer, Bruce and Spielman, Seth E. and Vilhuber, Lars and Wikle, Christopher} } @article {2657, title = {Regionalization of Multiscale Spatial Processes using a Criterion for Spatial Aggregation Error}, journal = {Journal of the Royal Statistical Society -- Series B.}, year = {2017}, abstract = {The modifiable areal unit problem and the ecological fallacy are known problems that occur when modeling multiscale spatial processes. We investigate how these forms of spatial aggregation error can guide a regionalization over a spatial domain of interest. By "regionalization" we mean a specification of geographies that define the spatial support for areal data. This topic has been studied vigorously by geographers, but has been given less attention by spatial statisticians. Thus, we propose a criterion for spatial aggregation error (CAGE), which we minimize to obtain an optimal regionalization. To define CAGE we draw a connection between spatial aggregation error and a new multiscale representation of the Karhunen-Loeve (K-L) expansion. This relationship between CAGE and the multiscale K-L expansion leads to illuminating theoretical developments including: connections between spatial aggregation error, squared prediction error, spatial variance, and a novel extension of Obled-Creutin eigenfunctions. The effectiveness of our approach is demonstrated through an analysis of two datasets, one using the American Community Survey and one related to environmental ocean winds.}, keywords = {American Community Survey, empirical orthogonal functions, MAUP, Reduced rank, Spatial basis functions, Survey data}, url = {https://arxiv.org/abs/1502.01974}, author = {Bradley, J.R. and Wikle, C.K. and Holan, S.H.} } @article {2541, title = {Utility Cost of Formal Privacy for Releasing National Employer-Employee Statistics}, journal = {Proceedings of the 2017 ACM International Conference on Management of Data}, year = {2017}, abstract = {National statistical agencies around the world publish tabular summaries based on combined employer-employee (ER-EE) data. The privacy of both individuals and business establishments that feature in these data are protected by law in most countries. These data are currently released using a variety of statistical disclosure limitation (SDL) techniques that do not reveal the exact characteristics of particular employers and employees, but lack provable privacy guarantees limiting inferential disclosures. In this work, we present novel algorithms for releasing tabular summaries of linked ER-EE data with formal, provable guarantees of privacy. We show that state-of-the-art differentially private algorithms add too much noise for the output to be useful. Instead, we identify the privacy requirements mandated by current interpretations of the relevant laws, and formalize them using the Pufferfish framework. We then develop new privacy definitions that are customized to ER-EE data and satisfy the statutory privacy requirements. We implement the experiments in this paper on production data gathered by the U.S. Census Bureau. An empirical evaluation of utility for these data shows that for reasonable values of the privacy-loss parameter ε>= 1, the additive error introduced by our provably private algorithms is comparable, and in some cases better, than the error introduced by existing SDL techniques that have no provable privacy guarantees. For some complex queries currently published, however, our algorithms do not have utility comparable to the existing traditional SDL algorithms. Those queries are fodder for future research.}, isbn = { 978-1-4503-4197-4 }, doi = {10.1145/3035918.3035940}, url = {http://dl.acm.org/citation.cfm?doid=3035918.3035940}, author = {Samuel Haney and Ashwin Machanavajjhala and John M. Abowd and Matthew Graham and Mark Kutzbach} } @techreport {handle:1813:49652, title = {Utility Cost of Formal Privacy for Releasing National Employer-Employee Statistics}, number = {1813:49652}, year = {2017}, institution = {Cornell University}, type = {Preprint}, abstract = {Utility Cost of Formal Privacy for Releasing National Employer-Employee Statistics Haney, Samuel; Machanavajjhala, Ashwin; Abowd, John M; Graham, Matthew; Kutzbach, Mark; Vilhuber, Lars National statistical agencies around the world publish tabular summaries based on combined employeremployee (ER-EE) data. The privacy of both individuals and business establishments that feature in these data are protected by law in most countries. These data are currently released using a variety of statistical disclosure limitation (SDL) techniques that do not reveal the exact characteristics of particular employers and employees, but lack provable privacy guarantees limiting inferential disclosures. In this work, we present novel algorithms for releasing tabular summaries of linked ER-EE data with formal, provable guarantees of privacy. We show that state-of-the-art differentially private algorithms add too much noise for the output to be useful. Instead, we identify the privacy requirements mandated by current interpretations of the relevant laws, and formalize them using the Pufferfish framework. We then develop new privacy definitions that are customized to ER-EE data and satisfy the statutory privacy requirements. We implement the experiments in this paper on production data gathered by the U.S. Census Bureau. An empirical evaluation of utility for these data shows that for reasonable values of the privacy-loss parameter ϵ>=1, the additive error introduced by our provably private algorithms is comparable, and in some cases better, than the error introduced by existing SDL techniques that have no provable privacy guarantees. For some complex queries currently published, however, our algorithms do not have utility comparable to the existing traditional}, url = {http://hdl.handle.net/1813/49652}, author = {Haney, Samuel and Machanavajjhala, Ashwin and Abowd, John M and Graham, Matthew and Kutzbach, Mark and Vilhuber, Lars} } @article {doi:10.1080/01621459.2015.1105807, title = {A Bayesian Approach to Graphical Record Linkage and Deduplication}, journal = {Journal of the American Statistical Association}, volume = {111}, number = {516}, year = {2016}, pages = {1660-1672}, abstract = {ABSTRACTWe propose an unsupervised approach for linking records across arbitrarily many files, while simultaneously detecting duplicate records within files. Our key innovation involves the representation of the pattern of links between records as a bipartite graph, in which records are directly linked to latent true individuals, and only indirectly linked to other records. This flexible representation of the linkage structure naturally allows us to estimate the attributes of the unique observable people in the population, calculate transitive linkage probabilities across records (and represent this visually), and propagate the uncertainty of record linkage into later analyses. Our method makes it particularly easy to integrate record linkage with post-processing procedures such as logistic regression, capture{\textendash}recapture, etc. Our linkage structure lends itself to an efficient, linear-time, hybrid Markov chain Monte Carlo algorithm, which overcomes many obstacles encountered by previously record linkage approaches, despite the high-dimensional parameter space. We illustrate our method using longitudinal data from the National Long Term Care Survey and with data from the Italian Survey on Household and Wealth, where we assess the accuracy of our method and show it to be better in terms of error rates and empirical scalability than other approaches in the literature. Supplementary materials for this article are available online.}, doi = {10.1080/01621459.2015.1105807}, url = {http://dx.doi.org/10.1080/01621459.2015.1105807}, author = {Rebecca C. Steorts and Rob Hall and Stephen E. Fienberg} } @article {2665, title = {Bayesian Hierarchical Models with Conjugate Full-Conditional Distributions for Dependent Data from the Natural Exponential Family}, journal = {Journal of the American Statistical Association - T\&M.}, year = {2016}, abstract = {We introduce a Bayesian approach for analyzing (possibly) high-dimensional dependent data that are distributed according to a member from the natural exponential family of distributions. This problem requires extensive methodological advancements, as jointly modeling high-dimensional dependent data leads to the so-called "big n problem." The computational complexity of the "big n problem" is further exacerbated when allowing for non-Gaussian data models, as is the case here. Thus, we develop new computationally efficient distribution theory for this setting. In particular, we introduce something we call the "conjugate multivariate distribution," which is motivated by the univariate distribution introduced in Diaconis and Ylvisaker (1979). Furthermore, we provide substantial theoretical and methodological development including: results regarding conditional distributions, an asymptotic relationship with the multivariate normal distribution, conjugate prior distributions, and full-conditional distributions for a Gibbs sampler. The results in this manuscript are extremely general, and can be adapted to many different settings. We demonstrate the proposed methodology through simulated examples and analyses based on estimates obtained from the US Census Bureaus{\textquoteright} American Community Survey (ACS).}, url = {https://arxiv.org/abs/1701.07506}, author = {Bradley, J.R. and Holan, S.H. and Wikle, C.K.} } @article {si:reiter:hillygus16, title = {Bayesian latent pattern mixture models for handling attrition in panel studies with refreshment samples}, journal = {Annals of Applied Statistics}, volume = {10}, year = {2016}, pages = {118-{\textendash}143}, doi = {10.1214/15-AOAS876}, url = {http://projecteuclid.org/euclid.aoas/1458909910}, author = {Y. Si and J. P. Reiter and D. S. Hillygus} } @article {2668, title = {Bayesian Lattice Filters for Time-Varying Autoregression and Time-Frequency Analysis}, journal = {Bayesian Analysis}, year = {2016}, pages = {977-1003}, abstract = {Modeling nonstationary processes is of paramount importance to many scientific disciplines including environmental science, ecology, and finance, among others. Consequently, flexible methodology that provides accurate estimation across a wide range of processes is a subject of ongoing interest. We propose a novel approach to model-based time-frequency estimation using time-varying autoregressive models. In this context, we take a fully Bayesian approach and allow both the autoregressive coefficients and innovation variance to vary over time. Importantly, our estimation method uses the lattice filter and is cast within the partial autocorrelation domain. The marginal posterior distributions are of standard form and, as a convenient by-product of our estimation method, our approach avoids undesirable matrix inversions. As such, estimation is extremely computationally efficient and stable. To illustrate the effectiveness of our approach, we conduct a comprehensive simulation study that compares our method with other competing methods and find that, in most cases, our approach performs superior in terms of average squared error between the estimated and true time-varying spectral density. Lastly, we demonstrate our methodology through three modeling applications; namely, insect communication signals, environmental data (wind components), and macroeconomic data (US gross domestic product (GDP) and consumption).}, url = {https://arxiv.org/abs/1408.2757}, author = {Yang, W.H. and Holan, S.H. and Wikle, C.K.} } @article {hahn:murray:mano, title = {A Bayesian Partial Identification Approach to Inferring the Prevalence of Accounting Misconduct}, journal = {Journal of the American Statistical Association}, volume = {111}, year = {2016}, pages = {14{\textendash}26}, abstract = {This article describes the use of flexible Bayesian regression models for estimating a partially identified probability function. Our approach permits efficient sensitivity analysis concerning the posterior impact of priors on the partially identified component of the regression model. The new methodology is illustrated on an important problem where only partially observed data are available{\textemdash}inferring the prevalence of accounting misconduct among publicly traded U.S. businesses. Supplementary materials for this article are available online.}, doi = {10.1080/01621459.2015.1084307}, url = {http://www.tandfonline.com/doi/full/10.1080/01621459.2015.1084307}, author = {P. R. Hahn and J. S. Murray and I. Manolopoulou} } @article {2666, title = {Bayesian Spatial Change of Support for Count-Valued Survey Data with Application to the American Community Survey}, journal = {Journal of the American Statistical Association}, year = {2016}, pages = {472-487}, abstract = {We introduce Bayesian spatial change of support methodology for count-valued survey data with known survey variances. Our proposed methodology is motivated by the American Community Survey (ACS), an ongoing survey administered by the U.S. Census Bureau that provides timely information on several key demographic variables. Specifically, the ACS produces 1-year, 3-year, and 5-year "period-estimates," and corresponding margins of errors, for published demographic and socio-economic variables recorded over predefined geographies within the United States. Despite the availability of these predefined geographies it is often of interest to data users to specify customized user-defined spatial supports. In particular, it is useful to estimate demographic variables defined on "new" spatial supports in "real-time." This problem is known as spatial change of support (COS), which is typically performed under the assumption that the data follows a Gaussian distribution. However, count-valued survey data is naturally non-Gaussian and, hence, we consider modeling these data using a Poisson distribution. Additionally, survey-data are often accompanied by estimates of error, which we incorporate into our analysis. We interpret Poisson count-valued data in small areas as an aggregation of events from a spatial point process. This approach provides us with the flexibility necessary to allow ACS users to consider a variety of spatial supports in "real-time." We demonstrate the effectiveness of our approach through a simulated example as well as through an analysis using public-use ACS data.}, url = {https://arxiv.org/abs/1405.7227}, author = {Bradley, J.R. and Wikle, C.K. and Holan, S.H.} } @article {2670, title = {Computation of the Autocovariances for Time Series with Multiple Long-Range Persistencies}, journal = {Computational Statistics and Data Analysis}, year = {2016}, pages = {44 - 56}, abstract = {Gegenbauer processes allow for flexible and convenient modeling of time series data with multiple spectral peaks, where the qualitative description of these peaks is via the concept of cyclical long-range dependence. The Gegenbauer class is extensive, including ARFIMA, seasonal ARFIMA, and GARMA processes as special cases. Model estimation is challenging for Gegenbauer processes when multiple zeros and poles occur in the spectral density, because the autocovariance function is laborious to compute. The method of splitting{\textendash}essentially computing autocovariances by convolving long memory and short memory dynamics{\textendash}is only tractable when a single long memory pole exists. An additive decomposition of the spectrum into a sum of spectra is proposed, where each summand has a single singularity, so that a computationally efficient splitting method can be applied to each term and then aggregated. This approach differs from handling all the poles in the spectral density at once, via an analysis of truncation error. The proposed technique allows for fast estimation of time series with multiple long-range dependences, which is illustrated numerically and through several case-studies.}, url = {http://www.sciencedirect.com/science/article/pii/S0167947316300202}, author = {McElroy, T.S. and Holan, S.H.} } @article {2667, title = {Generating Partially Synthetic Geocoded Public Use Data with Decreased Disclosure Risk Using Differential Smoothing}, journal = {Journal of the Royal Statistical Society - Series A}, year = {2016}, abstract = {When collecting geocoded confidential data with the intent to disseminate, agencies often resort to altering the geographies prior to making data publicly available due to data privacy obligations. An alternative to releasing aggregated and/or perturbed data is to release multiply-imputed synthetic data, where sensitive values are replaced with draws from statistical models designed to capture important distributional features in the collected data. One issue that has received relatively little attention, however, is how to handle spatially outlying observations in the collected data, as common spatial models often have a tendency to overfit these observations. The goal of this work is to bring this issue to the forefront and propose a solution, which we refer to as "differential smoothing." After implementing our method on simulated data, highlighting the effectiveness of our approach under various scenarios, we illustrate the framework using data consisting of sale prices of homes in San Francisco.}, url = {https://arxiv.org/abs/1507.05529}, author = {Quick, H. and Holan, S.H. and Wikle, C.K.} } @article {2669, title = {Multivariate Spatio-Temporal Survey Fusion with Application to the American Community Survey and Local Area Unemployment Statistics}, journal = {Stat}, year = {2016}, pages = {224 - 233}, abstract = {There are often multiple surveys available that estimate and report related demographic variables of interest that are referenced over space and/or time. Not all surveys produce the same information, and thus, combining these surveys typically leads to higher quality estimates. That is, not every survey has the same level of precision nor do they always provide estimates of the same variables. In addition, various surveys often produce estimates with incomplete spatio-temporal coverage. By combining surveys using a Bayesian approach, we can account for different margins of error and leverage dependencies to produce estimates of every variable considered at every spatial location and every time point. Specifically, our strategy is to use a hierarchical modelling approach, where the first stage of the model incorporates the margin of error associated with each survey. Then, in a lower stage of the hierarchical model, the multivariate spatio-temporal mixed effects model is used to incorporate multivariate spatio-temporal dependencies of the processes of interest. We adopt a fully Bayesian approach for combining surveys; that is, given all of the available surveys, the conditional distributions of the latent processes of interest are used for statistical inference. To demonstrate our proposed methodology, we jointly analyze period estimates from the US Census Bureau{\textquoteright}s American Community Survey, and estimates obtained from the Bureau of Labor Statistics Local Area Unemployment Statistics program. Copyright {\textcopyright} 2016 John Wiley \& Sons, Ltd.}, url = {http://onlinelibrary.wiley.com/doi/10.1002/sta4.120/full}, author = {Bradley, J.R. and Holan, S.H. and Wikle, C.K} } @article {1866, title = {Accounting for nonignorable unit nonresponse and attrition in panel studies with refreshment samples}, journal = {Journal of Survey Statistics and Methodology}, volume = {3}, year = {2015}, pages = {265-295}, chapter = {265}, abstract = { Panel surveys typically su↵er from attrition, which can lead to biased inference when basing analysis only on cases that complete all waves of the panel. Unfortunately, panel data alone cannot inform the extent of the bias from the attrition, so that analysts using the panel data alone must make strong and untestable assumptions about the missing data mechanism. Many panel studies also include refreshment samples, which are data collected from a random sample of new individuals during some later wave of the panel. Refreshment samples o↵er information that can be utilized to correct for biases induced by nonignorable attrition while reducing reliance on strong assumptions about the attrition process. To date, these bias correction methods have not dealt with two key practical issues in panel studies: unit nonresponse in the initial wave of the panel and in the refreshment sample itself. As we illustrate, nonignorable unit nonresponse can significantly compromise the analyst{\textquoteright}s ability to use the refreshment samples for attrition bias correction. Thus, it is crucial for analysts to assess how sensitive their inferences{\textemdash}corrected for panel attrition{\textemdash}are to di↵erent assumptions about the nature of the unit nonresponse. We present an approach that facilitates such sensitivity analyses, both for suspected nonignorable unit nonresponse in the initial wave and in the refreshment sample. We illustrate the approach using simulation studies and an analysis of data from the 2007-2008 Associated Press/Yahoo News election panel study. }, doi = {10.1093/jssam/smv007}, url = {http://jssam.oxfordjournals.org/content/3/3/265.abstract}, author = {Schifeling, T. and Cheng, C. and Hillygus, D. S. and Reiter, J. P.} } @article {1739, title = {Bayesian Analysis of Spatially-Dependent Functional Responses with Spatially-Dependent Multi-Dimensional Functional Predictors}, journal = {Statistica Sinica}, volume = {25}, year = {2015}, chapter = {205-223}, doi = {10.5705/ss.2013.245w }, url = {http://www3.stat.sinica.edu.tw/preprint/SS-13-245w_Preprint.pdf}, author = {Yang, W. H. and Wikle, C.K. and Holan, S.H. and Sudduth, K. and Meyers, D.B.} } @article {1741, title = {Bayesian Binomial Mixture Models for Estimating Abundance in Ecological Monitoring Studies}, journal = {Annals of Applied Statistics}, volume = {9}, year = {2015}, pages = {1-26}, doi = {10.1214/14-AOAS801}, url = {http://projecteuclid.org/euclid.aoas/1430226082}, author = {Wu, G. and Holan, S.H. and Nilon, C.H. and Wikle, C.K.} } @article {2126, title = {Bayesian Latent Pattern Mixture Models for Handling Attrition in Panel Studies With Refreshment Samples}, journal = {ArXiv}, year = {2015}, month = {09/2015}, abstract = {Many panel studies collect refreshment samples---new, randomly sampled respondents who complete the questionnaire at the same time as a subsequent wave of the panel. With appropriate modeling, these samples can be leveraged to correct inferences for biases caused by non-ignorable attrition. We present such a model when the panel includes many categorical survey variables. The model relies on a Bayesian latent pattern mixture model, in which an indicator for attrition and the survey variables are modeled jointly via a latent class model. We allow the multinomial probabilities within classes to depend on the attrition indicator, which offers additional flexibility over standard applications of latent class models. We present results of simulation studies that illustrate the benefits of this flexibility. We apply the model to correct attrition bias in an analysis of data from the 2007-2008 Associated Press/Yahoo News election panel study. }, keywords = {Categorical, Dirichlet pro- cess, Multiple imputation, Non-ignorable, Panel attrition, Refreshment sample}, url = {http://arxiv.org/abs/1509.02124}, author = {Yajuan Si and Jerome P. Reiter and D. Sunshine Hillygus} } @article {2015arXiv:1408.2757, title = {Bayesian Lattice Filters for Time-Varying Autoregression and Time-Frequency Analysis}, journal = {ArXiv}, number = {TEST 2}, year = {2015}, abstract = {Modeling nonstationary processes is of paramount importance to many scientific disciplines including environmental science, ecology, and finance, among others. Consequently, flexible methodology that provides accurate estimation across a wide range of processes is a subject of ongoing interest. We propose a novel approach to model-based time-frequency estimation using time-varying autoregressive models. In this context, we take a fully Bayesian approach and allow both the autoregressive coefficients and innovation variance to vary over time. Importantly, our estimation method uses the lattice filter and is cast within the partial autocorrelation domain. The marginal posterior distributions are of standard form and, as a convenient by-product of our estimation method, our approach avoids undesirable matrix inversions. As such, estimation is extremely computationally efficient and stable. To illustrate the effectiveness of our approach, we conduct a comprehensive simulation study that compares our method with other competing methods and find that, in most cases, our approach performs superior in terms of average squared error between the estimated and true time-varying spectral density. Lastly, we demonstrate our methodology through three modeling applications; namely, insect communication signals, environmental data (wind components), and macroeconomic data (US gross domestic product (GDP) and consumption).}, url = {http://arxiv.org/abs/1408.2757}, author = {Yang, W.~H. and Holan, S.~H. and Wikle, C.K.} } @article {2221, title = {Bayesian Lattice Filters for Time-Varying Autoregression and Time{\textendash}Frequency Analysis}, journal = {Project Euclid}, year = {2015}, month = {10/2015}, pages = {27}, abstract = {Modeling nonstationary processes is of paramount importance to many scientific disciplines including environmental science, ecology, and finance, among others. Consequently, flexible methodology that provides accurate estimation across a wide range of processes is a subject of ongoing interest. We propose a novel approach to model-based time{\textendash}frequency estimation using time-varying autoregressive models. In this context, we take a fully Bayesian approach and allow both the autoregressive coefficients and innovation variance to vary over time. Importantly, our estimation method uses the lattice filter and is cast within the partial autocorrelation domain. The marginal posterior distributions are of standard form and, as a convenient by-product of our estimation method, our approach avoids undesirable matrix inversions. As such, estimation is extremely computationally efficient and stable. To illustrate the effectiveness of our approach, we conduct a comprehensive simulation study that compares our method with other competing methods and find that, in most cases, our approach performs superior in terms of average squared error between the estimated and true time-varying spectral density. Lastly, we demonstrate our methodology through three modeling applications; namely, insect communication signals, environmental data (wind components), and macroeconomic data (US gross domestic product (GDP) and consumption).}, keywords = {locally stationary, model selection, nonstationary partial autocorrelation, piecewise stationary, sequential estimation, time-varying spectral density}, doi = {10.1214/15-BA978}, url = {http://projecteuclid.org/euclid.ba/1445263834}, author = {Yang, W.~H. and Holan, Scott H. and Wikle, Christopher K.} } @article {2039, title = {Bayesian Marked Point Process Modeling for Generating Fully Synthetic Public Use Data with Point-Referenced Geography}, journal = {Spatial Statistics}, volume = {14}, year = {2015}, month = {08/2015}, pages = {439--451}, doi = {10.1016/j.spasta.2015.07.008}, url = {http://www.sciencedirect.com/science/article/pii/S2211675315000718}, author = {Quick, Harrison and Holan, Scott H. and Wikle, Christopher K. and Reiter, Jerome P.} } @article {2015arXiv:1407.7795, title = {Bayesian Marked Point Process Modeling for Generating Fully Synthetic Public Use Data with Point-Referenced Geography}, journal = {ArXiv}, year = {2015}, abstract = {Many data stewards collect confidential data that include fine geography. When sharing these data with others, data stewards strive to disseminate data that are informative for a wide range of spatial and non-spatial analyses while simultaneously protecting the confidentiality of data subjects{\textquoteright} identities and attributes. Typically, data stewards meet this challenge by coarsening the resolution of the released geography and, as needed, perturbing the confidential attributes. When done with high intensity, these redaction strategies can result in released data with poor analytic quality. We propose an alternative dissemination approach based on fully synthetic data. We generate data using marked point process models that can maintain both the statistical properties and the spatial dependence structure of the confidential data. We illustrate the approach using data consisting of mortality records from Durham, North Carolina.}, url = {http://arxiv.org/abs/1407.7795}, author = {Quick, H. and Holan, S.~H. and Wikle, C.~K. and Reiter, J.~P.} } @article {2088, title = {Bayesian Semiparametric Hierarchical Empirical Likelihood Spatial Models}, journal = {Journal of Statistical Planning and Inference}, volume = {165}, year = {2015}, month = {10/2015}, pages = {78-90}, issn = {0378-3758}, doi = {10.1016/j.jspi.2015.04.002}, author = {Porter, A.T. and Holan, S.H. and Wikle, C.K.} } @article {2204, title = {Bayesian Spatial Change of Support for Count-Valued Survey Data with Application to the American Community Survey}, journal = {Journal of the American Statistical Association}, year = {2015}, month = {12/2015}, abstract = {We introduce Bayesian spatial change of support methodology for count-valued survey data with known survey variances. Our proposed methodology is motivated by the American Community Survey (ACS), an ongoing survey administered by the U.S. Census Bureau that provides timely information on several key demographic variables. Specifically, the ACS produces 1-year, 3-year, and 5-year {\textquotedblleft}period-estimates,{\textquotedblright} and corresponding margins of errors, for published demographic and socio-economic variables recorded over predefined geographies within the United States. Despite the availability of these predefined geographies it is often of interest to data-users to specify customized user-defined spatial supports. In particular, it is useful to estimate demographic variables defined on {\textquotedblleft}new{\textquotedblright} spatial supports in {\textquotedblleft}real-time.{\textquotedblright} This problem is known as spatial change of support (COS), which is typically performed under the assumption that the data follows a Gaussian distribution. However, count-valued survey data is naturally non-Gaussian and, hence, we consider modeling these data using a Poisson distribution. Additionally, survey-data are often accompanied by estimates of error, which we incorporate into our analysis. We interpret Poisson count-valued data in small areas as an aggregation of events from a spatial point process. This approach provides us with the flexibility necessary to allow ACS users to consider a variety of spatial supports in {\textquotedblleft}real-time.{\textquotedblright} We show the effectiveness of our approach through a simulated example as well as through an analysis using public-use ACS data.}, doi = {10.1080/01621459.2015.1117471}, url = {http://www.tandfonline.com/doi/abs/10.1080/01621459.2015.1117471}, author = {Bradley, Jonathan and Wikle, C.K. and Holan, S.~H.} } @article {2219, title = {Bayesian Spatial Change of Support for Count-Valued Survey Data with Application to the American Community Survey}, journal = {Journal of the American Statistical Association}, year = {2015}, month = {12/2015}, abstract = {We introduce Bayesian spatial change of support methodology for count-valued survey data with known survey variances. Our proposed methodology is motivated by the American Community Survey (ACS), an ongoing survey administered by the U.S. Census Bureau that provides timely information on several key demographic variables. Specifically, the ACS produces 1-year, 3-year, and 5-year {\textquotedblleft}period-estimates,{\textquotedblright} and corresponding margins of errors, for published demographic and socio-economic variables recorded over predefined geographies within the United States. Despite the availability of these predefined geographies it is often of interest to data-users to specify customized user-defined spatial supports. In particular, it is useful to estimate demographic variables defined on {\textquotedblleft}new{\textquotedblright} spatial supports in {\textquotedblleft}real-time.{\textquotedblright} This problem is known as spatial change of support (COS), which is typically performed under the assumption that the data follows a Gaussian distribution. However, count-valued survey data is naturally non-Gaussian and, hence, we consider modeling these data using a Poisson distribution. Additionally, survey-data are often accompanied by estimates of error, which we incorporate into our analysis. We interpret Poisson count-valued data in small areas as an aggregation of events from a spatial point process. This approach provides us with the flexibility necessary to allow ACS users to consider a variety of spatial supports in {\textquotedblleft}real-time.{\textquotedblright} We show the effectiveness of our approach through a simulated example as well as through an analysis using public-use ACS data.}, keywords = {Aggregation, American Community Survey, Bayesian hierarchical model, Givens angle prior, Markov chain Monte Carlo, Multiscale model, Non-Gaussian.}, doi = {10.1080/01621459.2015.1117471}, url = {http://www.tandfonline.com/doi/abs/10.1080/01621459.2015.1117471}, author = {Bradley, Jonathan R. and Wikle, Christopher K. and Holan, Scott H.} } @article {2015arXiv:1405.7227, title = {Bayesian Spatial Change of Support for Count{\textendash}Valued Survey Data}, journal = {ArXiv}, year = {2015}, abstract = {We introduce Bayesian spatial change of support methodology for count-valued survey data with known survey variances. Our proposed methodology is motivated by the American Community Survey (ACS), an ongoing survey administered by the U.S. Census Bureau that provides timely information on several key demographic variables. Specifically, the ACS produces 1-year, 3-year, and 5-year "period-estimates," and corresponding margins of errors, for published demographic and socio-economic variables recorded over predefined geographies within the United States. Despite the availability of these predefined geographies it is often of interest to data users to specify customized user-defined spatial supports. In particular, it is useful to estimate demographic variables defined on "new" spatial supports in "real-time." This problem is known as spatial change of support (COS), which is typically performed under the assumption that the data follows a Gaussian distribution. However, count-valued survey data is naturally non-Gaussian and, hence, we consider modeling these data using a Poisson distribution. Additionally, survey-data are often accompanied by estimates of error, which we incorporate into our analysis. We interpret Poisson count-valued data in small areas as an aggregation of events from a spatial point process. This approach provides us with the flexibility necessary to allow ACS users to consider a variety of spatial supports in "real-time." We demonstrate the effectiveness of our approach through a simulated example as well as through an analysis using public-use ACS data.}, url = {http://arxiv.org/abs/1405.7227}, author = {Bradley, J.~R. and Wikle, C.K. and Holan, S.~H.} } @article {1883, title = {Comment on {\textquoteleft}{\textquoteleft}Semiparametric Bayesian Density Estimation with Disparate Data Sources: A Meta-Analysis of Global Childhood Undernutrition" by Finncane, M. M., Paciorek, C. J., Stevens, G. A., and Ezzati, M.}, journal = {Journal of the American Statistical Association}, year = {2015}, author = {Wikle, C.K. and Holan, S.H.} } @article {2040, title = {Dirichlet Process Mixture Models for Nested Categorical Data}, journal = {ArXiv}, year = {2015}, abstract = {We present a Bayesian model for estimating the joint distribution of multivariate categorical data when units are nested within groups. Such data arise frequently in social science settings, for example, people living in households. The model assumes that (i) each group is a member of a group-level latent class, and (ii) each unit is a member of a unit-level latent class nested within its group-level latent class. This structure allows the model to capture dependence among units in the same group. It also facilitates simultaneous modeling of variables at both group and unit levels. We develop a version of the model that assigns zero probability to groups and units with physically impossible combinations of variables. We apply the model to estimate multivariate relationships in a subset of the American Community Survey. Using the estimated model, we generate synthetic household data that could be disseminated as redacted public use files with high analytic validity and low disclosure risks. Supplementary materials for this article are available online.}, url = {http://arxiv.org/pdf/1412.2282v3.pdf}, author = {Hu, J. and Reiter, J.P. and Wang, Q.} } @mastersthesis {2032, title = {Dirichlet Process Mixture Models for Nested Categorical Data (Ph.D. Thesis)}, year = {2015}, school = {Duke University}, type = {Ph.D.}, url = {http://dukespace.lib.duke.edu/dspace/handle/10161/9933}, author = {Hu, J.} } @mastersthesis {2270, title = {Four Essays in Unemployment, Wage Dynamics and Subjective Expectations}, year = {2015}, school = {University of Michigan}, type = {Ph.D.}, address = {Ann Arbor, MI}, abstract = {This dissertation contains four essays on unemployment differences between skill groups, on the effect of non-employment on wages and measurement error, and on subjective expectations of Americans about mortality and the stock market. Chapter 1 tests how much of the unemployment rate differences between education groups can be explained by occupational differences in labor adjustment costs. The educational gap in unemployment is substantial. Recent empirical studies found that the largest component of labor adjustment costs are adaptation costs: newly hired workers need a few month get up to speed and reach full productivity. The chapter evaluates the effect of adaptation costs on unemployment using a calibrated search and matching model. Chapter 2 tests how short periods of non-employment affect survey reports of annual earnings. Non-employment has strong and non-standard effects on response error in earnings. Persons tend to report the permanent component of their earnings accurately, but transitory shocks are underreported. Transitory shocks due to career interruptions are very large, taking up several month of lost earnings, on average, and people only report 60-85\% percent of these earnings losses. The resulting measurement error is non-standard: it has a positive mean, it is right-skewed, and the bias correlates with predictors of turnover. Chapter 3 proposes and tests a model, the modal response hypothesis, to explain patterns in mortality expectations of Americans. The model is a mathematical expression of the idea that survey responses of 0\%, 50\%, or 100\% to probability questions indicate a high level of uncertainty about the relevant probability. The chapter shows that subjective survival expectations in 2002 line up very well with realized mortality of the HRS respondents between 2002 and 2010 and our model performs better than typically used models in the literature of subjective probabilities. Chapter 4 analyzes the impact of the stock market crash of 2008 on households{\textquoteright} expectations about the returns on the stock market index: the population average of expectations, the average uncertainty, and the cross-sectional heterogeneity in expectations from March 2008 to February 2009.}, keywords = {measurement error, subjective expectations, unemployment}, url = {http://hdl.handle.net/2027.42/113598}, author = {Hudomiet, Peter} } @techreport {2417, title = {he role of occupation specific adaptation costs in explaining the educational gap in unemployment.}, year = {2015}, type = {Mimeo}, url = {https://sites.google.com/site/phudomiet/Hudomiet-JobMarketPaper.pdf?attredirects=0}, author = {Hudomiet, Peter} } @inbook {2092, title = {Hierarchcial models for uncertainty quantification: An overview}, booktitle = {Handbook of Uncertainty Quantification}, year = {2015}, publisher = {Springer}, organization = {Springer}, issn = {978-3-319-12384-4}, author = {Wikle, C.K.}, editor = {Ghanem, R. and Higdon, D. and Owhadi, H.} } @inbook {WikleHooten2015, title = {Hierarchical Agent-Based Spatio-Temporal Dynamic Models for Discrete Valued Data}, booktitle = {Handbook of Discrete-Valued Time Series}, year = {2015}, publisher = {Chapman and Hall/CRC Press}, organization = {Chapman and Hall/CRC Press}, chapter = {Hierarchical Agent-Based Spatio-Temporal Dynamic Models for Discrete Valued Data}, address = {Boca Raton, FL.}, issn = {9781466577732}, url = {http://www.crcpress.com/product/isbn/9781466577732}, author = {Wikle, C.K. and Hooten, M.B.}, editor = {Davis, R. and Holan, S. and Lund, R. and Ravishanker, N.} } @inbook {HolanWikle, title = {Hierarchical Dynamic Generalized Linear Mixed Models for Discrete-Valued Spatio-Temporal Data}, booktitle = {Handbook of Discrete-Valued Time Series}, year = {2015}, note = {to appear in "Handbook of Discrete-Valued Time Series}, publisher = {Chapman and Hall/CRC Press}, organization = {Chapman and Hall/CRC Press}, address = {Boca Raton, FL}, isbn = {ISBN 9781466577732}, url = {http://www.crcpress.com/product/isbn/9781466577732}, author = {Holan, S.H. and Wikle, C.K.}, editor = {Davis, R. and Holan, S. and Lund, R. and Ravishanker, N} } @inbook {1879, title = {Hierarchical Dynamic Generalized Linear Mixed Models for Discrete--Valued Spatio-Temporal Data}, booktitle = {Handbook of Discrete--Valued Time Series}, year = {2015}, author = {Holan, S.H. and Wikle, C.K.} } @inbook {2093, title = {Hierarchical Spatial Models}, booktitle = {Encyclopedia of Geographical Information Science}, year = {2015}, publisher = {Springer}, organization = {Springer}, author = {Arab, A. and Hooten, M.B. and Wikle, C.K.} } @inbook {Lund, title = {Long Memory Discrete--Valued Time Series}, booktitle = {Handbook of Discrete-Valued Time Series}, year = {2015}, publisher = {Chapman and Hall}, organization = {Chapman and Hall}, chapter = {Long Memoriy Discrete-Valued Time Series}, url = {http://www.crcpress.com/product/isbn/9781466577732}, author = {Lund, R. and Holan, S.H. and Livsey, J.} } @article {1882, title = {Multiscale Analysis of Survey Data: Recent Developments and Exciting Prospects}, journal = {Statistics Views}, year = {2015}, author = {Bradley, J.R. and Wikle, C.K. and Holan, S.H.} } @article {2089, title = {Multivariate Spatial Hierarchical Bayesian Empirical Likelihood Methods for Small Area Estimation}, journal = {STAT}, volume = {4}, year = {2015}, month = {05/2015}, pages = {108-116}, issn = {2049-1573}, doi = {10.1002/sta4.81}, url = {http://dx.doi.org/10.1002/sta4.81}, author = {Porter, A.T. and Holan, S.H. and Wikle, C.K.} } @article {2015arXiv:1503.00982, title = {Multivariate Spatio-Temporal Models for High-Dimensional Areal Data with Application to Longitudinal Employer-Household Dynamics}, journal = {ArXiv}, year = {2015}, abstract = {Many data sources report related variables of interest that are also referenced over geographic regions and time; however, there are relatively few general statistical methods that one can readily use that incorporate these multivariate spatio-temporal dependencies. Additionally, many multivariate spatio-temporal areal datasets are extremely high-dimensional, which leads to practical issues when formulating statistical models. For example, we analyze Quarterly Workforce Indicators (QWI) published by the US Census Bureau{\textquoteright}s Longitudinal Employer-Household Dynamics (LEHD) program. QWIs are available by different variables, regions, and time points, resulting in millions of tabulations. Despite their already expansive coverage, by adopting a fully Bayesian framework, the scope of the QWIs can be extended to provide estimates of missing values along with associated measures of uncertainty. Motivated by the LEHD, and other applications in federal statistics, we introduce the multivariate spatio-temporal mixed effects model (MSTM), which can be used to efficiently model high-dimensional multivariate spatio-temporal areal datasets. The proposed MSTM extends the notion of Moran{\textquoteright}s I basis functions to the multivariate spatio-temporal setting. This extension leads to several methodological contributions including extremely effective dimension reduction, a dynamic linear model for multivariate spatio-temporal areal processes, and the reduction of a high-dimensional parameter space using {a novel} parameter model.}, url = {http://arxiv.org/abs/1503.00982}, author = {Bradley, J.~R. and Holan, S.~H. and Wikle, C.K.} } @article {2169, title = {Multivariate Spatio-Temporal Models for High-Dimensional Areal Data with Application to Longitudinal Employer-Household Dynamics}, journal = {Annals of Applied Statistics}, volume = {9}, year = {2015}, month = {03/2015}, abstract = {Many data sources report related variables of interest that are also referenced over geographic regions and time; however, there are relatively few general statistical methods that one can readily use that incorporate these multivariate spatio-temporal dependencies. Additionally, many multivariate spatio-temporal areal datasets are extremely high-dimensional, which leads to practical issues when formulating statistical models. For example, we analyze Quarterly Workforce Indicators (QWI) published by the US Census Bureau{\textquoteright}s Longitudinal Employer-Household Dynamics (LEHD) program. QWIs are available by different variables, regions, and time points, resulting in millions of tabulations. Despite their already expansive coverage, by adopting a fully Bayesian framework, the scope of the QWIs can be extended to provide estimates of missing values along with associated measures of uncertainty. Motivated by the LEHD, and other applications in federal statistics, we introduce the multivariate spatio-temporal mixed effects model (MSTM), which can be used to efficiently model high-dimensional multivariate spatio-temporal areal datasets. The proposed MSTM extends the notion of Moran{\textquoteright}s I basis functions to the multivariate spatio-temporal setting. This extension leads to several methodological contributions including extremely effective dimension reduction, a dynamic linear model for multivariate spatio-temporal areal processes, and the reduction of a high-dimensional parameter space using a novel parameter model.}, doi = {0.1214/15-AOAS862}, author = {Bradley, J.R. and Holan, S.H. and Wikle, C.K.} } @techreport {handle:1813:40176, title = {NCRN Meeting Spring 2015: Models for Multiscale Spatially-Referenced Count Data}, number = {1813:40176}, year = {2015}, institution = {NCRN Coordinating Office}, type = {Preprint}, abstract = {NCRN Meeting Spring 2015: Models for Multiscale Spatially-Referenced Count Data Holan, Scott; Bradley, Jonathan R.; Wikle, Christopher K. Presentation at the NCRN Meeting Spring 2015}, url = {http://hdl.handle.net/1813/40176}, author = {Holan, Scott and Bradley, Jonathan R. and Wikle, Christopher K.} } @techreport {handle:1813:40177, title = {NCRN Meeting Spring 2015: Regionalization of Multiscale Spatial Processes Using a Criterion for Spatial Aggregation Error}, number = {1813:40177}, year = {2015}, institution = {NCRN Coordinating Office}, type = {Preprint}, abstract = {NCRN Meeting Spring 2015: Regionalization of Multiscale Spatial Processes Using a Criterion for Spatial Aggregation Error Wikle, Christopher K.; Bradley, Jonathan; Holan, Scott Develop and implement a statistical criterion to diagnose spatial aggregation error that can facilitate the choice of regionalizations of spatial data. Presentation at NCRN Meeting Spring 2015}, url = {http://hdl.handle.net/1813/40177}, author = {Wikle, Christopher K. and Bradley, Jonathan and Holan, Scott} } @techreport {handle:1813:40179, title = {NCRN Meeting Spring 2015: Training Undergraduates, Graduate Students, Postdocs, and Federal Agencies: Methodology, Data, and Science for Federal Statistics}, number = {1813:40179}, year = {2015}, institution = {NCRN Coordinating Office}, type = {Preprint}, abstract = {NCRN Meeting Spring 2015: Training Undergraduates, Graduate Students, Postdocs, and Federal Agencies: Methodology, Data, and Science for Federal Statistics Cressie, Noel; Holan, Scott H.; Wikle, Christopher K. Presentation at the NCRN Spring 2015 Meeting}, url = {http://hdl.handle.net/1813/40179}, author = {Cressie, Noel and Holan, Scott H. and Wikle, Christopher K.} } @article {http://arxiv.org/abs/1508.03758, title = {Nonparametric Bayesian models with focused clustering for mixed ordinal and nominal data}, journal = {ArXiV}, year = {2015}, publisher = {arXiv}, abstract = {Dirichlet process mixtures can be useful models of multivariate categorical data and effective tools for multiple imputation of missing categorical values. In some contexts, however, these models can fit certain variables well at the expense of others in ways beyond the analyst{\textquoteright}s control. For example, when the data include some variables with non-trivial amounts of missing values, the mixture model may fit the marginal distributions of the nearly and fully complete variables at the expense of the variables with high fractions of missing data. Motivated by this setting, we present a Dirichlet process mixture model for mixed ordinal and nominal data that allows analysts to split variables into two groups: focus variables and remainder variables. The model uses three sets of clusters, one set for ordinal focus variables, one for nominal focus variables, and one for all remainder variables. The model uses a multivariate ordered probit specification for the ordinal variables and independent multinomial kernels for the nominal variables. The three sets of clusters are linked using an infinite tensor factorization prior, as well as via dependence of the means of the latent continuous focus variables on the remainder variables. This effectively specifies a rich, complex model for the focus variables and a simpler model for remainder variables, yet still potentially captures associations among the variables. In the multiple imputation context, focus variables include key variables with high rates of missing values, and remainder variables include variables without much missing data. Using simulations, we illustrate advantages and limitations of using focused clustering compared to mixture models that do not distinguish variables. We apply the model to handle missing values in an analysis of the 2012 American National Election Study.}, url = {http://arxiv.org/abs/1508.03758}, author = {DeYoreo, Maria and Reiter , J. P. and Hillygus, D. S.} } @article {deyoreo:reiter:hillygus, title = {Nonparametric Bayesian models with focused clustering for mixed ordinal and nominal data}, journal = {Bayesian Analysis}, year = {2015}, month = {08/2015}, abstract = {Dirichlet process mixtures can be useful models of multivariate categorical data and effective tools for multiple imputation of missing categorical values. In some contexts, however, these models can fit certain variables well at the expense of others in ways beyond the analyst{\textquoteright}s control. For example, when the data include some variables with non-trivial amounts of missing values, the mixture model may fit the marginal distributions of the nearly and fully complete variables at the expense of the variables with high fractions of missing data. Motivated by this setting, we present a Dirichlet process mixture model for mixed ordinal and nominal data that allows analysts to split variables into two groups: focus variables and remainder variables. The model uses three sets of clusters, one set for ordinal focus variables, one for nominal focus variables, and one for all remainder variables. The model uses a multivariate ordered probit specification for the ordinal variables and independent multinomial kernels for the nominal variables. The three sets of clusters are linked using an infinite tensor factorization prior, as well as via dependence of the means of the latent continuous focus variables on the remainder variables. This effectively specifies a rich, complex model for the focus variables and a simpler model for remainder variables, yet still potentially captures associations among the variables. In the multiple imputation context, focus variables include key variables with high rates of missing values, and remainder variables include variables without much missing data. Using simulations, we illustrate advantages and limitations of using focused clustering compared to mixture models that do not distinguish variables. We apply the model to handle missing values in an analysis of the 2012 American National Election Study.}, doi = {10.1214/16-BA1020}, author = {M. De Yoreo and J. P. Reiter and D. S. Hillygus} } @article {1737, title = {A nonparametric, multiple imputation-based method for the retrospective integration of data sets}, journal = {Multivariate Behavioral Research}, volume = {50}, year = {2015}, pages = {383-397}, chapter = {383}, doi = {10.1080/00273171.2015.1022641}, url = {http://www.tandfonline.com/doi/full/10.1080/00273171.2015.1022641}, author = {M.M. Carrig and D. Manrique-Vallier and K. Ranby and J.P. Reiter and R. Hoyle} } @article {2015arXiv:1502.01974, title = {Regionalization of Multiscale Spatial Processes using a Criterion for Spatial Aggregation Error}, journal = {ArXiv}, year = {2015}, abstract = {The modifiable areal unit problem and the ecological fallacy are known problems that occur when modeling multiscale spatial processes. We investigate how these forms of spatial aggregation error can guide a regionalization over a spatial domain of interest. By "regionalization" we mean a specification of geographies that define the spatial support for areal data. This topic has been studied vigorously by geographers, but has been given less attention by spatial statisticians. Thus, we propose a criterion for spatial aggregation error (CAGE), which we minimize to obtain an optimal regionalization. To define CAGE we draw a connection between spatial aggregation error and a new multiscale representation of the Karhunen-Loeve (K-L) expansion. This relationship between CAGE and the multiscale K-L expansion leads to illuminating theoretical developments including: connections between spatial aggregation error, squared prediction error, spatial variance, and a novel extension of Obled-Creutin eigenfunctions. The effectiveness of our approach is demonstrated through an analysis of two datasets, one using the American Community Survey and one related to environmental ocean winds.}, url = {http://arxiv.org/abs/1502.01974}, author = {Bradley, J.~R. and Wikle, C.K. and Holan, S.~H.} } @article {1575, title = {Semi-parametric selection models for potentially non-ignorable attrition in panel studies with refreshment samples}, journal = {Political Analysis}, volume = {23}, year = {2015}, pages = {92-112}, chapter = {92}, url = {http://pan.oxfordjournals.org/cgi/reprint/mpu009?\%20ijkey=joX8eSl6gyIlQKP\&keytype=ref}, author = {Y. Si and J.P. Reiter and D.S. Hillygus} } @article {1742, title = {Small Area Estimation via Multivariate Fay-Herriot Models With Latent Spatial Dependence}, journal = {Australian \& New Zealand Journal of Statistics}, volume = {57}, year = {2015}, pages = {15-29}, url = {http://arxiv.org/abs/1310.7211}, author = {Porter, A.T. and Wikle, C.K. and Holan, S.H.} } @article {STA4:STA494, title = {Spatio-temporal change of support with application to American Community Survey multi-year period estimates}, journal = {Stat}, volume = {4}, year = {2015}, month = {10/2015}, pages = {255{\textendash}270}, abstract = {We present hierarchical Bayesian methodology to perform spatio-temporal change of support (COS) for survey data with Gaussian sampling errors. This methodology is motivated by the American Community Survey (ACS), which is an ongoing survey administered by the US Census Bureau that provides timely information on several key demographic variables. The ACS has published 1-year, 3-year, and 5-year period estimates, and margins of errors, for demographic and socio-economic variables recorded over predefined geographies. The spatio-temporal COS methodology considered here provides data users with a way to estimate ACS variables on customized geographies and time periods while accounting for sampling errors. Additionally, 3-year ACS period estimates are to be discontinued, and this methodology can provide predictions of ACS variables for 3-year periods given the available period estimates. The methodology is based on a spatio-temporal mixed-effects model with a low-dimensional spatio-temporal basis function representation, which provides multi-resolution estimates through basis function aggregation in space and time. This methodology includes a novel parameterization that uses a target dynamical process and recently proposed parsimonious Moran{\textquoteright}s I propagator structures. Our approach is demonstrated through two applications using public-use ACS estimates and is shown to produce good predictions on a hold-out set of 3-year period estimates. Copyright {\textcopyright} 2015 John Wiley \& Sons, Ltd.}, keywords = {Bayesian, change-of-support, dynamical, hierarchical models, mixed-effects model, Moran{\textquoteright}s I, multi-year period estimate}, issn = {2049-1573}, doi = {10.1002/sta4.94}, url = {http://dx.doi.org/10.1002/sta4.94}, author = {Bradley, Jonathan R. and Wikle, Christopher K. and Holan, Scott H.} } @booklet {Holan2014b, title = {An Approach for Identifying and Predicting Economic Recessions in Real-Time Using Time-Frequency Functional Models, Seminar on Bayesian Inference in Econometrics and Statistics (SBIES)}, year = {2014}, month = {May}, author = {Holan, S.H.} } @conference {Holan2014, title = {An Approach for Identifying and Predicting Economic Recessions in Real-Time Using Time-Frequency Functional Models}, booktitle = {Joint Statistical Meetings 2014}, year = {2014}, month = {August}, publisher = {Joint Statistical Meetings}, organization = {Joint Statistical Meetings}, address = {Boston, MA}, doi = {10.1002/asmb.1954}, url = {http://www.amstat.org/meetings/jsm/2014/onlineprogram/AbstractDetails.cfm?abstractid=310841}, author = {Holan, S.H.} } @article {McElroy2014, title = {Asymptotic Theory of Cepstral Random Fields}, journal = {Annals of Statistics}, volume = {42}, year = {2014}, pages = {64-86}, publisher = {University of Missouri}, doi = {10.1214/13-AOS1180}, url = {http://arxiv.org/pdf/1112.1977v4.pdf}, author = {McElroy, T. and Holan, S.} } @booklet {Holan2014d, title = {A Bayesian Approach to Estimating Agricultural Yield Based on Multiple Repeated Surveys}, year = {2014}, month = {March}, author = {Holan, S.H.} } @conference {Holan2014a, title = {Bayesian Dynamic Time-Frequency Estimation}, booktitle = {Twelfth World Meeting of ISBA}, year = {2014}, month = {July}, publisher = {ISBA}, organization = {ISBA}, address = {Cancun, Mexico}, author = {Holan, S.H.} } @techreport {HolanMcElroyWu2014, title = {The Cepstral Model for Multivariate Time Series: The Vector Exponential Model.}, number = {1406.0801}, year = {2014}, institution = {arXiv}, type = {preprint}, abstract = {

Vector autoregressive (VAR) models have become a staple in the analysis of multivariate time series and are formulated in the time domain as difference equations, with an implied covariance structure. In many contexts, it is desirable to work with a stable, or at least stationary, representation. To fit such models, one must impose restrictions on the coefficient matrices to ensure that certain determinants are nonzero; which, except in special cases, may prove burdensome. To circumvent these difficulties, we propose a flexible frequency domain model expressed in terms of the spectral density matrix. Specifically, this paper treats the modeling of covariance stationary vector-valued (i.e., multivariate) time series via an extension of the exponential model for the spectrum of a scalar time series. We discuss the modeling advantages of the vector exponential model and its computational facets, such as how to obtain Wold coefficients from given cepstral coefficients. Finally, we demonstrate the utility of our approach through simulation as well as two illustrative data examples focusing on multi-step ahead forecasting and estimation of squared coherence.

}, url = {http://arxiv.org/abs/1406.0801}, author = {Holan, S.H. and McElroy, T.S. and Wu, G.} } @article {spielman2014coevolution, title = {The Co-Evolution of Residential Segregation and the Built Environment at the Turn of the 20th Century: a Schelling Model}, journal = {Transactions in GIS}, volume = {18}, number = {1}, year = {2014}, pages = {25-45}, doi = {DOI: 10.1111/tgis.12014}, url = {http://onlinelibrary.wiley.com/enhanced/doi/10.1111/tgis.12014/}, author = {Spielman, S. E. and Harrison, P.} } @inbook {1576, title = {Disclosure risk evaluation for fully synthetic data}, booktitle = {Privacy in Statistical Databases}, volume = {8744}, year = {2014}, pages = {185-199}, publisher = {Springer}, organization = {Springer}, address = {Heidelberg}, author = {J. Hu and J.P. Reiter and Q. Wang} } @conference {HolanMcElroy2014, title = {Fast Estimation of Time Series with Multiple Long-Range Persistencies}, booktitle = {ASA Proceedings of the Joint Statistical Meetings}, year = {2014}, publisher = {American Statistical Association}, organization = {American Statistical Association}, address = {Alexandria, VA}, author = {McElroy, T.S. and Holan, S.H.} } @techreport {2054, title = {Flexible prior specification for partially identified nonlinear regression with binary responses}, number = {1407.8430}, year = {2014}, institution = {arXiv}, abstract = {This paper adapts tree-based Bayesian regression models for estimating a partially identified probability function. In doing so, ideas from the recent literature on Bayesian partial identification are applied within a sophisticated applied regression context. Our approach permits efficient sensitivity analysis concerning the posterior impact of priors over the partially identified component of the regression model. The new methodology is illustrated on an important problem where we only have partially observed data -- inferring the prevalence of accounting misconduct among publicly traded U.S. businesses.}, url = {https://arxiv.org/abs/1407.8430v1}, author = {P. R. Hahn and J. S. Murray and I. Manolopoulou} } @techreport {handle:1813:37750, title = {NCRN Meeting Fall 2014: Bayesian Marked Point Process Modeling for Generating Fully Synthetic Public Use Data with Point-Referenced Geography}, number = {1813:37750}, year = {2014}, institution = {NCRN Coordinating Office}, type = {Preprint}, abstract = {NCRN Meeting Fall 2014: Bayesian Marked Point Process Modeling for Generating Fully Synthetic Public Use Data with Point-Referenced Geography Quick, Harrison; Holan, Scott; Wikle, Christopher; Reiter, Jerry Presentation from NCRN Fall 2014 meeting}, url = {http://hdl.handle.net/1813/37750}, author = {Quick, Harrison and Holan, Scott and Wikle, Christopher and Reiter, Jerry} } @techreport {handle:1813:37749, title = {NCRN Meeting Fall 2014: Mixed Effects Modeling for Multivariate-Spatio-Temporal Areal Data}, number = {1813:37749}, year = {2014}, institution = {NCRN Coordinating Office}, type = {Preprint}, abstract = {NCRN Meeting Fall 2014: Mixed Effects Modeling for Multivariate-Spatio-Temporal Areal Data Bradley, Jonathan; Holan, Scott; Wikle, Christopher Presentation from NCRN Fall 2014 meeting}, url = {http://hdl.handle.net/1813/37749}, author = {Bradley, Jonathan and Holan, Scott and Wikle, Christopher} } @techreport {handle:1813:36394, title = {NCRN Meeting Spring 2014: Metadata Standards \& Technology Development for the NSF Survey of Earned Doctorates}, number = {1813:36394}, year = {2014}, institution = {NCRN Coordinating Office}, type = {Preprint}, abstract = {NCRN Meeting Spring 2014: Metadata Standards \& Technology Development for the NSF Survey of Earned Doctorates Noonan, Kimberly; Heus, Pascal; Mulcahy, Tim Presentation from NCRN Spring 2014 meeting}, url = {http://hdl.handle.net/1813/36394}, author = {Noonan, Kimberly and Heus, Pascal and Mulcahy, Tim} } @article {1799, title = {NewsViews: An Automated Pipeline for Creating Custom Geovisualizations for News}, year = {2014}, abstract = {Interactive visualizations add rich, data-based context to online news articles. Geographic maps are currently the most prevalent form of these visualizations. Unfortunately, designers capable of producing high-quality, customized geovisualizations are scarce. We present NewsViews, a novel automated news visualization system that generates interactive, annotated maps without requiring professional designers. NewsViews{\textquoteright} maps support trend identification and data comparisons relevant to a given news article. The NewsViews system leverages text mining to identify key concepts and locations discussed in articles (as well as po-tential annotations), an extensive repository of {\textquotedblleft}found{\textquotedblright} databases, and techniques adapted from cartography to identify and create visually {\textquotedblleft}interesting{\textquotedblright} thematic maps. In this work, we develop and evaluate key criteria in automatic, annotated, map generation and experimentally validate the key features for successful representations (e.g., relevance to context, variable selection, "interestingness" of representation and annotation quality). }, doi = {10.1145/2556288.2557228}, url = {http://cond.org/newsviews.html }, author = {Gao, T. and Hullman, J. and Adar, E. and Hect, B. and Diakopoulos, N.} } @inbook {2411, title = {The Rise of Incarceration Among the Poor with Mental Illnesses: How Neoliberal Policies Contribute}, booktitle = {The Routledge Handbook of Poverty in the United States}, year = {2014}, publisher = {Routledge}, organization = {Routledge}, author = {Camp, J. and Haymes, S. and Haymes, M. V. d. and Miller, R.J.} } @conference {ste:hal:fie:2014, title = {SMERED: A Bayesian Approach to Graphical Record Linkage and De-duplication}, booktitle = {AISTATS 2014 Proceedings, JMLR}, volume = {33}, year = {2014}, pages = {922{\textendash}930}, publisher = {W\& CP}, organization = {W\& CP}, author = {Steorts, R. and Hall, R. and Fienberg, S. E.} } @booklet {Holan2014e, title = {Spatial Fay-Herriot Models for Small Area Estimation With Functional Covariates}, year = {2014}, month = {January}, author = {Holan, S.H.} } @article {Porter2014a, title = {Spatial Fay-Herriot Models for Small Area Estimation with Functional Covariates}, journal = {Spatial Statistics}, volume = {10}, year = {2014}, pages = {27-42}, url = {http://arxiv.org/pdf/1303.6668v3.pdf}, author = {Porter, A. T., and Holan, S.H., and Wikle, C.K., and Cressie, N.} } @article {1797, title = {Toward healthy balance sheets: Savings accounts as a gateway for young adults{\textquoteright} asset diversification and accumulation}, journal = {The St. Louis Federal Reserve Bulletin}, year = {2014}, url = {http://research.stlouisfed.org/publications/review/2014/q4/friedline.pdf}, author = {Friedline, T. and Johnson, P. and Hughes, R.} } @techreport {2418, title = {Twitter, Big Data, and Jobs Numbers}, year = {2014}, type = {online}, url = {http://www.lsa.umich.edu/lsa/ci.twitterbigdataandjobsnumbers_ci.detail}, author = {Hudomiet, Peter} } @article {Hu13, title = {Are independent parameter draws necessary for multiple imputation?}, journal = {The American Statistician}, volume = {67}, year = {2013}, pages = {143-149}, doi = {10.1080/00031305.2013.821953}, url = {http://www.tandfonline.com/doi/full/10.1080/00031305.2013.821953}, author = {Hu, J. and Mitra, R. and Reiter, J.P.} } @booklet {Holan2013f, title = {A Bayesian Approach to Estimating Agricultural Yield Based on Multiple Repeated Surveys, Institute of Public Policy and the Truman School of Public Affairs}, year = {2013}, month = {March}, author = {Holan, S.H.} } @techreport {2653, title = {A Bayesian Approach to Graphical Record Linkage and De-duplication}, number = {1312.4645}, year = {2013}, abstract = {We propose an unsupervised approach for linking records across arbitrarily many files, while simultaneously detecting duplicate records within files. Our key innovation involves the representation of the pattern of links between records as a bipartite graph, in which records are directly linked to latent true individuals, and only indirectly linked to other records. This flexible representation of the linkage structure naturally allows us to estimate the attributes of the unique observable people in the population, calculate transitive linkage probabilities across records (and represent this visually), and propagate the uncertainty of record linkage into later analyses. Our method makes it particularly easy to integrate record linkage with post-processing procedures such as logistic regression, capture{\textendash}recapture, etc. Our linkage structure lends itself to an efficient, linear-time, hybrid Markov chain Monte Carlo algorithm, which overcomes many obstacles encountered by previously record linkage approaches, despite the high-dimensional parameter space. We illustrate our method using longitudinal data from the National Long Term Care Survey and with data from the Italian Survey on Household and Wealth, where we assess the accuracy of our method and show it to be better in terms of error rates and empirical scalability than other approaches in the literature. Supplementary materials for this article are available online.}, url = {https://arxiv.org/abs/1312.4645}, author = {Steorts, Rebecca C. and Hall, Rob and Fienberg, Stephen E.} } @conference {Spielman2013, title = {The Co-Evolution of Residential Segregation and the Built Environment at the Turn of the 20th Century: A Schelling Model}, booktitle = {Transactions in GIS}, year = {2013}, doi = {10.1111/tgis.12014}, author = {S.E. Spielman and Patrick Harrison} } @article {Holan2014c, title = {Ecological Prediction With Nonlinear Multivariate Time-Frequency Functional Data Models}, journal = {Journal of Agricultural, Biological, and Environmental Statistics}, volume = {18}, year = {2013}, chapter = {450-474}, doi = {10.1007/s13253-013-0142-1}, url = {http://link.springer.com/article/10.1007/s13253-013-0142-1}, author = {Yang, W.H., and Wikle, C.K. and Holan, S.H. and Wildhaber, M.L.} } @article {rom:hof:acq:2013, title = {Empirical Analysis of Data Breach Litigation}, journal = {Journal of Empirical Legal Studies}, volume = {11}, number = {1}, year = {2013}, pages = {74{\textendash}104}, author = {Romanosky, A. and Hoffman, D. and Acquisti, A.} } @article {deng2013, title = {Handling Attrition in Longitudinal Studies: The Case for Refreshment Samples}, journal = {Statist. Sci.}, volume = {28}, year = {2013}, month = {05/2013}, pages = {238{\textendash}256}, chapter = {238}, abstract = {Panel studies typically suffer from attrition, which reduces sample size and can result in biased inferences. It is impossible to know whether or not the attrition causes bias from the observed panel data alone. Refreshment samples{\textemdash}new, randomly sampled respondents given the questionnaire at the same time as a subsequent wave of the panel{\textemdash}offer information that can be used to diagnose and adjust for bias due to attrition. We review and bolster the case for the use of refreshment samples in panel studies. We include examples of both a fully Bayesian approach for analyzing the concatenated panel and refreshment data, and a multiple imputation approach for analyzing only the original panel. For the latter, we document a positive bias in the usual multiple imputation variance estimator. We present models appropriate for three waves and two refreshment samples, including nonterminal attrition. We illustrate the three-wave analysis using the 2007{\textendash}2008 Associated Press{\textendash}Yahoo! News Election Poll.}, doi = {10.1214/13-STS414}, url = {http://dx.doi.org/10.1214/13-STS414}, author = {Deng, Yiting and Hillygus, D. Sunshine and Reiter, Jerome P. and Si, Yajuan and Zheng, Siyu} } @article {Wikle2013d, title = {Hierarchical Bayesian Spatio-Temporal Conway-Maxwell Poisson Models with Dynamic Dispersion}, journal = {Journal of Agricultural, Biological, and Environmental Statistics}, volume = {18}, year = {2013}, pages = {335-356}, address = {Anchorage, Alaska}, doi = {10.1007/s13253-013-0141-2}, url = {http://link.springer.com/article/10.1007/s13253-013-0141-2}, author = {Wu, G. and Holan, S.H. and Wikle, C.K.} } @article {Wikle2013, title = {Hierarchical Spatio-Temporal Models and Survey Research}, journal = {Statistics Views}, year = {2013}, month = {May}, url = {http://www.statisticsviews.com/details/feature/4730991/Hierarchical-Spatio-Temporal-Models-and-Survey-Research.html}, author = {Wikle, C. and Holan, S. and Cressie, N.} } @booklet {Cressie2013, title = {How can survey estimates of small areas be improved by leveraging social-media data?}, journal = {The Survey Statistician}, number = {68}, year = {2013}, month = {July}, url = {http://isi.cbs.nl/iass/N68.pdf}, author = {Cressie, N. and Holan, S. and Wikle, C.} } @booklet {Holan2013b, title = {Recent Advances in Spatial Methods for Federal Surveys}, year = {2013}, month = {September}, author = {Holan, S.H.} } @inbook {Holan2013, title = {Spatio-temporal Design: Advances in Efficient Data Acquisition}, booktitle = {Spatio-temporal Design: Advances in Efficient Data Acquisition}, year = {2013}, pages = {269-284}, publisher = {Wiley}, organization = {Wiley}, chapter = {Semiparametric Dynamic Design of Monitoring Networks for Non-Gaussian Spatio-Temporal Data}, keywords = {semiparametric dynamic design for non-Gaussian spatio-temporal data}, isbn = {9780470974292}, doi = {10.1002/9781118441862}, author = {Holan, S. and Wikle, C.}, editor = {Jorge Mateu and Werner Muller} } @article {YuvalNardi2012, title = {Achieving both valid and secure logistic regression analysis on aggregated data from different private sources}, journal = {Journal of Privacy and Confidentiality}, volume = {4}, year = {2012}, pages = {189}, author = {Yuval Nardi and Robert Hall and Stephen E. Fienberg} } @article {Holan2012, title = {An Approach for Identifying and Predicting Economic Recessions in Real-Time Using Time-Frequency Functional Models}, journal = {Applied Stochastic Models in Business and Industry}, volume = {28}, year = {2012}, note = {DOI: 10.1002/asmb.1954}, month = {12/2012}, pages = {485-499}, keywords = {Bayesian model averaging, business cycles, empirical orthogonal functions, functional data, MIDAS, spectrogram, stochastic search variable selection}, doi = {10.1002/asmb.1954}, url = {http://onlinelibrary.wiley.com/doi/10.1002/asmb.1954/full}, author = {Holan, S. and Yang, W. and Matteson, D. and Wikle, C.K.} } @booklet {McElroy2012, title = {Asymptotic Theory of Cepstral Random Fields}, year = {2012}, note = {Arxiv Preprint arXiv:1112.1977}, publisher = {University of Missouri}, author = {McElroy, T. and Holan, S.} } @techreport {handle:1813:34461, title = {Asymptotic Theory of Cepstral Random Fields}, number = {1813:34461}, year = {2012}, institution = {University of Missouri}, type = {Preprint}, abstract = {Asymptotic Theory of Cepstral Random Fields McElroy, T.S.; Holan, S.H. Random fields play a central role in the analysis of spatially correlated data and, as a result,have a significant impact on a broad array of scientific applications. Given the importance of this topic, there has been a substantial amount of research devoted to this area. However, the cepstral random field model remains largely underdeveloped outside the engineering literature. We provide a comprehensive treatment of the asymptotic theory for two-dimensional random field models. In particular, we provide recursive formulas that connect the spatial cepstral coefficients to an equivalent moving-average random field, which facilitates easy computation of the necessary autocovariance matrix. Additionally, we establish asymptotic consistency results for Bayesian, maximum likelihood, and quasi-maximum likelihood estimation of random field parameters and regression parameters. Further, in both the maximum and quasi-maximum likelihood frameworks, we derive the asymptotic distribution of our estimator. The theoretical results are presented generally and are of independent interest,pertaining to a wide class of random field models. The results for the cepstral model facilitate model-building: because the cepstral coefficients are unconstrained in practice, numerical optimization is greatly simplified, and we are always guaranteed a positive definite covariance matrix. We show that inference for individual coefficients is possible, and one can refine models in a disciplined manner. Finally, our results are illustrated through simulation and the analysis of straw yield data in an agricultural field experiment. http://arxiv.org/pdf/1112.1977.pdf}, url = {http://hdl.handle.net/1813/34461}, author = {McElroy, T.S. and Holan, S.H.} } @article {Wang2012, title = {Bayesian Multi-Regime Smooth Transition Regression with Ordered Categorical Variables}, journal = {Computational Statistics and Data Analysis}, volume = {56}, year = {2012}, note = {http://dx.doi.org/10.1016/j.csda.2012.04.018}, month = {December}, pages = {4165-4179}, doi = {10.1016/j.csda.2012.04.018}, url = {http://dx.doi.org/10.1016/j.csda.2012.04.018}, author = {Wang, J. and Holan, S.} } @booklet {Holan2012f, title = {Bayesian Multiscale Multiple Imputation With Implications to Data Confidentiality}, year = {2012}, note = {Texas A\&M University, January 2012; Duke University (Hosted by Duke Node), February 2012; Rice University, March 2012; Clemson University, April 2012}, author = {Holan, S.H.} } @conference {hal:ste:fie:2012, title = {Bayesian Parametric and Nonparametric Inference for Multiple Record Likage}, booktitle = {Modern Nonparametric Methods in Machine Learning Workshop}, year = {2012}, publisher = {NIPS}, organization = {NIPS}, url = {http://www.stat.cmu.edu/NCRN/PUBLIC/files/beka_nips_finalsub4.pdf}, author = {Hall, R. and Steorts, R. and Fienberg, S. E.} } @conference {Holan2012e, title = {Flexible Spectral Models for Multivariate Time Series}, booktitle = {Joint Statistical Meetings 2012}, year = {2012}, month = {August}, author = {Holan, S.H.} } @article {Holan2012b, title = {Rejoinder: An approach for identifying and predicting economic recessions in real time using time frequency functional models}, journal = {Applied Stochastic Models in Business and Industry}, volume = {28}, year = {2012}, pages = {504-505}, doi = {10.1002/asmb.1955}, url = {http://onlinelibrary.wiley.com/doi/10.1002/asmb.1955/full}, author = {Holan, S. and Yang, W. and Matteson, D. and Wikle, C.} } @inbook {Holan2012a, title = {Semiparametric Dynamic Design of Monitoring Networks for Non-Gaussian Spatio-Temporal Data}, booktitle = {Spatio-temporal Design: Advances in Efficient Data Acquisition}, year = {2012}, pages = {269-284}, publisher = {Wiley}, organization = {Wiley}, address = {Chichester, UK}, doi = {10.1002/9781118441862.ch12}, url = {http://onlinelibrary.wiley.com/doi/10.1002/9781118441862.ch12/summary}, author = {Holan, S. and Wikle, C.K.}, editor = {Jorge Mateu and Werner Muller} } @conference {RobertHall2012, title = {Valid Statistical Inference on Automatically Matched Files}, booktitle = {Privacy in Statistical Databases}, year = {2012}, pages = {131{\textendash}142}, publisher = {Springer}, organization = {Springer}, doi = {10.1007/978-3-642-33627-0_11}, author = {Robert Hall and Stephen E. Fienberg}, editor = {Josep Domingo-Ferrer and Ilenia Tinnirello} } @conference {sad:hal:fie:2011, title = {Approaches to Multiple Record Linkage}, booktitle = {Proceedings of the 58th World Statistical Congress}, year = {2011}, pages = {1064{\textendash}1071}, publisher = {International Statistical Institute}, organization = {International Statistical Institute}, address = {Dublin}, url = {http://2011.isiproceedings.org/papers/450092.pdf}, author = {Sadinle, M. and Hall, R. and Fienberg, S. E.} } @article {Fienberg2011a, title = {Secure multiparty linear regression based on homomorphic encryption}, journal = {Journal of Official Statistics}, volume = {27}, year = {2011}, pages = {669}, author = {Robert Hall and Stephen E. Fienberg and Yuval Nardi} }