@techreport {handle:1813:52650, title = {Effects of a Government-Academic Partnership: Has the NSF-Census Bureau Research Network Helped Secure the Future of the Federal Statistical System?}, number = {1813:52650}, year = {2017}, institution = {NCRN Coordinating Office}, type = {Preprint}, abstract = {

Effects of a Government-Academic Partnership: Has the NSF-Census Bureau Research Network Helped Secure the Future of the Federal Statistical System? Weinberg, Daniel; Abowd, John M.; Belli, Robert F.; Cressie, Noel; Folch, David C.; Holan, Scott H.; Levenstein, Margaret C.; Olson, Kristen M.; Reiter, Jerome P.; Shapiro, Matthew D.; Smyth, Jolene; Soh, Leen-Kiat; Spencer, Bruce; Spielman, Seth E.; Vilhuber, Lars; Wikle, Christopher The National Science Foundation-Census Bureau Research Network (NCRN) was established in 2011 to create interdisciplinary research nodes on methodological questions of interest and significance to the broader research community and to the Federal Statistical System (FSS), particularly the Census Bureau. The activities to date have covered both fundamental and applied statistical research and have focused at least in part on the training of current and future generations of researchers in skills of relevance to surveys and alternative measurement of economic units, households, and persons. This paper discusses some of the key research findings of the eight nodes, organized into six topics: (1) Improving census and survey data collection methods; (2) Using alternative sources of data; (3) Protecting privacy and confidentiality by improving disclosure avoidance; (4) Using spatial and spatio-temporal statistical modeling to improve estimates; (5) Assessing data cost and quality tradeoffs; and (6) Combining information from multiple sources. It also reports on collaborations across nodes and with federal agencies, new software developed, and educational activities and outcomes. The paper concludes with an evaluation of the ability of the FSS to apply the NCRN{\textquoteright}s research outcomes and suggests some next steps, as well as the implications of this research-network model for future federal government renewal initiatives. This paper began as a May 8, 2015 presentation to the National Academies of Science{\textquoteright}s Committee on National Statistics by two of the principal investigators of the National Science Foundation-Census Bureau Research Network (NCRN) {\textendash} John Abowd and the late Steve Fienberg (Carnegie Mellon University). The authors acknowledge the contributions of the other principal investigators of the NCRN who are not co-authors of the paper (William Block, William Eddy, Alan Karr, Charles Manski, Nicholas Nagle, and Rebecca Nugent), the co- principal investigators, and the comments of Patrick Cantwell, Constance Citro, Adam Eck, Brian Harris-Kojetin, and Eloise Parker. We note with sorrow the deaths of Stephen Fienberg and Allan McCutcheon, two of the original NCRN principal investigators. The principal investigators also wish to acknowledge Cheryl Eavey{\textquoteright}s sterling grant administration on behalf of the NSF. The conclusions reached in this paper are not the responsibility of the National Science Foundation (NSF), the Census Bureau, or any of the institutions to which the authors belong

}, url = {http://hdl.handle.net/1813/52650}, author = {Weinberg, Daniel and Abowd, John M. and Belli, Robert F. and Cressie, Noel and Folch, David C. and Holan, Scott H. and Levenstein, Margaret C. and Olson, Kristen M. and Reiter, Jerome P. and Shapiro, Matthew D. and Smyth, Jolene and Soh, Leen-Kiat and Spencer, Bruce and Spielman, Seth E. and Vilhuber, Lars and Wikle, Christopher} } @article {2243, title = {Releasing synthetic magnitude micro data constrained to fixed marginal totals}, journal = {Statistical Journal of the International Association for Official Statistics}, volume = {32}, year = {2016}, month = {02/2016}, pages = {93-108}, chapter = {93}, abstract = {We present approaches to generating synthetic microdata for multivariate data that take on non-negative integer values, such as magnitude data in economic surveys. The basic idea is to estimate a mixture of Poisson distributions to describe the multivariate distribution, and release draws from the posterior predictive distribution of the model. We develop approaches that guarantee the synthetic data sum to marginal totals computed from the original data, as well approaches that do not enforce this equality. For both cases, we present methods for assessing disclosure risks inherent in releasing synthetic magnitude microdata. We illustrate the methodology using economic data from a survey of manufacturing establishments.}, keywords = {Confidential, Disclosure, establishment, mixture, poisson, risk}, doi = {10.3233/SJI-160959}, url = {http://content.iospress.com/download/statistical-journal-of-the-iaos/sji959}, author = {Wei, Lan and Reiter, Jerome P.} } @article {2239, title = {Synthetic establishment microdata around the world}, journal = {Statistical Journal of the International Association for Official Statistics}, volume = {32}, year = {2016}, pages = {65-68}, chapter = {65}, abstract = {In contrast to the many public-use microdata samples available for individual and household data from many statistical agencies around the world, there are virtually no establishment or firm microdata available. In large part, this difficulty in providing access to business microdata is due to the skewed and sparse distributions that characterize business data. Synthetic data are simulated data generated from statistical models. We organized sessions at the 2015 World Statistical Congress and the 2015 Joint Statistical Meetings, highlighting work on synthetic \emph{establishment} microdata. This overview situates those papers, published in this issue, within the broader literature.}, keywords = {Business data, confidentiality, differential privacy, international comparison, Multiple imputation, synthetic}, doi = {10.3233/SJI-160964}, url = {http://content.iospress.com/download/statistical-journal-of-the-iaos/sji964}, author = {Vilhuber, Lars and Abowd, John M. and Reiter, Jerome P.} } @article {2039, title = {Bayesian Marked Point Process Modeling for Generating Fully Synthetic Public Use Data with Point-Referenced Geography}, journal = {Spatial Statistics}, volume = {14}, year = {2015}, month = {08/2015}, pages = {439--451}, doi = {10.1016/j.spasta.2015.07.008}, url = {http://www.sciencedirect.com/science/article/pii/S2211675315000718}, author = {Quick, Harrison and Holan, Scott H. and Wikle, Christopher K. and Reiter, Jerome P.} } @techreport {handle:1813:42340, title = {Synthetic Establishment Microdata Around the World}, number = {1813:42340}, year = {2015}, institution = {Cornell University}, type = {Preprint}, abstract = {Synthetic Establishment Microdata Around the World Vilhuber, Lars; Abowd, John A.; Reiter, Jerome P. In contrast to the many public-use microdata samples available for individual and household data from many statistical agencies around the world, there are virtually no establishment or firm microdata available. In large part, this difficulty in providing access to business micro data is due to the skewed and sparse distributions that characterize business data. Synthetic data are simulated data generated from statistical models. We organized sessions at the 2015 World Statistical Congress and the 2015 Joint Statistical Meetings, highlighting work on synthetic establishment microdata. This overview situates those papers, published in this issue, within the broader literature.}, url = {http://hdl.handle.net/1813/42340}, author = {Vilhuber, Lars and Abowd, John A. and Reiter, Jerome P.} } @article {deng2013, title = {Handling Attrition in Longitudinal Studies: The Case for Refreshment Samples}, journal = {Statist. Sci.}, volume = {28}, year = {2013}, month = {05/2013}, pages = {238{\textendash}256}, chapter = {238}, abstract = {Panel studies typically suffer from attrition, which reduces sample size and can result in biased inferences. It is impossible to know whether or not the attrition causes bias from the observed panel data alone. Refreshment samples{\textemdash}new, randomly sampled respondents given the questionnaire at the same time as a subsequent wave of the panel{\textemdash}offer information that can be used to diagnose and adjust for bias due to attrition. We review and bolster the case for the use of refreshment samples in panel studies. We include examples of both a fully Bayesian approach for analyzing the concatenated panel and refreshment data, and a multiple imputation approach for analyzing only the original panel. For the latter, we document a positive bias in the usual multiple imputation variance estimator. We present models appropriate for three waves and two refreshment samples, including nonterminal attrition. We illustrate the three-wave analysis using the 2007{\textendash}2008 Associated Press{\textendash}Yahoo! News Election Poll.}, doi = {10.1214/13-STS414}, url = {http://dx.doi.org/10.1214/13-STS414}, author = {Deng, Yiting and Hillygus, D. Sunshine and Reiter, Jerome P. and Si, Yajuan and Zheng, Siyu} }