@article{631, author = {Hadas Raviv and Liat Hasenfratz and Kira Gousios and Marian Faryna and Ricky Beaty and Dean Johnson and Berlin Chen and Aja Altenhof and Brooke Ryan and Chandra Greenberg and Zhuoqiao Hong and Gal Assayag and Arkadii Tsyhanov and Valery Malakhov and Tal Rosenwein and Ofri Raviv and Casey Lew-Williams and Uri Hasson}, title = {The First 1,000 Days (1kD) Project-Collecting and Analyzing an Ultra-Dense Naturalistic Dataset of Human Baby Development}, abstract = {
Human development unfolds in continuous, multimodal environments across seconds, days, and years, yet most developmental datasets capture sparse, context-limited samples of everyday life. We introduce the First 1,000 Days (1kD) Project, an initiative designed to collect ultra-dense, longitudinal, child-centered data that capture developmental trajectories within their full ecological context. Fifteen U.S. homes with 17 infants were recorded 12-14 hours per day over a median of 944 days, yielding ~1.18 million hours of raw audiovisual data. We present an end-to-end framework for large-scale longitudinal naturalistic measurement and a scalable analysis pipeline of the collected data. In a case study, we describe how we utilized our pipeline to isolate child-centered speech, resulting in the collection of 2,000 to 6,000 hours of transcribed speech for each infant. We demonstrate that dense sampling within the home environment reveals a stable, household-specific lexical structure, which sparse sampling methods consistently fail to capture. The 1kD project offers a blueprint for teams aiming to collect and analyze natural behavior at scale in real-world settings.
}, year = {2026}, journal = {bioRxiv}, month = {03/2026}, }