<script src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fpratikmhatre%2F5933976&wmode=opaque&jsonp=1"></script>
<?php
$contents = file_get_contents("https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fpratikmhatre%2F5933976&wmode=opaque");
print_r($contents);
?>
<iframe src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fpratikmhatre%2F5933976&wmode=opaque"></iframe>
For more details see the documention.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
@misc{farahi_simulation-based_2026,
title = {Simulation-{Based} {Inference} via {Regression} {Projection} and {Batched} {Discrepancies}},
url = {http://arxiv.org/abs/2602.03613},
doi = {10.48550/arXiv.2602.03613},
abstract = {We analyze a lightweight simulation-based inference method that infers simulator parameters using only a regression-based projection of the observed data. After fitting a surrogate linear regression once, the procedure simulates small batches at proposed parameter values and assigns kernel weights based on the resulting batch residual discrepancy, producing a self-normalized pseudo-posterior that is simple, parallelizable, and requires access only to the fitted regression coefficients rather than raw observations. We formalize the construction as an importance-sampling approximation to a population target that averages over simulator randomness, prove consistency as the number of parameter draws grows, and establish stability to estimating the surrogate regression from finite samples. We then characterize asymptotic concentration as batch size increases and bandwidth shrinks, showing that the pseudo-posterior concentrates on an identified set determined by the chosen projection, thereby clarifying when the method yields point versus set identification. Experiments in a tractable nonlinear model and a cosmological calibration task using the DREAMS simulation suite illustrate the computational advantages of regression-based projections and the identifiability limitations that arise from low-information summaries.},
language = {en},
urldate = {2026-02-17},
publisher = {arXiv},
author = {Farahi, Arya and Rose, Jonah and Torrey, Paul},
month = feb,
year = {2026},
note = {arXiv:2602.03613 [stat]},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Statistics - Methodology},
}
@misc{islam_cosmo3dflow_2026,
title = {{Cosmo3DFlow}: {Wavelet} {Flow} {Matching} for {Spatial}-to-{Spectral} {Compression} in {Reconstructing} the {Early} {Universe}},
shorttitle = {{Cosmo3DFlow}},
url = {http://arxiv.org/abs/2602.10172},
doi = {10.48550/arXiv.2602.10172},
abstract = {Reconstructing the early Universe from the evolved present-day Universe is a challenging and computationally demanding problem in modern astrophysics. We devise a novel generative framework, Cosmo3DFlow, designed to address dimensionality and sparsity, the critical bottlenecks inherent in current state-of-the-art methods for cosmological inference. By integrating 3D Discrete Wavelet Transform (DWT) with flow matching, we effectively represent high-dimensional cosmological structures. The Wavelet Transform addresses the “void problem” by translating spatial emptiness into spectral sparsity. It decouples high-frequency details from lowfrequency structures through spatial compression, and waveletspace velocity fields facilitate stable ordinary differential equation (ODE) solvers with large step sizes. Using large-scale cosmological 𝑁 -body simulations, at 1283 resolution, we achieve up to 50× faster sampling than diffusion models, combining a 10× reduction in integration steps with lower per-step computational cost from wavelet compression. Our results enable initial conditions to be sampled in seconds, compared to minutes for previous methods.},
language = {en},
urldate = {2026-02-17},
publisher = {arXiv},
author = {Islam, Md Khairul and Xia, Zeyu and Goudjil, Ryan and Wang, Jialu and Farahi, Arya and Fox, Judy},
month = feb,
year = {2026},
note = {arXiv:2602.10172 [astro-ph]},
keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Artificial Intelligence},
}
@misc{chen_how_2026,
title = {How {Mergers} and {Flybys} {Shape} {Azimuthal} {Age} {Patterns} in {Spiral} {Galaxies}},
abstract = {Spiral structures are one of the most common features in galaxies, yet their origins and evolution remain debated. Stellar age distributions offer crucial insights into galaxy evolution and star formation, though environmental effects can obscure the intrinsic age patterns. Using the Auriga cosmological gravo-magnetohydrodynamical zoom-in simulations, we investigate the azimuthal age distribution of young stars ({\textless} 2 Gyr) in a sample of five Milky Way-mass spiral galaxies over the past 5 Gyr. We quantify the age gradients across spiral arms using the mean age offset (Δ𝜏) and the non-overlap fraction ( 𝑓non−overlap). We further analyse the impact of mergers and fly-by events on the age gradients. Our results show that Auriga spiral galaxies generally feature younger stars in their leading edges compared to the trailing edges, with a typical Δ𝜏 between 30 and 80 Myr. However, gas-rich interactions can disrupt this age offset, resulting in similar age distributions on each side of the spiral arms. In three snapshots, we observe similar mean ages on both sides of spiral arms but differing age distribution broadness, coinciding with satellite interactions crossing the host galaxy’s disc plane. Our simulation data suggest that the typical azimuthal age variation recovers within ∼600 Myr after galaxy interactions. This work highlights the transient role of environmental interactions in shaping spiral arm age patterns.},
language = {en},
publisher = {Oxford University Press on behalf of Royal Astronomical Society},
author = {Chen, Qian-Hui and Garcia, Alex M and Li, Zefeng and Grasha, Kathryn and Wisnioski, Emily and Torrey, Paul and Remus, Rhea-Silvia and Kimmig, Lucas C and Battisti, Andrew J and Buder, Sven},
year = {2026},
keywords = {Explainable},
}
@misc{huang_tuning_2026,
title = {Tuning the {Implicit} {Regularizer} of {Masked} {Diffusion} {Language} {Models}: {Enhancing} {Generalization} via {Insights} from \$k\$-{Parity}},
shorttitle = {Tuning the {Implicit} {Regularizer} of {Masked} {Diffusion} {Language} {Models}},
url = {http://arxiv.org/abs/2601.22450},
doi = {10.48550/arXiv.2601.22450},
abstract = {Masked Diffusion Language Models have recently emerged as a powerful generative paradigm, yet their generalization properties remain understudied compared to their auto-regressive counterparts. In this work, we investigate these properties within the setting of the k-parity problem (computing the XOR sum of k relevant bits), where neural networks typically exhibit grokking—a prolonged plateau of chance-level performance followed by sudden generalization. We theoretically decompose the Masked Diffusion (MD) objective into a Signal regime which drives feature learning, and a Noise regime which serves as an implicit regularizer. By training nanoGPT using MD objective on the k-parity problem, we demonstrate that MD objective fundamentally alters the learning landscape, enabling rapid and simultaneous generalization without experiencing grokking. Furthermore, we leverage our theoretical insights to optimize the distribution of the mask probability in the MD objective. Our method significantly improves perplexity for 50M-parameter models and achieves superior results across both pre-training from scratch and supervised fine-tuning. Specifically, we observe performance gains peaking at 8.8\% and 5.8\%, respectively, on 8B-parameter models, confirming the scalability and effectiveness of our framework in large-scale masked diffusion language model regimes.},
language = {en},
urldate = {2026-02-06},
publisher = {arXiv},
author = {Huang, Jianhao and Mirzasoleiman, Baharan},
month = jan,
year = {2026},
note = {arXiv:2601.22450 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning},
}
@techreport{collaboration_opportunities_2026,
title = {Opportunities in {AI}/{ML} for the {Rubin} {LSST} {Dark} {Energy} {Science} {Collaboration}},
url = {http://arxiv.org/abs/2601.14235},
doi = {10.5281/zenodo.18319953},
abstract = {The Vera C. Rubin Observatory's Legacy Survey of Space and Time (LSST) will produce unprecedented volumes of heterogeneous astronomical data (images, catalogs, and alerts) that challenge traditional analysis pipelines. The LSST Dark Energy Science Collaboration (DESC) aims to derive robust constraints on dark energy and dark matter from these data, requiring methods that are statistically powerful, scalable, and operationally reliable. Artificial intelligence and machine learning (AI/ML) are already embedded across DESC science workflows, from photometric redshifts and transient classification to weak lensing inference and cosmological simulations. Yet their utility for precision cosmology hinges on trustworthy uncertainty quantification, robustness to covariate shift and model misspecification, and reproducible integration within scientific pipelines. This white paper surveys the current landscape of AI/ML across DESC's primary cosmological probes and cross-cutting analyses, revealing that the same core methodologies and fundamental challenges recur across disparate science cases. Since progress on these cross-cutting challenges would benefit multiple probes simultaneously, we identify key methodological research priorities, including Bayesian inference at scale, physics-informed methods, validation frameworks, and active learning for discovery. With an eye on emerging techniques, we also explore the potential of the latest foundation model methodologies and LLM-driven agentic AI systems to reshape DESC workflows, provided their deployment is coupled with rigorous evaluation and governance. Finally, we discuss critical software, computing, data infrastructure, and human capital requirements for the successful deployment of these new methodologies, and consider associated risks and opportunities for broader coordination with external actors.},
language = {en},
urldate = {2026-02-06},
author = {Collaboration, LSST Dark Energy Science and Aubourg, Eric and Avestruz, Camille and Becker, Matthew R. and Biswas, Biswajit and Biswas, Rahul and Bolliet, Boris and Bolton, Adam S. and Bom, Clecio R. and Bonnet-Guerrini, Raphaël and Boucaud, Alexandre and Campagne, Jean-Eric and Chang, Chihway and Ćiprijanović, Aleksandra and Cohen-Tanugi, Johann and Coughlin, Michael W. and Crenshaw, John Franklin and Cuevas-Tello, Juan C. and Vicente, Juan de and Digel, Seth W. and Dillmann, Steven and Romero, Mariano Javier de León Dominguez and Drlica-Wagner, Alex and Erickson, Sydney and Gagliano, Alexander T. and Georgiou, Christos and Ghosh, Aritra and Grayling, Matthew and Grishin, Kirill A. and Heavens, Alan and House, Lindsay R. and Ishak, Mustapha and Kabalan, Wassim and Kannawadi, Arun and Lanusse, François and Leonard, C. Danielle and Léget, Pierre-François and Lochner, Michelle and Mao, Yao-Yuan and Melchior, Peter and Merz, Grant and Millon, Martin and Möller, Anais and Narayan, Gautham and Omori, Yuuki and Peiris, Hiranya and Perreault-Levasseur, Laurence and Malagón, Andrés A. Plazas and Ramachandra, Nesar and Remy, Benjamin and Roucelle, Cécile and Ruiz-Zapatero, Jaime and Schuldt, Stefan and Sevilla-Noarbe, Ignacio and Shah, Ved G. and Starkenburg, Tjitske and Thorp, Stephen and Cipriano, Laura Toribio San and Tröster, Tilman and Trotta, Roberto and Venkatraman, Padma and Wasserman, Amanda and White, Tim and Zeghal, Justine and Zhang, Tianqing and Zhang, Yuanyuan},
month = jan,
year = {2026},
note = {arXiv:2601.14235 [astro-ph]},
keywords = {Astrophysics - Cosmology and Nongalactic Astrophysics, Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
}
@misc{islam_omnispectra_2026,
title = {{OmniSpectra}: {A} {Unified} {Foundation} {Model} for {Native} {Resolution} {Astronomical} {Spectra}},
shorttitle = {{OmniSpectra}},
url = {http://arxiv.org/abs/2601.15351},
doi = {10.48550/arXiv.2601.15351},
abstract = {We present OmniSpectra, the first native-resolution foundation model for astronomy spectra. Unlike traditional models, which are limited to fixed-length input sizes or configurations, OmniSpectra handles spectra of any length at their original size, without resampling or interpolation. Despite the large-scale spectroscopic data from diverse surveys fueling the rapid growth of astronomy, existing foundation models are limited to a fixed wavelength range and specific instruments. OmniSpectra is the first foundation model to learn simultaneously from multiple real-world spectra surveys with different configurations at a large scale. We achieve this by designing a novel architecture with adaptive patching across variable lengths, sinusoidal global wavelength encoding, local positional embeddings through depthwise convolution, and validity-aware self-attention masks. Allowing us to learn multi-scale spatial patterns while skipping attention for invalid patches. Even with a limited training example, OmniSpectra demonstrates excellent zero-shot generalization compared to methods tailored for specific tasks. This transfer learning capability makes this model the state-of-the-art across various astronomy tasks, including source classification, redshift estimation, and properties prediction for stars and galaxies. OmniSpectra reduces the need for training individual models for different tasks from scratch, establishing itself as the next-generation astronomy foundation model.},
language = {en},
urldate = {2026-02-06},
publisher = {arXiv},
author = {Islam, Md Khairul and Fox, Judy},
month = jan,
year = {2026},
note = {arXiv:2601.15351 [astro-ph]},
keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Artificial Intelligence},
}
@misc{xue_beyond_2026,
title = {Beyond {What} {Seems} {Necessary}: {Hidden} {Gains} from {Scaling} {Training}-{Time} {Reasoning} {Length} under {Outcome} {Supervision}},
shorttitle = {Beyond {What} {Seems} {Necessary}},
url = {http://arxiv.org/abs/2602.00927},
doi = {10.48550/arXiv.2602.00927},
abstract = {Training LLMs to think and reason for longer has become a key ingredient in building state-ofthe-art models that can solve complex problems previously out of reach. Recent efforts pursue this in different ways, such as RL fine-tuning to elicit long CoT or scaling latent reasoning through architectural recurrence. This makes reasoning length an important scaling knob. In this work, we identify a novel phenomenon (both theoretically and experimentally): under outcome-only supervision, out-of-distribution (OOD) performance can continue improving as training-time reasoning length (e.g., the token budget in RL, or the loop count in looped Transformers) increases, even after in-distribution (ID) performance has saturated. This suggests that robustness may require a larger budget than ID validation alone would indicate. We provide theoretical explanations via two mechanisms: (i) self-iteration can induce a stronger inductive bias in the hypothesis class, reshaping IDoptimal solutions in ways that improve OOD generalization; and (ii) when shortcut solutions that work for ID samples but not for OOD samples persist in the hypothesis class, regularization can reduce the learned solution’s reliance on these shortcuts as the number of self-iterations increases. We complement the theory with empirical evidence from two realizations of scaling training-time reasoning length: increasing the number of loops in looped Transformers on a synthetic task, and increasing token budgets during RL fine-tuning of LLMs on mathematical reasoning.},
language = {en},
urldate = {2026-02-06},
publisher = {arXiv},
author = {Xue, Yihao and Zhang, Allan and Huang, Jianhao and Sahai, Amit and Mirzasoleiman, Baharan},
month = jan,
year = {2026},
note = {arXiv:2602.00927 [cs]},
keywords = {Computer Science - Machine Learning},
}
@misc{kumar_multiwavelength_2026,
title = {A multiwavelength view of the nearby {Calcium}-{Strong} {Transient} {SN} 2025coe in the {X}-{Ray}, {Near}-{Infrared}, and {Radio} {Wavebands}},
url = {http://arxiv.org/abs/2601.19018},
doi = {10.48550/arXiv.2601.19018},
abstract = {Calcium-strong transients (CaSTs) are a subclass of faint and rapidly evolving supernovae (SNe) that exhibit strong calcium features and notably weak oxygen features. The small but growing population of CaSTs exhibits some aspects similar to thermonuclear supernovae and others that are similar to massive star core-collapse events, leading to intriguing questions on the physical origins of CaSTs. SN 2025coe is one of the most nearby CaSTs discovered to date, and our coordinated multi-wavelength observations obtained days to weeks post-explosion reveal new insights on these enigmatic transients. With the most robust NIR spectroscopic time-series of a CaST collected to date, SN 2025coe shows spectral signatures characteristic of Type Ib SNe (SNe Ib, i.e. He-rich stripped-envelope SNe). SN 2025coe is the third X-ray detected CaST and our analysis of the Swift X-ray data suggest interaction with 0.12 ± 0.11 M⊙ of circumstellar material (CSM) extending to at least 2 × 1015cm (∼ 30, 000 R⊙), while our analysis of the 1-240 GHz radio non-detections gives an outer radius of that CSM of at most ∼ 4 × 1015 cm. This inferred nearby high-density CSM extending out to 3 ± 1 × 1015 cm is similar to that seen in the other two X-ray detected CaSTs, and its presence suggests that either intensive mass-loss or some polluting mechanism may be a common feature of this subclass. Our work also expands upon recent studies on the optical properties of SN 2025coe and explores our current understanding of different progenitor systems that could possibly produce CaSTs.},
language = {en},
urldate = {2026-02-06},
publisher = {arXiv},
author = {Kumar, Sahana and Baer-Way, Raphael and Ravi, Aravind P. and Modjaz, Maryam and Chandra, Poonam and Valenti, Stefano and Kwok, Lindsey A. and Tinyanont, Samaporn and Foley, Ryan J. and Howell, D. Andrew and Hiramatsu, Daichi and Andrews, Jennifer E. and Bostroem, K. Azalee and Christy, Collin and Franz, Noah and Hsu, Brian and Pearson, Jeniveve and Sand, David J. and Shrestha, Manisha and Smith, Nathan and Subrayan, Bhagya},
month = jan,
year = {2026},
note = {arXiv:2601.19018 [astro-ph]},
keywords = {Astrophysics - High Energy Astrophysical Phenomena},
}
@misc{vijaywargiya_inverse_2026,
title = {Inverse problems for history-enriched linear model reduction},
url = {http://arxiv.org/abs/2601.07101},
doi = {10.48550/arXiv.2601.07101},
abstract = {Standard projection-based model reduction for dynamical systems incurs closure error because it only accounts for instantaneous dependence on the resolved state. From the Mori-Zwanzig (MZ) perspective, projecting the full dynamics onto a low-dimensional resolved subspace induces additional noise and memory terms arising from the dynamics of the unresolved component in the orthogonal complement. The memory term makes the resolved dynamics explicitly history dependent. In this work, based on the MZ identity, we derive exact, history-enriched models for the resolved dynamics of linear driven dynamical systems and formulate inverse problems to learn model operators from discrete snapshot data via least-squares regression. We propose a greedy time-marching scheme to solve the inverse problems efficiently and analyze operator identifiability under full and partial observation data availability. For full observation data, we show that, under mild assumptions, the operators are identifiable even when the fullstate dynamics are governed by a general time-varying linear operator, whereas with partial observation data the inverse problem has a unique solution only when the full-state operator is time-invariant. To address the resulting non-uniqueness in the time-varying case, we introduce a time-smoothing Tikhonov regularization. Numerical results demonstrate that the operators can be faithfully reconstructed from both full and partial observation data and that the learned history-enriched MZ models yield accurate trajectories of the resolved state.},
language = {en},
urldate = {2026-01-13},
publisher = {arXiv},
author = {Vijaywargiya, Arjun and Biros, George},
month = jan,
year = {2026},
note = {arXiv:2601.07101 [math]},
keywords = {Accelerated, Mathematics - Dynamical Systems},
}
@misc{silvestrini_casco_2026,
title = {{CASCO}: {Cosmological} and {AStrophysical} parameters from {Cosmological} simulations and {Observations} {IV}. {Testing} warm dark matter cosmologies with galaxy scaling relations: {A} joint simulation-observation study using {DREAMS} simulations},
shorttitle = {{CASCO}},
url = {http://arxiv.org/abs/2601.07543},
doi = {10.48550/arXiv.2601.07543},
abstract = {Small-scale discrepancies in the standard Lamda cold dark matter paradigm have motivated the exploration of alternative dark matter (DM) models, such as warm dark matter (WDM). In our work, we investigate the constraining power of galaxy scaling relations on cosmological, astrophysical, and WDM parameters using a joint analysis of multiresolution hydrodynamic simulations and observational data. Our study is based on the DREAMS project and combines large-volume uniform-box simulations with high-resolution Milky Way (MW) zoom-in runs exploring a ΛWDM cosmology. To ensure consistency between the different simulation sets, we applied calibrations to account for resolution effects, which allowed us to better exploit the complementary strengths of the two suites. We compared the simulated relations, such as stellar size, DM mass, and fraction, within the stellar half-mass radius and the total-tostellar mass ratio with two complementary galaxy samples: the Spitzer Photometry and Accurate Rotation Curves catalog, providing resolved kinematics for nearby spirals, and the Local Volume Database catalog, which includes structural and dynamical measurements for dwarf galaxies in the Local Volume. By applying a bootstrap-based fitting procedure, we show that key cosmological parameters (Ωm, σ8) and supernova feedback strength can be recovered with good accuracy, particularly from the uniform-box simulations. Although the WDM particle mass remains unconstrained, the MW zoom-in simulations reveal subtle WDM-induced trends, especially at low stellar masses, in the scaling relations of both the DM mass and the total-to-stellar mass ratio within the stellar half-mass radius. Additionally, we find that the galaxy abundance as a function of total stellar mass shows a measurable dependence on WDM particle mass, with a suppression at log10 M∗/M⊙ {\textless}∼ 8 that appears separable from the impact of feedback, suggesting this observable is a valuable complementary probe. Our results highlight the importance of combining simulations at multiple resolutions with diverse observational catalogs to jointly constrain baryonic processes and DM properties. In particular, future low-mass galaxy surveys such as Euclid will play a crucial role in tightening the constraints on alternative DM scenarios through joint structural and statistical analyses. At the same time, higher-resolution simulations will be essential to fully capturing the small-scale features and improving the discriminatory power of such analyses, especially in the context of WDM.},
language = {en},
urldate = {2026-01-21},
publisher = {arXiv},
author = {Silvestrini, M. and Tortora, C. and Busillo, V. and Brooks, Alyson M. and Farahi, A. and Garcia, A. M. and Kallivayalil, N. and Napolitano, N. R. and Rose, J. C. and Torrey, P. and Villaescusa-Navarro, F. and Vogelsberger, M.},
month = jan,
year = {2026},
note = {arXiv:2601.07543 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Explainable},
}
@misc{ravi_double-peaked_2026,
title = {The {Double}-{Peaked} {Calcium}-{Strong} {SN} 2025coe: {Progenitor} {Constraints} from {Early} {Interaction} and {Ejecta} {Asymmetries}},
shorttitle = {The {Double}-{Peaked} {Calcium}-{Strong} {SN} 2025coe},
url = {http://arxiv.org/abs/2601.00415},
doi = {10.48550/arXiv.2601.00415},
abstract = {Supernova (SN) 2025coe at a distance of ∼25 Mpc is the third-closest calcium-strong (CaST) transient. It was discovered at a large projected offset of ∼34 kpc from its potential host galaxy NGC 3277. Multiband photometry of SN 2025coe indicates the presence of two peaks at day ∼2 and day ∼11 after explosion. Modeling the bolometric light curve, we find that the first peak can be reproduced either by shock cooling of a compact envelope (Renv ≈ 6–40 R⊙; Menv ≈ 0.1–0.2 M⊙) or by interaction with close-in circumstellar material (CSM; RCSM ≲ 8 × 1014 cm), or a combination of both. The second peak is dominated by radioactive decay of 56Ni (Mej ≈ 0.4–0.5 M⊙; M56Ni ≈ 1.4 × 10−2 M⊙).},
language = {en},
urldate = {2026-01-13},
publisher = {arXiv},
author = {Ravi, Aravind P. and Kumar, Sahana and Baer-Way, Raphael and Valenti, Stefano and Modjaz, Maryam and Baal, Bart F. A. van and Jerkstrand, Anders and Dong, Yize and Kwok, Lindsey A. and Pearson, Jeniveve and Sand, David J. and Hiramatsu, Daichi and Filippenko, Alexei V. and Andrews, Jennifer and Andrews, Moira and Arunachalam, Prasiddha and Bostroem, K. Azalee and Brink, Thomas G. and Christy, Collin and Chen, Liyang and Davis, Kyle W. and Esamdin, Ali and Farah, Joseph and Foley, Ryan J. and Hoang, Emily and Hosseinzadeh, Griffin and Howell, D. Andrew and Hsu, Brian and Huang, Ruifeng and Iskander, Abdusamatjan and Janzen, Daryl and Jha, Saurabh W. and Kaur, Ravjit and Lundquist, Michael J. and McCully, Curtis and Mehta, Darshana and Ni, Yuan Qi and Retamal, Nicolas Meza and Patra, Kishore C. and Ransome, Conor and Shrestha, Manisha and Smith, Nathan and Subrayan, Bhagya and Taggart, Kirsty and Wang, Xiaofeng and Wynn, Kathryn and Yang, Yi and Yan, Shengyu and Zheng, Weikang and Coe, Dan},
month = jan,
year = {2026},
note = {arXiv:2601.00415 [astro-ph]},
keywords = {Astrophysics - High Energy Astrophysical Phenomena, Explorable},
}
@misc{liu_who_2026,
title = {Who {Owns} {Creativity} and {Who} {Does} the {Work}? {Trade}-offs in {LLM}-{Supported} {Research} {Ideation}},
shorttitle = {Who {Owns} {Creativity} and {Who} {Does} the {Work}?},
url = {http://arxiv.org/abs/2601.12152},
doi = {10.48550/arXiv.2601.12152},
abstract = {LLM-based agents offer new potential to accelerate science and reshape research work. However, the quality of researcher contributions can vary significantly depending on human ability to steer agent behaviors. How can we best use these tools to augment scientific creativity without undermining aspects of contribution and ownership that drive research? To investigate this, we developed an agentic research ideation system integrating three roles—Ideator, Writer, and Evaluator—across three control levels—Low, Medium, and Intensive. Our mixed-methods study with 54 researchers suggests three key findings in how LLM-based agents reshape scientific creativity: 1) perceived creativity support does not simply increase linearly with greater control; 2) human effort shifts from ideating to verifying ideas; and 3) ownership becomes a negotiated outcome between human and AI. Our findings suggest that LLM agent design should emphasize researcher empowerment, fostering a sense of ownership over strong ideas rather than reducing researchers to operating an automated AI-driven process.},
language = {en},
urldate = {2026-01-21},
publisher = {arXiv},
author = {Liu, Houjiang and Choi, Yujin and Gautam, Sanjana and Jaffe, Gabriel and Rieh, Soo Young and Lease, Matthew},
month = jan,
year = {2026},
note = {arXiv:2601.12152 [cs]},
keywords = {Computer Science - Human-Computer Interaction, Explorable},
}
@misc{joseph_astrovisbench_2025,
title = {{AstroVisBench}: {A} {Code} {Benchmark} for {Scientific} {Computing} and {Visualization} in {Astronomy}},
shorttitle = {{AstroVisBench}},
url = {http://arxiv.org/abs/2505.20538},
doi = {10.48550/arXiv.2505.20538},
abstract = {Large Language Models (LLMs) are being explored for applications in scientific research, including their capabilities to synthesize literature, answer research questions, generate research ideas, and even conduct computational experiments. Ultimately, our goal is for these to help scientists derive novel scientific insights. In many areas of science, such insights often arise from processing and visualizing data to understand its patterns. However, evaluating whether an LLM-mediated scientific workflow produces outputs conveying the correct scientific insights is challenging to evaluate and has not been addressed in past work. We introduce ASTROVISBENCH, the first benchmark for both scientific computing and visualization in the astronomy domain. ASTROVISBENCH judges a language model’s ability to both (1) create astronomy-specific workflows to process and analyze data and (2) visualize the results of these workflows through complex plots. Our evaluation of visualizations uses a novel LLM-as-a-judge workflow, which is validated against annotation by five professional astronomers. Using ASTROVISBENCH we present an evaluation of state-of-the-art language models, showing a significant gap in their ability to engage in astronomy research as useful assistants. This evaluation provides a strong end-to-end evaluation for AI scientists that offers a path forward for the development of visualization-based workflows, which are central to a broad range of domains from physics to biology. We release the code and data for ASTROVISBENCH at astrovisbench.github.io.},
language = {en},
urldate = {2026-01-16},
publisher = {arXiv},
author = {Joseph, Sebastian Antony and Husain, Syed Murtaza and Offner, Stella S. R. and Juneau, Stéphanie and Torrey, Paul and Bolton, Adam S. and Farias, Juan P. and Gaffney, Niall and Durrett, Greg and Li, Junyi Jessy},
month = oct,
year = {2025},
note = {arXiv:2505.20538 [cs]},
keywords = {Accelerated, Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Computation and Language, Computer Science - Machine Learning, Explorable},
}
@inproceedings{zhong_tool_2025,
address = {Clarion Hotel Trondheim Trondheim Norway},
title = {A {Tool} for {Generating} {Exceptional} {Behavior} {Tests} {With} {Large} {Language} {Models}},
isbn = {979-8-4007-1276-0},
url = {https://dl.acm.org/doi/10.1145/3696630.3728608},
doi = {10.1145/3696630.3728608},
abstract = {Exceptional behavior tests (EBTs) are crucial in software development for verifying that code correctly handles unwanted events and throws appropriate exceptions. However, prior research has shown that developers often prioritize testing “happy paths”, i.e., paths without unwanted events, over exceptional scenarios. We present exLong, a tool that automatically generates EBTs to address this gap. exLong leverages a large language model (LLM) fine-tuned from CodeLlama and incorporates reasoning about exception-throwing traces, conditional expressions that guard throw statements, and non-exceptional behavior tests that execute similar traces. Our demonstration video illustrates how exLong can effectively assist developers in creating comprehensive EBTs for their project (available at https://youtu.be/Jro8kMgplZk).},
language = {en},
urldate = {2025-08-28},
booktitle = {Proceedings of the 33rd {ACM} {International} {Conference} on the {Foundations} of {Software} {Engineering}},
publisher = {ACM},
author = {Zhong, Linghan and Yuan, Samuel and Zhang, Jiyang and Liu, Yu and Nie, Pengyu and Li, Junyi Jessy and Gligoric, Milos},
month = jun,
year = {2025},
keywords = {Explorable},
pages = {1193--1197},
}
@inproceedings{zhang_hack_2025,
address = {Coimbra, Portugal},
title = {{HACK}: {Homomorphic} {Acceleration} via {Compression} of the {Key}-{Value} {Cache} for {Disaggregated} {LLM} {Inference}},
url = {https://doi.org/10.1145/3718958.3750481},
doi = {https://doi.org/10.1145/3718958.3750481},
abstract = {Disaggregated Large Language Model (LLM) inference decouples the compute-intensive prefill stage from the memory-intensive decode stage, allowing low-end, compute-focused GPUs for prefill and high-end, memory-rich GPUs for decode, which reduces cost while maintaining high throughput. However, transmitting Key-Value (KV) data between the two stages can be a bottleneck, especially for long prompts. Additionally, the computational overhead in the two stages is key for optimizing Job Completion Time (JCT), and KV data size can become prohibitive for long prompts and sequences. Existing KV quantization methods can alleviate transmission and memory bottlenecks, but they introduce significant dequantization overhead, exacerbating the computation time.},
language = {en},
author = {Zhang, Zeyu and Shen, Haiying and Vargaftik, Shay and Basat, Ran Ben and Mitzenmacher, Michael and Yu, Minlan},
month = sep,
year = {2025},
note = {https://github.com/pcl-projects/HACK},
keywords = {Explorable},
}
@misc{zhang_zack_2025,
title = {{ZACK}: {Zero}-{Overhead} {LLM} {Inference} {Acceleration} via {Dimensionality} {Compression} of the {Key}-{Value} {Cache}},
shorttitle = {{ZACK}},
url = {http://arxiv.org/abs/2408.04107},
doi = {10.48550/arXiv.2408.04107},
abstract = {In large-language models, memory constraints in the Key-Value Cache (KVC) pose a challenge during inference. In this work, we propose ZACK, the first KV dimensionality compression system that achieves zero-overhead compression and decompression and also reduces attention computation time. It complements and can be combined with evictionbased and quantization-based methods to further enhance KV compression. Moreover, ZACK employs adaptive compression, tailoring KV compression rates across heads and layers based on their contributions to inference to maximize overall compression while maintaining an accuracy loss constraint. Additionally, ZACK enhances the self-attention kernel to balance the uneven workloads caused by the adaptive compression approach to further reduce attention computation latency. Comprehensive experiments demonstrate that when combined with ZACK, state-of-the-art eviction-based and quantizationbased methods for KV compression further reduce KV size by up to 68\%, Time-To-First-Token (TTFT) by up to 44\%, and Time-Between-Tokens (TBT) by up to 55\% and achieve up to 1.72× throughput under the same latency, while maintaining 99\% of the baseline accuracy. We open-sourced the code.},
language = {en},
urldate = {2025-05-28},
publisher = {arXiv},
author = {Zhang, Zeyu and Shen, Haiying},
month = feb,
year = {2025},
note = {arXiv:2408.04107 [cs]},
keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Machine Learning, Explorable},
}
@misc{yin_learning_2025,
title = {Learning {Composable} {Chains}-of-{Thought}},
url = {http://arxiv.org/abs/2505.22635},
doi = {10.48550/arXiv.2505.22635},
abstract = {A common approach for teaching large language models (LLMs) to reason is to train on chain-of-thought (CoT) traces of in-distribution reasoning problems, but such annotated data is costly to obtain for every problem of interest. We want reasoning models to generalize beyond their training distribution, and ideally to generalize compositionally: combine atomic reasoning skills to solve harder, unseen reasoning tasks. We take a step towards compositional generalization of reasoning skills when addressing a target compositional task that has no labeled CoT data. We find that simply training models on CoT data of atomic tasks leads to limited generalization, but minimally modifying CoT formats of constituent atomic tasks to be composable can lead to improvements. We can train "atomic CoT" models on the atomic tasks with Composable CoT data and combine them with multitask learning or model merging for better zero-shot performance on the target compositional task. Such a combined model can be further bootstrapped on a small amount of compositional data using rejection sampling fine-tuning (RFT). Results on string operations and natural language skill compositions show that training LLMs on Composable CoT outperforms multitask learning and continued fine-tuning baselines within a given training data budget.},
language = {en},
urldate = {2025-08-28},
publisher = {arXiv},
author = {Yin, Fangcong and Liu, Zeyu Leo and Leqi, Liu and Ye, Xi and Durrett, Greg},
month = may,
year = {2025},
note = {arXiv:2505.22635 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Explorable},
}
@misc{xue_lora_2025,
title = {{LoRA} is {All} {You} {Need} for {Safety} {Alignment} of {Reasoning} {LLMs}},
url = {http://arxiv.org/abs/2507.17075},
doi = {10.48550/arXiv.2507.17075},
abstract = {Reasoning LLMs have demonstrated remarkable breakthroughs in solving complex problems that were previously out of reach. To ensure LLMs do not assist with harmful requests, safety alignment fine-tuning is necessary in the post-training phase. However, safety alignment fine-tuning has recently been shown to significantly degrade reasoning abilities, a phenomenon known as the “Safety Tax”. In this work, we show that using LoRA for SFT on refusal datasets effectively aligns the model for safety without harming its reasoning capabilities. This is because restricting the safety weight updates to a low-rank space minimizes the interference with the reasoning weights. Our extensive experiments across four benchmarks covering math, science, and coding show that this approach produces highly safe LLMs—with safety levels comparable to full-model fine-tuning—without compromising their reasoning abilities. Our ablation studies further identify three key factors in LoRA: (1) rank-1 updates are sufficient to achieve the best reasoning and safety performance, (2) the up projection layers are the most critical modules, with LoRA applied to them alone achieving even better results, and (3) middle layers are more effective than early or late layers. Together, these findings show that strong safety and reasoning can be achieved at minimal computational cost when updates are applied in the right places. Additionally, we observe that LoRA induces weight updates with smaller overlap with the initial weights compared to full-model fine-tuning. Finally, while our attempts to further reduce this overlap yield only modest improvements on some tasks, they highlight the potential of developing methods that more reliably optimize the reasoning–safety tradeoff. Our source code is available at https://github.com/YihaoXue/lora-safety-reasoning.},
language = {en},
urldate = {2026-01-21},
publisher = {arXiv},
author = {Xue, Yihao and Mirzasoleiman, Baharan},
month = oct,
year = {2025},
note = {arXiv:2507.17075 [cs]},
keywords = {Computer Science - Artificial Intelligence, Explorable},
}
@misc{wadhwa_evalagent_2025,
title = {{EvalAgent}: {Discovering} {Implicit} {Evaluation} {Criteria} from the {Web}},
shorttitle = {{EvalAgent}},
url = {http://arxiv.org/abs/2504.15219},
doi = {10.48550/arXiv.2504.15219},
abstract = {Evaluation of language model outputs on structured writing tasks is typically conducted with a number of desirable criteria presented to human evaluators or large language models (LLMs). For instance, on a prompt like "Help me draft an academic talk on coffee intake vs research productivity", a model response may be evaluated for criteria like accuracy and coherence. However, high-quality responses should do more than just satisfy basic task requirements. An effective response to this query should include quintessential features of an academic talk, such as a compelling opening, clear research questions, and a takeaway. To help identify these implicit criteria, we introduce EvalAgent, a novel framework designed to automatically uncover nuanced and task-specific criteria. EvalAgent first mines expert-authored online guidance. It then uses this evidence to propose diverse, long-tail evaluation criteria that are grounded in reliable external sources. Our experiments demonstrate that the grounded criteria produced by EvalAgent are often implicit (not directly stated in the user's prompt), yet specific (high degree of lexical precision). Further, EvalAgent criteria are often not satisfied by initial responses but they are actionable, such that responses can be refined to satisfy them. Finally, we show that combining LLM-generated and EvalAgent criteria uncovers more human-valued criteria than using LLMs alone.},
language = {en},
urldate = {2025-05-28},
publisher = {arXiv},
author = {Wadhwa, Manya and Sprague, Zayne and Malaviya, Chaitanya and Laban, Philippe and Li, Junyi Jessy and Durrett, Greg},
month = apr,
year = {2025},
note = {arXiv:2504.15219 [cs]},
keywords = {Computer Science - Computation and Language, Explorable},
}
@inproceedings{vashistha_i-trustworthy_2025,
address = {Mai Khao, Thailand},
title = {I-trustworthy {Models}. {A} framework for trustworthiness evaluation of probabilistic classifiers},
volume = {258},
abstract = {As probabilistic models continue to permeate various facets of our society and contribute to scientific advancements, it becomes a necessity to go beyond traditional metrics such as predictive accuracy and error rates and assess their trustworthiness. Grounded in the competence-based theory of trust, this work formalizes I-trustworthy framework – a novel framework for assessing the trustworthiness of probabilistic classifiers for inference tasks by linking local calibration to trustworthiness. To assess I-trustworthiness, we use the local calibration error (LCE) and develop a method of hypothesis-testing. This method utilizes a kernel-based test statistic, Kernel Local Calibration Error (KLCE), to test local calibration of a probabilistic classifier. This study provides theoretical guarantees by o!ering convergence bounds for an unbiased estimator of KLCE. Additionally, we present a diagnostic tool designed to identify and measure biases in cases of miscalibration. The e!ectiveness of the proposed test statistic is demonstrated through its application to both simulated and real-world datasets. Finally, LCE of related recalibration methods is studied, and we provide evidence of insu"ciency of existing methods to achieve I-trustworthiness.},
language = {en},
booktitle = {{PMLR}},
author = {Vashistha, Ritwik and Farahi, Arya},
year = {2025},
keywords = {Explainable},
}
@misc{trienes_behavioral_2025,
title = {Behavioral {Analysis} of {Information} {Salience} in {Large} {Language} {Models}},
url = {http://arxiv.org/abs/2502.14613},
doi = {10.48550/arXiv.2502.14613},
abstract = {Large Language Models (LLMs) excel at text summarization, a task that requires models to select content based on its importance. However, the exact notion of salience that LLMs have internalized remains unclear. To bridge this gap, we introduce an explainable framework to systematically derive and investigate information salience in LLMs through their summarization behavior. Using length-controlled summarization as a behavioral probe into the content selection process, and tracing the answerability of Questions Under Discussion throughout, we derive a proxy for how models prioritize information. Our experiments on 13 models across four datasets reveal that LLMs have a nuanced, hierarchical notion of salience, generally consistent across model families and sizes. While models show highly consistent behavior and hence salience patterns, this notion of salience cannot be accessed through introspection, and only weakly correlates with human perceptions of information salience.},
language = {en},
urldate = {2025-05-28},
publisher = {arXiv},
author = {Trienes, Jan and Schlötterer, Jörg and Li, Junyi Jessy and Seifert, Christin},
month = may,
year = {2025},
note = {arXiv:2502.14613 [cs]},
keywords = {Computer Science - Computation and Language, Explorable},
}
@misc{tang_chartmuseum_2025,
title = {{ChartMuseum}: {Testing} {Visual} {Reasoning} {Capabilities} of {Large} {Vision}-{Language} {Models}},
shorttitle = {{ChartMuseum}},
url = {http://arxiv.org/abs/2505.13444},
doi = {10.48550/arXiv.2505.13444},
abstract = {Chart understanding presents a unique challenge for large vision-language models (LVLMs), as it requires the integration of sophisticated textual and visual reasoning capabilities. However, current LVLMs exhibit a notable imbalance between these skills, falling short on visual reasoning that is difficult to perform in text. We conduct a case study using a synthetic dataset solvable only through visual reasoning and show that model performance degrades significantly with increasing visual complexity, while human performance remains robust. We then introduce CHARTMUSEUM, a new Chart Question Answering (QA) benchmark containing 1,162 expert-annotated questions spanning multiple reasoning types, curated from realworld charts across 184 sources, specifically built to evaluate complex visual and textual reasoning. Unlike prior chart understanding benchmarks—where frontier models perform similarly and near saturation—our benchmark exposes a substantial gap between model and human performance, while effectively differentiating model capabilities: although humans achieve 93\% accuracy, the best-performing model Gemini-2.5-Pro attains only 63.0\%, and the leading open-source LVLM Qwen2.5-VL-72B-Instruct achieves only 38.5\%. Moreover, on questions requiring primarily visual reasoning, all models experience a 35\%-55\% performance drop from text-reasoning-heavy question performance. Lastly, our qualitative error analysis reveals specific categories of visual reasoning that are challenging for current LVLMs.},
language = {en},
urldate = {2025-05-28},
publisher = {arXiv},
author = {Tang, Liyan and Kim, Grace and Zhao, Xinyu and Durrett, Greg and Lake, Thom and Ding, Wenxuan and Yin, Fangcong and Singhal, Prasann and Wadhwa, Manya and Liu, Zeyu Leo and Sprague, Zayne and Namuduri, Ramya and Hu, Bodun and Rodriguez, Juan Diego and Peng, Puyuan},
month = may,
year = {2025},
note = {arXiv:2505.13444 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Computer Vision and Pattern Recognition, Explorable},
}
@inproceedings{tairin_revisiting_2025,
address = {Tokyo, Japan},
title = {Revisiting the {Straggling} {Problem} in {GPU}-based {Distributed} {Deep} {Learning} {Training}},
abstract = {The straggler problem has been extensively studied in CPU-based distributed deep learning (DL) training but has not received significant attention in homogeneous GPU-based distributed training, possibly because GPUs do not typically become bottlenecks in this scenario. In this paper, we conduct experiment measurements and find that the straggler problems persist in this scenario, primarily stemming from communication hurdles, compounded by computation delays, and stragglers substantially inflate resource consumption and training time by ∼50\%. Existing straggler mitigation methods do not directly address the communication stragglers in this scenario, and they suffer from drawbacks such as prolonged latency in straggler removal, high resource consumption, or compromised training accuracy. To tackle these limitations, based on the insights derived from thorough measurements, we propose a Straggler-aware Time and Resource Efficient distributed DL Training system (STRET). STRET is tailored for both homogeneous and heterogeneous GPU-based distributed training, encompassing both the parameter server (PS) and all-reduce architectures. It creates a hybrid architecture that connects a straggler to a non-straggler possessing high communication bandwidth with it to reduce communication delay. If this method fails to eliminate stragglers, it runs two complementary methods in sequence to remove the stragglers. First, it further reduces communication overhead by withholding reporting gradients when the accuracy increment is marginal. Second, it conducts one-time batch size tuning to reduce iteration time. Real experimental results on TensorFlow show that STRET can reduce up to 56\% and 41\% training time and save up to 94\% and 96\% resources in the heterogeneous and homogeneous scenarios, respectively, compared to state-of-the-art approaches while preserving accuracy.},
language = {en},
booktitle = {Proceedings of the 34th {International} {Conference} on {Computer} {Communications} and {Networks} ({ICCCN} 2025)},
author = {Tairin, Suraiya and Zhang, Zeyu and Shen, Haiying},
month = aug,
year = {2025},
note = {Code: https://github.com/pcl-projects/STRET},
keywords = {Explorable},
}
@misc{staylor_combining_2025,
title = {Combining {Serverless} and {High}-{Performance} {Computing} {Paradigms} to support {ML} {Data}-{Intensive} {Applications}},
url = {http://arxiv.org/abs/2511.12185},
doi = {10.48550/arXiv.2511.12185},
abstract = {Data is found everywhere, from health and human infrastructure to the surge of sensors and the proliferation of internet-connected devices. To meet this challenge, the data engineering field has expanded significantly in recent years in both research and industry. Traditionally, data engineering, Machine Learning, and AI workloads have been run on large clusters within data center environments, requiring substantial investment in hardware and maintenance. With the rise of the public cloud, it is now possible to run large applications across nodes without owning or maintaining hardware. Serverless functions such as AWS Lambda provide horizontal scaling and precise billing without the hassle of managing traditional cloud infrastructure. However, when processing large datasets, users often rely on external storage options that are significantly slower than direct communication typical of HPC clusters. We introduce Cylon, a high-performance distributed data frame solution that has shown promising results for data processing using Python. We describe how we took inspiration from the FMI library and designed a serverless communicator to tackle communication and performance issues associated with serverless functions. With our design, we demonstrate that the performance of AWS Lambda falls below one percent of strong scaling experiments compared to serverful AWS (EC2) and HPCs based on implementing direct communication via NAT Traversal TCP Hole Punching.},
language = {en},
urldate = {2025-12-18},
publisher = {arXiv},
author = {Staylor, Mills and Sarker, Arup Kumar and Laszewski, Gregor von and Fox, Geoffrey and Cheng, Yue and Fox, Judy},
month = dec,
year = {2025},
note = {arXiv:2511.12185 [cs]},
keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Explainable},
}
@article{staylor_scalable_2025,
title = {Scalable cosmic {AI} inference using cloud serverless computing},
issn = {1094-3420, 1741-2846},
url = {https://journals.sagepub.com/doi/10.1177/10943420251399942},
doi = {10.1177/10943420251399942},
abstract = {Large-scale astronomical image data processing and prediction are essential for astronomers, providing crucial insights into celestial objects, the universe’s history, and its evolution. While modern deep learning models offer high predictive accuracy, they often demand substantial computational resources, making them resource-intensive and limiting accessibility. We introduce the Cloud-based Astronomy Inference (CAI) framework to address these challenges. This scalable solution integrates pre-trained foundation models with serverless cloud infrastructure through a Function-as-a-Service (FaaS). CAI enables efficient and scalable inference on astronomical images without extensive hardware. Using a foundation model for redshift prediction as a case study, our extensive experiments cover user devices, HPC (High-Performance Computing) servers, and Cloud. Using redshift prediction with the AstroMAE model demonstrated CAI’s scalability and efficiency, achieving inference on a 12.6 GB dataset in only 28 seconds compared to 140.8 seconds on HPC GPUs and 1793 seconds on HPC CPUs. CAI also achieved significantly higher throughput, reaching 18.04 billion bits per second (bps), and maintained near-constant inference times as data sizes increased, all at minimal computational cost (under \$5 per experiment). We also process large-scale data up to 1 TB to show CAI’s effectiveness at scale. CAI thus provides a highly scalable, accessible, and cost-effective inference solution for the astronomy community. The code is accessible at https://github.com/UVA-MLSys/ AI-for-Astronomy.},
language = {en},
urldate = {2025-12-10},
journal = {The International Journal of High Performance Computing Applications},
author = {Staylor, Mills and Dolatpour Fathkouhi, Amirreza and Islam, Md Khairul and O’Hara, Kaleigh and Goudjil, Ryan Ghiles and Fox, Geoffrey and Fox, Judy},
month = nov,
year = {2025},
keywords = {Explainable},
pages = {10943420251399942},
}
@misc{sprague_skillfactory_2025,
title = {{SkillFactory}: {Self}-{Distillation} {For} {Learning} {Cognitive} {Behaviors}},
shorttitle = {{SkillFactory}},
url = {http://arxiv.org/abs/2512.04072},
doi = {10.48550/arXiv.2512.04072},
abstract = {Reasoning models leveraging long chains of thought employ various cognitive skills, such as verification of their answers, backtracking, retrying by an alternate method, and more. Previous work has shown that when a base language model exhibits these skills, training that model further with reinforcement learning (RL) can learn to leverage them. How can we get models to leverage skills that aren’t exhibited by base models? Our work, SkillFactory, is a method for fine-tuning models to roughly learn these skills during a supervised fine-tuning (SFT) stage prior to RL. Our approach does not rely on distillation from a stronger model, but instead uses samples from the model itself, rearranged to provide training data in the format of those skills. These “silver” SFT traces may be imperfect, but are nevertheless effective for priming a model to acquire skills during RL. Our evaluation shows that (1) starting from SkillFactory SFT initialization helps a model to generalize to harder variants of a task post-RL, despite lower performance pre-RL; (2) cognitive skills are indeed used by the model; (3) RLed SkillFactory models are more robust to regression on out-of-domain tasks than RLed base models. Our work suggests that inductive biases learned prior to RL help models learn robust cognitive skill use1.},
language = {en},
urldate = {2025-12-18},
publisher = {arXiv},
author = {Sprague, Zayne and Lu, Jack and Wadhwa, Manya and Keh, Sedrick and Ren, Mengye and Durrett, Greg},
month = dec,
year = {2025},
note = {arXiv:2512.04072 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Explorable},
}
@article{sivasankaran_agn_2025,
title = {{AGN} feedback in merging galaxies with a {SMUGGLE} multiphase {ISM}},
volume = {545},
url = {https://doi.org/10.1093/mnras/staf2044},
abstract = {We study fast nuclear winds driven by Active Galactic Nucleus (AGN) feedback in merging galaxies using high-resolution hydrodynamics simulations. We Stars and MUltiphase Gas in GaLaxiEs (SMUGGLE) to explicitly model the multiphase interstellar medium (ISM) and employ sub-grid dynamical friction for massive black holes (BHs). Furthermore, we use a super-Lagrangian refinement scheme to resolve AGN feedback coupling to the ISM at ∼ 10 − 100 pc scales. By comparison between merging and isolated galaxies, with and without AGN feedback, we identify trends in the complex interplay between dynamics, BH fueling and feedback, and star formation and feedback. We consider three galaxy types: Milky Way analogs, Sbc-type galaxies, and Small Magellanic Cloud (SMC) analogs. The synergy between AGN feedback and merger dynamics is strongest in the Milky Way-like mergers, where the AGN winds are energetically dominant and entrain more gas when the initially thin disks become thick and amorphous during the merger. In contrast, the merger of thicker, vigorously star-forming Sbc galaxies is not strongly impacted by AGN feedback until star formation declines in the post-merger phase. Finally, while the sub-grid dynamical friction prescription effectively retains BHs in galactic nuclei during more massive mergers, the clumpy multiphase ISM induces significant wandering of low-mass BHs ({\textless} 105M⊙) in the shallow potentials of the SMC-like galaxies. These low-mass BHs wander at distances ≳ 2 kpc from the galactic center, yielding negligible BH accretion and feedback. This has implications for LISA event rates and present a further challenge to understanding the rapid growth of 𝑧 ∼ 7 − 10 quasars discovered by JWST.},
language = {en},
number = {3},
journal = {Monthly Notices of the Royal Astronomical Society},
author = {Sivasankaran, Aneesh and Blecha, Laura and Torrey, Paul and Kelley, Luke Zoltan and Bhowmick, Aklant and Vogelsberger, Mark and Hernquist, Lars and Marinacci, Federico and Sales, Laura V},
month = nov,
year = {2025},
keywords = {Explainable},
}
@misc{shen_econoserve_2025,
title = {{EconoServe}: {Maximizing} {Multi}-{Resource} {Utilization} with {SLO} {Guarantees} in {LLM} {Serving}},
shorttitle = {{EconoServe}},
url = {http://arxiv.org/abs/2411.06364},
doi = {10.48550/arXiv.2411.06364},
abstract = {As Large Language Models (LLMs) continue to grow, reducing costs and alleviating GPU demands has become increasingly critical. However, existing schedulers primarily target either GPU compute or Key-Value Cache (KVC) utilization, failing to fully optimize both GPU compute and KVC usage during each iteration or guarantee timely KVC allocations when needed. To address these challenges, we conducted a trace-based experimental analysis and made insightful observations, leading to the design of a system called ECONOSERVE. ECONOSERVE maximizes multi-resource utilization while ensuring service-level objective (SLO) guarantees in LLM serving. To enable adding prompts to a batch to maximize GPU utilization in each iteration, ECONOSERVE maintains separate waiting queues for prompt processing tasks (PTs) and generation tasks (GTs). It batches GTs with the same predicted response lengths (RL) to save scheduling time and allocates KVC space for the predicted RL to avoid KVC allocation failures. It further has a novel KVC pipelining method, allowing sharing allocated but unused KVC space to enhance KVC utilization. In addition, it prioritizes queued requests that occupy more KVC to release KVC earlier and satisfy request service-level-objective (SLO). Experimental results demonstrate that ECONOSERVE increases throughput by up to 4× with the same level of latency, generates up to 91\% lower job completion time and up to 91\% higher SLO satisfaction ratio compared to vLLM. It also reduces the number of GPUs used in DistServe by up to 78\% while maintaining the same level of goodput.},
language = {en},
urldate = {2025-05-28},
publisher = {arXiv},
author = {Shen, Haiying and Sen, Tanmoy},
month = mar,
year = {2025},
note = {arXiv:2411.06364 [cs]},
keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Explorable},
}
@misc{schroeder_late-time_2025,
title = {A {Late}-time {Radio} {Search} for {Highly} {Off}-axis {Jets} from {PTF} {Broad}-lined {Ic} {Supernovae} in {GRB}-like {Host} {Galaxy} {Environments}},
url = {http://arxiv.org/abs/2507.15928},
doi = {10.48550/arXiv.2507.15928},
abstract = {Hydrogen/Helium-poor stripped-envelope core-collapse supernovae with broad lines (SNe Ic-bl) almost always accompany the nearby (z {\textless} 0.3) jetted relativistic explosions known as long duration gamma-ray bursts (GRBs). However, the majority of SNe Ic-bl have no detected GRB counterpart. At least some of these SNe should harbor off-axis jets, whose afterglow may become detectable at late times, particularly at radio wavelengths. Here, we present Karl G. Jansky Very Large Array radio observations (rest frame times of ∼ 3–4 × 103 days post SN discovery) of a sample of 14 SNe Ic-bl discovered by the Palomar Transient Factory (PTF) that have been demonstrated to originate from the same host environments as the SNe Ic-bl associated with nearby GRBs. Of the 14 SNe, we identify three that are radio detected, one of which (PTF10tqv, z = 0.0795) is consistent with an off-axis jet with energy similar to classical GRBs (≳ 1050.5 erg). Using recently developed synchrotron radiation code, we find that for our 11 non-detections, which are among the deepest limits obtained for Ic-bl, we rule out an off-axis jet with an energy of ≳ 1051 erg in circumburst densities of ≳ 10−1 cm−3. We predict that well-spaced monitoring of newly discovered SNe Ic-bl from ∼ 10 days to ∼ 10 years (rest frame) to luminosities of ∼ 1027 erg s−1 Hz−1 will constrain the existence of highly off-axis jets (≳ 60◦) with classical GRB energies. The VLA Sky Survey will probe jets that are ≲ 60◦ off-axis, whereas the Deep Synpotic Array 2000 will probe jets out to ∼ 90◦ off-axis, demonstrating the importance of utilizing radio surveys to supplement targeted observations.},
language = {en},
urldate = {2025-08-28},
publisher = {arXiv},
author = {Schroeder, Genevieve and Ho, Anna Y. Q. and Dastidar, Ranadeep G. and Modjaz, Maryam and Corsi, Alessandra and Duffell, Paul C.},
month = jul,
year = {2025},
note = {arXiv:2507.15928 [astro-ph]},
keywords = {Astrophysics - High Energy Astrophysical Phenomena, Explorable},
}
@article{ruys_scalable_2025,
title = {Scalable {KNN} {Graph} {Construction} for {Heterogeneous} {Architectures}},
volume = {12},
issn = {2329-4949, 2329-4957},
url = {https://dl.acm.org/doi/10.1145/3733610},
doi = {10.1145/3733610},
abstract = {Constructing k-nearest neighbor (kNN) graphs is a fundamental component in many machine learning and scientific computing applications. Despite its prevalence, efficiently building all-nearest-neighbor graphs at scale on distributed heterogeneous HPC systems remains challenging, especially for large sparse non-integer datasets. We introduce optimizations for algorithms based on forests of random projection trees. Our novel GPU kernels for batched, within leaf, exact searches achieve 1.18× speedup over sparse reference kernels with less peak memory, and up to 19× speedup over CPU for memory-intensive problems. Our library,
PyRKNN
, implements distributed randomized projection forests for approximate kNN search. Optimizations to reduce and hide communication overhead allow us to achieve 5× speedup, in per iteration performance, relative to GOFMM (another projection tree, MPI-based kNN library), for a 64M 128d dataset on 1,024 processes. On a single-node we achieve speedup over FAISS-GPU for dense datasets and up to 10× speedup over CPU-only libraries.
PyRKNN
uniquely supports distributed memory kNN graph construction for both dense and sparse coordinates on CPU and GPU accelerators.},
language = {en},
number = {3},
urldate = {2025-08-28},
journal = {ACM Transactions on Parallel Computing},
author = {Ruys, William and Ghafouri, Ali and Chen, Chao and Biros, George},
month = sep,
year = {2025},
keywords = {Accelerated},
pages = {1--35},
}
@article{rose_introducing_2025,
title = {Introducing the {DREAMS} {Project}: {DaRk} {mattEr} and {Astrophysics} with {Machine} {Learning} and {Simulations}},
volume = {982},
issn = {0004-637X, 1538-4357},
shorttitle = {Introducing the {DREAMS} {Project}},
url = {https://iopscience.iop.org/article/10.3847/1538-4357/adb8e5},
doi = {10.3847/1538-4357/adb8e5},
abstract = {Abstract
We introduce the DaRk mattEr and Astrophysics with Machine learning and Simulations (DREAMS) project, an innovative approach to understanding the astrophysical implications of alternative dark matter (DM) models and their effects on galaxy formation and evolution. The DREAMS project will ultimately comprise thousands of cosmological hydrodynamic simulations that simultaneously vary over DM physics, astrophysics, and cosmology in modeling a range of systems—from galaxy clusters to ultra-faint satellites. Such extensive simulation suites can provide adequate training sets for machine-learning-based analyses. This paper introduces two new cosmological hydrodynamical suites of warm dark matter (WDM), each comprising 1024 simulations generated using the
arepo
code. One suite consists of uniform-box simulations covering a
(
25
h
−
1
Mpc
)
3
volume, while the other consists of Milky Way zoom-ins with sufficient resolution to capture the properties of classical satellites. For each simulation, the WDM particle mass is varied along with the initial density field and several parameters controlling the strength of baryonic feedback within the IllustrisTNG model. We provide two examples, separately utilizing emulators and convolutional neural networks, to demonstrate how such simulation suites can be used to disentangle the effects of DM and baryonic physics on galactic properties. The DREAMS project can be extended further to include different DM models, galaxy formation physics, and astrophysical targets. In this way, it will provide an unparalleled opportunity to characterize uncertainties on predictions for small-scale observables, leading to robust predictions for testing the particle physics nature of DM on these scales.},
language = {en},
number = {2},
urldate = {2025-05-28},
journal = {The Astrophysical Journal},
author = {Rose, Jonah C. and Torrey, Paul and Farahi, Arya and Kallivayalil, Nitya and Muñoz, Julian B. and Garcia, Alex M. and Villaescusa-Navarro, Francisco and Lisanti, Mariangela and Nguyen, Tri and Roy, Sandip and Kollmann, Kassidy E. and Vogelsberger, Mark and Cyr-Racine, Francis-Yan and Medvedev, Mikhail V. and Genel, Shy and Anglés-Alcázar, Daniel and Wang, Bonny Y. and Costanza, Belén and O’Neil, Stephanie and Roche, Cian and Karmakar, Soumyodipta and Low, Ryan and Lin, Shurui and Mostow, Olivia and Cruz, Akaxia and Caputo, Andrea and Necib, Lina and Teyssier, Romain and Dalcanton, Julianne J. and Spergel, David},
month = apr,
year = {2025},
keywords = {Explainable},
pages = {68},
}
@misc{rose_dreams_2025,
title = {The {DREAMS} {Project}: {Disentangling} the {Impact} of {Halo}-to-{Halo} {Variance} and {Baryonic} {Feedback} on {Milky} {Way} {Satellite} {Galaxies}},
copyright = {Creative Commons Attribution 4.0 International},
shorttitle = {The {DREAMS} {Project}},
url = {https://arxiv.org/abs/2512.02095},
doi = {10.48550/ARXIV.2512.02095},
abstract = {We analyze the properties of satellite galaxies around 1,024 Milky Way-mass hosts from the DREAMS Project, simulated within a ΛCDM cosmology. Utilizing the TNG galaxy-formation model, the DREAMS simulations incorporate both baryonic physics and cosmological uncertainties for a large sample of galaxies with diverse environments and formation histories. We investigate the relative impact of the physical uncertainty from the galaxy-formation model on predicted satellite properties using four metrics: the satellite stellar mass function, radial distribution, inner slope of dark matter density profile, and stellar half-light radius. We compare these predictions to observations from the SAGA Survey and the DREAMS N-body simulations and find that uncertainties from baryonic physics modeling are subdominant to the scatter arising from halo-to-halo variance. Where baryonic modeling does affect satellites, the supernova wind energy has the largest effect on the satellite properties that we investigate. Specifically, increased supernova wind energy suppresses the stellar mass of satellites and results in more extended stellar half-light radii. The adopted wind speed has only a minor impact, and other astrophysical and cosmological parameters show no measurable effect. Our findings highlight the robustness of satellite properties against uncertainties in baryonic physics modeling.},
language = {en},
urldate = {2025-12-18},
publisher = {arXiv},
author = {Rose, Jonah C. and Lisanti, Mariangela and Torrey, Paul and Villaescusa-Navarro, Francisco and Garcia, Alex M. and Farahi, Arya and Filion, Carrie and Brooks, Alyson M. and Kallivayalil, Nitya and Kollmann, Kassidy E. and Lilie, Ethan and Li, Jiaxuan and Mostow, Olivia and Cruz, Akaxia and Nguyen, Tri and Roy, Sandip and Pace, Andrew B. and Ahvazi, Niusha and O'Neil, Stephanie and Shen, Xuejian and Cyr-Racine, Francis-Yan and Price-Whelan, Adrian M. and Geha, Marla and Necib, Lina and Vogelsberger, Mark and Muñoz, Julian B. and Dalcanton, Julianne J.},
year = {2025},
note = {Version Number: 1},
keywords = {Astrophysics of Galaxies (astro-ph.GA), Explainable, FOS: Physical sciences},
}
@misc{rose_dreams_2025,
title = {The {DREAMS} {Project}: {A} {New} {Suite} of 1,024 {Simulations} to {Contextualize} the {Milky} {Way} and {Assess} {Physics} {Uncertainties}},
copyright = {Creative Commons Attribution 4.0 International},
shorttitle = {The {DREAMS} {Project}},
url = {https://arxiv.org/abs/2512.00148},
doi = {10.48550/ARXIV.2512.00148},
abstract = {We introduce a new suite of 1,024 cosmological and hydrodynamical zoom-in simulations of Milky Way-mass halos, run with Cold Dark Matter, as part of the DREAMS Project. Each simulation in the suite has a unique set of initial conditions and combination of cosmological and astrophysical parameters. The suite is designed to quantify theoretical uncertainties from halo-to-halo variance, as well as stellar and black hole feedback. We develop a novel weighting scheme that prioritizes regions of the input parameter space, yielding galaxies consistent with the observed present-day stellar mass–halo mass relation. The resulting galaxy population exhibits a wide diversity in structural properties that encompasses those of the actual Milky Way, providing a powerful statistical sample for galactic archaeology. To demonstrate the suite’s scientific utility, we investigate the connection between a galaxy’s merger history, focusing on Gaia-Sausage-Enceladus (GSE) analogs, and its present-day properties. We find that galaxies with a GSE analog have lower star formation rates, more compact disks, and more spherical stellar halos. Crucially, significant halo-to-halo scatter remains, demonstrating that matching more than the most significant events in the Milky Way’s past is necessary to recover its present-day properties. Our results highlight the necessity for large statistical samples to disentangle the stochastic nature of galaxy formation and robustly model the Milky Way’s unique history.},
language = {en},
urldate = {2025-12-18},
publisher = {arXiv},
author = {Rose, Jonah C. and Lisanti, Mariangela and Torrey, Paul and Villaescusa-Navarro, Francisco and Garcia, Alex M. and Farahi, Arya and Filion, Carrie and Brooks, Alyson M. and Kallivayalil, Nitya and Kollmann, Kassidy E. and Lilie, Ethan and Wang, Bonny Y. and Cruz, Akaxia and Roy, Sandip and Pace, Andrew B. and Ahvazi, Niusha and O'Neil, Stephanie and Roche, Cian and Shen, Xuejian and Vogelsberger, Mark},
year = {2025},
note = {Version Number: 1},
keywords = {Astrophysics of Galaxies (astro-ph.GA), Explainable, FOS: Physical sciences},
}
@article{reizinger_occams_2025,
title = {Occam’s {Razor} for {SSL}: {Memory}-{Efficient} {Parametric} {Instance} {Discrimination}},
volume = {12},
abstract = {Self-supervised learning (SSL) is the prevalent paradigm for representation learning often relying on pairwise similarity between multiple augmented views of each example. Numerous learning methods with various complexities such as gradient stopping, negative sampling, projectors, additional regularization terms, were introduced in the past years. These methods can be effective, but they require careful hyperparameter tuning, have increased computational and memory requirements and struggle with latent dimensionality collapse. Furthermore, complexities such as gradient stopping make them hard to analyse theoretically and confound the essential components of SSL. We introduce a simple parametric instance discrimination method, called Datum IndEx as its Target (DIET). DIET has a single computational branch, without explicit negative sampling, gradient stopping or other hyperparameters. We empirically demonstrate that DIET (1) can be implemented in a memory-efficient way; (2) achieves competitive performance with state-of-the-art SSL methods on small-scale datasets; and (3) is robust to hyperparameters such as batch size. We uncover tight connections to Spectral Contrastive Learning in the lazy training regime, leading to practical insights about the role of feature normalization. Compared to SimCLR or VICReg, DIET also has higher-rank embeddings on CIFAR100 and TinyImageNet, suggesting that DIET captures more latent information.},
language = {en},
journal = {Transactions on Machine Learning Research},
author = {Reizinger, Patrik and Bizeul, Alice and Juhos, Attila and Ibrahim, Mark and Klindt, David and Balestriero, Randall and Brendel, Wieland and Mirzasoleiman, Baharan},
year = {2025},
keywords = {Explorable},
}
@article{qi_star_2025,
title = {Star {Formation} {Rates}, {Metallicities}, and {Stellar} {Masses} on {Kiloparsec} {Scales} in {TNG50}},
volume = {993},
issn = {0004-637X, 1538-4357},
url = {https://iopscience.iop.org/article/10.3847/1538-4357/ae0622},
doi = {10.3847/1538-4357/ae0622},
abstract = {Integral field units have extended our knowledge of galactic properties to kiloparsec (or, sometimes, even smaller) patches of galaxies. These scales are where the physics driving galaxy evolution (feedback, chemical enrichment, etc.) take place. Quantifying the spatially resolved properties of galaxies, both observationally and theoretically, is therefore critical to our understanding of galaxy evolution. To this end, we investigate spatially resolved scaling relations within galaxies of M� {\textgreater} 109.0 at z = 0 in IllustrisTNG. We examine both the resolved star formation main sequence (rSFMS) and the resolved mass–metallicity relation (rMZR) using 1 kpc × 1 kpc maps. We find that the rSFMS in IllustrisTNG is well described by a power law but is significantly shallower than the observed rSFMS. However, the disagreement between the rSFMS of IllustrisTNG and observations is likely driven by an overestimation of AGN feedback in IllustrisTNG for the higher-mass hosts. Conversely, the rMZR for IllustrisTNG has very good agreement with observations. Furthermore, we argue that the rSFMS is an indirect result of the Schmidt–Kennicutt law and local gas relation, which are both independent of host galaxy properties. Finally, we expand upon a localized leaky-box model to study the evolution of idealized spaxels and find that it provides a good description of these resolved relations. The degree of agreement, however, between idealized spaxels and simulated spaxels depends on the “net” outflow rate for the spaxel, and the IllustrisTNG scaling relations indicate a preference for a low net outflow rate.},
language = {en},
number = {1},
urldate = {2025-11-11},
journal = {The Astrophysical Journal},
author = {Qi, Jia and Garcia, Alex M. and Robinson, Davis and Torrey, Paul and Moreno, Jorge and Green, Kara N. and Evans, Aaron S. and Hemler, Z. S. and Hernquist, Lars and Ellison, Sara L.},
month = nov,
year = {2025},
keywords = {Explainable},
pages = {32},
}
@article{poletti_modeling_2025,
title = {Modeling turbulent and self-gravitating fluids with {Fourier} neural operators},
volume = {3},
issn = {2770-9019},
url = {https://pubs.aip.org/aml/article/3/2/026118/3350987/Modeling-turbulent-and-self-gravitating-fluids},
doi = {10.1063/5.0263607},
abstract = {Neural Operators (NOs) are a leading method for surrogate modeling of partial differential equations. Unlike traditional neural networks, which approximate individual functions, NOs learn the mappings between function spaces. While NOs have been predominantly tested on simplified 1D and 2D problems, such as those explored in prior works, these studies fail to address the complexities of more realistic, highdimensional, and high-dynamic range systems. Moreover, many real-world applications involve incomplete or noisy data, which has not been adequately explored in current NO literature. In this work, we present a novel application of NOs to astrophysical data, which involves high-dynamic range projections into an observational space. We train Fourier NO (FNO) models to predict the evolution of incomplete observational proxies with density variations spanning four orders of magnitude. We demonstrate that FNOs can predict the effects of unobserved dynamical variables. Our work lays the groundwork for future studies that forecast direct astronomical observables.},
language = {en},
number = {2},
urldate = {2025-08-12},
journal = {APL Machine Learning},
author = {Poletti, Keith and Offner, Stella S. R. and Ward, Rachel A.},
month = jun,
year = {2025},
keywords = {Accelerated},
pages = {026118},
}
@article{payerne_weak_2025,
title = {Weak lensing mass-richness relation of {redMaPPer} clusters in {LSST} {DESC} {DC2} simulations},
volume = {700},
copyright = {https://creativecommons.org/licenses/by/4.0},
issn = {0004-6361, 1432-0746},
url = {https://www.aanda.org/10.1051/0004-6361/202554107},
doi = {10.1051/0004-6361/202554107},
abstract = {Context. Cluster scaling relations are key ingredients in cluster abundance-based cosmological studies. In optical cluster cosmology, where clusters are detected through their richness, cluster-weak gravitational lensing has proven to be a powerful tool to constrain the cluster mass-richness relation. This work is conducted as part of the Dark Energy Science Collaboration (DESC), which aims to analyze the Legacy Survey of Space and Time (LSST) of the Vera C. Rubin Observatory, starting in 2026.},
language = {en},
urldate = {2025-11-11},
journal = {Astronomy \& Astrophysics},
author = {Payerne, Constantin and Zhang, Zhuowen and Aguena, Michel and Combet, Céline and Guillemin, Thibault and Ricci, Marina and Amouroux, Nathan and Avestruz, Camille and Barroso, Eduardo J. and Farahi, Arya and Kovacs, Eve and Murray, Calum and Rau, Markus M. and Rykoff, Eli S. and Schmidt, Samuel J.},
month = aug,
year = {2025},
keywords = {Explainable},
pages = {A34},
}
@misc{payerne_analysis_2025,
title = {Analysis of the weak lensing mass-richness relation of {redMaPPer} clusters in the {LSST} {DESC} {DC2} simulations},
url = {http://arxiv.org/abs/2502.08444},
doi = {10.48550/arXiv.2502.08444},
abstract = {Context. Cluster scaling relations are key ingredients in cluster abundance-based cosmological studies. In optical cluster cosmology, where clusters are detected through their richness, cluster-weak gravitational lensing has proven to be a powerful tool to constrain the cluster mass-richness relation. This work is conducted as part of the Dark Energy Science Collaboration (DESC), which aims to analyze the Legacy Survey of Space and Time (LSST) of Vera C. Rubin Observatory, starting in 2026.},
language = {en},
urldate = {2025-03-31},
publisher = {arXiv},
author = {Payerne, Constantin and Zhang, Zhuowen and Aguena, Michel and Combet, Céline and Guillemin, Thibault and Farahi, Arya and Ricci, Marina and Amouroux, Nathan and Avestruz, Camille and Barroso, Eduardo J. and Kovacs, Eve and Murray, Calum and Rau, Markus M. and Rykoff, Eli S. and Schmidt, Samuel J.},
month = feb,
year = {2025},
note = {arXiv:2502.08444 [astro-ph]},
keywords = {Astrophysics - Cosmology and Nongalactic Astrophysics, Explainable},
}
@misc{offner_life_2025,
title = {The {Life} and {Times} of {Star}-{Forming} {Cores}: an {Analysis} of {Dense} {Gas} in the {STARFORGE} {Simulations}},
shorttitle = {The {Life} and {Times} of {Star}-{Forming} {Cores}},
url = {http://arxiv.org/abs/2502.15057},
doi = {10.48550/arXiv.2502.15057},
abstract = {Dense gas in molecular clouds is an important signature of ongoing and future star formation. We identify and track dense cores in the starforge simulations, following the core evolution from birth through dispersal by stellar feedback for typical Milky Way cloud conditions. Only ∼8\% of cores host protostars, and most disperse before forming stars. The median starless and protostellar core lifetimes are ∼ 0.5−0.6 Myr and ∼ 0.8−1.1 Myr, respectively, where the protostellar phase lasts ∼ 0.1+0.1 −0.05 Myr.},
language = {en},
urldate = {2025-03-31},
publisher = {arXiv},
author = {Offner, Stella S. R. and Taylor, Josh and Grudic, Michael Y.},
month = feb,
year = {2025},
note = {arXiv:2502.15057 [astro-ph]},
keywords = {Accelerated, Astrophysics - Astrophysics of Galaxies, Astrophysics - Solar and Stellar Astrophysics},
}
@misc{naharas_data_2025,
title = {Data {Selection} for {Fine}-tuning {Vision} {Language} {Models} via {Cross} {Modal} {Alignment} {Trajectories}},
url = {http://arxiv.org/abs/2510.01454},
doi = {10.48550/arXiv.2510.01454},
abstract = {Data-efficient learning aims to eliminate redundancy in large training datasets by training models on smaller subsets of the most informative examples. While data selection has been extensively explored for vision models and large language models (LLMs), it remains underexplored for Large Vision-Language Models (LVLMs). Notably, none of existing methods can outperform random selection at different subset sizes. In this work, we propose the first principled method for data-efficient instruction tuning of LVLMs. We prove that examples with similar cross-modal attention matrices during instruction tuning have similar gradients. Thus, they influence model parameters in a similar manner and convey the same information to the model during training. Building on this insight, we propose XMAS, which clusters examples based on the trajectories of the top singular values of their attention matrices obtained from fine-tuning a small proxy LVLM. By sampling a balanced subset from these clusters, XMAS effectively removes redundancy in large-scale LVLM training data. Extensive experiments show that XMAS can discard 50\% of the LLaVA-665k dataset and 85\% of the Vision-Flan dataset while fully preserving performance of LLaVA-1.5-7B on 10 downstream benchmarks and speeding up its training by 1.2×. This is 30\% more data reduction compared to the best baseline for LLaVA-665k. The project’s website can be found at https://bigml-cs-ucla.github.io/XMAS-project-page/.},
language = {en},
urldate = {2026-01-21},
publisher = {arXiv},
author = {Naharas, Nilay and Nguyen, Dang and Bulut, Nesihan and Bateni, Mohammadhossein and Mirrokni, Vahab and Mirzasoleiman, Baharan},
month = oct,
year = {2025},
note = {arXiv:2510.01454 [cs]},
keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Explorable},
}
@misc{mostow_how_2025,
title = {How {Many} {Bursts} {Does} it {Take} to {Form} a {Core} at the {Center} of a {Galaxy}?},
url = {http://arxiv.org/abs/2412.09566},
doi = {10.48550/arXiv.2412.09566},
abstract = {We present a novel method for systematically assessing the impact of central potential fluctuations associated with bursty outflows on the structure of dark matter halos for classical and ultra-faint dwarf galaxies. Specifically, we use dark-matter-only simulations augmented with a manually-added massive particle that modifies the central potential and approximately accounts for a centrally-concentrated baryonic component. This approach enables precise control over the magnitude, frequency, and timing of rapid outflow events. We demonstrate that this method can reproduce the established result of core formation for systems that undergo multiple episodes of bursty outflows. In contrast, we also find that equivalent models that involve only a single (or small number of) burst episodes do not form cores with the same efficacy. This is important because many UFDs in the Local Universe are observed to have tightly constrained star formation histories that are best described by a single, early burst of star formation. Using a suite of cosmological, zoom-in simulations, we identify the regimes in which single bursts can and cannot form a cored density profile. Our results suggest that it may be difficult to form cores in UFD-mass systems with a single, early burst regardless of its magnitude.},
language = {en},
urldate = {2025-11-11},
publisher = {arXiv},
author = {Mostow, Olivia and Torrey, Paul and Rose, Jonah C. and Garcia, Alex M. and Ahvazi, Niusha and Lisanti, Mariangela and Kallivayalil, Nitya},
month = oct,
year = {2025},
note = {arXiv:2412.09566 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Explainable},
}
@misc{low_structure_2025,
title = {Structure {Formation} under {Inelastic} {Two}-{Component} {Dark} {Matter}: {Halo} {Statistics} and {Matter} {Power} {Spectra} in the {High}-\$z\$ {Universe}},
shorttitle = {Structure {Formation} under {Inelastic} {Two}-{Component} {Dark} {Matter}},
url = {http://arxiv.org/abs/2503.05881},
doi = {10.48550/arXiv.2503.05881},
abstract = {We present hydrodynamic simulations of a flavour-mixed two-component dark matter (2cDM) model that utilize IllustrisTNG baryonic physics. The model parameters are explored for two sets of power laws of the velocity-dependent cross sections, favoured on the basis of previous studies. The model is shown to suppress the formation of structures at scales k ≳ 102 h Mpc−1 up to 40\% compared to cold dark matter (CDM) at redshifts z ∼ 5 − 2. We compare our results to structure enhancement and suppression due to cosmological and astrophysical parameters presented in the literature and find that 2cDM effects remain relevant at galactic and subgalactic scales. The results indicate the robustness of the role of nongravitational dark matter interactions in structure formation and the absence of putative degeneracies introduced by baryonic feedback at high z. The predictions made can be further tested with future Ly-α forest observations.},
language = {en},
urldate = {2025-03-31},
publisher = {arXiv},
author = {Low, Ryan and Adhikari, Rakshak and Rose, Jonah C. and O'Neil, Stephanie and Medvedev, Mikhail V. and Torrey, Paul and Vogelsberger, Mark},
month = mar,
year = {2025},
note = {arXiv:2503.05881 [astro-ph]},
keywords = {Astrophysics - Cosmology and Nongalactic Astrophysics, Explainable},
}
@misc{liu_propmend_2025,
title = {{PropMEND}: {Hypernetworks} for {Knowledge} {Propagation} in {LLMs}},
shorttitle = {{PropMEND}},
url = {http://arxiv.org/abs/2506.08920},
doi = {10.48550/arXiv.2506.08920},
abstract = {Knowledge editing techniques for large language models (LLMs) can inject knowledge that is later reproducible verbatim, but they fall short on propagating that knowledge: models cannot answer questions that require reasoning with the injected knowledge. We present a hypernetwork-based approach for knowledge propagation, named PropMEND, where we meta-learn how to modify gradients of a language modeling loss to encourage injected information to propagate. Our approach extends the meta-objective of MEND [29] so that gradient updates on knowledge are transformed to enable answering multi-hop questions involving that knowledge. We show improved performance on the RippleEdit dataset, showing almost 2× accuracy on challenging multi-hop questions whose answers are not explicitly stated in the injected fact. We further introduce a new dataset, Controlled RippleEdit, to evaluate the generalization of our hypernetwork, testing knowledge propagation along relations and entities unseen during hypernetwork training. PropMEND still outperforms existing approaches in unseen entity-relation pairs, yet the performance gap decreases substantially, suggesting future work in propagating knowledge to a wide range of relations.},
language = {en},
urldate = {2025-08-28},
publisher = {arXiv},
author = {Liu, Zeyu Leo and Durrett, Greg and Choi, Eunsol},
month = jun,
year = {2025},
note = {arXiv:2506.08920 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Machine Learning, Explorable},
}
@misc{lilie_dreams_2025,
title = {The {DREAMS} {Project}: {Disentangling} the {Impact} of {Halo}-to-{Halo} {Variance} and {Baryonic} {Feedback} on {Milky} {Way} {Dark} {Matter} {Speed} {Distributions}},
copyright = {Creative Commons Attribution 4.0 International},
shorttitle = {The {DREAMS} {Project}},
url = {https://arxiv.org/abs/2512.04157},
doi = {10.48550/ARXIV.2512.04157},
abstract = {Direct detection experiments require information about the local dark matter speed distribution to produce constraints on dark matter candidates, or infer their properties in the event of a discovery. In this paper, we analyze how the uncertainty in the dark matter speed distribution near the Sun is affected by baryonic feedback, halo-to-halo variance, and halo mass. To do so, we harness the statistical power of the new DREAMS Cold Dark Matter simulation suite, which is comprised of 1024 zoom-in Milky Way-mass halos with varied initial conditions as well as cosmological and astrophysical parameters. Applying a normalizing flows emulator to these simulations, we find that the uncertainty in the local DM speed distribution is dominated by halo-to-halo variance and, to a lesser extent, uncertainty in host halo mass. Uncertainties in supernova and black hole feedback (from the IllustrisTNG model in this case) are negligible in comparison. Using the DREAMS suite, we present a state-of-the-art prediction for the DM speed distribution in the Milky Way. Although the Standard Halo Model is contained within the uncertainty of this prediction, individual galaxies may have distributions that differ from it. Lastly, we apply our DREAMS results to the XENON1T experiment and demonstrate that the astrophysical uncertainties are comparable to the experimental ones, solidifying previous results in the literature obtained with a smaller sample of simulated Milky Way-mass halos.},
language = {en},
urldate = {2025-12-18},
publisher = {arXiv},
author = {Lilie, Ethan and Rose, Jonah C. and Lisanti, Mariangela and Garcia, Alex M. and Torrey, Paul and Kollmann, Kassidy E. and Li, Jiaxuan and Mostow, Olivia and Wang, Bonny Y. and O'Neil, Stephanie and Shen, Xuejian and Brooks, Alyson M. and Farahi, Arya and Kallivayalil, Nitya and Necib, Lina and Pace, Andrew B. and Vogelsberger, Mark},
year = {2025},
note = {Version Number: 1},
keywords = {Astrophysics of Galaxies (astro-ph.GA), Cosmology and Nongalactic Astrophysics (astro-ph.CO), Explainable, FOS: Physical sciences, High Energy Physics - Phenomenology (hep-ph)},
}
@article{libanore_effective_2025,
title = {Effective model for line intensity mapping: {Auto}- and cross-power spectra in the cosmic dawn and reionization},
volume = {112},
issn = {2470-0010, 2470-0029},
shorttitle = {Effective model for line intensity mapping},
url = {https://link.aps.org/doi/10.1103/xq1r-zh51},
doi = {10.1103/xq1r-zh51},
language = {en},
number = {8},
urldate = {2025-11-11},
journal = {Physical Review D},
author = {Libanore, Sarah and Muñoz, Julian B. and Kovetz, Ely D.},
month = oct,
year = {2025},
keywords = {Explainable},
pages = {083552},
}
@misc{libanore_new_2025,
title = {A {New} {Boundary} {Condition} on {Reionization}},
url = {http://arxiv.org/abs/2509.08886},
doi = {10.48550/arXiv.2509.08886},
abstract = {The epoch of reionization (EoR) marks the last phase transition of hydrogen in our Universe, as it evolves from cold and neutral to hot and ionized in the intergalactic medium (IGM). While its endpoint and duration can be estimated from current observations, albeit with large uncertainties, there is no known avenue to constrain its onset. We propose a novel method based on the Pearson cross-correlation coefficient between 21-cm brightness temperature maps and line-intensity maps tracing star-formation (e.g., OIII, CO, CII). This real-space estimator evolves from negative to positive as X-ray heating progresses, and saturates prior to the EoR. We predict a sharp turnover from saturation during the earliest EoR stages, when the IGM ionized fraction reaches \${\textbackslash}bar\{x\}\_\{{\textbackslash}rm HII\}{\textbackslash}sim 1{\textbackslash}\%-10{\textbackslash}\%\$. We show that in standard scenarios, where the IGM heating precedes reionization, the turnover is a clear, model-robust signature. Its detection will provide a unique observational anchor for the EoR onset, complementing existing probes and tightening constraints on early galaxy formation models.},
language = {en},
urldate = {2025-11-11},
publisher = {arXiv},
author = {Libanore, Sarah and Kovetz, Ely D. and Munoz, Julian B. and Sklansky, Yonatan and Thélie, Emilie},
month = sep,
year = {2025},
note = {arXiv:2509.08886 [astro-ph]},
keywords = {Astrophysics - Cosmology and Nongalactic Astrophysics, Explainable},
}
@misc{leisher_linking_2025,
title = {Linking {Warm} {Dark} {Matter} to {Merger} {Tree} {Histories} via {Deep} {Learning} {Networks}},
url = {http://arxiv.org/abs/2511.05367},
doi = {10.48550/arXiv.2511.05367},
abstract = {Dark matter (DM) halos form hierarchically in the Universe through a series of merger events. Cosmological simulations can represent this series of mergers as a graph-like “tree” structure. Previous work has shown these merger trees are sensitive to cosmology simulation parameters, but as DM structures, the outstanding question of their sensitivity to DM models remains unanswered. In this work, we investigate the feasibility of deep learning methods trained on merger trees to infer Warm Dark Matter (WDM) particles masses from the DREAMS simulation suite. We organize the merger trees from 1,024 zoom-in simulations into graphs with nodes representing the merger history of galaxies and edges denoting hereditary links. We vary the complexity of the node features included in the graphs ranging from a single node feature up through an array of several galactic properties (e.g., halo mass, star formation rate, etc.). We train a Graph Neural Network (GNN) to predict the WDM mass using the graph representation of the merger tree as input. We find that the GNN can predict the mass of the WDM particle (R2 from 0.07 to 0.95), with success depending on the graph complexity and node features. We extend the same methods to supernovae and active galactic nuclei feedback parameters ASN1, ASN2, and AAGN, successfully inferring the supernovae parameters. The GNN can even infer the WDM mass from merger tree histories without any node features, indicating that the structure of merger trees alone inherits information about the cosmological parameters of the simulations from which they form.},
language = {en},
urldate = {2025-11-11},
publisher = {arXiv},
author = {Leisher, Ilem and Torrey, Paul and Garcia, Alex M. and Rose, Jonah C. and Villaescusa-Navarro, Francisco and Lubberts, Zachary and Farahi, Arya and O'Neil, Stephanie and Shen, Xuejian and Mostow, Olivia and Kallivayalil, Nitya and Zimmerman, Dhruv and Narayanan, Desika and Vogelsberger, Mark},
month = nov,
year = {2025},
note = {arXiv:2511.05367 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Explainable},
}
@misc{lazare_first_2025,
title = {First galaxy ultraviolet luminosity function limits on dark matter-proton scattering},
url = {http://arxiv.org/abs/2510.10757},
doi = {10.48550/arXiv.2510.10757},
abstract = {Scattering between dark matter (DM) and protons leads to suppressed small-scale fluctuations, with implications for a variety of cosmological observables. In this work, we search for evidence of DM-proton scattering with an interaction cross section \$σ{\textbackslash}!={\textbackslash}!σ\_0 ({\textbackslash}frac\{v\}\{c\}){\textasciicircum}n\$ for \$n=0,2\$ and \$4\$, corresponding e.g. to velocity-independent contact interactions from heavy mediators, velocity-dependent pseudoscalar-mediated scattering, and higher-order dipole interactions, respectively, using high-redshift (\$z {\textbackslash}sim4-10\$) ultraviolet galaxy luminosity functions (UVLFs) observed by Hubble Space Telescope (HST). We employ an adjusted implementation of GALLUMI combined with the modified Boltzmann solver CLASS DMeff that accounts for interacting DM, and incorporate UVLF data from both blank and lensed HST fields, alongside Planck CMB data and the Pantheon supernova catalog in a Bayesian analysis framework to set constraints on \$σ\_0\$. Our results show that including lensed UVLF data, which probe fainter galaxies than the blank HST fields and thus smaller scales, leads to a substantial improvement in the constraints on \$σ\_0\$ for \$n{\textgreater}0\$, surpassing existing bounds from Milky-Way (MW) satellite abundance and CMB anisotropies. For \$m\_χ = 1{\textbackslash},{\textbackslash}rm MeV \$, for example, we set the upper bounds at \$8.3{\textbackslash}times 10{\textasciicircum}\{-26\} {\textbackslash}, {\textbackslash}rm cm{\textasciicircum}2\$ for \$n=2\$ and \$1.2{\textbackslash}times 10{\textasciicircum}\{-22\} {\textbackslash}, {\textbackslash}rm cm{\textasciicircum}2\$ for \$n=4\$. For \$n=0\$, our bound is within an order of magnitude of those from the Lyman-\$α\$ forest and MW satellites.},
language = {en},
urldate = {2025-11-11},
publisher = {arXiv},
author = {Lazare, Hovav and Kovetz, Ely D. and Boddy, Kimberly K. and Munoz, Julian B.},
month = oct,
year = {2025},
note = {arXiv:2510.10757 [astro-ph]},
keywords = {Astrophysics - Cosmology and Nongalactic Astrophysics, Explainable, High Energy Physics - Phenomenology},
}
@misc{lau_baryon_2025,
title = {Baryon {Pasting} the {Uchuu} {Lightcone} {Simulation}},
url = {http://arxiv.org/abs/2411.00108},
doi = {10.3847/1538-4357/ada940},
abstract = {We present the Baryon Pasted (BP) X-ray and thermal Sunyaev-Zel’dovich (tSZ) maps derived from the half-sky Uchuu Lightcone simulation. These BP-Uchuu maps are constructed using more than 75 million dark matter halos with masses 𝑀500𝑐 ≥ 1013 𝑀⊙ within the redshift range 0 ≤ 𝑧 ≤ 2. A distinctive feature of our BP-Uchuu Lightcone maps is their capability to assess the influence of both extrinsic and intrinsic scatter caused by triaxial gaseous halos and internal gas characteristics, respectively, at the map level. We show that triaxial gas drives substantial scatter in X-ray luminosities of clusters and groups, accounting for nearly half of the total scatter in core-excised measurements. Additionally, scatter in the thermal pressure and gas density profiles of halos enhances the X-ray and SZ power spectra, leading to biases in cosmological parameter estimates. These findings are statistically robust due to the extensive sky coverage and large halo sample in the BP-Uchuu maps. The BP-Uchuu maps are publicly available online via Globus.},
language = {en},
urldate = {2025-03-31},
author = {Lau, Erwin T. and Nagai, Daisuke and Farahi, Arya and Ishiyama, Tomoaki and Miyatake, Hironao and Osato, Ken and Shirasaki, Masato},
month = feb,
year = {2025},
note = {arXiv:2411.00108 [astro-ph]},
keywords = {Astrophysics - Cosmology and Nongalactic Astrophysics, Explainable},
}
@article{kho_signatures_2025,
title = {Signatures of {Black} {Hole} {Seeding} on the \textit{{M}}$_{\textrm{•}}$ – \textit{σ} {Relation}: {Predictions} from the {BRAHMA} {Simulations}},
volume = {994},
issn = {0004-637X, 1538-4357},
shorttitle = {Signatures of {Black} {Hole} {Seeding} on the \textit{{M}}$_{\textrm{•}}$ – \textit{σ} {Relation}},
url = {https://iopscience.iop.org/article/10.3847/1538-4357/ae0e1e},
doi = {10.3847/1538-4357/ae0e1e},
abstract = {The James Webb Space Telescope has identified a large population of supermassive (106–108 M⊙) black holes (BHs) in the early Universe (z ∼ 4–7). Current measurements suggest that many of these BHs exhibit higher BHto-stellar mass ratios than local populations, opening a new window into the earliest stages of BH–galaxy coevolution and offering the potential to place tight constraints on BH seeding and growth in the early Universe. In this work, we use the BRAHMA simulations to investigate the impact of BH seeding on the M•–σ relation. These simulations adopt heavy ∼105 M⊙ seeds and systematically varied BH seeding models, resulting in distinct predictions for seed abundances. We find that different seed models lead to different normalizations of the M•–σ relation at higher redshifts (z {\textgreater} 2) across all σ, and at low redshift for systems with low σ (50 km s−1 ≲ σ ≲ 80 km s−1). The most lenient seed model also shows negligible evolution in the M•–σ relation across redshift, while more restrictive models have substantially lower normalization on the M•–σ relation for high σ (∼100 km s−1) at high redshifts, and evolve upward toward the local relation. We demonstrate that, while an evolving M*–σ relation mitigates changes in the M•–σ relation, any M•–σ evolution is a direct consequence of merger-dominated BH growth in low mass galaxies (≲109 M⊙) and accretion-dominated BH growth in high-mass (≳109 M⊙) galaxies. Furthermore, the scatter in the M•–σ relation is larger for the more restrictive models due to the inability of many BHs to grow significantly beyond their seed mass.},
language = {en},
number = {2},
urldate = {2025-12-18},
journal = {The Astrophysical Journal},
author = {Kho, Jonathan and Bhowmick, Aklant Kumar and Torrey, Paul and Garcia, Alex M. and Ahvazi, Niusha and Blecha, Laura and Vogelsberger, Mark},
month = dec,
year = {2025},
keywords = {Explainable},
pages = {172},
}
@article{kavoussi_impact_2025,
title = {Impact of follicle size before luteal progesterone supplementation on clinical outcomes of modified natural cycle single frozen embryo transfer},
volume = {6},
issn = {26663341},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2666334124001442},
doi = {10.1016/j.xfre.2024.12.001},
abstract = {Objective: To determine whether follicle size at midcycle transvaginal sonography imaging before luteal progesterone supplementation predicts modified natural cycle single frozen embryo transfer (mNC-SFET) outcomes. Design: Retrospective chart review. Subjects: Frozen embryo transfer charts were reviewed. After inclusion and exclusion criteria were applied, data were abstracted from cases of mNC-SFET (n ¼ 115). Exposure: For group A, lead follicle measuring {\textless}16 mm on day of trigger or peak þovulation predictor kit (n ¼ 50), and for group B, lead follicle measuring R16 mm on day of trigger or peak þovulation predictor kit (n ¼ 65). Main Outcome Measures: Follicle size analyzed as possible predictor of primary outcome ongoing pregnancy rate (OPR) as well as secondary outcomes implantation rate (IR), clinical pregnancy rate (CPR), and spontaneous abortion (SAB) rate via bivariate associations and multivariate logistic regression analyses.
Results: Bivariate analyses showed no differences between groups in OPR (A, 48.0\%, 24/50, and B, 44.6 \%, 29/65), IR (A, 64.0\%, 32/50, and B, 61.5\%, 40/65), CPR (A, 58.0\%, 29/50, and B, 52.3\%, 34/65), and SAB rates (A, 25.0\%, 8/32, and B, 27.5\%, 11/40). Multivariate analysis to investigate potential confounding between lead follicle size and outcomes of interest showed no difference in the primary and secondary outcomes. Furthermore, multivariate analyses using lead follicle size as a continuous variable showed no difference in outcomes.
Conclusion: In normo-ovulatory women undergoing mNC-SFET with natural endometrial preparation with human chorionic gonadotropin trigger or luteinizing hormone surge to time frozen embryo transfer, lead follicle size before luteal phase supplementation does not impact clinical outcomes such as IR, CPR, SAB rate, or OPR. (F S RepÒ 2025;6:47–51. Ó2024 by American Society for Reproductive Medicine.)},
language = {en},
number = {1},
urldate = {2025-11-11},
journal = {F\&S Reports},
author = {Kavoussi, Shahryar K. and Chen, Shu-Hung and Farzaneh, Negar and Farahi, Arya and Mehrabani-Farsi, Romtin and Aston, Kenneth I. and Chen, Justin and Kavoussi, Parviz K.},
month = mar,
year = {2025},
keywords = {Explainable},
pages = {47--51},
}
@article{joshi_mm-gen_2025,
title = {{MM}-{Gen}: {Principled} and {Generalizable} {Data} {Curation} for {Enhancing} {Task} {Performance} in {VLMs}},
language = {en},
journal = {Journal of Data-centric Machine Learning Research},
author = {Joshi, Siddharth and Nushi, Besmira and Balachandran, Vidhisha and Chandrasekaran, Varun and Vineet, Vibhav and Joshi, Neel and Mirzasoleiman, Baharan},
month = sep,
year = {2025},
keywords = {Explorable},
}
@misc{javanmard_understanding_2025,
title = {Understanding the {Role} of {Training} {Data} in {Test}-{Time} {Scaling}},
url = {http://arxiv.org/abs/2510.03605},
doi = {10.48550/arXiv.2510.03605},
abstract = {Test-time scaling improves the reasoning capabilities of large language models (LLMs) by allocating extra compute to generate longer Chains-of-Thoughts (CoTs). This enables models to tackle more complex problem by breaking them down into additional steps, backtracking, and correcting mistakes. Despite its strong performance–demonstrated by OpenAI’s o1 and DeepSeek R1, the conditions in the training data under which long CoTs emerge, and when such long CoTs improve the performance, remain unclear. In this paper, we study the performance of test-time scaling for transformers trained on an in-context weight prediction task for linear regression. Our analysis provides a theoretical explanation for several intriguing observations: First, at any fixed test error, increasing test-time compute allows us to reduce the number of in-context examples (context length) in training prompts. Second, if the skills required to solve a downstream task are not sufficiently present in the training data, increasing test-time compute can harm performance. Finally, we characterize task hardness via the smallest eigenvalue of its feature covariance matrix and show that training on a diverse, relevant, and hard set of tasks results in best performance for test-time scaling. We confirm our findings with experiments on large, nonlinear transformer architectures.},
language = {en},
urldate = {2026-01-21},
publisher = {arXiv},
author = {Javanmard, Adel and Mirzasoleiman, Baharan and Mirrokni, Vahab},
month = oct,
year = {2025},
note = {arXiv:2510.03605 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Explorable, Statistics - Machine Learning},
}
@misc{jacobs_efficient_2025,
title = {Efficient and {Stable} {Multi}-{Dimensional} {Kolmogorov}-{Smirnov} {Distance}},
url = {http://arxiv.org/abs/2504.11299},
doi = {10.48550/arXiv.2504.11299},
abstract = {We revisit extending the Kolmogorov-Smirnov distance between probability distributions to the multidimensional setting, and make new arguments about the proper way to approach this generalization. Our proposed formulation maximizes the difference over orthogonal dominating rectangular ranges (d-sided rectangles in Rd), and is an integral probability metric. We also prove that the distance between a distribution and a sample from the distribution converges to 0 as the sample size grows, and bound this rate. Moreover we show that one can, up to this same approximation error, compute the distance efficiently in 4 or fewer dimensions; specifically the runtime is near-linear in the size of the sample needed for that error. With this, we derive a δ-precision two-sample hypothesis test using this distance. Finally, we show these metric and approximation properties do not hold for other popular variants.},
language = {en},
urldate = {2025-05-28},
publisher = {arXiv},
author = {Jacobs, Peter Matthew and Namjoo, Foad and Phillips, Jeff M.},
month = apr,
year = {2025},
note = {arXiv:2504.11299 [stat]},
keywords = {Computer Science - Computational Geometry, Computer Science - Machine Learning, Observable, Statistics - Computation},
}
@misc{hsu_instance-level_2025,
title = {Instance-level {Performance} {Prediction} for {Long}-form {Generation} {Tasks}},
url = {http://arxiv.org/abs/2509.07309},
doi = {10.48550/arXiv.2509.07309},
abstract = {We motivate and share a new benchmark1 for instance-level performance prediction of long-form generation tasks having multifaceted, fine-grained quality metrics. Our task-, model- and metric-agnostic formulation predicts continuous evaluation metric scores given only black-box model inputs and outputs. Beyond predicting point estimates of metric scores, the benchmark also requires inferring prediction intervals to quantify uncertainty around point estimates. Evaluation spans 11 long-form datasets/tasks with multiple LLMs, baselines, and metrics per task. We show that scores can be effectively predicted across long-form generation tasks using as few as 16 training examples Overall, we introduce a novel and useful task, a valuable benchmark to drive progress, and baselines ready for practical adoption today.},
language = {en},
urldate = {2025-10-02},
publisher = {arXiv},
author = {Hsu, Chi-Yang and Braylan, Alexander and Su, Yiheng and Alonso, Omar and Lease, Matthew},
month = sep,
year = {2025},
note = {arXiv:2509.07309 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning, Explorable},
}
@article{goubert_environmental_2025,
title = {Environmental versus intrinsic quenching at cosmic noon: predictions from cosmological hydrodynamical simulations for {VLT}-{MOONRISE}},
volume = {543},
copyright = {https://creativecommons.org/licenses/by/4.0/},
issn = {0035-8711, 1365-2966},
shorttitle = {Environmental versus intrinsic quenching at cosmic noon},
url = {https://academic.oup.com/mnras/article/543/3/2006/8255889},
doi = {10.1093/mnras/staf1554},
abstract = {We present an investigation into the quenching of simulated galaxies across cosmic time, honing in on the role played by both intrinsic and environmental mechanisms at different epochs. In anticipation of VLT-MOONRISE, Very Large Telescope MOONS (Multi-Object Optical and Near-infrared Spectrograph) Redshift-Intensive Survey Experiment, the first wide-field spectroscopic galaxy survey to target cosmic noon, this work provides clear predictions to compare to the future observations. We investigate the quenching of centrals, high-mass satellites, and low-mass satellites from two cosmological hydrodynamical simulations: Illustris The Next Generation and Evolution and Assembly of GaLaxies and their Environment. Satellites are split according to bespoke mass thresholds, designed to separate environmental and intrinsic quenching mechanisms. To determine the best parameter for predicting quiescence, we apply a Random Forest classification analysis for each galaxy class at each epoch. The Random Forest classification determines supermassive black hole mass as the best predictor of quiescence in centrals and high-mass satellites. Alternatively, the quenching of low-mass satellites is best predicted by group halo mass, at all epochs. Additionally, we investigate the evolution in the dependence of the quenched fraction with various parameters, revealing a more complex picture. There is strong evidence for the rejuvenation of star formation from z = 2 to z = 0 in EAGLE, but not in IllustrisTNG. The starkest discrepancy between simulations rests in the mass threshold analysis. While IllustrisTNG predicts the existence of environmentally quenched satellites visible within the survey limits of MOONRISE, EAGLE does not. Hence, MOONRISE will provide critical data that is needed to evaluate current models, and constrain future models, of quenching processes.},
language = {en},
number = {3},
urldate = {2025-11-11},
journal = {Monthly Notices of the Royal Astronomical Society},
author = {Goubert, Paul H and Bluck, Asa F L and Piotrowska, Joanna M and Torrey, Paul and Maiolino, Roberto and Franco, Thomas Pinto and Casimiro, Camilo and Cea, Nicolas},
month = oct,
year = {2025},
keywords = {Explainable},
pages = {2006--2034},
}
@misc{garcia_metallicity_2025,
title = {Metallicity {Gradients} in {Modern} {Cosmological} {Simulations} {II}: {The} {Role} of {Bursty} {Versus} {Smooth} {Feedback} at {High}-{Redshift}},
shorttitle = {Metallicity {Gradients} in {Modern} {Cosmological} {Simulations} {II}},
url = {http://arxiv.org/abs/2510.26877},
doi = {10.48550/arXiv.2510.26877},
abstract = {The distribution of gas-phase metals within galaxies encodes the impact of stellar feedback on galactic evolution. At high-redshift, when galaxies are rapidly assembling, feedback-driven outflows and turbulence can strongly reshape radial metallicity gradients. In this work, we use the FIRE-2, SPICE, Thesan and Thesan Zoom cosmological simulations – spanning a range of stellar feedback from bursty (time-variable) to smooth (steady) – to investigate how these feedback modes shape gas-phase metallicity gradients at 3 {\textless} z ≲ 11. Across all models, we find that galaxies with bursty feedback (FIRE-2, SPICE Bursty, and Thesan Zoom) develop systematically flatter (factors of ∼ 2 − 10) metallicity gradients than those with smooth feedback (SPICE Smooth and Thesan Box), particularly at stellar masses M⋆ {\textgreater} 109 M⊙. These results demonstrate that bursty stellar feedback provides sufficient turbulence to prevent strong negative gradients from forming, while smooth stellar feedback does not generically allow for efficient radial redistribution of metals thereby keeping gradients steep. Finally, we compare with recent observations, finding that the majority – but, notably, not all – of the observed gradients may favor a bursty stellar feedback scenario. In all, these results highlight the utility of high-resolution observations of gas-phase metallicity at high-redshift as a key discriminator of these qualitatively different feedback types.},
language = {en},
urldate = {2025-11-11},
publisher = {arXiv},
author = {Garcia, Alex M. and Torrey, Paul and Bhagwat, Aniket and Shen, Xuejian and Vogelsberger, Mark and McClymont, William and Nagarajan-Swenson, Jaya and Ridolfo, Sophia G. and Zhu, Peixin and Zimmerman, Dhruv T. and Zier, Oliver and Biddle, Sarah and Sarkar, Arnab and Chakraborty, Priyanka and Wright, Ruby J. and Grasha, Kathryn and Costa, Tiago and Keating, Laura and Kannan, Rahul and Smith, Aaron and Garaldi, Enrico and Puchwein, Ewald and Ciardi, Benedetta and Hernquist, Lars and Kewley, Lisa J.},
month = nov,
year = {2025},
note = {arXiv:2510.26877 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Explainable},
}
@misc{garcia_dreams_2025,
title = {The {DREAMS} {Project}: {Disentangling} the {Impact} of {Halo}-to-{Halo} {Variance} and {Baryonic} {Feedback} on {Milky} {Way} {Dark} {Matter} {Density} {Profiles}},
copyright = {arXiv.org perpetual, non-exclusive license},
shorttitle = {The {DREAMS} {Project}},
url = {https://arxiv.org/abs/2512.03132},
doi = {10.48550/ARXIV.2512.03132},
abstract = {Astrophysical searches for dark matter in the Milky Way require a reliable model for its density distribution, which in turn depends on the influence of baryonic feedback on the Galaxy. In this work, we utilize a new suite of Milky Way-mass halos from the DREAMS Project, simulated with Cold Dark Matter (CDM),to quantify the influence of baryon feedback and intrinsic halo-to-halo variance on dark matter density profiles. Our suite of 1024 halos varies over supernova and black hole feedback parameters from the IllustrisTNG model, as well as variations in two cosmological parameters. We find that Milky Way-mass dark matter density profiles in the IllustrisTNG model are largely insensitive to astrophysics and cosmology variations, with the dominant source of scatter instead arising from halo-to-halo variance. However, most of the (comparatively minor) feedback-driven variations come from the changes to supernova prescriptions. By comparing to dark matter-only simulations, we find that the strongest supernova wind energies are so effective at preventing galaxy formation that the halos are nearly entirely collisionless dark matter. Finally, regardless of physics variation, all the DREAMS halos are roughly consistent with a halo contracting adiabatically from the presence of baryons, unlike models that have bursty stellar feedback. This work represents a step toward assessing the robustness of Milky Way dark matter profiles, with direct implications for dark matter searches where systematic uncertainty in the density profile remains a major challenge.},
language = {en},
urldate = {2025-12-18},
publisher = {arXiv},
author = {Garcia, Alex M. and Rose, Jonah C. and Torrey, Paul and Caputo, Andrea and Lisanti, Mariangela and Pace, Andrew B. and Liu, Hongwan and Hussein, Abdelaziz and Liu, Haozhe and Villaescusa-Navarro, Francisco and Barry, John and Leisher, Ilem and Costanza, Belén and Kho, Jonathan and Lilie, Ethan and Li, Jiaxuan and Ahvazi, Niusha and Bhowmick, Aklant and Nguyen, Tri and O'Neil, Stephanie and Ou, Xiaowei and Shen, Xuejian and Farahi, Arya and Kallivayalil, Nitya and Necib, Lina and Vogelsberger, Mark},
year = {2025},
note = {Version Number: 1},
keywords = {Astrophysics of Galaxies (astro-ph.GA), Explainable, FOS: Physical sciences},
}
@misc{ferguson_future_2025,
title = {The {Future} of {Artificial} {Intelligence} and the {Mathematical} and {Physical} {Sciences} ({AI}+{MPS})},
url = {http://arxiv.org/abs/2509.02661},
doi = {10.48550/arXiv.2509.02661},
abstract = {This community paper developed out of the NSF Workshop on the Future of Artificial Intelligence (AI) and the Mathematical and Physics Sciences (MPS), which was held in March 2025 with the goal of understanding how the MPS domains (Astronomy, Chemistry, Materials Research, Mathematical Sciences, and Physics) can best capitalize on, and contribute to, the future of AI. We present here a summary and snapshot of the MPS community’s perspective, as of Spring/Summer 2025, in a rapidly developing field. The link between AI and MPS is becoming increasingly inextricable; now is a crucial moment to strengthen the link between AI and Science by pursuing a strategy that proactively and thoughtfully leverages the potential of AI for scientific discovery and optimizes opportunities to impact the development of AI by applying concepts from fundamental science. To achieve this, we propose activities and strategic priorities that: (1) enable AI+MPS research in both directions; (2) build up an interdisciplinary community of AI+MPS researchers; and (3) foster education and workforce development in AI for MPS researchers and students. We conclude with a summary of suggested priorities for funding agencies, educational institutions, and individual researchers to help position the MPS community to be a leader in, and take full advantage of, the transformative potential of AI+MPS.},
language = {en},
urldate = {2025-09-17},
publisher = {arXiv},
author = {Ferguson, Andrew and LaFleur, Marisa and Ruthotto, Lars and Thaler, Jesse and Ting, Yuan-Sen and Tiwary, Pratyush and Villar, Soledad and Alves, E. Paulo and Avigad, Jeremy and Billinge, Simon and Bilodeau, Camille and Brown, Keith and Candes, Emmanuel and Chattopadhyay, Arghya and Cheng, Bingqing and Clausen, Jonathan and Coley, Connor and Connolly, Andrew and Daum, Fred and Dong, Sijia and Du, Chrisy Xiyu and Dvorkin, Cora and Fanelli, Cristiano and Ford, Eric B. and Frutos, Luis Manuel and Trillos, Nicolás García and Garraffo, Cecilia and Ghrist, Robert and Gomez-Bombarelli, Rafael and Guadagni, Gianluca and Guggilam, Sreelekha and Gukov, Sergei and Gutiérrez, Juan B. and Habib, Salman and Hachmann, Johannes and Hanin, Boris and Harris, Philip and Holland, Murray and Holm, Elizabeth and Huang, Hsin-Yuan and Hsu, Shih-Chieh and Jackson, Nick and Isayev, Olexandr and Ji, Heng and Katsaggelos, Aggelos and Kepner, Jeremy and Kevrekidis, Yannis and Kuchera, Michelle and Kutz, J. Nathan and Lalic, Branislava and Lee, Ann and LeBlanc, Matt and Lim, Josiah and Lindsey, Rebecca and Liu, Yongmin and Lu, Peter Y. and Malik, Sudhir and Mandic, Vuk and Manian, Vidya and Mazi, Emeka P. and Mehta, Pankaj and Melchior, Peter and Ménard, Brice and Ngadiuba, Jennifer and Offner, Stella and Olivetti, Elsa and Ong, Shyue Ping and Rackauckas, Christopher and Rigollet, Philippe and Risko, Chad and Romero, Philip and Rotskoff, Grant and Savoie, Brett and Seljak, Uros and Shih, David and Shiu, Gary and Shlyakhtenko, Dima and Silverstein, Eva and Sparks, Taylor and Strohmer, Thomas and Stubbs, Christopher and Thomas, Stephen and Vaikuntanathan, Suriyanarayanan and Vidal, Rene and Villaescusa-Navarro, Francisco and Voth, Gregory and Wandelt, Benjamin and Ward, Rachel and Weber, Melanie and Wechsler, Risa and Whitelam, Stephen and Wiest, Olaf and Williams, Mike and Yang, Zhuoran and Yingling, Yaroslava G. and Yu, Bin and Yue, Shuwen and Zabludoff, Ann and Zhao, Huimin and Zhang, Tong},
month = sep,
year = {2025},
note = {arXiv:2509.02661 [cs]},
keywords = {Accelerated, Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Condensed Matter - Materials Science, Physics - Data Analysis, Statistics and Probability},
}
@misc{costanza_sensitivity_2025,
title = {On the sensitivity of different galaxy properties to warm dark matter},
url = {http://arxiv.org/abs/2510.05037},
doi = {10.3847/1538-4357/ae0e6c},
abstract = {We study the impact of warm dark matter (WDM) particle mass on galaxy properties using 1,024 state-of-the-art cosmological hydrodynamical simulations from the DREAMS project. We begin by using a Multilayer Perceptron (MLP) coupled with a normalizing flow to explore global statistical descriptors of galaxy populations, such as the mean, standard deviation, and histograms of 14 galaxy properties. We find that subhalo gas mass is the most informative feature for constraining the WDM mass, achieving a determination coefficient of R2 = 0.9. We employ symbolic regression to extract simple, interpretable relations with the WDM particle mass. Finally, we adopt a more localized approach by selecting individual dark matter halos and using a Graph Neural Network (GNN) with a normalizing flow to infer the WDM mass, incorporating subhalo properties as node features and global simulation statistics as graph-level features. The GNN approach yields only a residual improvement over MLP models based solely on global features, indicating that most of the predictive power resides in the global descriptors, with only marginal gains from halo-level information.},
language = {en},
urldate = {2025-10-09},
author = {Costanza, Belén and Wang, Bonny Y. and Villaescusa-Navarro, Francisco and Garcia, Alex M. and Rose, Jonah C. and Vogelsberger, Mark and Torrey, Paul and Farahi, Arya and Shen, Xuejian and Leisher, Ilem},
month = oct,
year = {2025},
note = {arXiv:2510.05037 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Astrophysics - Cosmology and Nongalactic Astrophysics, Explainable},
}
@article{cao_association_2025,
title = {Association between optically identified galaxy clusters and the underlying dark matter halos},
language = {en},
journal = {PHYS. REV. D},
author = {Cao, Shulei and Wu, Hao-Yi and Costanzi, Matteo and Farahi, Arya and Grandis, Sebastian and Weinberg, David H and Evrard, August E and Rozo, Eduardo and Salcedo, Andres N and To, Chun-Hao and Yang, Lei and Zhou, Conghao},
year = {2025},
keywords = {Explainable},
}
@misc{blank_general_2025,
title = {The {General} {Expiration} {Streaming} {Model}: {Diameter}, \$k\$-{Center}, {Counting}, {Sampling}, and {Friends}},
copyright = {arXiv.org perpetual, non-exclusive license},
shorttitle = {The {General} {Expiration} {Streaming} {Model}},
url = {https://arxiv.org/abs/2509.07587},
doi = {10.48550/ARXIV.2509.07587},
abstract = {An important thread in the study of data-stream algorithms focuses on settings where stream items are active only for a limited time. We introduce a new expiration model, where each item arrives with its own expiration time. The special case where items expire in the order that they arrive, which we call consistent expirations, contains the classical sliding-window model of Datar, Gionis, Indyk, and Motwani [SICOMP 2002] and its timestamp-based variant of Braverman and Ostrovsky [FOCS 2007].},
language = {en},
urldate = {2025-11-11},
publisher = {arXiv},
author = {Blank, Lotte and Cabello, Sergio and Hajiaghayi, MohammadTaghi and Krauthgamer, Robert and Mahabadi, Sepideh and Nusser, André and Phillips, Jeff M. and Sauer, Jonas},
year = {2025},
note = {Version Number: 1},
keywords = {Data Structures and Algorithms (cs.DS), FOS: Computer and information sciences, Observable},
}
@misc{blamart_uv_2025,
title = {{UV} {Luminosity} {Functions} from {HST} and {JWST}: {A} {Possible} {Resolution} to the {High}-{Redshift} {Galaxy} {Abundance} {Puzzle} and {Implications} for {Cosmic} {Strings}},
shorttitle = {{UV} {Luminosity} {Functions} from {HST} and {JWST}},
url = {http://arxiv.org/abs/2512.09980},
doi = {10.48550/arXiv.2512.09980},
abstract = {Recent observations of high redshift galaxies by the James Webb Space Telescope suggest the presence of a bright population of galaxies that is more abundant than predicted by most galaxy formation models. These observations have led to a rethinking of these models, and numerous astrophysical and cosmological solutions have been proposed, including cosmic strings, topological defects that may be remnants of a specific phase transition in the very early moments of the Universe. In this paper, we integrate cosmic strings, a source of nonlinear and non-Gaussian perturbations, into the semi analytical code Zeus21, allowing us to efficiently predict the ultraviolet luminosity function (UVLF). We conduct a precise study of parameter degeneracies between star-formation astrophysics and cosmic-string phenomenology. Our results suggest that cosmic strings can boost the early-galaxy abundance enough to explain the measured UVLFs from the James Webb and Hubble Space Telescopes from redshift z = 4 to z = 17 without modifying the star-formation physics. In addition, we set a new upper bound on the string tension of \$Gμ{\textbackslash}lessapprox 10{\textasciicircum}\{-8\}\$ (\$95{\textbackslash}\%\$ credibility), improving upon previous limits from the cosmic microwave background. Although with current data there is some level of model and prior dependence to this limit, it suggests that UVLFs are a promising avenue for future observational constraints on cosmic-string physics.},
language = {en},
urldate = {2025-12-18},
publisher = {arXiv},
author = {Blamart, Mattéo and Liu, Adrian and Brandenberger, Robert and Muñoz, Julian B. and Cyr, Bryce},
month = dec,
year = {2025},
note = {arXiv:2512.09980 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Astrophysics - Cosmology and Nongalactic Astrophysics, Explainable, General Relativity and Quantum Cosmology, High Energy Physics - Phenomenology, High Energy Physics - Theory},
}
@misc{bhowmick_heavy_2025,
title = {Heavy seeds and the first black holes: {Insights} from the {BRAHMA} simulations},
shorttitle = {Heavy seeds and the first black holes},
url = {http://arxiv.org/abs/2510.01322},
doi = {10.48550/arXiv.2510.01322},
abstract = {From the luminous quasars at z ∼ 6 to the recent z ∼ 9–11 AGNs revealed by JWST, observations of the earliest black hole (BH) populations can provide unique constraints on BH formation and growth models. We use the BRAHMA simulations with constrained initial conditions to investigate BH assembly in extreme overdense regions. The simulations implement heavy ∼ 104–105 M⊙ seeds forming in dense, metal-poor gas exposed to sufficient Lyman–Werner flux. With gas accretion modeled via Bondi–Hoyle formalism and BH dynamics and mergers with a subgrid dynamical friction scheme, we isolate the impact of seeding, dynamics, accretion, and feedback on early BH growth. With fiducial stellar and AGN feedback inherited from IllustrisTNG, accretion is strongly suppressed at z ≳ 9, leaving mergers as the dominant growth channel. Gas accretion dominates at z ≲ 9, where permissive models (superEddington or low radiative efficiency) build ∼ 109 M⊙ BHs powering quasars by z ∼ 6, while stricter IllustrisTNG-based prescriptions yield much lower BH masses (∼ 106–108 M⊙). Our seed models strongly affect merger-driven growth at z ≳ 9: only the most lenient models (with ∼ 105 M⊙ seeds) produce enough BH mergers to reach ≳ 106 M⊙ by z ∼ 10, consistent with current estimates for GNz11. Our dynamical friction model gives low merger efficiencies, hindering the buildup of ≳ 107 M⊙ BHs by z ∼ 9–10, as currently inferred for GHZ9, UHZ1, and CAPERS-LRD-z9. If the BH-to-stellar mass ratios of these sources are indeed as extreme as currently inferred, they would require either very short BH merger timescales or reduced AGN thermal feedback. Weaker stellar feedback boosts both star formation and BH accretion and cannot raise these ratios.},
language = {en},
urldate = {2025-10-09},
publisher = {arXiv},
author = {Bhowmick, Aklant K. and Blecha, Laura and Torrey, Paul and Kelley, Luke Zoltan and Natarajan, Priyamvada and Somerville, Rachel S. and Weinberger, Rainer and Garcia, Alex M. and Hernquist, Lars and Matteo, Tiziana Di and Kho, Jonathan and Vogelsberger, Mark},
month = oct,
year = {2025},
note = {arXiv:2510.01322 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Explainable},
}
@misc{bhowmick_dynamics_2025,
title = {Dynamics of low-mass black hole seeds in the {BRAHMA} simulations using subgrid-dynamical friction: {Impact} on merger-driven black hole growth in the high redshift {Universe}},
shorttitle = {Dynamics of low-mass black hole seeds in the {BRAHMA} simulations using subgrid-dynamical friction},
url = {http://arxiv.org/abs/2506.09184},
doi = {10.48550/arXiv.2506.09184},
abstract = {We analyze the dynamics of low-mass black hole (BH) seeds in the high-redshift (z ≳ 5) Universe using a suite of [4.5 Mpc]3 and [9 Mpc]3 BRAHMA cosmological hydrodynamic simulations. The simulations form seeds with mass Mseed = 2.2 × 103 M⊙ in halos that exceed critical thresholds of dense \& metal-poor gas mass (5−150 Mseed) and the halo mass (1000−10000 Mseed). While the initial BRAHMA boxes pinned the BHs to the halo centers, here we implement a sub-grid dynamical friction (DF) model. We also compare simulations where the BH is allowed to wander without the added DF. We investigate the spatial and velocity offsets of BHs in their host subhalos, as well as BH merger rates. We find that subgrid DF is crucial to ensure that a significant fraction of BHs effectively sink to halo centers by z ∼ 5, thereby enabling them to get gravitationally bound and merge with other BHs at separations close to the spatial resolution (∼ 0.2 − 0.4 kpc) of the simulation. For the BHs that merge, the associated merger time scales lag between ∼ 100 − 1000 Myr after their host halos merge. Compared to predictions using BH repositioning, the overall z ≳ 5 BH merger rates under subgrid DF decrease by a factor of ∼ 4 − 10. Under subgrid DF, the different seed models predict merger rates between ∼ 100 − 1000 events per year at z ≳ 5. These mergers dominate early BH growth, assembling BHs up to ∼ 104 − 105 M⊙ by z ∼ 5, wherein ≲ 2 \% of their mass is assembled via gas accretion. Our results highlight the promise for constraining seeding mechanisms using gravitational waves from future facilities such as the Laser Interferometer Space Antenna.},
language = {en},
urldate = {2025-11-11},
publisher = {arXiv},
author = {Bhowmick, Aklant K. and Blecha, Laura and Kelley, Luke Z. and Sivasankaran, Aneesh and Torrey, Paul and Weinberger, Rainer and Chen, Nianyi and Vogelsberger, Mark and Hernquist, Lars and Natarajan, Priyamvada},
month = jun,
year = {2025},
note = {arXiv:2506.09184 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Explainable},
}
@misc{baer-way_first_2025,
title = {The first radio view of a type {Ibn} supernova in {SN} 2023fyq: {Understanding} the mass-loss history in the last decade before the explosion},
shorttitle = {The first radio view of a type {Ibn} supernova in {SN} 2023fyq},
url = {http://arxiv.org/abs/2509.07080},
doi = {10.48550/arXiv.2509.07080},
abstract = {Supernovae that interact with hydrogen-poor, helium-rich circumstellar material (CSM), known as Type Ibn supernovae (SNe Ibn), present a unique opportunity to probe mass-loss processes in massive stars. In this work, we report the first radio detection of a SN Ibn, SN 2023fyq, and characterize the mass-loss history of its stellar progenitor using the radio and X-ray observations obtained over 18 months post-explosion. We find that the radio emission from 58–185 days is best modeled by synchrotron radiation attenuated by free-free absorption from a CSM of density ∼ 10−18 g/cm3 (∼ 106ρISM) at a radius of 1016 cm, corresponding to a mass-loss rate of ∼ 4 × 10−3 M⊙ yr−1 (for a wind velocity of 1700 km/s from optical spectroscopy) from 0.7 to 3 years before the explosion. This timescale is consistent with the time frame over which pre-explosion optical outbursts were observed. However, our late-time observations at 525 days post-explosion yield non-detections, and the 3σ upper limits (along with an X-ray non-detection) allow us to infer lower-density CSM at 2 × 1016 cm with ˙M {\textless} 2.5 × 10−3 M⊙ yr−1. These results suggest a shell-like CSM from at most 4 × 1015 to 2 × 1016 cm (∼ 105R⊙) with an elevated CSM density (0.004 M⊙ yr−1) that is roughly consistent with predictions from a merger model for this object. Future radio observations of a larger sample of SNe Ibn will provide key details on the extent and density of their helium-rich CSM.},
language = {en},
urldate = {2025-09-18},
publisher = {arXiv},
author = {Baer-Way, Raphael and Nayana, A. J. and Jacobson-Galan, Wynn and Chandra, Poonam and Modjaz, Maryam and Wu, Samantha C. and Tsuna, Daichi and Margutti, Raffaella and Chornock, Ryan and Pellegrino, Craig and Dong, Yize and Drout, Maria R. and Kilpatrick, Charles D. and Milisavljevic, Dan and Patnaude, Daniel and Stauffer, Candice},
month = sep,
year = {2025},
note = {arXiv:2509.07080 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Astrophysics - High Energy Astrophysical Phenomena, Astrophysics - Solar and Stellar Astrophysics, Explorable},
}
@article{anderson_robust_2025,
title = {Robust {High}-{Dimensional} {Mean} {Estimation} {With} {Low} {Data} {Size}, an {Empirical} {Study}},
volume = {02},
abstract = {Robust statistics aims to compute quantities to represent data where a fraction of it may be arbitrarily corrupted. The most essential statistic is the mean, and in recent years, there has been a flurry of theoretical advancement for e!ciently estimating the mean in high dimensions on corrupted data. While several algorithms have been proposed that achieve near-optimal error, they all rely on large data size requirements as a function of dimension. In this paper, we perform an extensive experimentation over various mean estimation techniques where data size might not meet this requirement due to the highdimensional setting.},
language = {en},
journal = {Transactions on Machine Learning Research},
author = {Anderson, Cullen and Phillips, Jeff M.},
month = feb,
year = {2025},
keywords = {Observable},
}
@misc{andersen_app_2025,
title = {{APP}: {Accelerated} {Path} {Patching} with {Task}-{Specific} {Pruning}},
copyright = {Creative Commons Attribution 4.0 International},
shorttitle = {{APP}},
url = {https://arxiv.org/abs/2511.05442},
doi = {10.48550/ARXIV.2511.05442},
abstract = {Circuit discovery is a key step in many mechanistic interpretability pipelines. Current methods, such as Path Patching, are computationally expensive and have limited in-depth circuit analysis for smaller models. In this study, we propose Accelerated Path Patching (APP), a hybrid approach leveraging our novel contrastive attention head pruning method to drastically reduce the search space of circuit discovery methods. Our Contrastive-FLAP pruning algorithm uses techniques from causal mediation analysis to assign higher pruning scores to task-specific attention heads, leading to higher performing sparse models compared to traditional pruning techniques. Although Contrastive-FLAP is successful at preserving task-specific heads that existing pruning algorithms remove at low sparsity ratios, the circuits found by Contrastive-FLAP alone are too large to satisfy the minimality constraint required in circuit analysis. APP first applies Contrastive-FLAP to reduce the search space on required for circuit discovery algorithms by, on average, 56{\textbackslash}\%. Next, APP, applies traditional Path Patching on the remaining attention heads, leading to a speed up of 59.63{\textbackslash}\%-93.27{\textbackslash}\% compared to Path Patching applied to the dense model. Despite the substantial computational saving that APP provides, circuits obtained from APP exhibit substantial overlap and similar performance to previously established Path Patching circuits},
language = {en},
urldate = {2025-12-18},
publisher = {arXiv},
author = {Andersen, Frauke and Rudman, William and Zhang, Ruochen and Eickhoff, Carsten},
year = {2025},
note = {Version Number: 1},
keywords = {68Uxx, Artificial Intelligence (cs.AI), Computation and Language (cs.CL), Explainable, FOS: Computer and information sciences, I.2.7; I.2.6; I.2.m, Machine Learning (cs.LG)},
}
@article{aljamal_mass_2025,
title = {Mass {Proxy} {Quality} of {Massive} {Halo} {Properties} in the {IllustrisTNG} and {FLAMINGO} {Simulations}: {I}. {Hot} {Gas}},
volume = {544},
url = {https://doi.org/10.1093/mnras/staf1665},
abstract = {We examine scale and redshift dependence of mass-property relations (MPRs) for five hot gas properties of two large groupand cluster-scale halo samples realized by the IllustrisTNG, TNG-Cluster and FLAMINGO cosmological hydrodynamical simulations. For intrinsic properties of i) hot gas mass (𝑀gas), ii) spectroscopic-like temperature (𝑇sl), iii) soft-band X-ray luminosity (𝐿X), and iv) X-ray (𝑌X) and v) Sunyaev-Zel’dovich (𝑌SZ) thermal energies, we use MPR parameters to infer mass proxy quality (MPQ) — the implied scatter in total halo mass conditioned on a property — for halos with 𝑀500c ≥ 1013 M⊙ at redshifts, 𝑧 ∈ \{0, 0.5, 1, 2\}. We find: (1) in general, scaling relation slopes and covariance display moderate to strong dependence on halo mass, with redshift dependence secondary; (2) for halos with 𝑀500c {\textgreater} 1014 M⊙, scalings of 𝑀gas and 𝑌SZ simplify toward self-similar slope and constant intrinsic scatter (5 and 10\%, respectively) nearly independent of scale, making both measures ideal for cluster finding and characterization to 𝑧 = 2; (3) halo mass-conditioned likelihoods of hot gas mass and thermal energy closely follow a log-normal form; (4) despite normalization differences up to 0.4 dex between the two simulations, higher-order scaling features such as slopes and property covariance show much better agreement. Slopes show appreciable redshift dependence at the group scale, while redshift dependence of the scatter is exhibited by low mass FLAMINGO halos only; (5) property correlations are largely consistent between the simulations, with values that mainly agree with existing empirical measurements. We close with a literature survey placing our MPR slopes and intrinsic scatter estimates into community context.},
language = {en},
number = {1},
journal = {Monthly Notices of the Royal Astronomical Society},
publisher = {Oxford University Press on behalf of Royal Astronomical Society},
author = {Aljamal, Eddie and Evrard, August E and Farahi, Arya and Pillepich, Annalisa and Nelson, Dylan and Schaye, Joop and Schaller, Matthieu and Braspenning, Joey},
month = oct,
year = {2025},
keywords = {Explainable},
pages = {67--94},
}
@misc{shi_argumentative_2025,
title = {Argumentative {Experience}: {Reducing} {Confirmation} {Bias} on {Controversial} {Issues} through {LLM}-{Generated} {Multi}-{Persona} {Debates}},
doi = {doi.org/10.1145/nnnnnnn.nnnnnnn},
language = {en},
publisher = {ACM},
author = {Shi, Li and Liu, Houjiang and Wong, Yian and Mujumdar, Utkarsh and Zhang, Dan and Gwizdka, Jacek and Lease, Matthew},
month = may,
year = {2025},
keywords = {Explorable},
}
@misc{yang_smalltolarge_2024,
title = {{SmallToLarge} ({S2L}): {Scalable} {Data} {Selection} for {Fine}-tuning {Large} {Language} {Models} by {Summarizing} {Training} {Trajectories} of {Small} {Models}},
shorttitle = {{SmallToLarge} ({S2L})},
url = {http://arxiv.org/abs/2403.07384},
doi = {10.48550/arXiv.2403.07384},
abstract = {Despite the effectiveness of data selection for large language models (LLMs) during pretraining and instruction fine-tuning phases, improving data efficiency in supervised fine-tuning (SFT) for specialized domains poses significant challenges due to the complexity of fine-tuning data. To bridge this gap, we introduce an effective and scalable data selection method for SFT, SmallToLarge (S2L), which leverages training trajectories from small models to guide the data selection for larger models. We demonstrate through extensive experiments that S2L significantly improves data efficiency in SFT for mathematical problem-solving, reducing the training data to just 11\% of the original MathInstruct dataset (Yue et al., 2023) to match full dataset performance while outperforming state-of-the-art data selection algorithms by an average of 4.7\% across 6 in- and out-domain evaluation datasets. Remarkably, selecting only 50K data for SFT, S2L achieves a 32.7\% accuracy on the most challenging MATH (Hendrycks et al., 2021) benchmark, improving Phi-2 (Li et al., 2023b) by 16.6\%. In clinical text summarization on the MIMIC-III dataset (Johnson et al., 2016), S2L again outperforms training on the full dataset using only 50\% of the data. Notably, S2L can perform data selection using a reference model 40x smaller than the target model, proportionally reducing the cost of data selection.},
language = {en},
urldate = {2025-05-25},
publisher = {arXiv},
author = {Yang, Yu and Mishra, Siddhartha and Chiang, Jeffrey N. and Mirzasoleiman, Baharan},
month = dec,
year = {2024},
note = {arXiv:2403.07384 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Machine Learning, Explorable},
}
@inproceedings{nguyen_changing_2024,
address = {Vancouver Convention Center},
title = {Changing the {Training} {Data} {Distribution} to {Reduce} {Simplicity} {Bias} {Improves} {In}-distribution {Generalization}},
abstract = {Can we modify the training data distribution to encourage the underlying optimization method toward finding solutions with superior generalization performance on in-distribution data? In this work, we approach this question for the first time by comparing the inductive bias of gradient descent (GD) with that of sharpness-aware minimization (SAM). By studying a two-layer CNN, we rigorously prove that SAM learns different features more uniformly, particularly in early epochs. That is, SAM is less susceptible to simplicity bias compared to GD. We also show that examples containing features that are learned early are separable from the rest based on the model’s output. Based on this observation, we propose a method that (i) clusters examples based on the network output early in training, (ii) identifies a cluster of examples with similar network output, and (iii) upsamples the rest of examples only once to alleviate the simplicity bias. We show empirically that USEFUL effectively improves the generalization performance on the original data distribution when training with various gradient methods, including (S)GD and SAM. Notably, we demonstrate that our method can be combined with SAM variants and existing data augmentation strategies to achieve, to the best of our knowledge, state-of-the-art performance for training ResNet18 on CIFAR10, STL10, CINIC10, Tiny-ImageNet; ResNet34 on CIFAR100; and VGG19 and DenseNet121 on CIFAR10.},
language = {en},
author = {Nguyen, Tuan Hai Dang and Haddad, Paymon and Gan, Eric and Mirzasoleiman, Baharan},
year = {2024},
keywords = {Explorable},
}
@misc{nguyen_how_2024,
title = {How {DREAMS} are made: {Emulating} {Satellite} {Galaxy} and {Subhalo} {Populations} with {Diffusion} {Models} and {Point} {Clouds}},
shorttitle = {How {DREAMS} are made},
url = {http://arxiv.org/abs/2409.02980},
doi = {10.48550/arXiv.2409.02980},
abstract = {The connection between galaxies and their host dark matter (DM) halos is critical to our understanding of cosmology, galaxy formation, and DM physics. To maximize the return of upcoming cosmological surveys, we need an accurate way to model this complex relationship. Many techniques have been developed to model this connection, from Halo Occupation Distribution (HOD) to empirical and semi-analytic models to hydrodynamic. Hydrodynamic simulations can incorporate more detailed astrophysical processes but are computationally expensive; HODs, on the other hand, are computationally cheap but have limited accuracy. In this work, we present NeHOD, a generative framework based on variational diffusion model and Transformer, for painting galaxies/subhalos on top of DM with an accuracy of hydrodynamic simulations but at a computational cost similar to HOD. By modeling galaxies/subhalos as point clouds, instead of binning or voxelization, we can resolve small spatial scales down to the resolution of the simulations. For each halo, NeHOD predicts the positions, velocities, masses, and concentrations of its central and satellite galaxies. We train NeHOD on the TNG-Warm DM suite of the DREAMS project, which consists of 1024 high-resolution zoom-in hydrodynamic simulations of Milky Way-mass halos with varying warm DM mass and astrophysical parameters. We show that our model captures the complex relationships between subhalo properties as a function of the simulation parameters, including the mass functions, stellar-halo mass relations, concentration-mass relations, and spatial clustering. Our method can be used for a large variety of downstream applications, from galaxy clustering to strong lensing studies.},
language = {en},
urldate = {2025-05-28},
publisher = {arXiv},
author = {Nguyen, Tri and Villaescusa-Navarro, Francisco and Mishra-Sharma, Siddharth and Cuesta-Lazaro, Carolina and Torrey, Paul and Farahi, Arya and Garcia, Alex M. and Rose, Jonah C. and O'Neil, Stephanie and Vogelsberger, Mark and Shen, Xuejian and Roche, Cian and Anglés-Alcázar, Daniel and Kallivayalil, Nitya and Muñoz, Julian B. and Cyr-Racine, Francis-Yan and Roy, Sandip and Necib, Lina and Kollmann, Kassidy E.},
month = sep,
year = {2024},
note = {arXiv:2409.02980 [astro-ph]},
keywords = {Astrophysics - Astrophysics of Galaxies, Astrophysics - Cosmology and Nongalactic Astrophysics, Computer Science - Machine Learning, Explainable},
}