var bibbase_data = {"data":"\"Loading..\"\n\n
\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n
\n generated by\n \n \"bibbase.org\"\n\n \n
\n \n\n
\n\n \n\n\n
\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n
\n JavaScript\n (easiest)\n
\n \n <script src=\"https://bibbase.org/show?bib=https%3A%2F%2Fgist.githubusercontent.com%2Fdineshj1%2F0185709a89b3de5cb7c763e36c0cb031%2Fraw%2F&commas=true&jsonp=1&jsonp=1\"></script>\n \n
\n\n PHP\n
\n \n <?php\n $contents = file_get_contents(\"https://bibbase.org/show?bib=https%3A%2F%2Fgist.githubusercontent.com%2Fdineshj1%2F0185709a89b3de5cb7c763e36c0cb031%2Fraw%2F&commas=true&jsonp=1\");\n print_r($contents);\n ?>\n \n
\n\n iFrame\n (not recommended)\n
\n \n <iframe src=\"https://bibbase.org/show?bib=https%3A%2F%2Fgist.githubusercontent.com%2Fdineshj1%2F0185709a89b3de5cb7c763e36c0cb031%2Fraw%2F&commas=true&jsonp=1\"></iframe>\n \n
\n\n

\n For more details see the documention.\n

\n
\n
\n\n
\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n
\n\n
\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

    \n
  1. renew the authorization for BibBase on Mendeley, and
  2. \n
  3. update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n
  4. \n
\n

\n\n

\n \n \n Fix it now\n

\n
\n\n
\n\n\n
\n \n \n
\n
\n  \n 2025\n \n \n (7)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n The Value of Sensory Information to a Robot.\n \n \n \n\n\n \n Krishna, A., Hu, E. S., & Jayaraman, D.\n\n\n \n\n\n\n ICLR. 2025.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{krishna2025vosi,\ntitle={The Value of Sensory Information to a Robot},\nauthor={Arjun Krishna and Edward S. Hu and Dinesh Jayaraman},\nabstract={A decision-making agent, such as a robot, must observe and react to any new task-relevant information that becomes available from its environment. We seek to study a fundamental scientific question: what value does sensory information hold to an agent at various moments in time during the execution of a task? Towards this, we empirically study agents of varying architectures, generated with varying policy synthesis approaches (imitation, RL, model-based control), on diverse robotics tasks. For each robotic agent, we characterize its regret in terms of performance degradation when state observations are withheld from it at various task states for varying lengths of time. We find that sensory information is surprisingly rarely task-critical in many commonly studied task setups. Task characteristics such as stochastic dynamics largely dictate the value of sensory information for a well-trained robot; policy architectures such as planning vs. reactive control generate more nuanced second-order effects. Further, sensing efficiency is curiously correlated with task proficiency: in particular, fully trained high-performing agents are more robust to sensor loss than novice agents early in their training. Overall, our findings characterize the tradeoffs between sensory information and task performance in practical sequential decision making tasks, and pave the way towards the design of more resource-efficient decision-making agents.},\njournal={ICLR},\nyear={2025}\n}\n
\n
\n\n\n
\n A decision-making agent, such as a robot, must observe and react to any new task-relevant information that becomes available from its environment. We seek to study a fundamental scientific question: what value does sensory information hold to an agent at various moments in time during the execution of a task? Towards this, we empirically study agents of varying architectures, generated with varying policy synthesis approaches (imitation, RL, model-based control), on diverse robotics tasks. For each robotic agent, we characterize its regret in terms of performance degradation when state observations are withheld from it at various task states for varying lengths of time. We find that sensory information is surprisingly rarely task-critical in many commonly studied task setups. Task characteristics such as stochastic dynamics largely dictate the value of sensory information for a well-trained robot; policy architectures such as planning vs. reactive control generate more nuanced second-order effects. Further, sensing efficiency is curiously correlated with task proficiency: in particular, fully trained high-performing agents are more robust to sensor loss than novice agents early in their training. Overall, our findings characterize the tradeoffs between sensory information and task performance in practical sequential decision making tasks, and pave the way towards the design of more resource-efficient decision-making agents.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Vision Language Models are In-Context Value Learners.\n \n \n \n\n\n \n Ma, Y. J., Hejna, J., Fu, C., Shah, D., Liang, J., Xu, Z., Kirmani, S., Xu, P., Driess, D., Xiao, T., Bastani, O., Jayaraman, D., Yu, W., Zhang, T., Sadigh, D., & Xia, F.\n\n\n \n\n\n\n ICLR. 2025.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2025gvl,\ntitle={Vision Language Models are In-Context Value Learners},\nauthor={Yecheng Jason Ma and Joey Hejna and Chuyuan Fu and Dhruv Shah and Jacky Liang and Zhuo Xu and Sean Kirmani and Peng Xu and Danny Driess and Ted Xiao and Osbert Bastani and Dinesh Jayaraman and Wenhao Yu and Tingnan Zhang and Dorsa Sadigh and Fei Xia},\nabstract={Predicting temporal progress from visual trajectories is important for intelligent robots that can learn, adapt, and improve. However, learning such progress estimator, or temporal value function, across different tasks and domains requires both a large amount of diverse data and methods which can scale and generalize. To address these challenges, we present Generative Value Learning (GVL), a universal value function estimator that leverages the world knowledge embedded in vision-language models (VLMs) to predict task progress. Naively asking a VLM to predict values for a video sequence performs poorly due to the strong temporal correlation between successive frames. Instead, GVL poses value estimation as a temporal ordering problem over shuffled video frames; this seemingly more challenging task encourages VLMs to more fully exploit their underlying semantic and temporal grounding capabilities to differentiate frames based on their perceived task progress, consequently producing significantly better value predictions. Without any robot or task specific training, GVL can in-context zero-shot and few-shot predict effective values for more than 300 distinct real-world tasks across diverse robot platforms, including challenging bimanual manipulation tasks. Furthermore, we demonstrate that GVL permits flexible multi-modal in-context learning via examples from heterogeneous tasks and embodiments, such as human videos. The generality of GVL enables various downstream applications pertinent to visuomotor policy learning, including dataset filtering, success detection, and value-weighted regression -- all without any model training or finetuning.},\njournal={ICLR},\nyear={2025}\n}\n
\n
\n\n\n
\n Predicting temporal progress from visual trajectories is important for intelligent robots that can learn, adapt, and improve. However, learning such progress estimator, or temporal value function, across different tasks and domains requires both a large amount of diverse data and methods which can scale and generalize. To address these challenges, we present Generative Value Learning (GVL), a universal value function estimator that leverages the world knowledge embedded in vision-language models (VLMs) to predict task progress. Naively asking a VLM to predict values for a video sequence performs poorly due to the strong temporal correlation between successive frames. Instead, GVL poses value estimation as a temporal ordering problem over shuffled video frames; this seemingly more challenging task encourages VLMs to more fully exploit their underlying semantic and temporal grounding capabilities to differentiate frames based on their perceived task progress, consequently producing significantly better value predictions. Without any robot or task specific training, GVL can in-context zero-shot and few-shot predict effective values for more than 300 distinct real-world tasks across diverse robot platforms, including challenging bimanual manipulation tasks. Furthermore, we demonstrate that GVL permits flexible multi-modal in-context learning via examples from heterogeneous tasks and embodiments, such as human videos. The generality of GVL enables various downstream applications pertinent to visuomotor policy learning, including dataset filtering, success detection, and value-weighted regression – all without any model training or finetuning.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Learning to Achieve Goals with Belief State Transformers.\n \n \n \n\n\n \n Hu, E. S., Ahn, K., Liu, Q., Xu, H., Tomar, M., Langford, A., Jayaraman, D., Lamb, A., & Langford, J.\n\n\n \n\n\n\n ICLR. 2025.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{hu2025belief,\ntitle={Learning to Achieve Goals with Belief State Transformers},\nauthor={Edward S. Hu and Kwangjun Ahn and Qinghua Liu and Haoran Xu and Manan Tomar and Ada Langford and Dinesh Jayaraman and Alex Lamb and John Langford},\nabstract={We introduce the "Belief State Transformer", a next-token predictor that takes both a prefix and suffix as inputs, with a novel objective of predicting both the next token for the prefix and the previous token for the suffix. The Belief State Transformer effectively learns to solve challenging problems that conventional forward-only transformers struggle with, in a domain-independent fashion. Key to this success is learning a compact belief state that captures all relevant information necessary for accurate predictions. Empirical ablations show that each component of the model is essential in difficult scenarios where standard Transformers fall short. For the task of story writing with known prefixes and suffixes, our approach outperforms the Fill-in-the-Middle method for reaching known goals and demonstrates improved performance even when the goals are unknown.\nAltogether, the Belief State Transformer enables more efficient goal-conditioned decoding, better test-time inference, and high-quality text representations on small scale problems.},\njournal={ICLR},\nyear={2025}\n}\n
\n
\n\n\n
\n We introduce the \"Belief State Transformer\", a next-token predictor that takes both a prefix and suffix as inputs, with a novel objective of predicting both the next token for the prefix and the previous token for the suffix. The Belief State Transformer effectively learns to solve challenging problems that conventional forward-only transformers struggle with, in a domain-independent fashion. Key to this success is learning a compact belief state that captures all relevant information necessary for accurate predictions. Empirical ablations show that each component of the model is essential in difficult scenarios where standard Transformers fall short. For the task of story writing with known prefixes and suffixes, our approach outperforms the Fill-in-the-Middle method for reaching known goals and demonstrates improved performance even when the goals are unknown. Altogether, the Belief State Transformer enables more efficient goal-conditioned decoding, better test-time inference, and high-quality text representations on small scale problems.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Articulate-Anything: Automatic Modeling of Articulated Objects via a Vision-Language Foundation Model.\n \n \n \n\n\n \n Le, L., Xie, J., Liang, W., Wang, H., Yang, Y., Ma, Y. J., Vedder, K., Krishna, A., Jayaraman, D., & Eaton, E.\n\n\n \n\n\n\n ICLR. 2025.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{le2025articulate,\ntitle={Articulate-Anything: Automatic Modeling of Articulated Objects via a Vision-Language Foundation Model},\nauthor={Long Le and Jason Xie and William Liang and Hung-Ju Wang and Yue Yang and Yecheng Jason Ma and Kyle Vedder and Arjun Krishna and Dinesh Jayaraman and Eric Eaton},\nabstract={Interactive 3D simulated objects are crucial in AR/VR, animations, and robotics, driving immersive experiences and advanced automation. However, creating these articulated objects requires extensive human effort and expertise, limiting their broader applications. To overcome this challenge, we present Articulate-Anything, a system that automates the articulation of diverse, complex objects from many input modalities, including text, images, and videos. Articulate-Anything leverages vision-language models (VLMs) to generate code that can be compiled into an interactable digital twin for use in standard 3D simulators. Our system exploits existing 3D asset datasets via a mesh retrieval mechanism, along with an actor-critic system that iteratively proposes, evaluates, and refines solutions for articulating the objects, self-correcting errors to achieve a robust out- come. Qualitative evaluations demonstrate Articulate-Anything's capability to articulate complex and even ambiguous object affordances by leveraging rich grounded inputs. In extensive quantitative experiments on the standard PartNet-Mobility dataset, Articulate-Anything substantially outperforms prior work, increasing the success rate from 8.7-11.6% to 75% and setting a new bar for state-of-art performance. We further showcase the utility of our generated assets by using them to train robotic policies for fine-grained manipulation tasks that go beyond basic pick and place.},\njournal={ICLR},\nyear={2025}\n}\n
\n
\n\n\n
\n Interactive 3D simulated objects are crucial in AR/VR, animations, and robotics, driving immersive experiences and advanced automation. However, creating these articulated objects requires extensive human effort and expertise, limiting their broader applications. To overcome this challenge, we present Articulate-Anything, a system that automates the articulation of diverse, complex objects from many input modalities, including text, images, and videos. Articulate-Anything leverages vision-language models (VLMs) to generate code that can be compiled into an interactable digital twin for use in standard 3D simulators. Our system exploits existing 3D asset datasets via a mesh retrieval mechanism, along with an actor-critic system that iteratively proposes, evaluates, and refines solutions for articulating the objects, self-correcting errors to achieve a robust out- come. Qualitative evaluations demonstrate Articulate-Anything's capability to articulate complex and even ambiguous object affordances by leveraging rich grounded inputs. In extensive quantitative experiments on the standard PartNet-Mobility dataset, Articulate-Anything substantially outperforms prior work, increasing the success rate from 8.7-11.6% to 75% and setting a new bar for state-of-art performance. We further showcase the utility of our generated assets by using them to train robotic policies for fine-grained manipulation tasks that go beyond basic pick and place.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n REGENT: A Retrieval-Augmented Generalist Agent That Can Act In-Context in New Environments.\n \n \n \n\n\n \n Sridhar, K., Dutta, S., Jayaraman, D., & Lee, I.\n\n\n \n\n\n\n ICLR. 2025.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{sridhar2025regent,\ntitle={REGENT: A Retrieval-Augmented Generalist Agent That Can Act In-Context in New Environments},\nauthor={Kaustubh Sridhar and Souradeep Dutta and Dinesh Jayaraman and Insup Lee},\nabstract={Do generalist agents require large models pre-trained on massive amounts of data to rapidly adapt to new environments? We propose a novel approach to pre-train relatively small models and adapt them to unseen environments via in-context learning, without any finetuning. Our key idea is that retrieval offers a powerful bias for fast adaptation. Indeed, we demonstrate that even a simple retrieval-based 1-nearest neighbor agent offers a surprisingly strong baseline for today's state-of-the-art generalist agents. From this starting point, we construct a semi-parametric agent, REGENT, that trains a transformer-based policy on sequences of queries and retrieved neighbors. REGENT can generalize to unseen robotics and game-playing environments via retrieval augmentation and in-context learning, achieving this with up to 3x fewer parameters and up to an order-of-magnitude fewer pre-training datapoints, significantly outperforming today's state-of-the-art generalist agents.},\njournal={ICLR},\nyear={2025}\n}\n
\n
\n\n\n
\n Do generalist agents require large models pre-trained on massive amounts of data to rapidly adapt to new environments? We propose a novel approach to pre-train relatively small models and adapt them to unseen environments via in-context learning, without any finetuning. Our key idea is that retrieval offers a powerful bias for fast adaptation. Indeed, we demonstrate that even a simple retrieval-based 1-nearest neighbor agent offers a surprisingly strong baseline for today's state-of-the-art generalist agents. From this starting point, we construct a semi-parametric agent, REGENT, that trains a transformer-based policy on sequences of queries and retrieved neighbors. REGENT can generalize to unseen robotics and game-playing environments via retrieval augmentation and in-context learning, achieving this with up to 3x fewer parameters and up to an order-of-magnitude fewer pre-training datapoints, significantly outperforming today's state-of-the-art generalist agents.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n ZeroMimic: Distilling Robotic Manipulation Skills from Web Videos.\n \n \n \n\n\n \n Zhao*, Z., Shi*, J., Wang, T., Ma, J., & Jayaraman, D.\n\n\n \n\n\n\n ICRA. 2025.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{shi2025zeromimic,\ntitle={ZeroMimic: Distilling Robotic Manipulation Skills from Web Videos},\nauthor={Zhuolun Zhao* and Junyao Shi* and Tianyou Wang and Jason Ma and Dinesh Jayaraman},\nabstract={Many recent advances in robotic manipulation\nhave come through imitation learning, yet these rely largely\non mimicking a particularly hard-to-acquire form of demon-\nstrations: those collected on the same robot in the same room\nwith the same objects as the trained policy must handle at test\ntime. In contrast, large pre-recorded human video datasets\ndemonstrating manipulation skills in-the-wild already exist,\nwhich contain valuable information for robots. Is it possible to\ndistill a repository of useful robotic skill policies out of such\ndata without any additional requirements on robot-specific\ndemonstrations or exploration? We present the first such\nsystem ZeroMimic, that generates immediately deployable image\ngoal-conditioned skill policies for several common categories\nof manipulation tasks (opening, closing, pouring, pick&place,\ncutting, and stirring) each capable of acting upon diverse objects\nand across diverse unseen task setups. ZeroMimic is carefully\ndesigned to exploit recent advances in semantic and geometric\nvisual understanding of human videos, together with modern\ngrasp affordance detectors and imitation policy classes. After\ntraining ZeroMimic on the popular EpicKitchens dataset of ego-\ncentric human videos, we evaluate its out-of-the-box performance\nin varied kitchen settings, demonstrating its impressive abilities\nto handle these varied tasks. To enable plug-and-play reuse of\nZeroMimic policies on other task setups and robots, we will\nrelease software and policy checkpoints for all skills.},\njournal={ICRA},\nyear={2025}\n}\n\n
\n
\n\n\n
\n Many recent advances in robotic manipulation have come through imitation learning, yet these rely largely on mimicking a particularly hard-to-acquire form of demon- strations: those collected on the same robot in the same room with the same objects as the trained policy must handle at test time. In contrast, large pre-recorded human video datasets demonstrating manipulation skills in-the-wild already exist, which contain valuable information for robots. Is it possible to distill a repository of useful robotic skill policies out of such data without any additional requirements on robot-specific demonstrations or exploration? We present the first such system ZeroMimic, that generates immediately deployable image goal-conditioned skill policies for several common categories of manipulation tasks (opening, closing, pouring, pick&place, cutting, and stirring) each capable of acting upon diverse objects and across diverse unseen task setups. ZeroMimic is carefully designed to exploit recent advances in semantic and geometric visual understanding of human videos, together with modern grasp affordance detectors and imitation policy classes. After training ZeroMimic on the popular EpicKitchens dataset of ego- centric human videos, we evaluate its out-of-the-box performance in varied kitchen settings, demonstrating its impressive abilities to handle these varied tasks. To enable plug-and-play reuse of ZeroMimic policies on other task setups and robots, we will release software and policy checkpoints for all skills.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Leveraging Symmetry to Accelerate Learning of Trajectory Tracking Controllers for Free-Flying Robotic Systems.\n \n \n \n\n\n \n Welde*, J., Rao*, N., Kunapuli*, P., Jayaraman, D., & Kumar, V.\n\n\n \n\n\n\n ICRA. 2025.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{welde2025symmetry,\ntitle={Leveraging Symmetry to Accelerate Learning of Trajectory Tracking Controllers for Free-Flying Robotic Systems},\nauthor={Jake Welde* and Nishanth Rao* and Pratik Kunapuli* and Dinesh Jayaraman and Vijay Kumar},\nabstract={Tracking controllers enable robotic systems to\naccurately follow planned reference trajectories. In particular,\nreinforcement learning (RL) has shown promise in the synthesis\nof controllers for systems with complex dynamics and modest\nonline compute budgets. However, the poor sample efficiency of\nRL and the challenges of reward design make training slow and\nsometimes unstable, especially for high-dimensional systems. In\nthis work, we leverage the inherent Lie group symmetries of\nrobotic systems with a floating base to mitigate these chal-\nlenges when learning tracking controllers. We model a general\ntracking problem as a Markov decision process (MDP) that\ncaptures the evolution of both the physical and reference states.\nNext, we prove that symmetry in the underlying dynamics and\nrunning costs leads to an MDP homomorphism, a mapping\nthat allows a policy trained on a lower-dimensional “quotient”\nMDP to be lifted to an optimal tracking controller for the\noriginal system. We compare this symmetry-informed approach\nto an unstructured baseline, using Proximal Policy Optimization\n(PPO) to learn tracking controllers for three systems: the\nParticle (a forced point mass), the Astrobee (a fully-\nactuated space robot), and the Quadrotor (an underactuated\nsystem). Results show that a symmetry-aware approach both\naccelerates training and reduces tracking error after the same\nnumber of training steps.},\njournal={ICRA},\nyear={2025}\n}\n
\n
\n\n\n
\n Tracking controllers enable robotic systems to accurately follow planned reference trajectories. In particular, reinforcement learning (RL) has shown promise in the synthesis of controllers for systems with complex dynamics and modest online compute budgets. However, the poor sample efficiency of RL and the challenges of reward design make training slow and sometimes unstable, especially for high-dimensional systems. In this work, we leverage the inherent Lie group symmetries of robotic systems with a floating base to mitigate these chal- lenges when learning tracking controllers. We model a general tracking problem as a Markov decision process (MDP) that captures the evolution of both the physical and reference states. Next, we prove that symmetry in the underlying dynamics and running costs leads to an MDP homomorphism, a mapping that allows a policy trained on a lower-dimensional “quotient” MDP to be lifted to an optimal tracking controller for the original system. We compare this symmetry-informed approach to an unstructured baseline, using Proximal Policy Optimization (PPO) to learn tracking controllers for three systems: the Particle (a forced point mass), the Astrobee (a fully- actuated space robot), and the Quadrotor (an underactuated system). Results show that a symmetry-aware approach both accelerates training and reduces tracking error after the same number of training steps.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2024\n \n \n (17)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Environment Curriculum Generation via Large Language Models.\n \n \n \n \n\n\n \n Liang, W., Wang, S., Wang, H., Bastani, O., Jayaraman*, D., & Ma*, Y. J.\n\n\n \n\n\n\n CORL. 2024.\n \n\n\n\n
\n\n\n\n \n \n \"EnvironmentPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{liang2024eurekaverse,\ntitle = {Environment Curriculum Generation via Large Language Models},\nauthor = {William Liang and Sam Wang and Hungju Wang and\nOsbert Bastani and Dinesh Jayaraman* and Yecheng Jason Ma*},\nabstract={Recent work has demonstrated that a promising strategy for teaching robots a wide range of complex skills is by training them on a curriculum of progressively more challenging environments. However, developing an effective curriculum of environment distributions currently requires significant expertise, which must be repeated for every new domain. Our key insight is that environments are often naturally represented as code. Thus, we probe whether effective environment curriculum design can be achieved and automated via code generation by large language models (LLM). In this paper, we introduce Eurekaverse, an unsupervised environment design algorithm that uses LLMs to sample progressively more challenging, diverse, and learnable environments for skill training. We validate Eurekaverse's effectiveness in the domain of quadrupedal parkour learning, in which a quadruped robot must traverse through a variety of obstacle courses. The automatic curriculum designed by Eurekaverse enables gradual learning of complex parkour skills in simulation and can successfully transfer to the real-world, outperforming manual training courses designed by humans.},\njournal={CORL},\nyear={2024},\nurl={https://eureka-research.github.io/eurekaverse/}\n}\n
\n
\n\n\n
\n Recent work has demonstrated that a promising strategy for teaching robots a wide range of complex skills is by training them on a curriculum of progressively more challenging environments. However, developing an effective curriculum of environment distributions currently requires significant expertise, which must be repeated for every new domain. Our key insight is that environments are often naturally represented as code. Thus, we probe whether effective environment curriculum design can be achieved and automated via code generation by large language models (LLM). In this paper, we introduce Eurekaverse, an unsupervised environment design algorithm that uses LLMs to sample progressively more challenging, diverse, and learnable environments for skill training. We validate Eurekaverse's effectiveness in the domain of quadrupedal parkour learning, in which a quadruped robot must traverse through a variety of obstacle courses. The automatic curriculum designed by Eurekaverse enables gradual learning of complex parkour skills in simulation and can successfully transfer to the real-world, outperforming manual training courses designed by humans.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Task-Oriented Hierarchical Object Decomposition for Visuomotor Control .\n \n \n \n \n\n\n \n Qian, J., Bucher, B., & Jayaraman, D.\n\n\n \n\n\n\n CORL. 2024.\n \n\n\n\n
\n\n\n\n \n \n \"Task-OrientedPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{qian2024hodor,\n  title={Task-Oriented Hierarchical Object Decomposition for Visuomotor Control },\n  author = {Jianing Qian and Bernadette Bucher and Dinesh Jayaraman},\n  abstract={Good pre-trained visual representations could enable robots to learn visuomotor policy efficiently. Still, existing representations take a one-size-fits-all-tasks approach that comes with two important drawbacks: (1) Being completely task-agnostic, these representations cannot effectively ignore any task-irrelevant information in the scene,  and (2) They often lack the representational capacity to handle unconstrained/complex real-world scenes. Instead, we propose to train a large combinatorial family of representations organized by scene entities: objects and object parts. This \\underline{h}ierarchical \\underline{o}bject \\underline{d}ecomposition for task-\\underline{o}riented \\underline{r}epresentations (\\methodname) permits selectively assembling different representations specific to each task while scaling in representational capacity with the complexity of the scene and the task. In our experiments, we find that \\methodname outperforms prior pre-trained representations, both scene vector representations and object-centric representations, for sample-efficient imitation learning across 5 simulated and 5 real-world manipulation tasks. We further find that the invariances captured in \\methodname are inherited into downstream policies, which can robustly generalize to out-of-distribution test conditions, permitting zero-shot skill chaining. Appendix and videos: https://sites.google.com/view/hodor-corl24.},\n  journal={CORL},\n  year={2024},\n  url={https://sites.google.com/view/hodor-corl24}\n}\n
\n
\n\n\n
\n Good pre-trained visual representations could enable robots to learn visuomotor policy efficiently. Still, existing representations take a one-size-fits-all-tasks approach that comes with two important drawbacks: (1) Being completely task-agnostic, these representations cannot effectively ignore any task-irrelevant information in the scene, and (2) They often lack the representational capacity to handle unconstrained/complex real-world scenes. Instead, we propose to train a large combinatorial family of representations organized by scene entities: objects and object parts. This ˘nderlinehierarchical ˘nderlineobject ˘nderlinedecomposition for task-˘nderlineoriented ˘nderlinerepresentations (\\methodname) permits selectively assembling different representations specific to each task while scaling in representational capacity with the complexity of the scene and the task. In our experiments, we find that \\methodname outperforms prior pre-trained representations, both scene vector representations and object-centric representations, for sample-efficient imitation learning across 5 simulated and 5 real-world manipulation tasks. We further find that the invariances captured in \\methodname are inherited into downstream policies, which can robustly generalize to out-of-distribution test conditions, permitting zero-shot skill chaining. Appendix and videos: https://sites.google.com/view/hodor-corl24.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Open X-Embodiment: Robotic Learning Datasets and RT-X Models.\n \n \n \n \n\n\n \n collaboration , L.\n\n\n \n\n\n\n ICRA. 2024.\n \n\n\n\n
\n\n\n\n \n \n \"OpenPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{open_x_embodiment_rt_x_2024,\ntitle={Open {X-E}mbodiment: Robotic Learning Datasets and {RT-X} Models},\nauthor = {Large collaboration},\njournal  = {ICRA},\nyear = {2024},\nurl = {https://robotics-transformer-x.github.io/}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n DrEureka: Language Model Guided Sim-To-Real Transfer.\n \n \n \n\n\n \n Ma, Y. J., Liang, W., Wang, H., Wang, S., Zhu, Y., Fan, L., Bastani, O., & Jayaraman, D.\n\n\n \n\n\n\n RSS. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2024dreureka,\n    title   = {DrEureka: Language Model Guided Sim-To-Real Transfer},\n    author  = {Yecheng Jason Ma and William Liang and Hungju Wang and Sam Wang and Yuke Zhu and Linxi Fan and Osbert Bastani and Dinesh Jayaraman},\n    year    = {2024},\n    journal = {RSS}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset.\n \n \n \n\n\n \n Khazatsky, A., Pertsch, K., Nair, S., Balakrishna, A., Dasari, S., Karamcheti, S., Nasiriany, S., Srirama, M. K., Chen, L. Y., Ellis, K., Fagan, P. D., Hejna, J., Itkina, M., Lepert, M., Ma, Y. J., Miller, P. T., Wu, J., Belkhale, S., Dass, S., Ha, H., Jain, A., Lee, A., Lee, Y., Memmel, M., Park, S., Radosavovic, I., Wang, K., Zhan, A., Black, K., Chi, C., Hatch, K. B., Lin, S., Lu, J., Mercat, J., Rehman, A., Sanketi, P. R, Sharma, A., Simpson, C., Vuong, Q., Walke, H. R., Wulfe, B., Xiao, T., Yang, J. H., Yavary, A., Zhao, T. Z., Agia, C., Baijal, R., Castro, M. G., Chen, D., Chen, Q., Chung, T., Drake, J., Foster, E. P., Gao, J., Herrera, D. A., Heo, M., Hsu, K., Hu, J., Jackson, D., Le, C., Li, Y., Lin, K., Lin, R., Ma, Z., Maddukuri, A., Mirchandani, S., Morton, D., Nguyen, T., O'Neill, A., Scalise, R., Seale, D., Son, V., Tian, S., Tran, E., Wang, A. E., Wu, Y., Xie, A., Yang, J., Yin, P., Zhang, Y., Bastani, O., Berseth, G., Bohg, J., Goldberg, K., Gupta, A., Gupta, A., Jayaraman, D., Lim, J. J, Malik, J., Martín-Martín, R., Ramamoorthy, S., Sadigh, D., Song, S., Wu, J., Yip, M. C., Zhu, Y., Kollar, T., Levine, S., & Finn, C.\n\n\n \n\n\n\n RSS. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{khazatsky2024droid,\n      title={DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset}, \n      author={Alexander Khazatsky and Karl Pertsch and Suraj Nair and Ashwin Balakrishna and Sudeep Dasari and Siddharth Karamcheti and Soroush Nasiriany and Mohan Kumar Srirama and Lawrence Yunliang Chen and Kirsty Ellis and Peter David Fagan and Joey Hejna and Masha Itkina and Marion Lepert and Yecheng Jason Ma and Patrick Tree Miller and Jimmy Wu and Suneel Belkhale and Shivin Dass and Huy Ha and Arhan Jain and Abraham Lee and Youngwoon Lee and Marius Memmel and Sungjae Park and Ilija Radosavovic and Kaiyuan Wang and Albert Zhan and Kevin Black and Cheng Chi and Kyle Beltran Hatch and Shan Lin and Jingpei Lu and Jean Mercat and Abdul Rehman and Pannag R Sanketi and Archit Sharma and Cody Simpson and Quan Vuong and Homer Rich Walke and Blake Wulfe and Ted Xiao and Jonathan Heewon Yang and Arefeh Yavary and Tony Z. Zhao and Christopher Agia and Rohan Baijal and Mateo Guaman Castro and Daphne Chen and Qiuyu Chen and Trinity Chung and Jaimyn Drake and Ethan Paul Foster and Jensen Gao and David Antonio Herrera and Minho Heo and Kyle Hsu and Jiaheng Hu and Donovon Jackson and Charlotte Le and Yunshuang Li and Kevin Lin and Roy Lin and Zehan Ma and Abhiram Maddukuri and Suvir Mirchandani and Daniel Morton and Tony Nguyen and Abigail O'Neill and Rosario Scalise and Derick Seale and Victor Son and Stephen Tian and Emi Tran and Andrew E. Wang and Yilin Wu and Annie Xie and Jingyun Yang and Patrick Yin and Yunchu Zhang and Osbert Bastani and Glen Berseth and Jeannette Bohg and Ken Goldberg and Abhinav Gupta and Abhishek Gupta and Dinesh Jayaraman and Joseph J Lim and Jitendra Malik and Roberto Martín-Martín and Subramanian Ramamoorthy and Dorsa Sadigh and Shuran Song and Jiajun Wu and Michael C. Yip and Yuke Zhu and Thomas Kollar and Sergey Levine and Chelsea Finn},\n      year={2024},\n      journal={RSS}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Training self-learning circuits for power-efficient solutions.\n \n \n \n\n\n \n Stern, M., Dillavou, S., Jayaraman, D., Durian, D. J, & Liu, A. J\n\n\n \n\n\n\n Applied Physics Letters (APL) Machine Learning. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{stern2024physical,\n  title={Training self-learning circuits for power-efficient solutions},\n  author={Stern, Menachem and Dillavou, Sam and Jayaraman, Dinesh and Durian, Douglas J and Liu, Andrea J},\n  journal={Applied Physics Letters (APL) Machine Learning},\n  year={2024}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Universal Visual Decomposer: Long-Horizon Manipulation Made Easy.\n \n \n \n\n\n \n Zhang, Z., Li, Y., Bastani, O., Gupta, A., Jayaraman, D., Ma, Y. J., & Weihs, L.\n\n\n \n\n\n\n ICRA. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{zhang2024universal,\n      title={Universal Visual Decomposer: Long-Horizon Manipulation Made Easy}, \n      author={Zichen Zhang and Yunshuang Li and Osbert Bastani and Abhishek Gupta and Dinesh Jayaraman and Yecheng Jason Ma and Luca Weihs},\n      year={2024},\n      journal={ICRA},\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Recasting Generic Pretrained Vision Transformers As Object-Centric Scene Encoders For Manipulation Policies .\n \n \n \n \n\n\n \n Qian, J., Panagopoulos, A., & Jayaraman, D.\n\n\n \n\n\n\n ICRA. 2024.\n \n\n\n\n
\n\n\n\n \n \n \"RecastingPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{qian2024soft,\n  title={Recasting Generic Pretrained Vision Transformers As Object-Centric Scene Encoders For Manipulation Policies },\n    author={Jianing Qian and Anastasios Panagopoulos and Dinesh Jayaraman},\n  abstract={Generic re-usable pre-trained image representation encoders\nhave become a standard component of methods for many\ncomputer vision tasks. As visual representations for robots\nhowever, their utility has been limited, leading to a\nrecent wave of efforts to pre-train robotics-specific image\nencoders that are better suited to robotic tasks than their\ngeneric counterparts. We propose SOFT, a wrapper around\npre-trained vision transformer PVT models that bridges this\ngap without any further training. Rather than construct\nrepresentations out of only the final layer activations,\nSOFT individuates and locates object-like entities from PVT\nattentions, and describes them with PVT activations,\nproducing an object-centric representation. Across standard\nchoices of generic pre-trained vision transformers PVT, we\ndemonstrate in each case that policies trained on SOFT(PVT)\nfar outstrip standard PVT representations for manipulation\ntasks in simulated and real settings, approaching the\nstate-of-the-art robotics-aware representations.},\n  url={https://sites.google.com/view/robot-soft/},\n  journal={ICRA},\n  year={2024}\n}\n
\n
\n\n\n
\n Generic re-usable pre-trained image representation encoders have become a standard component of methods for many computer vision tasks. As visual representations for robots however, their utility has been limited, leading to a recent wave of efforts to pre-train robotics-specific image encoders that are better suited to robotic tasks than their generic counterparts. We propose SOFT, a wrapper around pre-trained vision transformer PVT models that bridges this gap without any further training. Rather than construct representations out of only the final layer activations, SOFT individuates and locates object-like entities from PVT attentions, and describes them with PVT activations, producing an object-centric representation. Across standard choices of generic pre-trained vision transformers PVT, we demonstrate in each case that policies trained on SOFT(PVT) far outstrip standard PVT representations for manipulation tasks in simulated and real settings, approaching the state-of-the-art robotics-aware representations.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Composing Pre-Trained Object-Centric Representations for Robotics From “What” and “Where” Foundation Models.\n \n \n \n \n\n\n \n Shi*, J., Qian*, J., Ma, Y. J., & Jayaraman, D.\n\n\n \n\n\n\n ICRA. 2024.\n \n\n\n\n
\n\n\n\n \n \n \"ComposingPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{shi2024plug,\n  title={Composing Pre-Trained Object-Centric Representations for Robotics From “What” and “Where” Foundation Models},\n  author={Shi*, Junyao and Qian*, Jianing and Ma, Yecheng Jason and Jayaraman, Dinesh},\n  abstract={There have recently been large advances both in pre-training visual representations for robotic control and segmenting unknown category objects in general images. To leverage these for improved robot learning, we propose POCR, a new framework for building pre-trained object-centric representations for robotic control. Building on theories of “what-where” representations in psychology and computer vision, we use segmentations from a pre-trained model to stably locate across timesteps, various entities in the scene, capturing “where” information. To each such segmented entity, we apply other pre-trained models that build vector descriptions suitable for robotic control tasks, thus capturing “what” the entity is. Thus, our pre-trained object-centric representations for control are constructed by appropriately combining the outputs of off-the-shelf pre-trained models, with no new training. On various simulated and real robotic tasks, we show that imitation policies for robotic manipulators trained on POCR achieve better performance and systematic generalization than state of the art pre-trained representations for robotics, as well as prior object-centric representations that are typically trained from scratch.},\nurl={https://sites.google.com/view/pocr},\n  journal={ICRA},\n  year={2024}\n}\n
\n
\n\n\n
\n There have recently been large advances both in pre-training visual representations for robotic control and segmenting unknown category objects in general images. To leverage these for improved robot learning, we propose POCR, a new framework for building pre-trained object-centric representations for robotic control. Building on theories of “what-where” representations in psychology and computer vision, we use segmentations from a pre-trained model to stably locate across timesteps, various entities in the scene, capturing “where” information. To each such segmented entity, we apply other pre-trained models that build vector descriptions suitable for robotic control tasks, thus capturing “what” the entity is. Thus, our pre-trained object-centric representations for control are constructed by appropriately combining the outputs of off-the-shelf pre-trained models, with no new training. On various simulated and real robotic tasks, we show that imitation policies for robotic manipulators trained on POCR achieve better performance and systematic generalization than state of the art pre-trained representations for robotics, as well as prior object-centric representations that are typically trained from scratch.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Long-HOT: A Modular Hierarchical Approach for Long-Horizon Object Transport.\n \n \n \n\n\n \n Narayanan, S., Jayaraman, D., & Chandraker, M.\n\n\n \n\n\n\n ICRA. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{narayanan2024long,\n  title={Long-HOT: A Modular Hierarchical Approach for Long-Horizon Object Transport},\n  author={Narayanan, Sriram and Jayaraman, Dinesh and Chandraker, Manmohan},\n  journal={ICRA},\n  abstract={We aim to address key challenges in long-horizon\nembodied exploration and navigation by proposing a\nlong-horizon object transport task called Long-HOT and a\nnovel modular framework for temporally extended navigation.\nAgents in Long-HOT need to efficiently find and pick up\ntarget objects that are scattered in the environment, carry\nthem to a goal location with load constraints, and\noptionally have access to a container. We propose a modular\ntopological graph-based transport policy (HTP) that\nexplores efficiently with the help of weighted frontiers.\nOur approach uses a combination of motion planning to reach\npoint goals within explored locations and object navigation\npolicies for moving towards semantic targets at unknown\nlocations. Experiments on both our proposed Habitat\ntransport task and on MultiOn benchmarks show that our\nmethod outperforms baselines and prior works. Further, we\nanalyze the agent's behavior for the usage of the container\nand demonstrate meaningful generalization to much harder\ntransport scenes with training only on simpler versions of\nthe task. We will release all the code and data.},\n  year={2024}\n}\n
\n
\n\n\n
\n We aim to address key challenges in long-horizon embodied exploration and navigation by proposing a long-horizon object transport task called Long-HOT and a novel modular framework for temporally extended navigation. Agents in Long-HOT need to efficiently find and pick up target objects that are scattered in the environment, carry them to a goal location with load constraints, and optionally have access to a container. We propose a modular topological graph-based transport policy (HTP) that explores efficiently with the help of weighted frontiers. Our approach uses a combination of motion planning to reach point goals within explored locations and object navigation policies for moving towards semantic targets at unknown locations. Experiments on both our proposed Habitat transport task and on MultiOn benchmarks show that our method outperforms baselines and prior works. Further, we analyze the agent's behavior for the usage of the container and demonstrate meaningful generalization to much harder transport scenes with training only on simpler versions of the task. We will release all the code and data.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Privileged Sensing Scaffolds Reinforcement Learning.\n \n \n \n\n\n \n Hu, E., Springer, J., Rybkin, O., & Jayaraman, D.\n\n\n \n\n\n\n ICLR. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{hu2024scaffolder,\n  title={Privileged Sensing Scaffolds Reinforcement Learning},\n  author={Edward Hu and James Springer and Oleh Rybkin and Dinesh Jayaraman},\n  journal={ICLR},\n  year={2024},\n  abstract={We need to look at our shoelaces as we first learn to tie them but having mastered this skill, can do it from touch alone. We call this phenomenon “sensory scaffolding”: observation streams that are not needed by a master might yet aid a novice learner. We consider such sensory scaffolding setups for training artificial agents. For example, a robot arm may need to be deployed with just a low-cost, robust, general-purpose camera; yet its performance may improve by having privileged training-time-only access to informative albeit expensive and unwieldy motion capture rigs or fragile tactile sensors. For these settings, we propose Scaffolder, a reinforcement learning approach which effectively exploits privileged sensing in critics, world models, reward estimators, and other such auxiliary components that are only used at training time, to improve the target policy. For evaluating sensory scaffolding agents, we design a new “S3” suite of ten diverse simulated robotic tasks that explore a wide range of practical sensor setups. Agents must use privileged camera sensing to train blind hurdlers, privileged active visual perception to help robot arms overcome visual occlusions, privileged touch sensors to train robot hands, and more. Scaffolder easily outperforms relevant prior baselines and frequently performs comparably even to policies that have test-time access to the privileged sensors.}\n}\n
\n
\n\n\n
\n We need to look at our shoelaces as we first learn to tie them but having mastered this skill, can do it from touch alone. We call this phenomenon “sensory scaffolding”: observation streams that are not needed by a master might yet aid a novice learner. We consider such sensory scaffolding setups for training artificial agents. For example, a robot arm may need to be deployed with just a low-cost, robust, general-purpose camera; yet its performance may improve by having privileged training-time-only access to informative albeit expensive and unwieldy motion capture rigs or fragile tactile sensors. For these settings, we propose Scaffolder, a reinforcement learning approach which effectively exploits privileged sensing in critics, world models, reward estimators, and other such auxiliary components that are only used at training time, to improve the target policy. For evaluating sensory scaffolding agents, we design a new “S3” suite of ten diverse simulated robotic tasks that explore a wide range of practical sensor setups. Agents must use privileged camera sensing to train blind hurdlers, privileged active visual perception to help robot arms overcome visual occlusions, privileged touch sensors to train robot hands, and more. Scaffolder easily outperforms relevant prior baselines and frequently performs comparably even to policies that have test-time access to the privileged sensors.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Can Transformers Capture Spatial Relations between Objects?.\n \n \n \n\n\n \n Wen, C., Jayaraman, D., & Gao, Y.\n\n\n \n\n\n\n ICLR. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{wen2024relativit,\n  title={Can Transformers Capture Spatial Relations between Objects?},\n  author={Chuan Wen and Dinesh Jayaraman and Yang Gao},\n  journal={ICLR},\n  year={2024},\n  abstract={Spatial relationships between objects represent key scene information for humans to understand and interact with the world. To study the capability of current computer vision systems to recognize physically grounded spatial relations, we start by proposing precise relation definitions that permit consistently annotating a benchmark dataset. Despite the apparent simplicity of this task relative to others in the recognition literature, we observe that existing approaches perform poorly on this benchmark. We propose new approaches exploiting the long-range attention capabilities of transformers for this task, and evaluating key design principles. We identify a simple "RelatiViT" architecture and demonstrate that it outperforms all current approaches. To our knowledge, this is the first method to convincingly outperform naive baselines on spatial relation prediction in in-the-wild settings.}\n}\n
\n
\n\n\n
\n Spatial relationships between objects represent key scene information for humans to understand and interact with the world. To study the capability of current computer vision systems to recognize physically grounded spatial relations, we start by proposing precise relation definitions that permit consistently annotating a benchmark dataset. Despite the apparent simplicity of this task relative to others in the recognition literature, we observe that existing approaches perform poorly on this benchmark. We propose new approaches exploiting the long-range attention capabilities of transformers for this task, and evaluating key design principles. We identify a simple \"RelatiViT\" architecture and demonstrate that it outperforms all current approaches. To our knowledge, this is the first method to convincingly outperform naive baselines on spatial relation prediction in in-the-wild settings.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Eureka: Human-Level Reward Design via Coding Large Language Models.\n \n \n \n\n\n \n Ma, Y. J., Liang, W., Wang, G., Huang, D., Bastani, O., Jayaraman, D., Zhu, Y., Fan, L., & Anandkumar, A.\n\n\n \n\n\n\n ICLR. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2024eureka,\n      title={Eureka: Human-Level Reward Design via Coding Large Language Models}, \n      author={Yecheng Jason Ma and William Liang and Guanzhi Wang and De-An Huang and Osbert Bastani and Dinesh Jayaraman and Yuke Zhu and Linxi Fan and Anima Anandkumar},\n      year={2024},\n      journal={ICLR}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Memory-Consistent Neural Networks for Imitation Learning.\n \n \n \n\n\n \n Sridhar, K., Dutta, S., Jayaraman, D., Weimer, J., & Lee, I.\n\n\n \n\n\n\n ICLR. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{sridhar2024memoryconsistent,\n      title={Memory-Consistent Neural Networks for Imitation Learning}, \n      author={Kaustubh Sridhar and Souradeep Dutta and Dinesh Jayaraman and James Weimer and Insup Lee},\n      year={2024},\n      journal={ICLR}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n ZeroFlow: Fast Zero Label Scene Flow via Distillation.\n \n \n \n\n\n \n Vedder, K., Peri, N., Chodosh, N., Khatri, I., Eaton, E., Jayaraman, D., Liu, Y., Ramanan, D., & Hays, J.\n\n\n \n\n\n\n ICLR. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{vedder2024zeroflow,\n  title={ZeroFlow: Fast Zero Label Scene Flow via Distillation},\n  author={Vedder, Kyle and Peri, Neehar and Chodosh, Nathaniel and Khatri, Ishan and Eaton, Eric and Jayaraman, Dinesh and Liu, Yang and Ramanan, Deva and Hays, James},\n  journal={ICLR},\n  year={2024}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n TLControl: Trajectory and Language Control for Human Motion Synthesis.\n \n \n \n\n\n \n Wan, W., Dou, Z., Komura, T., Wang, W., Jayaraman, D., & Liu, L.\n\n\n \n\n\n\n ECCV. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{wan2024tlcontrol,\n      title={TLControl: Trajectory and Language Control for Human Motion Synthesis}, \n      author={Weilin Wan and Zhiyang Dou and Taku Komura and Wenping Wang and Dinesh Jayaraman and Lingjie Liu},\n      year={2024},\n      abstract={Controllable human motion synthesis is essential for applications in AR/VR, gaming and embodied AI. Existing methods often focus solely on either language or full trajectory control, lacking precision in synthesizing motions aligned with user-specified trajectories, especially for multi-joint control. To address these issues, we present TLControl, a novel method for realistic human motion synthesis, incorporating both low-level Trajectory and high-level Language semantics controls, through the integration of neural-based and optimization-based techniques. Specifically, we begin with training a VQ-VAE for a compact and well-structured latent motion space organized by body parts. We then propose a Masked Trajectories Transformer (MTT) for predicting a motion distribution conditioned on language and trajectory. Once trained, we use MTT to sample initial motion predictions given user-specified partial trajectories and text descriptions as conditioning. Finally, we introduce a test-time optimization to refine these coarse predictions for precise trajectory control, which offers flexibility by allowing users to specify various optimization goals and ensures high runtime efficiency. Comprehensive experiments show that TLControl significantly outperforms the state-of-the-art in trajectory accuracy and time efficiency, making it practical for interactive and high-quality animation generation.},\n      journal={ECCV},\n}\n\n
\n
\n\n\n
\n Controllable human motion synthesis is essential for applications in AR/VR, gaming and embodied AI. Existing methods often focus solely on either language or full trajectory control, lacking precision in synthesizing motions aligned with user-specified trajectories, especially for multi-joint control. To address these issues, we present TLControl, a novel method for realistic human motion synthesis, incorporating both low-level Trajectory and high-level Language semantics controls, through the integration of neural-based and optimization-based techniques. Specifically, we begin with training a VQ-VAE for a compact and well-structured latent motion space organized by body parts. We then propose a Masked Trajectories Transformer (MTT) for predicting a motion distribution conditioned on language and trajectory. Once trained, we use MTT to sample initial motion predictions given user-specified partial trajectories and text descriptions as conditioning. Finally, we introduce a test-time optimization to refine these coarse predictions for precise trajectory control, which offers flexibility by allowing users to specify various optimization goals and ensures high runtime efficiency. Comprehensive experiments show that TLControl significantly outperforms the state-of-the-art in trajectory accuracy and time efficiency, making it practical for interactive and high-quality animation generation.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Learning a Meta-Controller for Dynamic Grasping.\n \n \n \n\n\n \n Jia, Y., Xu, J., Jayaraman, D., & Song, S.\n\n\n \n\n\n\n CASE. 2024.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jia2024learning,\n  title={Learning a Meta-Controller for Dynamic Grasping},\n  author={Jia, Yinsen and Xu, Jingxi and Jayaraman, Dinesh and Song, Shuran},\n  abstract={Grasping moving objects is a challenging task that requires multiple submodules such as object pose predictor, arm motion planner, etc. Each submodule operates under its own set of meta-parameters. For example, how far the pose predictor should look into the future (i.e., \\textit{look-ahead time}) and the maximum amount of time the motion planner can spend planning a motion (i.e., \\textit{time budget}). Many previous works assign fixed values to these parameters; however, at different moments \\textit{within} a single episode of dynamic grasping, the optimal values should vary depending on the current scene. In this work, we propose a dynamic grasping pipeline with a meta-controller that controls the look-ahead time and time budget dynamically. We learn the meta-controller through reinforcement learning with a sparse reward. Our experiments show the meta-controller improves the grasping success rate (up to 28\\% in the most cluttered environment) and reduces grasping time, compared to the strongest baseline. Our meta-controller learns to reason about the reachable workspace and maintain the predicted pose within the reachable region. In addition, it assigns a small but sufficient time budget for the motion planner. Our method can handle different objects, trajectories, and obstacles. Despite being trained only with 3-6 random cuboidal obstacles, our meta-controller generalizes well to 7-9 obstacles and more realistic out-of-domain household setups with unseen obstacle shapes.},\n  journal={CASE},\n  year={2024}\n}\n
\n
\n\n\n
\n Grasping moving objects is a challenging task that requires multiple submodules such as object pose predictor, arm motion planner, etc. Each submodule operates under its own set of meta-parameters. For example, how far the pose predictor should look into the future (i.e., look-ahead time) and the maximum amount of time the motion planner can spend planning a motion (i.e., time budget). Many previous works assign fixed values to these parameters; however, at different moments within a single episode of dynamic grasping, the optimal values should vary depending on the current scene. In this work, we propose a dynamic grasping pipeline with a meta-controller that controls the look-ahead time and time budget dynamically. We learn the meta-controller through reinforcement learning with a sparse reward. Our experiments show the meta-controller improves the grasping success rate (up to 28% in the most cluttered environment) and reduces grasping time, compared to the strongest baseline. Our meta-controller learns to reason about the reachable workspace and maintain the predicted pose within the reachable region. In addition, it assigns a small but sufficient time budget for the motion planner. Our method can handle different objects, trajectories, and obstacles. Despite being trained only with 3-6 random cuboidal obstacles, our meta-controller generalizes well to 7-9 obstacles and more realistic out-of-domain household setups with unseen obstacle shapes.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2023\n \n \n (6)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Prospective Learning: Principled Extrapolation to the Future.\n \n \n \n \n\n\n \n De Silva, A., Ramesh, R., Ungar, L., Shuler, M. H., Cowan, N. J., Platt, M., Li, C., Isik, L., Roh, S., Charles, A., Venkataraman, A., Caffo, B., How, J. J., Kebschull, J. M, Krakauer, J. W., Bichuch, M., Kinfu, K. A., Yezerets, E., Jayaraman, D., Shin, J. M., Villar, S., Phillips, I., Priebe, C. E., Hartung, T., Miller, M. I., Dey, J., Huang, N., Eaton, E., Etienne-Cummings, R., Ogburn, E. L., Burns, R., Osuagwu, O., Mensh, B., Muotri, A. R., Brown, J., White, C., Yang, W., Verstynen, A. A. R. T., Kording, K. P., Chaudhari, P., & Vogelstein, J. T.\n\n\n \n\n\n\n In Proceedings of The 2nd Conference on Lifelong Learning Agents, 2023. PMLR\n \n\n\n\n
\n\n\n\n \n \n \"ProspectivePaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@InProceedings{desilva23prospective,\n  title = \t {Prospective Learning: Principled Extrapolation to the Future},\n  author =       {De Silva, Ashwin and Ramesh, Rahul and Ungar, Lyle and Shuler, Marshall Hussain and Cowan, Noah J. and Platt, Michael and Li, Chen and Isik, Leyla and Roh, Seung-Eon and Charles, Adam and Venkataraman, Archana and Caffo, Brian and How, Javier J. and Kebschull, Justus M and Krakauer, John W. and Bichuch, Maxim and Kinfu, Kaleab Alemayehu and Yezerets, Eva and Jayaraman, Dinesh and Shin, Jong M. and Villar, Soledad and Phillips, Ian and Priebe, Carey E. and Hartung, Thomas and Miller, Michael I. and Dey, Jayanta and Huang, Ningyuan and Eaton, Eric and Etienne-Cummings, Ralph and Ogburn, Elizabeth L. and Burns, Randal and Osuagwu, Onyema and Mensh, Brett and Muotri, Alysson R. and Brown, Julia and White, Chris and Yang, Weiwei and Verstynen, Andrei A. Rusu Timothy and Kording, Konrad P. and Chaudhari, Pratik and Vogelstein, Joshua T.},\n  booktitle = \t {Proceedings of The 2nd Conference on Lifelong Learning Agents},\n  year = \t {2023},\n  publisher =    {PMLR},\n  pdf = \t {https://proceedings.mlr.press/v232/de-silva23a/de-silva23a.pdf},\n  url = \t {https://proceedings.mlr.press/v232/de-silva23a.html},\n  abstract = \t {Learning is a process which can update decision rules, based on past experience, such that future performance improves. Traditionally, machine learning is often evaluated under the assumption that the future will be identical to the past in distribution or change adversarially. But these assumptions can be either too optimistic or pessimistic for many problems in the real world. Real world scenarios evolve over multiple spatiotemporal scales with partially predictable dynamics. Here we reformulate the learning problem to one that centers around this idea of dynamic futures that are partially learnable. We conjecture that certain sequences of tasks are not retrospectively learnable (in which the data distribution is fixed), but are prospectively learnable (in which distributions may be dynamic), suggesting that prospective learning is more difficult in kind than retrospective learning. We argue that prospective learning more accurately characterizes many real world problems that (1) currently stymie existing artificial intelligence solutions and/or (2) lack adequate explanations for how natural intelligences solve them. Thus, studying prospective learning will lead to deeper insights and solutions to currently vexing challenges in both natural and artificial intelligences.}\n}\n
\n
\n\n\n
\n Learning is a process which can update decision rules, based on past experience, such that future performance improves. Traditionally, machine learning is often evaluated under the assumption that the future will be identical to the past in distribution or change adversarially. But these assumptions can be either too optimistic or pessimistic for many problems in the real world. Real world scenarios evolve over multiple spatiotemporal scales with partially predictable dynamics. Here we reformulate the learning problem to one that centers around this idea of dynamic futures that are partially learnable. We conjecture that certain sequences of tasks are not retrospectively learnable (in which the data distribution is fixed), but are prospectively learnable (in which distributions may be dynamic), suggesting that prospective learning is more difficult in kind than retrospective learning. We argue that prospective learning more accurately characterizes many real world problems that (1) currently stymie existing artificial intelligence solutions and/or (2) lack adequate explanations for how natural intelligences solve them. Thus, studying prospective learning will lead to deeper insights and solutions to currently vexing challenges in both natural and artificial intelligences.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Vision-Based Contact Localization Without Touch or Force Sensing.\n \n \n \n\n\n \n Kim, L., Li, Y., Posa, M., & Jayaraman, D.\n\n\n \n\n\n\n CORL. 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kim2023im2contact,\n  title={Vision-Based Contact Localization Without Touch or Force Sensing},\n  author={Leon Kim and Yunshuang Li and Michael Posa and Dinesh Jayaraman},\n  journal={CORL},\n  year={2023}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n LIV: Language-Image Representations and Rewards for Robotic Control.\n \n \n \n\n\n \n Ma, Y. J., Kumar, V., Zhang, A., Bastani, O., & Jayaraman, D.\n\n\n \n\n\n\n ICML. 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2023liv,\n  title         = "{LIV}: Language-Image Representations and Rewards for Robotic Control",\n  author        = "Yecheng Jason Ma and Vikash Kumar and Amy Zhang and Osbert Bastani and Dinesh Jayaraman",\n  journal= {ICML}, \n  year = {2023},\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Planning Goals for Exploration.\n \n \n \n\n\n \n Hu, E., Chang, R., Rybkin, O., & Jayaraman, D.\n\n\n \n\n\n\n ICLR (top 25 percent) and Best Workshop Paper at CORL 2022 Robot Adaptation Workshop. 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{hu2023peg,\n  title = "Planning Goals for Exploration",\n  author = "Edward Hu and Richard Chang and Oleh Rybkin and Dinesh Jayaraman",\n  journal= {ICLR (top 25 percent) and Best Workshop Paper at CORL 2022 Robot Adaptation Workshop}, \n  year = 2023,\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Learning Policy-Aware Models for Model-Based Reinforcement Learning via Transition Occupancy Matching.\n \n \n \n\n\n \n Ma, Y. J., Sivakumar, K., Yen, J., Bastani, O., & Jayaraman, D.\n\n\n \n\n\n\n L4DC. 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kausik2023tom,\n  title         = "Learning Policy-Aware Models for Model-Based Reinforcement Learning via Transition Occupancy Matching",\n  author        = "Yecheng Jason Ma and Kausik Sivakumar and Jason Yen and Osbert Bastani and Dinesh Jayaraman",\n  journal = {L4DC},\n  year          =  2023,\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n VIP: Towards Universal Visual Reward and Representation via Value-Implicit Pre-Training.\n \n \n \n\n\n \n Ma, Y. J., Sodhani, S., Jayaraman, D., Bastani, O., Kumar, V., & Zhang, A.\n\n\n \n\n\n\n ICLR (top 25 percent). 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2023vip,\n  title         = "{VIP}: Towards Universal Visual Reward and Representation\n                   via {Value-Implicit} {Pre-Training}",\n  author        = "Ma, Yecheng Jason and Sodhani, Shagun and Jayaraman, Dinesh\n                   and Bastani, Osbert and Kumar, Vikash and Zhang, Amy",\n  journal= {ICLR (top 25 percent)}, \n  year = 2023,\n}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2022\n \n \n (8)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Training Robots to Evaluate Robots: Example-Based Interactive Reward Functions for Policy Learning.\n \n \n \n\n\n \n Huang, K., Hu, E., & Jayaraman, D.\n\n\n \n\n\n\n CORL. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{huang2022lirf,\n      title={Training Robots to Evaluate Robots: Example-Based Interactive Reward Functions for Policy Learning}, \n      author={Kun Huang and Edward Hu and Dinesh Jayaraman},\n      year={2022},\n      journal= {CORL}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Discovering Deformable Keypoint Pyramids.\n \n \n \n\n\n \n Qian, J., Panagopoulos, A., & Jayaraman, D.\n\n\n \n\n\n\n ECCV. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{qian2022dkp,\n      title={Discovering Deformable Keypoint Pyramids}, \n      author={Jianing Qian and Anastasios Panagopoulos and Dinesh Jayaraman},\n      year={2022},\n      journal= {ECCV}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n How Far I'll Go: Offline Goal-Conditioned Reinforcement Learning via $ f $-Advantage Regression.\n \n \n \n\n\n \n Ma, Y. J., Yan, J., Jayaraman, D., & Bastani, O.\n\n\n \n\n\n\n NeurIPS. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2022far,\n  title={How Far I'll Go: Offline Goal-Conditioned Reinforcement Learning via $ f $-Advantage Regression},\n  author={Ma, Yecheng Jason and Yan, Jason and Jayaraman, Dinesh and Bastani, Osbert},\n  journal={NeurIPS},\n  year={2022}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Fighting Fire with Fire: Avoiding DNN Shortcuts through Priming.\n \n \n \n\n\n \n Wen, C., Qian, J., Lin, J., Teng, J., Jayaraman, D., & Gao, Y.\n\n\n \n\n\n\n ICML. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{wen2022priming,\n      title={Fighting Fire with Fire: Avoiding DNN Shortcuts through Priming}, \n      author={Chuan Wen and Jianing Qian and Jierui Lin and Jiaye Teng and Dinesh Jayaraman and Yang Gao},\n      year={2022},\n      journal= {ICML}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n SMODICE: Versatile Offline Imitation Learning via State Occupancy Matching.\n \n \n \n\n\n \n Ma, Y. J., Shen, A., Jayaraman, D., & Bastani, O.\n\n\n \n\n\n\n ICML. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2022smodice,\n      title={SMODICE: Versatile Offline Imitation Learning via State Occupancy Matching}, \n      author={Yecheng Jason Ma and Andrew Shen and Dinesh Jayaraman and Osbert Bastani},\n      year={2022},\n      journal= {ICML}\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Prospective Learning: Back to the Future.\n \n \n \n\n\n \n Vogelstein, J. T, Verstynen, T., Kording, K. P, Isik, L., Krakauer, J. W, Etienne-Cummings, R., Ogburn, E. L, Priebe, C. E, Burns, R., Kutten, K., Knierim, J. J, Potash, J. B, Hartung, T., Smirnova, L., Worley, P., Savonenko, A., Phillips, I., Miller, M. I, Vidal, R., Sulam, J., Charles, A., Cowan, N. J, Bichuch, M., Venkataraman, A., Li, C., Thakor, N., Kebschull, J. M, Albert, M., Xu, J., Shuler, M. H., Caffo, B., Ratnanather, T., Geisa, A., Roh, S., Yezerets, E., Madhyastha, M., How, J. J, Tomita, T. M, Dey, J., Ningyuan, Huang, Shin, J. M, Kinfu, K. A., Chaudhari, P., Baker, B., Schapiro, A., Jayaraman, D., Eaton, E., Platt, M., Ungar, L., Wehbe, L., Kepecs, A., Christensen, A., Osuagwu, O., Brunton, B., Mensh, B., Muotri, A. R, Silva, G., Puppo, F., Engert, F., Hillman, E., Brown, J., White, C., & Yang, W.\n\n\n \n\n\n\n . January 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@ARTICLE{Vogelstein2022-mn,\n  title         = "Prospective Learning: Back to the Future",\n  author        = "Vogelstein, Joshua T and Verstynen, Timothy and Kording,\n                   Konrad P and Isik, Leyla and Krakauer, John W and\n                   Etienne-Cummings, Ralph and Ogburn, Elizabeth L and Priebe,\n                   Carey E and Burns, Randal and Kutten, Kwame and Knierim,\n                   James J and Potash, James B and Hartung, Thomas and\n                   Smirnova, Lena and Worley, Paul and Savonenko, Alena and\n                   Phillips, Ian and Miller, Michael I and Vidal, Rene and\n                   Sulam, Jeremias and Charles, Adam and Cowan, Noah J and\n                   Bichuch, Maxim and Venkataraman, Archana and Li, Chen and\n                   Thakor, Nitish and Kebschull, Justus M and Albert, Marilyn\n                   and Xu, Jinchong and Shuler, Marshall Hussain and Caffo,\n                   Brian and Ratnanather, Tilak and Geisa, Ali and Roh,\n                   Seung-Eon and Yezerets, Eva and Madhyastha, Meghana and How,\n                   Javier J and Tomita, Tyler M and Dey, Jayanta and {Ningyuan}\n                   and {Huang} and Shin, Jong M and Kinfu, Kaleab Alemayehu and\n                   Chaudhari, Pratik and Baker, Ben and Schapiro, Anna and\n                   Jayaraman, Dinesh and Eaton, Eric and Platt, Michael and\n                   Ungar, Lyle and Wehbe, Leila and Kepecs, Adam and\n                   Christensen, Amy and Osuagwu, Onyema and Brunton, Bing and\n                   Mensh, Brett and Muotri, Alysson R and Silva, Gabriel and\n                   Puppo, Francesca and Engert, Florian and Hillman, Elizabeth\n                   and Brown, Julia and White, Chris and Yang, Weiwei",\n  month         =  jan,\n  year          =  2022,\n  archivePrefix = "arXiv",\n  primaryClass  = "cs.LG",\n  eprint        = "2201.07372"\n}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Conservative and Adaptive Penalty for Model-Based Safe Reinforcement Learning.\n \n \n \n\n\n \n Ma, Y. J., Shen, A., Bastani, O., & Jayaraman, D.\n\n\n \n\n\n\n AAAI. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2022cap, title= {Conservative and Adaptive Penalty for Model-Based Safe Reinforcement Learning}, author= {Ma, Yecheng Jason and Shen, Andrew and Bastani, Osbert and Jayaraman, Dinesh}, journal= {AAAI}, year= {2022}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Know Thyself: Transferable Visuomotor Control Through Robot-Awareness.\n \n \n \n\n\n \n Hu, E. S., Huang, K., Rybkin, O., & Jayaraman, D.\n\n\n \n\n\n\n ICLR. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{hu2022rac, author = {Edward S. Hu and Kun Huang and Oleh Rybkin and Dinesh Jayaraman}, journal = {ICLR}, title = {Know Thyself: Transferable Visuomotor Control Through Robot-Awareness}, year = {2022}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2021\n \n \n (9)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Conservative Offline Distributional Reinforcement Learning.\n \n \n \n\n\n \n Ma, Y. J., Jayaraman, D., & Bastani, O.\n\n\n \n\n\n\n NeurIPS. 2021.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2021conservative, title= {Conservative Offline Distributional Reinforcement Learning}, author= {Ma, Yecheng Jason and {Jayaraman}, {Dinesh} and Bastani, Osbert}, journal= {NeurIPS}, year= {2021}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Embracing the Reconstruction Uncertainty in 3D Human Pose Estimation.\n \n \n \n\n\n \n Kolotouros, N., Pavlakos, G., Jayaraman, D., & Daniilidis, K.\n\n\n \n\n\n\n ICCV. 2021.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kolotouros2021embracing, title= {Embracing the Reconstruction Uncertainty in 3D Human Pose Estimation}, author= {Kolotouros, Nikos and Pavlakos, Georgios and {Jayaraman}, {Dinesh} and Daniilidis, Kostas}, journal= {ICCV}, year= {2021}}\n% - comments @article{ma2021uncertainty, title= {What Matters More and When: Epistemic or Aleatoric Uncertainty?}, author= {Ma, Yecheng and Moore, Juston and Pleiss, Geoff and {Jayaraman}, {Dinesh} and Gardner, Jacob}, journal= {(under review)}, year= {2021}}\n% - comments @article{lee2021perimeter, title= {Vision-Based Perimeter Defense Via Multi-View Active Pose Estimation}, author= {Lee, Elijah and Loianno, Giuseppe and {Jayaraman}, {Dinesh} and Kumar, Vijay}, journal= {(under review)}, year= {2021}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Object Representations Guided By Optical Flow.\n \n \n \n\n\n \n Qian, J., & Jayaraman, D.\n\n\n \n\n\n\n NeurIPS 4th Robot Learning Workshop: Self-Supervised and Lifelong Learning. 2021.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{qian2021flood, title= {Object Representations Guided By Optical Flow}, author= {Qian, Jianing and {Jayaraman}, {Dinesh}}, journal= {NeurIPS 4th Robot Learning Workshop: Self-Supervised and Lifelong Learning}, year= {2021}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Keyframe-focused visual imitation learning.\n \n \n \n\n\n \n Wen, C., Lin, J., Qian, J., Gao, Y., & Jayaraman, D.\n\n\n \n\n\n\n ICML. 2021.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{wen2021keyframe, title= {Keyframe-focused visual imitation learning}, author= {Wen, Chuan and Lin, Jierui and Qian, Jianing and Gao, Yang and {Jayaraman}, {Dinesh}}, journal= {ICML}, year= {2021}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n How Are Learned Perception-Based Controllers Impacted by the Limits of Robust Control?.\n \n \n \n\n\n \n Xu, J., Lee, B., Matni, N., & Jayaraman, D.\n\n\n \n\n\n\n L4DC. 2021.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{xu2021limits, title={How Are Learned Perception-Based Controllers Impacted by the Limits of Robust Control?},journal={L4DC}, author={Jingxi Xu and Bruce Lee and Nikolai Matni and {Dinesh} {Jayaraman}}, year={2021}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n An exploration of embodied visual exploration.\n \n \n \n\n\n \n Ramakrishnan, S. K, Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n IJCV. 2021.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ramakrishnan2021exploration, author = {Ramakrishnan, Santhosh K and {Jayaraman}, {Dinesh} and Grauman, Kristen}, journal = {IJCV}, title = {An exploration of embodied visual exploration}, year = {2021}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n SMiRL: Surprise Minimizing RL in Dynamic Environments.\n \n \n \n\n\n \n Berseth, G., Geng, D., Devin, C., Finn, C., Jayaraman, D., & Levine, S.\n\n\n \n\n\n\n ICLR. 2021.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{berseth2021smirl, author={Glen Berseth and Daniel Geng and Coline Devin and Chelsea Finn and {Dinesh} {Jayaraman} and Sergey Levine}, title={{SMiRL}: Surprise Minimizing RL in Dynamic Environments}, year = {2021}, journal ={ICLR}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Femtomolar SARS-CoV-2 Antigen Detection Using the Microbubbling Digital Assay with Smartphone Readout Enables Antigen Burden Quantitation and Dynamics Tracking.\n \n \n \n \n\n\n \n Chen, H., Li, Z., Feng, S., Wang, A., Richard-Greenblatt, M., Hutson, E., Andrianus, S., Glaser, L. J., Rodino, K. G., Qian, J., Jayaraman, D., Collman, R. G., Glascock, A., Bushman, F. D., Lee, J. S., Cherry, S., Fausto, A., Weiss, S. R., Koo, H., Corby, P. M., O\\textquoterightDoherty, U., Garfall, A. L., Vogl, D. T., Stadtmauer, E. A., & Wang, P.\n\n\n \n\n\n\n In 2021. Cold Spring Harbor Laboratory Press\n \n\n\n\n
\n\n\n\n \n \n \"FemtomolarPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{chen2021covid, author= {Chen, Hui and Li, Zhao and Feng, Sheng and Wang, Anni and Richard-Greenblatt, Melissa and Hutson, Emily and Andrianus, Stefen and Glaser, Laurel J. and Rodino, Kyle G. and Qian, Jianing and {Jayaraman}, {Dinesh} and Collman, Ronald G. and Glascock, Abigail and Bushman, Frederic D. and Lee, Jae Seung and Cherry, Sara and Fausto, Alejandra and Weiss, Susan R. and Koo, Hyun and Corby, Patricia M. and O{\\textquoteright}Doherty, Una and Garfall, Alfred L. and Vogl, Dan T. and Stadtmauer, Edward A. and Wang, Ping}, title= {Femtomolar SARS-CoV-2 Antigen Detection Using the Microbubbling Digital Assay with Smartphone Readout Enables Antigen Burden Quantitation and Dynamics Tracking}, elocation-id= {2021.03.17.21253847}, year= {2021}, doi= {10.1101/2021.03.17.21253847}, publisher= {Cold Spring Harbor Laboratory Press}, abstract= {Background Little is known about the dynamics of SARS-CoV-2 antigen burden in respiratory samples in different patient populations at different stages of infection. Current rapid antigen tests cannot quantitate and track antigen dynamics with high sensitivity and specificity in respiratory samples.Methods We developed and validated an ultra-sensitive SARS-CoV-2 antigen assay with smartphone readout using the Microbubbling Digital Assay previously developed by our group, which is a platform that enables highly sensitive detection and quantitation of protein biomarkers. A computer vision-based algorithm was developed for microbubble smartphone image recognition and quantitation. A machine learning-based classifier was developed to classify the smartphone images based on detected microbubbles. Using this assay, we tracked antigen dynamics in serial swab samples from COVID patients hospitalized in ICU and immunocompromised COVID patients.Results The limit of detection (LOD) of the Microbubbling SARS-CoV-2 Antigen Assay was 0.5 pg/mL (10.6 fM) recombinant nucleocapsid (N) antigen or 4000 copies/mL inactivated SARS-CoV-2 virus in nasopharyngeal (NP) swabs, comparable to many rRT-PCR methods. The assay had high analytical specificity towards SARS-CoV-2. Compared to EUA-approved rRT-PCR methods, the Microbubbling Antigen Assay demonstrated a positive percent agreement (PPA) of 97\\% (95\\% confidence interval (CI), 92-99\\%) in symptomatic individuals within 7 days of symptom onset and positive SARS-CoV-2 nucleic acid results, and a negative percent agreement (NPA) of 97\\% (95\\% CI, 94-100\\%) in symptomatic and asymptomatic individuals with negative nucleic acid results. Antigen positivity rate in NP swabs gradually decreased as days-after-symptom-onset increased, despite persistent nucleic acid positivity of the same samples. The computer vision and machine learning-based automatic microbubble image classifier could accurately identify positives and negatives, based on microbubble counts and sizes. Total microbubble volume, a potential marker of antigen burden, correlated inversely with Ct values and days-after-symptom-onset. Antigen was detected for longer periods of time in immunocompromised patients with hematologic malignancies, compared to immunocompetent individuals. Simultaneous detectable antigens and nucleic acids may indicate the presence of replicating viruses in patients with persistent infections.Conclusions The Microbubbling SARS-CoV-2 Antigen Assay enables sensitive and specific detection of acute infections, and quantitation and tracking of antigen dynamics in different patient populations at various stages of infection. With smartphone compatibility and automated image processing, the assay is well-positioned to be adapted for point-of-care diagnosis and to explore the clinical implications of antigen dynamics in future studies.Competing Interest StatementThe authors have declared no competing interest.Funding StatementHC, ZL and PW have received support from National Institute of Health grants R01DA035868, R01EB029363 and National Science Foundation grant 1928334. SRW has received support from National Institute of Health grant R01AI40442 and Penn Center for Research on Coronaviruses and Other Emerging Pathogens. We thank the RADx-Tech Program, Penn Center for Precision Medicine, Penn Health-Tech and Penn Center for Innovation \\&amp; Precision Dentistry for providing funding for this project. This work was carried out in part at the Singh Center for Nanotechnology, part of the National Nanotechnology Coordinated Infrastructure Program, which is supported by the National Science Foundation grant NNCI-2025608.Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesThe details of the IRB/oversight body that provided approval or exemption for the research described are given below:The study was approved by the Institutional Review Board of the University of Pennsylvania.All necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesAll data referred to in the manuscript is available from the research team. The OpenCV-based Python code for the computer vision and machine learning pipeline is available at the following address: https://github.com/jianingq/microbuble-detection-and-classification.git.}, URL= {https://www.medrxiv.org/content/early/2021/03/26/2021.03.17.21253847}, eprint= {https://www.medrxiv.org/content/early/2021/03/26/2021.03.17.21253847.full.pdf}, journal= {medRxiv}}\n
\n
\n\n\n
\n Background Little is known about the dynamics of SARS-CoV-2 antigen burden in respiratory samples in different patient populations at different stages of infection. Current rapid antigen tests cannot quantitate and track antigen dynamics with high sensitivity and specificity in respiratory samples.Methods We developed and validated an ultra-sensitive SARS-CoV-2 antigen assay with smartphone readout using the Microbubbling Digital Assay previously developed by our group, which is a platform that enables highly sensitive detection and quantitation of protein biomarkers. A computer vision-based algorithm was developed for microbubble smartphone image recognition and quantitation. A machine learning-based classifier was developed to classify the smartphone images based on detected microbubbles. Using this assay, we tracked antigen dynamics in serial swab samples from COVID patients hospitalized in ICU and immunocompromised COVID patients.Results The limit of detection (LOD) of the Microbubbling SARS-CoV-2 Antigen Assay was 0.5 pg/mL (10.6 fM) recombinant nucleocapsid (N) antigen or 4000 copies/mL inactivated SARS-CoV-2 virus in nasopharyngeal (NP) swabs, comparable to many rRT-PCR methods. The assay had high analytical specificity towards SARS-CoV-2. Compared to EUA-approved rRT-PCR methods, the Microbubbling Antigen Assay demonstrated a positive percent agreement (PPA) of 97% (95% confidence interval (CI), 92-99%) in symptomatic individuals within 7 days of symptom onset and positive SARS-CoV-2 nucleic acid results, and a negative percent agreement (NPA) of 97% (95% CI, 94-100%) in symptomatic and asymptomatic individuals with negative nucleic acid results. Antigen positivity rate in NP swabs gradually decreased as days-after-symptom-onset increased, despite persistent nucleic acid positivity of the same samples. The computer vision and machine learning-based automatic microbubble image classifier could accurately identify positives and negatives, based on microbubble counts and sizes. Total microbubble volume, a potential marker of antigen burden, correlated inversely with Ct values and days-after-symptom-onset. Antigen was detected for longer periods of time in immunocompromised patients with hematologic malignancies, compared to immunocompetent individuals. Simultaneous detectable antigens and nucleic acids may indicate the presence of replicating viruses in patients with persistent infections.Conclusions The Microbubbling SARS-CoV-2 Antigen Assay enables sensitive and specific detection of acute infections, and quantitation and tracking of antigen dynamics in different patient populations at various stages of infection. With smartphone compatibility and automated image processing, the assay is well-positioned to be adapted for point-of-care diagnosis and to explore the clinical implications of antigen dynamics in future studies.Competing Interest StatementThe authors have declared no competing interest.Funding StatementHC, ZL and PW have received support from National Institute of Health grants R01DA035868, R01EB029363 and National Science Foundation grant 1928334. SRW has received support from National Institute of Health grant R01AI40442 and Penn Center for Research on Coronaviruses and Other Emerging Pathogens. We thank the RADx-Tech Program, Penn Center for Precision Medicine, Penn Health-Tech and Penn Center for Innovation & Precision Dentistry for providing funding for this project. This work was carried out in part at the Singh Center for Nanotechnology, part of the National Nanotechnology Coordinated Infrastructure Program, which is supported by the National Science Foundation grant NNCI-2025608.Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesThe details of the IRB/oversight body that provided approval or exemption for the research described are given below:The study was approved by the Institutional Review Board of the University of Pennsylvania.All necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesAll data referred to in the manuscript is available from the research team. The OpenCV-based Python code for the computer vision and machine learning pipeline is available at the following address: https://github.com/jianingq/microbuble-detection-and-classification.git.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Likelihood-Based Diverse Sampling for Trajectory Forecasting.\n \n \n \n\n\n \n Ma, Y. J., Inala, J. P., Jayaraman, D., & Bastani, O.\n\n\n \n\n\n\n ICCV. 2021.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ma2021diverse, title= {Likelihood-Based Diverse Sampling for Trajectory Forecasting}, author= {Ma, Yecheng Jason and Inala, Jeevana Priya and {Jayaraman}, {Dinesh} and Bastani, Osbert}, journal= {ICCV}, year= {2021}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2020\n \n \n (6)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Model-Based Inverse Reinforcement Learning from Visual Demonstrations.\n \n \n \n\n\n \n Das, N., Bechtle, S., Davchev, T., Jayaraman, D., Rai, A., & Meier, F.\n\n\n \n\n\n\n CORL. 2020.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{das2020keypointirl, author = {Neha Das and Sarah Bechtle and Todor Davchev and {Dinesh} {Jayaraman} and Akshara Rai and Franziska Meier}, journal = {CORL}, title = {Model-Based Inverse Reinforcement Learning from Visual Demonstrations}, year = {2020}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Cautious adaptation for reinforcement learning in safety-critical settings.\n \n \n \n\n\n \n Zhang, J., Cheung, B., Finn, C., Levine, S., & Jayaraman, D.\n\n\n \n\n\n\n ICML. 2020.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{zhang2020cautious, title= {Cautious adaptation for reinforcement learning in safety-critical settings}, author= {Zhang, Jesse and Cheung, Brian and Finn, Chelsea and Levine, Sergey and {Jayaraman}, {Dinesh}}, journal= {ICML}, year= {2020}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Digit: A novel design for a low-cost compact high-resolution tactile sensor with application to in-hand manipulation.\n \n \n \n\n\n \n Lambeta, M., Chou, P., Tian, S., Yang, B., Maloon, B., Most, V. R., Stroud, D., Santos, R., Byagowi, A., Kammerer, G., Jayaraman, D., & Calandra, R.\n\n\n \n\n\n\n ICRA and IEEE RA-L. 2020.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{lambeta2020digit, title= {Digit: A novel design for a low-cost compact high-resolution tactile sensor with application to in-hand manipulation}, author= {Lambeta, Mike and Chou, Po-Wei and Tian, Stephen and Yang, Brian and Maloon, Benjamin and Most, Victoria Rose and Stroud, Dave and Santos, Raymond and Byagowi, Ahmad and Kammerer, Gregg and {Jayaraman}, {Dinesh} and Calandra, Roberto}, journal= {ICRA and IEEE RA-L}, year= {2020}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Fighting Copycat Agents in Behavioral Cloning from Observation Histories.\n \n \n \n\n\n \n Wen, C., Lin, J., Darrell, T., Jayaraman, D., & Gao, Y.\n\n\n \n\n\n\n NeurIPS. 2020.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{wen2020copycat, title= {Fighting Copycat Agents in Behavioral Cloning from Observation Histories}, author= {Wen, Chuan and Lin, Jierui and Darrell, Trevor and {Jayaraman}, {Dinesh} and Gao, Yang}, journal= {NeurIPS}, year= {2020}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Long-horizon visual planning with goal-conditioned hierarchical predictors.\n \n \n \n\n\n \n Pertsch, K., Rybkin, O., Ebert, F., Jayaraman, D., Finn, C., & Levine, S.\n\n\n \n\n\n\n NeurIPS. 2020.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{pertsch2020long, title= {Long-horizon visual planning with goal-conditioned hierarchical predictors}, author= {Pertsch, Karl and Rybkin, Oleh and Ebert, Frederik and {Jayaraman}, {Dinesh} and Finn, Chelsea and Levine, Sergey}, journal= {NeurIPS}, year= {2020}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n MAVRIC: Morphology-Agnostic Visual Robotic Control.\n \n \n \n\n\n \n Yang*, B., Jayaraman*, D., Berseth, G., Efros, A., & Levine, S.\n\n\n \n\n\n\n ICRA and IEEE RA-L. 2020.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{yang2020mavric, title={{MAVRIC}: Morphology-Agnostic Visual Robotic Control}, author={Yang*, Brian and {Jayaraman}*, {Dinesh} and Berseth, Glen and Efros, Alexei and Levine, Sergey}, journal={ICRA and IEEE RA-L}, year={2020}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2019\n \n \n (5)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Causal Confusion in Imitation Learning.\n \n \n \n\n\n \n de Haan, P., Jayaraman, D., & Levine, S.\n\n\n \n\n\n\n NeurIPS. 2019.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{de2019causal, title= {Causal Confusion in Imitation Learning}, author= {de Haan, Pim and {Jayaraman}, {Dinesh} and Levine, Sergey}, journal= {NeurIPS}, year= {2019}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Emergence of exploratory look-around behaviors through active observation completion.\n \n \n \n\n\n \n Ramakrishnan*, S. K, Jayaraman*, D., & Grauman, K.\n\n\n \n\n\n\n Science Robotics. 2019.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ramakrishnan2019emergence, title= {Emergence of exploratory look-around behaviors through active observation completion}, author= {Ramakrishnan*, Santhosh K and {Jayaraman}*, {Dinesh} and Grauman, Kristen}, journal= {Science Robotics}, year= {2019}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Manipulation by feel: Touch-based control with deep predictive models.\n \n \n \n\n\n \n Tian, S., Ebert, F., Jayaraman, D., Mudigonda, M., Finn, C., Calandra, R., & Levine, S.\n\n\n \n\n\n\n ICRA. 2019.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{tian2019manipulation, title= {Manipulation by feel: Touch-based control with deep predictive models}, author= {Tian, Stephen and Ebert, Frederik and {Jayaraman}, {Dinesh} and Mudigonda, Mayur and Finn, Chelsea and Calandra, Roberto and Levine, Sergey}, journal= {ICRA}, year= {2019}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n REPLAB: A reproducible low-cost arm benchmark for robotic learning.\n \n \n \n\n\n \n Yang, B., Jayaraman, D., Zhang, J., & Levine, S.\n\n\n \n\n\n\n ICRA. 2019.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{yang2019replab, title= {REPLAB: A reproducible low-cost arm benchmark for robotic learning}, author= {Yang, Brian and {Jayaraman}, {Dinesh} and Zhang, Jesse and Levine, Sergey}, journal= {ICRA}, year={2019}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Time-agnostic prediction: Predicting predictable video frames.\n \n \n \n\n\n \n Jayaraman, D., Ebert, F., Efros, A. A, & Levine, S.\n\n\n \n\n\n\n ICLR. 2019.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2019time, title= {Time-agnostic prediction: Predicting predictable video frames}, author= {{Jayaraman}, {Dinesh} and Ebert, Frederik and Efros, Alexei A and Levine, Sergey}, journal= {ICLR}, year= {2019}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2018\n \n \n (5)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Techniques for rectification of camera arrays.\n \n \n \n\n\n \n Ma, T., Sun, W., Nestares, O., Seshadrinathan, K., & Jayaraman, D.\n\n\n \n\n\n\n jan 23 2018.\n US Patent 9,875,543\n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@misc{ma2018techniques, title= {Techniques for rectification of camera arrays}, author= {Ma, Tao and Sun, Wei and Nestares, Oscar and Seshadrinathan, Kalpana and {Jayaraman}, {Dinesh}}, year= {2018}, month= {jan~23}, note= {US Patent 9,875,543}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n End-to-end policy learning for active visual categorization.\n \n \n \n\n\n \n Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n IEEE TPAMI. 2018.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2018end, title= {End-to-end policy learning for active visual categorization}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {IEEE TPAMI}, year= {2018}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Learning to look around: Intelligently exploring unseen environments for unknown tasks.\n \n \n \n\n\n \n Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n CVPR. 2018.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2018learning, title= {Learning to look around: Intelligently exploring unseen environments for unknown tasks}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {CVPR}, year= {2018}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n More Than a Feeling: Learning to Grasp and Regrasp using Vision and Touch.\n \n \n \n\n\n \n Calandra, R., Owens, A., Jayaraman, D., Lin, J., Yuan, W., Malik, J., Adelson, E. H, & Levine, S.\n\n\n \n\n\n\n IROS and IEEE RA-L. 2018.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{calandra2018more, title= {More Than a Feeling: Learning to Grasp and Regrasp using Vision and Touch}, author= {Calandra, Roberto and Owens, Andrew and {Jayaraman}, {Dinesh} and Lin, Justin and Yuan, Wenzhen and Malik, Jitendra and Adelson, Edward H and Levine, Sergey}, journal= {IROS and IEEE RA-L}, year= {2018}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Shapecodes: self-supervised feature learning by lifting views to viewgrids.\n \n \n \n\n\n \n Jayaraman, D., Gao, R., & Grauman, K.\n\n\n \n\n\n\n ECCV. 2018.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2018shapecodes, title= {Shapecodes: self-supervised feature learning by lifting views to viewgrids}, author= {{Jayaraman}, {Dinesh} and Gao, Ruohan and Grauman, Kristen}, journal= {ECCV}, year= {2018}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2017\n \n \n (4)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Techniques for improved focusing of camera arrays.\n \n \n \n\n\n \n Nestares, O., Seshadrinathan, K., & Jayaraman, D.\n\n\n \n\n\n\n aug 22 2017.\n US Patent 9,743,016\n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@misc{nestares2017techniques, title= {Techniques for improved focusing of camera arrays}, author= {Nestares, Oscar and Seshadrinathan, Kalpana and {Jayaraman}, {Dinesh}}, year= {2017}, month= {aug~22}, note= {US Patent 9,743,016}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Divide, share, and conquer: Multi-task attribute learning with selective sharing.\n \n \n \n\n\n \n Jayaraman, D., Chen, C., Sha, F., & Grauman, K.\n\n\n \n\n\n\n In Visual attributes, pages 49–85. Springer, Cham, 2017.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@incollection{chen2017divide, title= {Divide, share, and conquer: Multi-task attribute learning with selective sharing}, author= {{Jayaraman}, {{Dinesh}} and Chen, Chao-Yeh and Sha, Fei and Grauman, Kristen}, booktitle= {Visual attributes}, pages= {49--85}, year= {2017}, publisher= {Springer, Cham}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Embodied learning for visual recognition.\n \n \n \n\n\n \n Jayaraman, D.\n\n\n \n\n\n\n Ph.D. Thesis, UT Austin, 2017.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@phdthesis{jayaraman2017embodied, title= {Embodied learning for visual recognition}, author= {{Jayaraman}, {Dinesh}}, year= {2017}, school= {UT Austin}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Learning Image Representations Tied to Egomotion from Unlabeled Video.\n \n \n \n\n\n \n Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n IJCV Special Issue of Best Papers from ICCV 2015. 2017.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2017learning, title= {Learning Image Representations Tied to Egomotion from Unlabeled Video}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {IJCV Special Issue of Best Papers from ICCV 2015}, year= {2017}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2016\n \n \n (4)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Look-ahead before you leap: end-to-end active recognition by forecasting the effect of motion.\n \n \n \n\n\n \n Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n ECCV. 2016.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2016look, title= {Look-ahead before you leap: end-to-end active recognition by forecasting the effect of motion}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {ECCV}, year= {2016}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Object-Centric Representation Learning from Unlabeled Videos.\n \n \n \n\n\n \n Gao, R., Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n ACCV. 2016.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{gao2016object, title= {Object-Centric Representation Learning from Unlabeled Videos}, author= {Gao, Ruohan and {Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {ACCV}, year= {2016}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Pano2Vid: Automatic cinematography for watching 360-degree videos.\n \n \n \n\n\n \n Su, Y., Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n ACCV. 2016.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{su2016pano2vid, title= {Pano2Vid: Automatic cinematography for watching 360-degree videos}, author= {Su, Yu-Chuan and {Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {ACCV}, year= {2016}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Slow and steady feature analysis: higher order temporal coherence in video.\n \n \n \n\n\n \n Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n CVPR. 2016.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2016slow, title= {Slow and steady feature analysis: higher order temporal coherence in video}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {CVPR}, year= {2016}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2015\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Learning image representations tied to ego-motion.\n \n \n \n\n\n \n Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n ICCV. 2015.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2015learning, title= {Learning image representations tied to ego-motion}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {ICCV}, year= {2015}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2014\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Decorrelating semantic visual attributes by resisting the urge to share.\n \n \n \n\n\n \n Jayaraman, D., Sha, F., & Grauman, K.\n\n\n \n\n\n\n CVPR. 2014.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2014decorrelating, title= {Decorrelating semantic visual attributes by resisting the urge to share}, author= {{Jayaraman}, {Dinesh} and Sha, Fei and Grauman, Kristen}, journal={CVPR}, year= {2014}}\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n Zero-shot recognition with unreliable attributes.\n \n \n \n\n\n \n Jayaraman, D., & Grauman, K.\n\n\n \n\n\n\n NeurIPS. 2014.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2014zero, title= {Zero-shot recognition with unreliable attributes}, author= {{Jayaraman}, {Dinesh} and Grauman, Kristen}, journal= {NeurIPS}, year= {2014}}\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2012\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Objective quality assessment of multiply distorted images.\n \n \n \n\n\n \n Jayaraman, D., Mittal, A., Moorthy, A. K, & Bovik, A. C\n\n\n \n\n\n\n ASILOMAR Signals, Systems and Computers. 2012.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{jayaraman2012objective, title= {Objective quality assessment of multiply distorted images}, author= {{Jayaraman}, {Dinesh} and Mittal, Anish and Moorthy, Anush K and Bovik, Alan C}, journal= {ASILOMAR Signals, Systems and Computers}, year= {2012}}                                                                                                              
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n\n\n\n
\n\n\n \n\n \n \n \n \n\n
\n"}; document.write(bibbase_data.data);