\n \n \n
\n
\n\n \n \n \n \n \n \n Towards the next generation of Geospatial Artificial Intelligence.\n \n \n \n \n\n\n \n Mai, G.; Xie, Y.; Jia, X.; Lao, N.; Rao, J.; Zhu, Q.; Liu, Z.; Chiang, Y.; and Jiao, J.\n\n\n \n\n\n\n
International Journal of Applied Earth Observation and Geoinformation, 136: 104368. February 2025.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@article{10.1016/j.jag.2025.104368,\n author = {Mai, Gengchen and Xie, Yiqun and Jia, Xiaowei and Lao, Ni and Rao, Jinmeng and Zhu, Qing and Liu, Zeping and Chiang, Yao-Yi and Jiao, Junfeng},\n doi = {10.1016/j.jag.2025.104368},\n issn = {1569-8432},\n journal = {International Journal of Applied Earth Observation and Geoinformation},\n month = {February},\n pages = {104368},\n publisher = {Elsevier BV},\n title = {Towards the next generation of Geospatial Artificial Intelligence},\n url = {https://doi.org/10.1016/j.jag.2025.104368},\n volume = {136},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n\n\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Fine-Scale Soil Mapping in Alaska with Multimodal Machine Learning.\n \n \n \n \n\n\n \n Lin, Y.; Chen, T.; Brungard, C.; Grunwald, S.; Ives, S.; Macander, M.; Nawrocki, T.; Chiang, Y.; and Jelinski, N.\n\n\n \n\n\n\n In
Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems, of
SIGSPATIAL ’25, pages 995–1007, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3748636.3764170,\n author = {Lin, Yijun and Chen, Theresa and Brungard, Colby and Grunwald, Sabine and Ives, Sue and Macander, Matt and Nawrocki, Timm and Chiang, Yao-Yi and Jelinski, Nic},\n booktitle = {Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems},\n collection = {SIGSPATIAL ’25},\n doi = {10.1145/3748636.3764170},\n month = {November},\n pages = {995–1007},\n publisher = {ACM},\n series = {SIGSPATIAL ’25},\n title = {Fine-Scale Soil Mapping in Alaska with Multimodal Machine Learning},\n url = {https://doi.org/10.1145/3748636.3764170},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Benchmarking Geospatial Question Answering with MapQA.\n \n \n \n \n\n\n \n Li, Z.; Grossman, M.; Qasemi, E.; Kulkarni, M.; Chen, M.; and Chiang, Y.\n\n\n \n\n\n\n In
Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems, of
SIGSPATIAL ’25, pages 1042–1045, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3748636.3764174,\n author = {Li, Zekun and Grossman, Malcolm and Qasemi, Ehsan and Kulkarni, Mihir and Chen, Muhao and Chiang, Yao-Yi},\n booktitle = {Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems},\n collection = {SIGSPATIAL ’25},\n doi = {10.1145/3748636.3764174},\n month = {November},\n pages = {1042–1045},\n publisher = {ACM},\n series = {SIGSPATIAL ’25},\n title = {Benchmarking Geospatial Question Answering with MapQA},\n url = {https://doi.org/10.1145/3748636.3764174},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Transit for All: Mapping Equitable Bike2Subway Connection using Region Representation Learning.\n \n \n \n \n\n\n \n Namgung, M.; Lee, J.; Ding, F.; and Chiang, Y.\n\n\n \n\n\n\n In
Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems, of
SIGSPATIAL ’25, pages 1056–1066, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3748636.3764176,\n author = {Namgung, Min and Lee, Janghyeon and Ding, Fangyi and Chiang, Yao-Yi},\n booktitle = {Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems},\n collection = {SIGSPATIAL ’25},\n doi = {10.1145/3748636.3764176},\n month = {November},\n pages = {1056–1066},\n publisher = {ACM},\n series = {SIGSPATIAL ’25},\n title = {Transit for All: Mapping Equitable Bike2Subway Connection using Region Representation Learning},\n url = {https://doi.org/10.1145/3748636.3764176},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n DIGMAPPER: A Modular System for Automated Geologic Map Digitization.\n \n \n \n \n\n\n \n Duan, W.; Chiang, Y.; Chen, T.; Gerlek, M. P.; Jang, L.; Kirsanova, S.; Knoblock, C. A.; Lin, F.; Lin, Y.; Li, Z.; and Minton, S. N.\n\n\n \n\n\n\n In
Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems, of
SIGSPATIAL ’25, pages 717–728, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3748636.3764602,\n author = {Duan, Weiwei and Chiang, Yao-Yi and Chen, Theresa and Gerlek, Michael P. and Jang, Leeje and Kirsanova, Sofia and Knoblock, Craig A. and Lin, Fandel and Lin, Yijun and Li, Zekun and Minton, Steven N.},\n booktitle = {Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems},\n collection = {SIGSPATIAL ’25},\n doi = {10.1145/3748636.3764602},\n month = {November},\n pages = {717–728},\n publisher = {ACM},\n series = {SIGSPATIAL ’25},\n title = {DIGMAPPER: A Modular System for Automated Geologic Map Digitization},\n url = {https://doi.org/10.1145/3748636.3764602},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n MoVER: Modeling User Heterogeneity with Enriched Trajectory Representations for Human Mobility Prediction.\n \n \n \n \n\n\n \n Lin, Y.; Lin, F.; Kim, J.; and Chiang, Y.\n\n\n \n\n\n\n In
Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems, of
SIGSPATIAL ’25, pages 1234–1237, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3748636.3771315,\n author = {Lin, Yijun and Lin, Fandel and Kim, Jina and Chiang, Yao-Yi},\n booktitle = {Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems},\n collection = {SIGSPATIAL ’25},\n doi = {10.1145/3748636.3771315},\n month = {November},\n pages = {1234–1237},\n publisher = {ACM},\n series = {SIGSPATIAL ’25},\n title = {MoVER: Modeling User Heterogeneity with Enriched Trajectory Representations for Human Mobility Prediction},\n url = {https://doi.org/10.1145/3748636.3771315},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n BeSTAD: Behavior-Aware Spatio-Temporal Anomaly Detection for Human Mobility Data.\n \n \n \n \n\n\n \n Xie, J.; Kim, J.; Chiang, Y.; Zhao, L.; and Shafique, K.\n\n\n \n\n\n\n In
Proceedings of the 2nd ACM SIGSPATIAL International Workshop on Geospatial Anomaly Detection, of
GeoAnomalies ’25, pages 56–59, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3764914.3770888,\n author = {Xie, Junyi and Kim, Jina and Chiang, Yao-Yi and Zhao, Lingyi and Shafique, Khurram},\n booktitle = {Proceedings of the 2nd ACM SIGSPATIAL International Workshop on Geospatial Anomaly Detection},\n collection = {GeoAnomalies ’25},\n doi = {10.1145/3764914.3770888},\n month = {November},\n pages = {56–59},\n publisher = {ACM},\n series = {GeoAnomalies ’25},\n title = {BeSTAD: Behavior-Aware Spatio-Temporal Anomaly Detection for Human Mobility Data},\n url = {https://doi.org/10.1145/3764914.3770888},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n HiCoTraj: Zero-Shot Demographic Reasoning via Hierarchical Chain-of-Thought Prompting from Trajectory.\n \n \n \n \n\n\n \n Xie, J.; Jiao, Y.; Kim, J.; Chiang, Y.; Zhao, L.; and Shafique, K.\n\n\n \n\n\n\n In
Proceedings of the 1st ACM SIGSPATIAL International Workshop on Generative and Agentic AI for Multi-Modality Space-Time Intelligence, of
GeoGenAgent ’25, pages 49–53, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3764915.3770723,\n author = {Xie, Junyi and Jiao, Yuankun and Kim, Jina and Chiang, Yao-Yi and Zhao, Lingyi and Shafique, Khurram},\n booktitle = {Proceedings of the 1st ACM SIGSPATIAL International Workshop on Generative and Agentic AI for Multi-Modality Space-Time Intelligence},\n collection = {GeoGenAgent ’25},\n doi = {10.1145/3764915.3770723},\n month = {November},\n pages = {49–53},\n publisher = {ACM},\n series = {GeoGenAgent ’25},\n title = {HiCoTraj: Zero-Shot Demographic Reasoning via Hierarchical Chain-of-Thought Prompting from Trajectory},\n url = {https://doi.org/10.1145/3764915.3770723},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Augmenting Human-Centered Racial Covenant Detection and Georeferencing with Plug-and-Play NLP Pipelines.\n \n \n \n \n\n\n \n Pyo, J.; Jiao, Y.; Chiang, Y.; and Corey, M.\n\n\n \n\n\n\n In
Proceedings of the 1st ACM SIGSPATIAL International Workshop on Human-Centered Geospatial Computing, of
GeoHCC ’25, pages 10–14, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3764917.3771333,\n author = {Pyo, Jiyoon and Jiao, Yuankun and Chiang, Yao-Yi and Corey, Michael},\n booktitle = {Proceedings of the 1st ACM SIGSPATIAL International Workshop on Human-Centered Geospatial Computing},\n collection = {GeoHCC ’25},\n doi = {10.1145/3764917.3771333},\n month = {November},\n pages = {10–14},\n publisher = {ACM},\n series = {GeoHCC ’25},\n title = {Augmenting Human-Centered Racial Covenant Detection and Georeferencing with Plug-and-Play NLP Pipelines},\n url = {https://doi.org/10.1145/3764917.3771333},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n StreetLens: Enabling Human-Centered AI Agents for Neighborhood Assessment from Street View Imagery.\n \n \n \n \n\n\n \n Kim, J.; Jang, L.; Chiang, Y.; Wang, G.; and Pasco, M. C.\n\n\n \n\n\n\n In
Proceedings of the 1st ACM SIGSPATIAL International Workshop on Human-Centered Geospatial Computing, of
GeoHCC ’25, pages 15–19, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3764917.3771334,\n author = {Kim, Jina and Jang, Leeje and Chiang, Yao-Yi and Wang, Guanyu and Pasco, Michelle C.},\n booktitle = {Proceedings of the 1st ACM SIGSPATIAL International Workshop on Human-Centered Geospatial Computing},\n collection = {GeoHCC ’25},\n doi = {10.1145/3764917.3771334},\n month = {November},\n pages = {15–19},\n publisher = {ACM},\n series = {GeoHCC ’25},\n title = {StreetLens: Enabling Human-Centered AI Agents for Neighborhood Assessment from Street View Imagery},\n url = {https://doi.org/10.1145/3764917.3771334},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Detecting Legend Items on Historical Maps Using GPT-4o with In-Context Learning.\n \n \n \n \n\n\n \n Kirsanova, S.; Duan, W.; and Chiang, Y.\n\n\n \n\n\n\n In
Proceedings of the 4th ACM SIGSPATIAL International Workshop on Searching and Mining Large Collections of Geospatial Data, of
GeoSearch ’25, pages 35–38, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3764920.3770590,\n author = {Kirsanova, Sofia and Duan, Weiwei and Chiang, YaoYi},\n booktitle = {Proceedings of the 4th ACM SIGSPATIAL International Workshop on Searching and Mining Large Collections of Geospatial Data},\n collection = {GeoSearch ’25},\n doi = {10.1145/3764920.3770590},\n month = {November},\n pages = {35–38},\n publisher = {ACM},\n series = {GeoSearch ’25},\n title = {Detecting Legend Items on Historical Maps Using GPT-4o with In-Context Learning},\n url = {https://doi.org/10.1145/3764920.3770590},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Region Context from Unifying Points, Lines, and Polygons.\n \n \n \n \n\n\n \n Kim, J.; and Chiang, Y.\n\n\n \n\n\n\n In
Proceedings of the 3rd ACM SIGSPATIAL International Workshop on Advances in Urban-AI, of
UrbanAI ’25, pages 94–95, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3764926.3771941,\n author = {Kim, Jina and Chiang, Yao-Yi},\n booktitle = {Proceedings of the 3rd ACM SIGSPATIAL International Workshop on Advances in Urban-AI},\n collection = {UrbanAI ’25},\n doi = {10.1145/3764926.3771941},\n month = {November},\n pages = {94–95},\n publisher = {ACM},\n series = {UrbanAI ’25},\n title = {Region Context from Unifying Points, Lines, and Polygons},\n url = {https://doi.org/10.1145/3764926.3771941},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n CareWELL: Multimodal Region Representation Learning with Spatial Contexts for Urban Health.\n \n \n \n \n\n\n \n Namgung, M.; Chiang, Y.; and Omitaomu, O. A.\n\n\n \n\n\n\n In
Proceedings of the 3rd ACM SIGSPATIAL International Workshop on Advances in Urban-AI, of
UrbanAI ’25, pages 27–36, November 2025. ACM\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@inproceedings{10.1145/3764926.3771947,\n author = {Namgung, Min and Chiang, Yao-Yi and Omitaomu, Olufemi A.},\n booktitle = {Proceedings of the 3rd ACM SIGSPATIAL International Workshop on Advances in Urban-AI},\n collection = {UrbanAI ’25},\n doi = {10.1145/3764926.3771947},\n month = {November},\n pages = {27–36},\n publisher = {ACM},\n series = {UrbanAI ’25},\n title = {CareWELL: Multimodal Region Representation Learning with Spatial Contexts for Urban Health},\n url = {https://doi.org/10.1145/3764926.3771947},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Validating Machine Learning–Derived Built Environment Measures From Google Street View for Urban Aging Research in India.\n \n \n \n \n\n\n \n Atshan, S.; Namgung, M.; Lee, J.; Dhankhar, A.; Khobragade, P.; Cole, A.; Ailshire, J. A.; Adar, S. D.; Chiang, Y.; Lee, J.; and Nichols, E.\n\n\n \n\n\n\n
SSRN Electronic Journal. 2025.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@article{10.2139/ssrn.5943954,\n author = {Atshan, Samer and Namgung, Min and Lee, Janghyeon and Dhankhar, Anushikha and Khobragade, Pranali and Cole, Aidan and Ailshire, Jennifer A. and Adar, Sara D. and Chiang, Yao-Yi and Lee, Jinkook and Nichols, Emma},\n doi = {10.2139/ssrn.5943954},\n issn = {1556-5068},\n journal = {SSRN Electronic Journal},\n publisher = {Elsevier BV},\n title = {Validating Machine Learning–Derived Built Environment Measures From Google Street View for Urban Aging Research in India},\n url = {https://doi.org/10.2139/ssrn.5943954},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n GeoAnomaly Detection: Towards finding Needles of Anomalous Behavior in a Haystack of Geospatial Data.\n \n \n \n \n\n\n \n Chiang, Y.; Kim, J.; Krause, C.; Mattei, E.; Shafique, K.; Wenk, C.; and Züfle, A.\n\n\n \n\n\n\n The SIGSPATIAL Special 15 (1), 2025.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@misc{Chiang2025_geoanomaly_detection_towards_finding_needles_of,\n author = {Yao-Yi Chiang and Joon-Seok Kim and Cory Krause and Enrico Mattei and Khurram Shafique and Carola Wenk and Andreas Züfle},\n howpublished = {The SIGSPATIAL Special 15 (1)},\n title = {GeoAnomaly Detection: Towards finding Needles of Anomalous Behavior in a Haystack of Geospatial Data},\n url = {https://dl.acm.org/doi/abs/10.1145/3757932.3757935},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Fine-Scale Soil Mapping in Alaska with Multimodal Machine Learning.\n \n \n \n \n\n\n \n Jelinski, N. A; Chiang, Y.; Nawrocki, T.; Macander, M.; Ives, S.; Sabine, G.; Brungard, C.; Chen, T.; and Lin, Y.\n\n\n \n\n\n\n ACM SIGSPATIAL 2025, 2025.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@misc{Jelinski2025_finescale_soil_mapping_in_alaska_with,\n author = {Nicolas A Jelinski and Yao-Yi Chiang and Timm Nawrocki and Matt Macander and Sue Ives and Grunwald Sabine and Colby Brungard and Theresa Chen and Yijun Lin},\n howpublished = {ACM SIGSPATIAL 2025},\n title = {Fine-Scale Soil Mapping in Alaska with Multimodal Machine Learning},\n url = {https://experts.umn.edu/en/publications/fine-scale-soil-mapping-in-alaska-with-multimodal-machine-learnin/},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Less is More: Multimodal Region Representation via Pairwise Inter-view Learning.\n \n \n \n \n\n\n \n Namgung, M.; Lin, Y.; Lee, J.; and Chiang, Y.\n\n\n \n\n\n\n arXiv, 2025.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@misc{Namgung2025_less_is_more_multimodal_region_representation,\n abstract = {With the increasing availability of geospatial datasets, researchers have explored region representation learning (RRL) to analyze complex region characteristics. Recent RRL methods use contrastive learning (CL) to capture shared information between two modalities but often overlook task-relevant unique information specific to each modality. Such modality-specific details can explain region characteristics that shared information alone cannot capture. Bringing information factorization to RRL can address this by factorizing multimodal data into shared and unique information. However, existing factorization approaches focus on two modalities, whereas RRL can benefit from various geospatial data. Extending factorization beyond two modalities is non-trivial because modeling high-order relationships introduces a combinatorial number of learning objectives, increasing model complexity. We introduce Cross modal Knowledge Injected Embedding, an information factorization approach for RRL that captures both shared and unique representations. CooKIE uses a pairwise inter-view learning approach that captures high-order information without modeling high-order dependency, avoiding exhaustive combinations. We evaluate CooKIE on three regression tasks and a land use classification task in New York City and Delhi, India. Results show that CooKIE outperforms existing RRL methods and a factorized RRL model, capturing multimodal information with fewer training parameters and floating-point operations per second (FLOPs). We release the code: https://github.com/MinNamgung/CooKIE.},\n author = {Min Namgung and Yijun Lin and JangHyeon Lee and Yao-Yi Chiang},\n howpublished = {arXiv},\n primaryclass = {cs.LG},\n title = {Less is More: Multimodal Region Representation via Pairwise Inter-view Learning},\n url = {https://arxiv.org/api/RnQIRUtWQ/Dj2XGXX8a3TcyOxS0},\n year = {2025}\n}\n\n\n
\n\n\n
\n With the increasing availability of geospatial datasets, researchers have explored region representation learning (RRL) to analyze complex region characteristics. Recent RRL methods use contrastive learning (CL) to capture shared information between two modalities but often overlook task-relevant unique information specific to each modality. Such modality-specific details can explain region characteristics that shared information alone cannot capture. Bringing information factorization to RRL can address this by factorizing multimodal data into shared and unique information. However, existing factorization approaches focus on two modalities, whereas RRL can benefit from various geospatial data. Extending factorization beyond two modalities is non-trivial because modeling high-order relationships introduces a combinatorial number of learning objectives, increasing model complexity. We introduce Cross modal Knowledge Injected Embedding, an information factorization approach for RRL that captures both shared and unique representations. CooKIE uses a pairwise inter-view learning approach that captures high-order information without modeling high-order dependency, avoiding exhaustive combinations. We evaluate CooKIE on three regression tasks and a land use classification task in New York City and Delhi, India. Results show that CooKIE outperforms existing RRL methods and a factorized RRL model, capturing multimodal information with fewer training parameters and floating-point operations per second (FLOPs). We release the code: https://github.com/MinNamgung/CooKIE.\n
\n\n\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n Modeling approaches for estimating the effects of risk factors using longitudinal lifecourse exposure data in dementia research.\n \n \n \n \n\n\n \n Nichols, E.; Bindas, A.; Atshan, S.; Chang, H.; Chiang, Y.; Henn, B. C.; Hayes‐Larson, E.; Keller, K. P; Kezios, K. L; Shih, R. A; Szpiro, A. A; Weiss, J.; Adar, S. D; Knapp, D. M; Lee, J.; and Weuve, J.\n\n\n \n\n\n\n Alzheimer's & Dementia 21 (12), 2025.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@misc{Nichols2025_modeling_approaches_for_estimating_the_effects,\n author = {Emma Nichols and Ava Bindas and Samer Atshan and Howard Chang and Yao‐Yi Chiang and Birgit Claus Henn and Eleanor Hayes‐Larson and Kayleigh P Keller and Katrina L Kezios and Regina A Shih and Adam A Szpiro and Jordan Weiss and Sara D Adar and David M Knapp and Jinkook Lee and Jennifer Weuve},\n howpublished = {Alzheimer's & Dementia 21 (12)},\n title = {Modeling approaches for estimating the effects of risk factors using longitudinal lifecourse exposure data in dementia research},\n url = {https://alz-journals.onlinelibrary.wiley.com/doi/abs/10.1002/alz.70971},\n year = {2025}\n}\n\n\n
\n\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n FRIEDA: Benchmarking Multi-Step Cartographic Reasoning in Vision-Language Models.\n \n \n \n \n\n\n \n Pyo, J.; Jiao, Y.; Jung, D.; Li, Z.; Jang, L.; Kirsanova, S.; Kim, J.; Lin, Y.; Liu, Q.; Xie, J.; Askari, H.; Xu, N.; Chen, M.; and Chiang, Y.\n\n\n \n\n\n\n arXiv, 2025.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@misc{Pyo2025_frieda_benchmarking_multistep_cartographic_reasoning_in,\n abstract = {Cartographic reasoning is the skill of interpreting geographic relationships by aligning legends, map scales, compass directions, map texts, and geometries across one or more map images. Although essential as a concrete cognitive capability and for critical tasks such as disaster response and urban planning, it remains largely unevaluated. Building on progress in chart and infographic understanding, recent large vision language model studies on map visual question-answering often treat maps as a special case of charts. In contrast, map VQA demands comprehension of layered symbology (e.g., symbols, geometries, and text labels) as well as spatial relations tied to orientation and distance that often span multiple maps and are not captured by chart-style evaluations. To address this gap, we introduce FRIEDA, a benchmark for testing complex open-ended cartographic reasoning in LVLMs. FRIEDA sources real map images from documents and reports in various domains and geographical areas. Following classifications in Geographic Information System (GIS) literature, FRIEDA targets all three categories of spatial relations: topological (border, equal, intersect, within), metric (distance), and directional (orientation). All questions require multi-step inference, and many require cross-map grounding and reasoning. We evaluate eleven state-of-the-art LVLMs under two settings: (1) the direct setting, where we provide the maps relevant to the question, and (2) the contextual setting, where the model may have to identify the maps relevant to the question before reasoning. Even the strongest models, Gemini-2.5-Pro and GPT-5-Think, achieve only 38.20% and 37.20% accuracy, respectively, far below human performance of 84.87%. These results reveal a persistent gap in multi-step cartographic reasoning, positioning FRIEDA as a rigorous benchmark to drive progress on spatial intelligence in LVLMs.},\n author = {Jiyoon Pyo and Yuankun Jiao and Dongwon Jung and Zekun Li and Leeje Jang and Sofia Kirsanova and Jina Kim and Yijun Lin and Qin Liu and Junyi Xie and Hadi Askari and Nan Xu and Muhao Chen and Yao-Yi Chiang},\n howpublished = {arXiv},\n primaryclass = {cs.CV},\n title = {FRIEDA: Benchmarking Multi-Step Cartographic Reasoning in Vision-Language Models},\n url = {https://arxiv.org/api/cAn1zHHcKlclQ3NWAR92PR4QKLc},\n year = {2025}\n}\n\n\n
\n\n\n
\n Cartographic reasoning is the skill of interpreting geographic relationships by aligning legends, map scales, compass directions, map texts, and geometries across one or more map images. Although essential as a concrete cognitive capability and for critical tasks such as disaster response and urban planning, it remains largely unevaluated. Building on progress in chart and infographic understanding, recent large vision language model studies on map visual question-answering often treat maps as a special case of charts. In contrast, map VQA demands comprehension of layered symbology (e.g., symbols, geometries, and text labels) as well as spatial relations tied to orientation and distance that often span multiple maps and are not captured by chart-style evaluations. To address this gap, we introduce FRIEDA, a benchmark for testing complex open-ended cartographic reasoning in LVLMs. FRIEDA sources real map images from documents and reports in various domains and geographical areas. Following classifications in Geographic Information System (GIS) literature, FRIEDA targets all three categories of spatial relations: topological (border, equal, intersect, within), metric (distance), and directional (orientation). All questions require multi-step inference, and many require cross-map grounding and reasoning. We evaluate eleven state-of-the-art LVLMs under two settings: (1) the direct setting, where we provide the maps relevant to the question, and (2) the contextual setting, where the model may have to identify the maps relevant to the question before reasoning. Even the strongest models, Gemini-2.5-Pro and GPT-5-Think, achieve only 38.20% and 37.20% accuracy, respectively, far below human performance of 84.87%. These results reveal a persistent gap in multi-step cartographic reasoning, positioning FRIEDA as a rigorous benchmark to drive progress on spatial intelligence in LVLMs.\n
\n\n\n
\n\n\n
\n
\n\n \n \n \n \n \n \n WalkCLIP: Multimodal Learning for Urban Walkability Prediction.\n \n \n \n \n\n\n \n Xiang, S.; Lee, J.; Namgung, M.; and Chiang, Y.\n\n\n \n\n\n\n arXiv, 2025.\n
\n\n
\n\n
\n\n
\n\n \n \n
Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n
\n
@misc{Xiang2025_walkclip_multimodal_learning_for_urban_walkability,\n abstract = {Urban walkability is a cornerstone of public health, sustainability, and quality of life. Traditional walkability assessments rely on surveys and field audits, which are costly and difficult to scale. Recent studies have used satellite imagery, street view imagery, or population indicators to estimate walkability, but these single-source approaches capture only one dimension of the walking environment. Satellite data describe the built environment from above, but overlook the pedestrian perspective. Street view imagery captures conditions at the ground level, but lacks broader spatial context. Population dynamics reveal patterns of human activity but not the visual form of the environment. We introduce WalkCLIP, a multimodal framework that integrates these complementary viewpoints to predict urban walkability. WalkCLIP learns walkability-aware vision-language representations from GPT-4o generated image captions, refines these representations with a spatial aggregation module that incorporates neighborhood context, and fuses the resulting features with representations from a population dynamics foundation model. Evaluated at 4,660 locations throughout Minneapolis-Saint Paul, WalkCLIP outperforms unimodal and multimodal baselines in both predictive accuracy and spatial alignment. These results show that the integration of visual and behavioral signals yields reliable predictions of the walking environment.},\n author = {Shilong Xiang and JangHyeon Lee and Min Namgung and Yao-Yi Chiang},\n howpublished = {arXiv},\n primaryclass = {cs.CV},\n title = {WalkCLIP: Multimodal Learning for Urban Walkability Prediction},\n url = {https://arxiv.org/api/4piXGGnxE2zcdD3UZMJB+nH1NA8},\n year = {2025}\n}\n\n\n
\n\n\n
\n Urban walkability is a cornerstone of public health, sustainability, and quality of life. Traditional walkability assessments rely on surveys and field audits, which are costly and difficult to scale. Recent studies have used satellite imagery, street view imagery, or population indicators to estimate walkability, but these single-source approaches capture only one dimension of the walking environment. Satellite data describe the built environment from above, but overlook the pedestrian perspective. Street view imagery captures conditions at the ground level, but lacks broader spatial context. Population dynamics reveal patterns of human activity but not the visual form of the environment. We introduce WalkCLIP, a multimodal framework that integrates these complementary viewpoints to predict urban walkability. WalkCLIP learns walkability-aware vision-language representations from GPT-4o generated image captions, refines these representations with a spatial aggregation module that incorporates neighborhood context, and fuses the resulting features with representations from a population dynamics foundation model. Evaluated at 4,660 locations throughout Minneapolis-Saint Paul, WalkCLIP outperforms unimodal and multimodal baselines in both predictive accuracy and spatial alignment. These results show that the integration of visual and behavioral signals yields reliable predictions of the walking environment.\n
\n\n\n
\n\n\n\n\n\n