Towards the Integration of Reinforcement Learning into MASPY

Towards the Integration of Reinforcement Learning into MASPY. Mellado, A. L. L., Borges, A. P., Cardoso, R. C., & Alves, G. V. In Workshop-Escola de Sistemas de Agentes, seus Ambientes e Aplicações (WESAAC), pages 21–28, September, 2025.

Paper doi abstract bibtex

Learning in symbolic agent architectures remains a key challenge in the development of adaptive multi-agent systems. This paper introduces a learning module for MASPY, a Python-based framework inspired by the Belief-Desire-Intention (BDI) model. The module enables agents to learn optimal actions using tabular reinforcement learning algorithms, such as Q-Learning and SARSA. To support this, we propose the SART methodology, which decomposes the learning environment into four structured components: States, Actions, Rewards, and Transitions. This structure allows MASPY agents to perceive their environment through defined percepts, act through decorated functions, and adapt over time using discrete learning strategies. The learning module offers a unified Python-based architecture for symbolic reasoning agents that learn through reinforcement training. This is shown practically with a toy problem where agents are able to learn to execute the actions of a previously unknown environment.

@inproceedings{mellado_towards_2025,
	title = {Towards the {Integration} of {Reinforcement} {Learning} into {MASPY}},
	copyright = {Copyright (c)},
	issn = {2326-5434},
	url = {https://sol.sbc.org.br/index.php/wesaac/article/view/37544},
	doi = {10.5753/wesaac.2025.37544},
	abstract = {Learning in symbolic agent architectures remains a key challenge in the development of adaptive multi-agent systems. This paper introduces a learning module for MASPY, a Python-based framework inspired by the Belief-Desire-Intention (BDI) model. The module enables agents to learn optimal actions using tabular reinforcement learning algorithms, such as Q-Learning and SARSA. To support this, we propose the SART methodology, which decomposes the learning environment into four structured components: States, Actions, Rewards, and Transitions. This structure allows MASPY agents to perceive their environment through defined percepts, act through decorated functions, and adapt over time using discrete learning strategies. The learning module offers a unified Python-based architecture for symbolic reasoning agents that learn through reinforcement training. This is shown practically with a toy problem where agents are able to learn to execute the actions of a previously unknown environment.},
	language = {en},
	urldate = {2026-03-27},
	booktitle = {Workshop-{Escola} de {Sistemas} de {Agentes}, seus {Ambientes} e {Aplicações} ({WESAAC})},
	author = {Mellado, Alexandre L. L. and Borges, André Pinz and Cardoso, Rafael C. and Alves, Gleifer Vaz},
	month = sep,
	year = {2025},
	pages = {21--28},
}

Downloads: 0

{"_id":"cqm8BgnbR2t487Mww","bibbaseid":"mellado-borges-cardoso-alves-towardstheintegrationofreinforcementlearningintomaspy-2025","author_short":["Mellado, A. L. L.","Borges, A. P.","Cardoso, R. C.","Alves, G. V."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","title":"Towards the Integration of Reinforcement Learning into MASPY","copyright":"Copyright (c)","issn":"2326-5434","url":"https://sol.sbc.org.br/index.php/wesaac/article/view/37544","doi":"10.5753/wesaac.2025.37544","abstract":"Learning in symbolic agent architectures remains a key challenge in the development of adaptive multi-agent systems. This paper introduces a learning module for MASPY, a Python-based framework inspired by the Belief-Desire-Intention (BDI) model. The module enables agents to learn optimal actions using tabular reinforcement learning algorithms, such as Q-Learning and SARSA. To support this, we propose the SART methodology, which decomposes the learning environment into four structured components: States, Actions, Rewards, and Transitions. This structure allows MASPY agents to perceive their environment through defined percepts, act through decorated functions, and adapt over time using discrete learning strategies. The learning module offers a unified Python-based architecture for symbolic reasoning agents that learn through reinforcement training. This is shown practically with a toy problem where agents are able to learn to execute the actions of a previously unknown environment.","language":"en","urldate":"2026-03-27","booktitle":"Workshop-Escola de Sistemas de Agentes, seus Ambientes e Aplicações (WESAAC)","author":[{"propositions":[],"lastnames":["Mellado"],"firstnames":["Alexandre","L.","L."],"suffixes":[]},{"propositions":[],"lastnames":["Borges"],"firstnames":["André","Pinz"],"suffixes":[]},{"propositions":[],"lastnames":["Cardoso"],"firstnames":["Rafael","C."],"suffixes":[]},{"propositions":[],"lastnames":["Alves"],"firstnames":["Gleifer","Vaz"],"suffixes":[]}],"month":"September","year":"2025","pages":"21–28","bibtex":"@inproceedings{mellado_towards_2025,\n\ttitle = {Towards the {Integration} of {Reinforcement} {Learning} into {MASPY}},\n\tcopyright = {Copyright (c)},\n\tissn = {2326-5434},\n\turl = {https://sol.sbc.org.br/index.php/wesaac/article/view/37544},\n\tdoi = {10.5753/wesaac.2025.37544},\n\tabstract = {Learning in symbolic agent architectures remains a key challenge in the development of adaptive multi-agent systems. This paper introduces a learning module for MASPY, a Python-based framework inspired by the Belief-Desire-Intention (BDI) model. The module enables agents to learn optimal actions using tabular reinforcement learning algorithms, such as Q-Learning and SARSA. To support this, we propose the SART methodology, which decomposes the learning environment into four structured components: States, Actions, Rewards, and Transitions. This structure allows MASPY agents to perceive their environment through defined percepts, act through decorated functions, and adapt over time using discrete learning strategies. The learning module offers a unified Python-based architecture for symbolic reasoning agents that learn through reinforcement training. This is shown practically with a toy problem where agents are able to learn to execute the actions of a previously unknown environment.},\n\tlanguage = {en},\n\turldate = {2026-03-27},\n\tbooktitle = {Workshop-{Escola} de {Sistemas} de {Agentes}, seus {Ambientes} e {Aplicações} ({WESAAC})},\n\tauthor = {Mellado, Alexandre L. L. and Borges, André Pinz and Cardoso, Rafael C. and Alves, Gleifer Vaz},\n\tmonth = sep,\n\tyear = {2025},\n\tpages = {21--28},\n}\n\n","author_short":["Mellado, A. L. L.","Borges, A. P.","Cardoso, R. C.","Alves, G. V."],"key":"mellado_towards_2025","id":"mellado_towards_2025","bibbaseid":"mellado-borges-cardoso-alves-towardstheintegrationofreinforcementlearningintomaspy-2025","role":"author","urls":{"Paper":"https://sol.sbc.org.br/index.php/wesaac/article/view/37544"},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://api.zotero.org/users/562957/collections/Z768AYPF/items?key=n3Ur7uZ5Qi8KzHFDMlknwy2l&format=bibtex&limit=100","dataSources":["AdmcuDNM8GSatWG9Y","o8So9xjdoX7Po3tE2"],"keywords":[],"search_terms":["towards","integration","reinforcement","learning","maspy","mellado","borges","cardoso","alves"],"title":"Towards the Integration of Reinforcement Learning into MASPY","year":2025}