Safe Reinforcement Learning via Shielding

Safe Reinforcement Learning via Shielding. Alshiekh, M., Bloem, R., Ehlers, R., Könighofer, B., Niekum, S., & Topcu, U. Proceedings of the AAAI Conference on Artificial Intelligence, April, 2018. Number: 1

Paper doi abstract bibtex

Reinforcement learning algorithms discover policies that maximize reward, but do not necessarily guarantee safety during learning or execution phases. We introduce a new approach to learn optimal policies while enforcing properties expressed in temporal logic. To this end, given the temporal logic specification that is to be obeyed by the learning system, we propose to synthesize a reactive system called a shield. The shield monitors the actions from the learner and corrects them only if the chosen action causes a violation of the specification. We discuss which requirements a shield must meet to preserve the convergence guarantees of the learner. Finally, we demonstrate the versatility of our approach on several challenging reinforcement learning scenarios.

@article{alshiekh_safe_2018,
	title = {Safe {Reinforcement} {Learning} via {Shielding}},
	volume = {32},
	copyright = {Copyright (c)},
	issn = {2374-3468},
	url = {https://ojs.aaai.org/index.php/AAAI/article/view/11797},
	doi = {10.1609/aaai.v32i1.11797},
	abstract = {Reinforcement learning algorithms discover policies that maximize reward, but do not necessarily guarantee safety during learning or execution phases. We introduce a new approach to learn optimal policies while enforcing properties expressed in temporal logic. To this end, given the temporal logic specification that is to be obeyed by the learning system, we propose to synthesize a reactive system called a shield. The shield monitors the actions from the learner and corrects them only if the chosen action causes a violation of the specification. We discuss which requirements a shield must meet to preserve the convergence guarantees of the learner. Finally, we demonstrate the versatility of our approach on several challenging reinforcement learning scenarios.},
	language = {en},
	number = {1},
	urldate = {2023-01-06},
	journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
	author = {Alshiekh, Mohammed and Bloem, Roderick and Ehlers, Rüdiger and Könighofer, Bettina and Niekum, Scott and Topcu, Ufuk},
	month = apr,
	year = {2018},
	note = {Number: 1},
	keywords = {Formal Methods},
}

Downloads: 0

{"_id":"EW2LXJciHR8Z4g6vX","bibbaseid":"alshiekh-bloem-ehlers-knighofer-niekum-topcu-safereinforcementlearningviashielding-2018","author_short":["Alshiekh, M.","Bloem, R.","Ehlers, R.","Könighofer, B.","Niekum, S.","Topcu, U."],"bibdata":{"bibtype":"article","type":"article","title":"Safe Reinforcement Learning via Shielding","volume":"32","copyright":"Copyright (c)","issn":"2374-3468","url":"https://ojs.aaai.org/index.php/AAAI/article/view/11797","doi":"10.1609/aaai.v32i1.11797","abstract":"Reinforcement learning algorithms discover policies that maximize reward, but do not necessarily guarantee safety during learning or execution phases. We introduce a new approach to learn optimal policies while enforcing properties expressed in temporal logic. To this end, given the temporal logic specification that is to be obeyed by the learning system, we propose to synthesize a reactive system called a shield. The shield monitors the actions from the learner and corrects them only if the chosen action causes a violation of the specification. We discuss which requirements a shield must meet to preserve the convergence guarantees of the learner. Finally, we demonstrate the versatility of our approach on several challenging reinforcement learning scenarios.","language":"en","number":"1","urldate":"2023-01-06","journal":"Proceedings of the AAAI Conference on Artificial Intelligence","author":[{"propositions":[],"lastnames":["Alshiekh"],"firstnames":["Mohammed"],"suffixes":[]},{"propositions":[],"lastnames":["Bloem"],"firstnames":["Roderick"],"suffixes":[]},{"propositions":[],"lastnames":["Ehlers"],"firstnames":["Rüdiger"],"suffixes":[]},{"propositions":[],"lastnames":["Könighofer"],"firstnames":["Bettina"],"suffixes":[]},{"propositions":[],"lastnames":["Niekum"],"firstnames":["Scott"],"suffixes":[]},{"propositions":[],"lastnames":["Topcu"],"firstnames":["Ufuk"],"suffixes":[]}],"month":"April","year":"2018","note":"Number: 1","keywords":"Formal Methods","bibtex":"@article{alshiekh_safe_2018,\n\ttitle = {Safe {Reinforcement} {Learning} via {Shielding}},\n\tvolume = {32},\n\tcopyright = {Copyright (c)},\n\tissn = {2374-3468},\n\turl = {https://ojs.aaai.org/index.php/AAAI/article/view/11797},\n\tdoi = {10.1609/aaai.v32i1.11797},\n\tabstract = {Reinforcement learning algorithms discover policies that maximize reward, but do not necessarily guarantee safety during learning or execution phases. We introduce a new approach to learn optimal policies while enforcing properties expressed in temporal logic. To this end, given the temporal logic specification that is to be obeyed by the learning system, we propose to synthesize a reactive system called a shield. The shield monitors the actions from the learner and corrects them only if the chosen action causes a violation of the specification. We discuss which requirements a shield must meet to preserve the convergence guarantees of the learner. Finally, we demonstrate the versatility of our approach on several challenging reinforcement learning scenarios.},\n\tlanguage = {en},\n\tnumber = {1},\n\turldate = {2023-01-06},\n\tjournal = {Proceedings of the AAAI Conference on Artificial Intelligence},\n\tauthor = {Alshiekh, Mohammed and Bloem, Roderick and Ehlers, Rüdiger and Könighofer, Bettina and Niekum, Scott and Topcu, Ufuk},\n\tmonth = apr,\n\tyear = {2018},\n\tnote = {Number: 1},\n\tkeywords = {Formal Methods},\n}\n\n\n\n\n\n\n\n\n\n\n\n","author_short":["Alshiekh, M.","Bloem, R.","Ehlers, R.","Könighofer, B.","Niekum, S.","Topcu, U."],"key":"alshiekh_safe_2018","id":"alshiekh_safe_2018","bibbaseid":"alshiekh-bloem-ehlers-knighofer-niekum-topcu-safereinforcementlearningviashielding-2018","role":"author","urls":{"Paper":"https://ojs.aaai.org/index.php/AAAI/article/view/11797"},"keyword":["Formal Methods"],"metadata":{"authorlinks":{}},"downloads":0,"html":""},"bibtype":"article","biburl":"https://bibbase.org/zotero/alukina","dataSources":["Cfgnp5s4HQSBd8tAf"],"keywords":["formal methods"],"search_terms":["safe","reinforcement","learning","via","shielding","alshiekh","bloem","ehlers","könighofer","niekum","topcu"],"title":"Safe Reinforcement Learning via Shielding","year":2018}