Convergence of simulation-based policy iteration. Cooper, W., Henderson, S., & Lewis, M. Probability in the Engineering and Informational Sciences, 17:213–234, 2003.
Paper abstract bibtex Simulation-based policy iteration (SBPI) is a modification of the policy iteration algorithm for computing optimal policies for Markov decision processes. At each iteration, rather than solving the average evaluation equations, SBPI employs simulation to estimate a solution to these equations. For recurrent average-reward Markov decision processes with finite state and action spaces, we provide easily-verifiable conditions that ensure that simulation-based policy iteration almost-surely eventually never leaves the set of optimal decision rules. We analyze three simulation estimators for solutions to the average evaluation equations. Using our general results, we derive simple conditions on the simulation runlengths that guarantee the almost-sure convergence of the algorithm.
@article{coohenlew02,
abstract = {Simulation-based policy iteration (SBPI) is a modification of the policy iteration algorithm for computing optimal policies for Markov decision processes. At each iteration, rather than solving the average evaluation equations, SBPI employs simulation to estimate a solution to these equations. For recurrent average-reward Markov decision processes with finite state and action spaces, we provide easily-verifiable conditions that ensure that simulation-based policy iteration almost-surely eventually never leaves the set of optimal decision rules. We analyze three simulation estimators for solutions to the average evaluation equations. Using our general results, we derive simple conditions on the simulation runlengths that guarantee the almost-sure convergence of the algorithm.},
author = {W.~L.\ Cooper and S.~G.\ Henderson and M.~E.\ Lewis},
date-added = {2016-01-10 16:07:54 +0000},
date-modified = {2016-01-10 16:07:54 +0000},
journal = {Probability in the Engineering and Informational Sciences},
pages = {213--234},
title = {Convergence of simulation-based policy iteration},
url_paper = {pubs/csbpi2.pdf},
volume = {17},
year = {2003}}
Downloads: 0
{"_id":"daaZATdNZnMXKoKru","bibbaseid":"cooper-henderson-lewis-convergenceofsimulationbasedpolicyiteration-2003","downloads":0,"creationDate":"2016-01-10T16:41:19.137Z","title":"Convergence of simulation-based policy iteration","author_short":["Cooper, W.","Henderson, S.","Lewis, M."],"year":2003,"bibtype":"article","biburl":"https://people.orie.cornell.edu/shane/ShanePubs.bib","bibdata":{"bibtype":"article","type":"article","abstract":"Simulation-based policy iteration (SBPI) is a modification of the policy iteration algorithm for computing optimal policies for Markov decision processes. At each iteration, rather than solving the average evaluation equations, SBPI employs simulation to estimate a solution to these equations. For recurrent average-reward Markov decision processes with finite state and action spaces, we provide easily-verifiable conditions that ensure that simulation-based policy iteration almost-surely eventually never leaves the set of optimal decision rules. We analyze three simulation estimators for solutions to the average evaluation equations. Using our general results, we derive simple conditions on the simulation runlengths that guarantee the almost-sure convergence of the algorithm.","author":[{"firstnames":["W. L.\\"],"propositions":[],"lastnames":["Cooper"],"suffixes":[]},{"firstnames":["S. G.\\"],"propositions":[],"lastnames":["Henderson"],"suffixes":[]},{"firstnames":["M. E.\\"],"propositions":[],"lastnames":["Lewis"],"suffixes":[]}],"date-added":"2016-01-10 16:07:54 +0000","date-modified":"2016-01-10 16:07:54 +0000","journal":"Probability in the Engineering and Informational Sciences","pages":"213–234","title":"Convergence of simulation-based policy iteration","url_paper":"pubs/csbpi2.pdf","volume":"17","year":"2003","bibtex":"@article{coohenlew02,\n\tabstract = {Simulation-based policy iteration (SBPI) is a modification of the policy iteration algorithm for computing optimal policies for Markov decision processes. At each iteration, rather than solving the average evaluation equations, SBPI employs simulation to estimate a solution to these equations. For recurrent average-reward Markov decision processes with finite state and action spaces, we provide easily-verifiable conditions that ensure that simulation-based policy iteration almost-surely eventually never leaves the set of optimal decision rules. We analyze three simulation estimators for solutions to the average evaluation equations. Using our general results, we derive simple conditions on the simulation runlengths that guarantee the almost-sure convergence of the algorithm.},\n\tauthor = {W.~L.\\ Cooper and S.~G.\\ Henderson and M.~E.\\ Lewis},\n\tdate-added = {2016-01-10 16:07:54 +0000},\n\tdate-modified = {2016-01-10 16:07:54 +0000},\n\tjournal = {Probability in the Engineering and Informational Sciences},\n\tpages = {213--234},\n\ttitle = {Convergence of simulation-based policy iteration},\n\turl_paper = {pubs/csbpi2.pdf},\n\tvolume = {17},\n\tyear = {2003}}\n\n","author_short":["Cooper, W.","Henderson, S.","Lewis, M."],"key":"coohenlew02","id":"coohenlew02","bibbaseid":"cooper-henderson-lewis-convergenceofsimulationbasedpolicyiteration-2003","role":"author","urls":{" paper":"https://people.orie.cornell.edu/shane/pubs/csbpi2.pdf"},"metadata":{"authorlinks":{"henderson, s":"https://people.orie.cornell.edu/shane/"}},"downloads":0},"search_terms":["convergence","simulation","based","policy","iteration","cooper","henderson","lewis"],"keywords":[],"authorIDs":["26gYvdajdPXPJE7x9","2L38RWkDRcZ7pNrCP","2S28FaHcvRe82HALu","2pim5rDqZtSdo8kRL","36vBNrAKQ6ZdJEe2Y","38s23udQojQibpA47","3EcfFsBN3R7JvEDRR","4jsvAjiatq5t2WPmq","569289aeec54c0f347000121","5FnGFhTQX8Nqf69Q4","5XfXT7CCH2CdbGTSi","5ZzCj7tQGxpgMuCrv","5de6d8d7abd988de0100011f","5de7df655e1638de0100030f","5de8ab873cfb74df01000059","5de8b31f3cfb74df010000cc","5de8d64b9e80cdde010001c8","5de8e6b2978afbdf010000a9","5de9d90f5e5ac8de01000168","5dea6e444f43c9de010000bb","5dec3180d39dc7de01000077","5dec3e3bd39dc7de010000fc","5dec9b5893ac84df0100001d","5def306ae83f7dde010000c3","5defa21d706001de01000114","5defac28706001de010001cd","5defc6b1d6a2dcde01000266","5df0d46796fa76de010000f2","5df14b68630a9ee0010001b8","5df21728e4cb4ede01000173","5df26d8f27cd2fde01000026","5df60db92b34d0de01000012","5df68a7d72bbd4df01000092","5df6c36bf9318fdf0100010b","5df7f538b9bb17df010000bf","5df896d110b1d1de01000042","5df94503ccc001de010000af","5dfa00842e791dde01000001","5dfaa028669fc3df0100011f","5dfea6522331dfdf0100004b","5e000c759292b5de01000008","5e031b17f2d70dde010000d5","5e0461336ef264df0100005d","5e05486e1965a8de010000d0","5e061c888e1565f201000063","5e07ea8ef1089ddf01000086","5e086de7ffbbabdf01000047","5e08b6697dc1dcdf010000c8","5e090cba79e131f3010000ea","5e095acc72022bde0100009b","5e0bfcf7e0da25de0100007e","5e0c51a3abd50cdf01000084","5e0c68f28d508bde010004f0","5e0db73bc7ca67df01000070","5e0e082ae2dbbedf0100006a","5e0ef46b5500a7e0010000ad","5e110755d6a01ede0100006d","5e1179e67da100de01000153","5e11980d91bc7ade010000d8","5e11ebb93e1c29de010000b0","5e123faac196d3de01000101","5e136663f16095df01000094","5e154e21edfb1ede0100011f","5e17839ecf35a4de0100014c","5e1bac6261cb16df01000080","5e1c006bbadffbde0100008b","5e1cc7ff0744c2de010001d9","5e1cebbeabed9bde010001ba","5e1d36df6b18c4df0100009f","5e1d928c3a6d8cde010001de","5e1e08aa2cced5de0100025d","5e1e30d4407a20de010000f9","5e1e6aaece9ed9de0100009b","5e1ea72ebedb58de010000c3","5e1fc88b2b05b2de010000d0","5e20de47b46c27ee0100014e","5e2399c8ed52aedf0100009b","5e28c8316acacbdf010001a3","5e28d0f6a3df5bdf0100003e","5e28f7f641639df30100005e","5e2d13616b217bdf0100009b","5e2d9f5c481fd6de0100008d","5e2dfea9524f94de01000041","5e2f694c0c34b9f20100007f","5e2f90ad48b7a4df01000149","5e31377d5a3ceade010000cf","5e31f8497c8d24df010000c5","5e333e71e0067bde01000011","5e3389e17a676dee01000134","5e35ea7d5cd57fde01000067","5e39930ed14579de01000204","5e3cca90948886de010000fe","5e3d52cadc4cd0f30100012b","5e404f26668183de01000123","5e41855cd9f47bee010002a3","5e420d0cebe241de01000157","5e422cec70cecede01000180","5e423354ac099bde01000037","5e424120ac099bde010000d7","5e43badf44c4f9df0100003b","5e4423effdc393de010001f2","5e4451e7df3c2af3010000f1","5e44be577759a7df0100006e","5e49b478cb98e8de01000091","5e4aa2afb70966df01000296","5e4b63e4cf8e89df01000096","5e4be9588f0677df010000ed","5e4cb94a160d65de01000182","5e4e5f347aa348df010001d9","5e4f1191e5389bde0100007b","5e4f3af9aa67a8de010000f8","5e5124c6fe63cfde01000034","5e51f9988240c0df0100008a","5e5322a512ed54df01000052","5e548ccafd5b13df01000038","5e54953dfd5b13df01000090","5e54b74e929495df01000102","5e555c86e89e5fde010000eb","5e55cff08061d1de010000b0","5e592a2ee60e02de01000059","5e5be9f8d49321e00100003e","5e5d776c0b73f6de010000c8","5e5ea2ce2fd1fade01000005","5e5eeaa3cc2eefde010000a5","5e6051b64a1c7edf01000064","5e606b639119f0de01000048","5e62c494cb259cde01000102","5e65403e0c7028de01000036","5e657ec86e5f4cf3010000d5","5e65cc3f14abd4de01000021","5e66600c46e828de01000176","5e69549caf718af2010002b3","5e6960a7af718af201000734","5e699a5020d4e9de010003f1","5e69e4013aab3cdf01000161","5e69ed893aab3cdf0100025e","5e6aabf3f216f6de01000138","5h4sgGHdCH72vAZaM","5ogF2CXCHfy6f484d","679Lm4jHWxqDJ8TiQ","6DyHskuuDXDGAzxHc","6nxwuyYbC6RW6ctek","6xnaCE8mZhgGBwoWi","6yshnZNE7WuhXH3zH","7Z5DLqPvh2BSihDYW","7cLQ9ngEAmrJ8o7ng","855LJ9b6fiWcscwFN","8Bsp47GekpgnCPQm4","8LpJRKsPW6YmLjZJ9","8eBEDfxsYFTcr6D8t","8iYgnty9yNpPx8ww7","8mvfqLAJDTcR47GXK","93H6SkR5ZZFczSQtF","9K4PKpyNwMnu6auGZ","9Qa7okxpMsCqu8Evw","9mnDgN4uqrz56E7FJ","9r3r2fPaAJWs3wXTg","AHQynee39fDWtZgBp","AWRZiPM7bCygBnXyG","AWxK6srKr36K857wh","AjYpJy8GHz5zJsf4H","Ausug2EGy92q96QPz","AxPH3xkMhKWuSpuyE","BHRCwsQXFpwDGc7fp","BbCWAbwCksowiDgHx","Ci5qw2GnxzxcJKDKo","CzzsKfT9JDCHEGYro","DQpuYXoY9deoFBaTM","EcHzQ49GGQ5vDZNLD","Ef3oocL9JKQdn9Pqq","EoAdiTpWDQfvqNMzE","F7ZzX5AsxaZoSgMFe","FFJ9Ae9pqtG42WKB6","FMkfumjCrWrrKQczE","FncA2ugmrk7yCPcZa","G2jFvsGAsqtaAovYM","G4EPY8koyPYHu6css","G5ugWiyiNQhicyqLP","GBNP6u82dyKn7rRkb","GXNi3uL7npAJxPkGQ","GpLkACYbcdZQ4LwdL","HYZqQAsEvasCppbuQ","HcP5Y9NFYc8Yojerg","HmP6LEgH9pKWqqPTy","HxwuuTuGWzf92icM4","JQuMGxgwzdR7sgxer","KAC5CGdzyQP4Wkc3D","Kk5EXnqpC5Cz65qCC","KsGc3CYAdcxGs8gNr","Ksk7KbxxhY3Ns7TRi","LJtGuaMSm2sCckWtw","LczSiSueKtaPM7AE3","MQrjHaLK4jomDMbeQ","McRaN26eWz4pvucZr","MdAPEtvyZWwXGfraS","NRRzjdv4ndHYvfofz","NiQpvaq38Pyuxo7sy","NkGZ2B3Rbj6g2MGwq","NqzqFFoKxN6tYyk8w","P7YN63ELLz4NXJotE","PFN4Ei227rh2Qxg9m","PXHrDY3LzsWyjkfH4","PeQ4rmLD8wLeNY39b","PfeDfM2K83tid3rA3","PhxXW7otyx9WwjSKa","PuZ8iZc94SYeu4GXv","QSavEd56isEzWSaGw","QnxXSMc5dBy77wZrs","QrdwRk3KXpNtadmeu","Rzrm76FFjE8xtscXR","S7XiMg9rfWuwddJMk","SEuAab2YuQaZp9Kde","SJ3PFN9RrLd2xi73p","T5TEj4TjxX3m6Z9za","TWcF2afrECWeXE4zJ","Tid6mW8cSXEe6RhWC","TsgA3oJKfMznyrR3f","WBRZyawcRu8gHXhrb","WWw2gZPyC8uLobjBS","WkhJ2E4yWrEJZq4Bk","XZL2CrSt6cfr4AbTe","Xb5P5zQGyqXyETFcT","Xhz6teaHpeim5fgkz","Xs8K9NawGq7viWdx7","XxBEsxiJpvDSkHwXQ","YMgHnMefgLiJEcTE3","Yvc8TZP5uBbxkcoXx","ZEBGCShPtkc7dR47b","ZFhDzqJSGaWx3tXZS","ZKadrEXFEkyief8xP","ZMn96y9dnoZpNNFHy","ZNF4tdDfuxBcjPNQr","ZRwZoRp5JnXD7nE7A","a5L6Bovd4stCePXii","a8QTuBQsxdGiPaTWP","aBN9ZrMni4AQYdfJm","aPFfQEZ9vKNkXy76M","aWWb9vq6XiTGKydz4","aYnk9v2xWPghMLGyu","b7hFe2YtzFbaPjg9A","bAnfMPpLJEKZZPePo","bBtfBJuvfws2WTJ4Y","bFsrsBuRb5nZhPfsE","dMZZR69yri54uFRhF","dSvGC6HzQgogDHy8f","ddpGacRuPforDhNzP","dkycrSPuYCmYHGiyY","dpBkCSXvuqRsjmAvP","duuQg8k8ZdwwRrgHA","e4QDjo2cojpWbA2FB","eMZdNEpEwzHdJyiT9","eWhngvkkPZeEGDsNs","eiJ7CHbyR2bYkPABw","fJuitccD2ezptNyo4","fLZeo3fwtsLxbaCzY","fPL8orbGXeaqeC2jv","fTs69k9HD9EDA6n4t","fYKiLZc9uogemS8mR","gRZLEHEL3bZsH9zB4","gv4yfeAf5ETLsNJ3o","h5FuGphsRDoZ3ntjR","hYEDsPLfrnPSFD8ba","hmjQRFMMGibFRfywb","hvWJGBtXa7LprQrss","ikdxjgfSujMGaEeuK","jAR4mi7YHYiRob65L","jEy68rHQFAh3369Dq","jXvs75SNQbHk8n26n","jz2BcEwfjC54yRWKT","k9ToFTaCqpCuRZfiK","kGf4ver4WvHM7Henm","kJ8zAHjdfnS9evg8e","kQLgRGZtRtbCu5CWQ","kksLvcknF9XCph3o8","kqNzuZW5QzyMC3NxQ","m5g4d2Ji5svFKfux2","mAqBMDgpdft8xosuS","mAvNfBKRHJs6Tf8GS","mCWMz9bshhBGganAv","mT35iKsRN84gooYso","md2cqfvKnwptM6x4D","mj5gvsoL3jHE7ycpF","n6fxq5nmkMGpbtrBn","nK52pNES5XKuHbk93","nLLdweZausMkCGJBb","neSCfH5uTpXEYx4PZ","nzsudJ3iaZqRkGKJA","o4QTNyHrFNm2Tn5rB","oB5DAkJusk5QrvMXc","oKQpecoAfcKZKdcp7","oM2vTjZAxRGBJJ5yh","oaqwoetYHNQcyyPSH","ojXkTd4kwMMYDGLky","ooYc6K5B8R87e7tuQ","oq9D4NZrqtGAG7pNE","pMSrJwNhffGiXw6LM","pPKZF868cEFjrHvG9","pezgZ2rz2DpBXKoxW","pjNJ9XaNuJq3enQf4","pmqK5QgF7tvFYCK7c","qB98657rKoQ2NN3o9","qCEAs5D2dXWTaXmsd","qfv8TS6NXKzQ2dP3e","qq5hdyFc5iEE48bfd","qw6N6haZvsNpg6JLs","rrYEng8q6EdhSGXL7","rwbaSRgkFWuJfAun2","s42rEXJehoarEyz43","sRLXGDDkQWrRJJJxP","suFwo2T3nx8T8XC3z","tKwA9xoRXgMDvf2ga","tSqtNKBiZes7dnSGC","tpLYe4KxGtnSxemcF","uFB9zrWpuJcP5Rs6W","uHoncFSXgEFtykauH","ukX5cWtSNPoY4kapW","vGMGbRbSQgtAb528R","vYauPKschnTPzPTTS","wDimZdXeX2Zg8Y7Mg","wHWQ8xBQeaszmm7jJ","wXEQe57PS5wMwDeCA","wi4AvGDtqgJANYiyT","wkB4afQhPTsyctvpW","wnP349xGsoeknZpXk","xLBNmFpRSQzeXfmj5","xThWiQnYRhfLcCESh","yH8xrJTcnFSDbocHc","ybJ8CoH86jDtFXbrw","z6EnaMv3Ph9XbKHDA","zKBMW5DJAxBj3bokS","zmYD3Z7SyZwWnpv2c","zuXc5NysK9hRSYYcJ","zzkyM84igmCKJcDup"],"dataSources":["ZCuKDjctePZJeeaBw","SEqonpKnx4miWre2P"]}