references.bib

@article{sutton1988learning,
  title={Learning to predict by the methods of temporal differences},
  author={Sutton, Richard S},
  journal={Machine learning},
  volume={3},
  number={1},
  pages={9--44},
  year={1988},
  publisher={Springer}
}
@book{sutton2018reinforcement,
	title={Reinforcement learning: An introduction},
	author={Sutton, Richard S and Barto, Andrew G},
	year={2018},
	publisher={MIT press}
}
@article{leike2017ai,
	title={AI safety gridworlds},
	author={Leike, Jan and Martic, Miljan and Krakovna, Victoria and Ortega, Pedro A and Everitt, Tom and Lefrancq, Andrew and Orseau, Laurent and Legg, Shane},
	journal={arXiv preprint arXiv:1711.09883},
	year={2017}
}
@online{clark2016faulty,
	title={Faulty Reward Functions in the Wild},
	author={Clark, James and Amodei, Dario},
	year=2016,
	url={https://openai.com/blog/faulty-reward-functions/},
	urldate={2020-08-20}
}
@article{garcia2015comprehensive,
	title={A comprehensive survey on safe reinforcement learning},
	author={Garc{\i}a, Javier and Fern{\'a}ndez, Fernando},
	journal={Journal of Machine Learning Research},
	volume={16},
	number={1},
	pages={1437--1480},
	year={2015}
}
@article{moldovan2012safe,
	title={Safe exploration in markov decision processes},
	author={Moldovan, Teodor Mihai and Abbeel, Pieter},
	journal={arXiv preprint arXiv:1205.4810},
	year={2012}
}
@article{fisac2018general,
	title={A general safety framework for learning-based control in uncertain robotic systems},
	author={Fisac, Jaime F and Akametalu, Anayo K and Zeilinger, Melanie N and Kaynama, Shahab and Gillula, Jeremy and Tomlin, Claire J},
	journal={IEEE Transactions on Automatic Control},
	volume={64},
	number={7},
	pages={2737--2752},
	year={2018},
	publisher={IEEE}
}
@inproceedings{turchetta2016safe,
	title={Safe exploration in finite markov decision processes with gaussian processes},
	author={Turchetta, Matteo and Berkenkamp, Felix and Krause, Andreas},
	booktitle={Advances in Neural Information Processing Systems},
	pages={4312--4320},
	year={2016}
}
@article{paternain2019safe,
	title={Safe policies for reinforcement learning via primal-dual methods},
	author={Paternain, Santiago and Calvo-Fullana, Miguel and Chamon, Luiz FO and Ribeiro, Alejandro},
	journal={arXiv preprint arXiv:1911.09101},
	year={2019}
}
@article{geibel2005risk,
	title={Risk-sensitive reinforcement learning applied to control under constraints},
	author={Geibel, Peter and Wysotzki, Fritz},
	journal={Journal of Artificial Intelligence Research},
	volume={24},
	pages={81--108},
	year={2005}
}
@inproceedings{kim2012cost,
	title={Cost-sensitive exploration in Bayesian reinforcement learning},
	author={Kim, Dongho and Kim, Kee-Eung and Poupart, Pascal},
	booktitle={Advances in neural information processing systems},
	pages={3068--3076},
	year={2012}
}
@article{achiam2017constrained,
	title={Constrained policy optimization},
	author={Achiam, Joshua and Held, David and Tamar, Aviv and Abbeel, Pieter},
	journal={arXiv preprint arXiv:1705.10528},
	year={2017}
}
@inproceedings{chow2018lyapunov,
	title={A lyapunov-based approach to safe reinforcement learning},
	author={Chow, Yinlam and Nachum, Ofir and Duenez-Guzman, Edgar and Ghavamzadeh, Mohammad},
	booktitle={Advances in neural information processing systems},
	pages={8092--8101},
	year={2018}
}
@inproceedings{hans2008safe,
	title={Safe exploration for reinforcement learning.},
	author={Hans, Alexander and Schneega{\ss}, Daniel and Sch{\"a}fer, Anton Maximilian and Udluft, Steffen},
	booktitle={ESANN},
	pages={143--148},
	year={2008}
}
@book{altman1999constrained,
	title={Constrained Markov decision processes},
	author={Altman, Eitan},
	volume={7},
	year={1999},
	publisher={CRC Press}
}
@article{zheng2020constrained,
	title={Constrained upper confidence reinforcement learning},
	author={Zheng, Liyuan and Ratliff, Lillian J},
	journal={arXiv preprint arXiv:2001.09377},
	year={2020}
}
@article{mihatsch2002risk,
	title={Risk-sensitive reinforcement learning},
	author={Mihatsch, Oliver and Neuneier, Ralph},
	journal={Machine learning},
	volume={49},
	number={2-3},
	pages={267--290},
	year={2002},
	publisher={Springer}
}
@article{ray2019benchmarking,
	title={Benchmarking safe exploration in deep reinforcement learning},
	author={Ray, Alex and Achiam, Joshua and Amodei, Dario},
	journal={arXiv preprint arXiv:1910.01708},
	year={2019}
}
@book{sutton2018reinforcement,
	title={Reinforcement learning: An introduction},
	author={Sutton, Richard S and Barto, Andrew G},
	year={2018},
	publisher={MIT press}
}
@article{watkins1992q,
	title={Q-learning},
	author={Watkins, Christopher JCH and Dayan, Peter},
	journal={Machine learning},
	volume={8},
	number={3-4},
	pages={279--292},
	year={1992},
	publisher={Springer}
}
@book{aubin2011viability,
	title={Viability theory: new directions},
	author={Aubin, Jean-Pierre and Bayen, Alexandre M and Saint-Pierre, Patrick},
	year={2011},
	publisher={Springer Science \& Business Media}
}
@inproceedings{heim2020learnable,
	title={A Learnable Safety Measure},
	author={Heim, Steve and Rohr, Alexander and Trimpe, Sebastian and Badri-Spr{\"o}witz, Alexander},
	booktitle={Conference on Robot Learning},
	pages={627--639},
	year={2020},
	organization={PMLR}
}
@article{sutton1992reinforcement,
	title={Reinforcement learning is direct adaptive optimal control},
	author={Sutton, Richard S and Barto, Andrew G and Williams, Ronald J},
	journal={IEEE Control Systems Magazine},
	volume={12},
	number={2},
	pages={19--22},
	year={1992},
	publisher={IEEE}
}
@article{baker2019emergent,
	title={Emergent tool use from multi-agent autocurricula},
	author={Baker, Bowen and Kanitscheider, Ingmar and Markov, Todor and Wu, Yi and Powell, Glenn and McGrew, Bob and Mordatch, Igor},
	journal={arXiv preprint arXiv:1909.07528},
	year={2019}
}
@article{hwangbo2019learning,
	title={Learning agile and dynamic motor skills for legged robots},
	author={Hwangbo, Jemin and Lee, Joonho and Dosovitskiy, Alexey and Bellicoso, Dario and Tsounis, Vassilios and Koltun, Vladlen and Hutter, Marco},
	journal={Science Robotics},
	volume={4},
	number={26},
	year={2019},
	publisher={Science Robotics}
}
@article{marcobayesian,
	title={Bayesian Optimization in Robot Learning},
	author={Marco-Valle, M Sc Alonso}
}
@inproceedings{hans2008safe,
	title={Safe exploration for reinforcement learning.},
	author={Hans, Alexander and Schneega{\ss}, Daniel and Sch{\"a}fer, Anton Maximilian and Udluft, Steffen},
	booktitle={ESANN},
	pages={143--148},
	year={2008}
}
@inproceedings{berkenkamp2016safe,
	title={Safe controller optimization for quadrotors with Gaussian processes},
	author={Berkenkamp, Felix and Schoellig, Angela P and Krause, Andreas},
	booktitle={2016 IEEE International Conference on Robotics and Automation (ICRA)},
	pages={491--496},
	year={2016},
	organization={IEEE}
}
@inproceedings{bansal2017hamilton,
	title={Hamilton-Jacobi reachability: A brief overview and recent advances},
	author={Bansal, Somil and Chen, Mo and Herbert, Sylvia and Tomlin, Claire J},
	booktitle={2017 IEEE 56th Annual Conference on Decision and Control (CDC)},
	pages={2242--2253},
	year={2017},
	organization={IEEE}
}
@article{chow2017risk,
	title={Risk-constrained reinforcement learning with percentile risk criteria},
	author={Chow, Yinlam and Ghavamzadeh, Mohammad and Janson, Lucas and Pavone, Marco},
	journal={The Journal of Machine Learning Research},
	volume={18},
	number={1},
	pages={6070--6120},
	year={2017},
	publisher={JMLR. org}
}
@article{heim2019beyond,
	title={Beyond basins of attraction: Quantifying robustness of natural dynamics},
	author={Heim, Steve and Spr{\"o}witz, Alexander},
	journal={IEEE Transactions on Robotics},
	volume={35},
	number={4},
	pages={939--952},
	year={2019},
	publisher={IEEE}
}
@inproceedings{pardo2018time,
	title={Time limits in reinforcement learning},
	author={Pardo, Fabio and Tavakoli, Arash and Levdik, Vitaly and Kormushev, Petar},
	booktitle={International Conference on Machine Learning},
	pages={4045--4054},
	year={2018}
}
@book{williams2006gaussian,
	title={Gaussian processes for machine learning},
	author={Williams, Christopher KI and Rasmussen, Carl Edward},
	volume={2},
	number={3},
	year={2006},
	publisher={MIT press Cambridge, MA}
}
@book{boyd2004convex,
	title={Convex optimization},
	author={Boyd, Stephen and Boyd, Stephen P and Vandenberghe, Lieven},
	year={2004},
	publisher={Cambridge university press}
}
@inproceedings{zaytsev2015two,
	title={Two steps is enough: No need to plan far ahead for walking balance},
	author={Zaytsev, Petr and Hasaneini, S Javad and Ruina, Andy},
	booktitle={2015 IEEE International Conference on Robotics and Automation (ICRA)},
	pages={6295--6300},
	year={2015},
	organization={IEEE}
}