-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
262 lines (262 loc) · 8.53 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
@article{sutton1988learning,
title={Learning to predict by the methods of temporal differences},
author={Sutton, Richard S},
journal={Machine learning},
volume={3},
number={1},
pages={9--44},
year={1988},
publisher={Springer}
}
@book{sutton2018reinforcement,
title={Reinforcement learning: An introduction},
author={Sutton, Richard S and Barto, Andrew G},
year={2018},
publisher={MIT press}
}
@article{leike2017ai,
title={AI safety gridworlds},
author={Leike, Jan and Martic, Miljan and Krakovna, Victoria and Ortega, Pedro A and Everitt, Tom and Lefrancq, Andrew and Orseau, Laurent and Legg, Shane},
journal={arXiv preprint arXiv:1711.09883},
year={2017}
}
@online{clark2016faulty,
title={Faulty Reward Functions in the Wild},
author={Clark, James and Amodei, Dario},
year=2016,
url={https://openai.com/blog/faulty-reward-functions/},
urldate={2020-08-20}
}
@article{garcia2015comprehensive,
title={A comprehensive survey on safe reinforcement learning},
author={Garc{\i}a, Javier and Fern{\'a}ndez, Fernando},
journal={Journal of Machine Learning Research},
volume={16},
number={1},
pages={1437--1480},
year={2015}
}
@article{moldovan2012safe,
title={Safe exploration in markov decision processes},
author={Moldovan, Teodor Mihai and Abbeel, Pieter},
journal={arXiv preprint arXiv:1205.4810},
year={2012}
}
@article{fisac2018general,
title={A general safety framework for learning-based control in uncertain robotic systems},
author={Fisac, Jaime F and Akametalu, Anayo K and Zeilinger, Melanie N and Kaynama, Shahab and Gillula, Jeremy and Tomlin, Claire J},
journal={IEEE Transactions on Automatic Control},
volume={64},
number={7},
pages={2737--2752},
year={2018},
publisher={IEEE}
}
@inproceedings{turchetta2016safe,
title={Safe exploration in finite markov decision processes with gaussian processes},
author={Turchetta, Matteo and Berkenkamp, Felix and Krause, Andreas},
booktitle={Advances in Neural Information Processing Systems},
pages={4312--4320},
year={2016}
}
@article{paternain2019safe,
title={Safe policies for reinforcement learning via primal-dual methods},
author={Paternain, Santiago and Calvo-Fullana, Miguel and Chamon, Luiz FO and Ribeiro, Alejandro},
journal={arXiv preprint arXiv:1911.09101},
year={2019}
}
@article{geibel2005risk,
title={Risk-sensitive reinforcement learning applied to control under constraints},
author={Geibel, Peter and Wysotzki, Fritz},
journal={Journal of Artificial Intelligence Research},
volume={24},
pages={81--108},
year={2005}
}
@inproceedings{kim2012cost,
title={Cost-sensitive exploration in Bayesian reinforcement learning},
author={Kim, Dongho and Kim, Kee-Eung and Poupart, Pascal},
booktitle={Advances in neural information processing systems},
pages={3068--3076},
year={2012}
}
@article{achiam2017constrained,
title={Constrained policy optimization},
author={Achiam, Joshua and Held, David and Tamar, Aviv and Abbeel, Pieter},
journal={arXiv preprint arXiv:1705.10528},
year={2017}
}
@inproceedings{chow2018lyapunov,
title={A lyapunov-based approach to safe reinforcement learning},
author={Chow, Yinlam and Nachum, Ofir and Duenez-Guzman, Edgar and Ghavamzadeh, Mohammad},
booktitle={Advances in neural information processing systems},
pages={8092--8101},
year={2018}
}
@inproceedings{hans2008safe,
title={Safe exploration for reinforcement learning.},
author={Hans, Alexander and Schneega{\ss}, Daniel and Sch{\"a}fer, Anton Maximilian and Udluft, Steffen},
booktitle={ESANN},
pages={143--148},
year={2008}
}
@book{altman1999constrained,
title={Constrained Markov decision processes},
author={Altman, Eitan},
volume={7},
year={1999},
publisher={CRC Press}
}
@article{zheng2020constrained,
title={Constrained upper confidence reinforcement learning},
author={Zheng, Liyuan and Ratliff, Lillian J},
journal={arXiv preprint arXiv:2001.09377},
year={2020}
}
@article{mihatsch2002risk,
title={Risk-sensitive reinforcement learning},
author={Mihatsch, Oliver and Neuneier, Ralph},
journal={Machine learning},
volume={49},
number={2-3},
pages={267--290},
year={2002},
publisher={Springer}
}
@article{ray2019benchmarking,
title={Benchmarking safe exploration in deep reinforcement learning},
author={Ray, Alex and Achiam, Joshua and Amodei, Dario},
journal={arXiv preprint arXiv:1910.01708},
year={2019}
}
@book{sutton2018reinforcement,
title={Reinforcement learning: An introduction},
author={Sutton, Richard S and Barto, Andrew G},
year={2018},
publisher={MIT press}
}
@article{watkins1992q,
title={Q-learning},
author={Watkins, Christopher JCH and Dayan, Peter},
journal={Machine learning},
volume={8},
number={3-4},
pages={279--292},
year={1992},
publisher={Springer}
}
@book{aubin2011viability,
title={Viability theory: new directions},
author={Aubin, Jean-Pierre and Bayen, Alexandre M and Saint-Pierre, Patrick},
year={2011},
publisher={Springer Science \& Business Media}
}
@inproceedings{heim2020learnable,
title={A Learnable Safety Measure},
author={Heim, Steve and Rohr, Alexander and Trimpe, Sebastian and Badri-Spr{\"o}witz, Alexander},
booktitle={Conference on Robot Learning},
pages={627--639},
year={2020},
organization={PMLR}
}
@article{sutton1992reinforcement,
title={Reinforcement learning is direct adaptive optimal control},
author={Sutton, Richard S and Barto, Andrew G and Williams, Ronald J},
journal={IEEE Control Systems Magazine},
volume={12},
number={2},
pages={19--22},
year={1992},
publisher={IEEE}
}
@article{baker2019emergent,
title={Emergent tool use from multi-agent autocurricula},
author={Baker, Bowen and Kanitscheider, Ingmar and Markov, Todor and Wu, Yi and Powell, Glenn and McGrew, Bob and Mordatch, Igor},
journal={arXiv preprint arXiv:1909.07528},
year={2019}
}
@article{hwangbo2019learning,
title={Learning agile and dynamic motor skills for legged robots},
author={Hwangbo, Jemin and Lee, Joonho and Dosovitskiy, Alexey and Bellicoso, Dario and Tsounis, Vassilios and Koltun, Vladlen and Hutter, Marco},
journal={Science Robotics},
volume={4},
number={26},
year={2019},
publisher={Science Robotics}
}
@article{marcobayesian,
title={Bayesian Optimization in Robot Learning},
author={Marco-Valle, M Sc Alonso}
}
@inproceedings{hans2008safe,
title={Safe exploration for reinforcement learning.},
author={Hans, Alexander and Schneega{\ss}, Daniel and Sch{\"a}fer, Anton Maximilian and Udluft, Steffen},
booktitle={ESANN},
pages={143--148},
year={2008}
}
@inproceedings{berkenkamp2016safe,
title={Safe controller optimization for quadrotors with Gaussian processes},
author={Berkenkamp, Felix and Schoellig, Angela P and Krause, Andreas},
booktitle={2016 IEEE International Conference on Robotics and Automation (ICRA)},
pages={491--496},
year={2016},
organization={IEEE}
}
@inproceedings{bansal2017hamilton,
title={Hamilton-Jacobi reachability: A brief overview and recent advances},
author={Bansal, Somil and Chen, Mo and Herbert, Sylvia and Tomlin, Claire J},
booktitle={2017 IEEE 56th Annual Conference on Decision and Control (CDC)},
pages={2242--2253},
year={2017},
organization={IEEE}
}
@article{chow2017risk,
title={Risk-constrained reinforcement learning with percentile risk criteria},
author={Chow, Yinlam and Ghavamzadeh, Mohammad and Janson, Lucas and Pavone, Marco},
journal={The Journal of Machine Learning Research},
volume={18},
number={1},
pages={6070--6120},
year={2017},
publisher={JMLR. org}
}
@article{heim2019beyond,
title={Beyond basins of attraction: Quantifying robustness of natural dynamics},
author={Heim, Steve and Spr{\"o}witz, Alexander},
journal={IEEE Transactions on Robotics},
volume={35},
number={4},
pages={939--952},
year={2019},
publisher={IEEE}
}
@inproceedings{pardo2018time,
title={Time limits in reinforcement learning},
author={Pardo, Fabio and Tavakoli, Arash and Levdik, Vitaly and Kormushev, Petar},
booktitle={International Conference on Machine Learning},
pages={4045--4054},
year={2018}
}
@book{williams2006gaussian,
title={Gaussian processes for machine learning},
author={Williams, Christopher KI and Rasmussen, Carl Edward},
volume={2},
number={3},
year={2006},
publisher={MIT press Cambridge, MA}
}
@book{boyd2004convex,
title={Convex optimization},
author={Boyd, Stephen and Boyd, Stephen P and Vandenberghe, Lieven},
year={2004},
publisher={Cambridge university press}
}
@inproceedings{zaytsev2015two,
title={Two steps is enough: No need to plan far ahead for walking balance},
author={Zaytsev, Petr and Hasaneini, S Javad and Ruina, Andy},
booktitle={2015 IEEE International Conference on Robotics and Automation (ICRA)},
pages={6295--6300},
year={2015},
organization={IEEE}
}