-
Notifications
You must be signed in to change notification settings - Fork 5
/
lunarlandingdraftwind.py
441 lines (388 loc) · 15.8 KB
/
lunarlandingdraftwind.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
import io
import os
import glob
import torch
import pip
import base64
import math
# import stable_baselines3
import numpy as np
import matplotlib.pyplot as plt
import stable_baselines3
from stable_baselines3 import DQN
from stable_baselines3.common.results_plotter import ts2xy, load_results
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_atari_env
import gym
from gym import spaces
#
# # @title Plotting/Video functions
# from IPython.display import HTML
# from pyvirtualdisplay import Display
# from IPython import display as ipythondisplay
import torch
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MAIN_ENGINE_POWER = 13.0
SIDE_ENGINE_POWER = 0.6
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
LANDER_POLY = [(-14, +17), (-17, 0), (-17, -10), (+17, -10), (+17, 0), (+14, +17)]
LEG_AWAY = 20
LEG_DOWN = 18
LEG_W, LEG_H = 2, 8
LEG_SPRING_TORQUE = 40
SIDE_ENGINE_HEIGHT = 14.0
SIDE_ENGINE_AWAY = 12.0
VIEWPORT_W = 600
VIEWPORT_H = 400
##define constant parameters
print(f"Is CUDA supported by this system?{torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
# Storing ID of current CUDA device
cuda_id = torch.cuda.current_device()
print(f"ID of current CUDA device:{torch.cuda.current_device()}")
print(f"Name of current CUDA device:{torch.cuda.get_device_name(cuda_id)}")
import gym
cuda = torch.device('cuda') # Default CUDA device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# with torch.cuda.device(0):
nn_layers = [64,64] #This is the configuration of your neural network. Currently, we have two layers, each consisting of 64 neurons.
#If you want three layers with 64 neurons each, set the value to [64,64,64] and so on.
learning_rate = 0.001 #This is the step-size with which the gradient descent is carried out.
#Tip: Use smaller step-sizes for larger networks.
env = gym.make('LunarLander-v2')
log_dir = "/tmp/gym/"
log_dir='C:/Users/carla/PycharmProjects/atchekegroup1lunarlanding/gym2007/'
os.makedirs(log_dir, exist_ok=True)
from gym.envs.box2d import LunarLander
def step(self, actions):
assert self.lander is not None
# Update wind
assert self.lander is not None, "You forgot to call reset()"
if self.enable_wind and not (
self.legs[0].ground_contact or self.legs[1].ground_contact
):
# the function used for wind is tanh(sin(2 k x) + sin(pi k x)),
# which is proven to never be periodic, k = 0.01
wind_mag = (
math.tanh(
math.sin(0.02 * self.wind_idx) + (math.sin(math.pi * 0.01 * self.wind_idx))
)
* self.wind_power
)
self.wind_idx += 1
self.lander.ApplyForceToCenter(
(wind_mag, 0.0),
True,
)
# the function used for torque is tanh(sin(2 k x) + sin(pi k x)),
# which is proven to never be periodic, k = 0.01
torque_mag = math.tanh(
math.sin(0.02 * self.torque_idx)
+ (math.sin(math.pi * 0.01 * self.torque_idx))
) * (self.turbulence_power)
self.torque_idx += 1
self.lander.ApplyTorque(
(torque_mag),
True,
)
if self.continuous:
action = np.clip(action, -1, +1).astype(np.float32)
else:
assert self.action_space.contains(
action
), f"{action!r} ({type(action)}) invalid "
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0])
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (
not self.continuous and action == 2
):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 # 0.5..1.0
assert m_power >= 0.5 and m_power <= 1.0
else:
m_power = 1.0
# 4 is move a bit downwards, +-2 for randomness
ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
p = self._create_particle(
3.5, # 3.5 is here to make particle speed adequate
impulse_pos[0],
impulse_pos[1],
m_power,
) # particles are just a decoration
p.ApplyLinearImpulse(
(ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (
not self.continuous and action in [1, 3]
):
# Orientation engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
assert s_power >= 0.5 and s_power <= 1.0
else:
direction = action - 2
s_power = 1.0
ox = tip[0] * dispersion[0] + side[0] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
oy = -tip[1] * dispersion[0] - side[1] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
impulse_pos = (
self.lander.position[0] + ox - tip[0] * 17 / SCALE,
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
)
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse(
(ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.lander.position
vel = self.lander.linearVelocity
state = [
(pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
(pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
self.lander.angle,
20.0 * self.lander.angularVelocity / FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0,
]
assert len(state) == 8
reward = 0
shaping = (
-100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
- 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
- 100 * abs(state[4])
+ 10 * state[6]
+ 10 * state[7]
) # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= (
m_power * 0.30
) # less fuel spent is better, about -30 for heuristic landing. You should modify these values.
reward -= s_power * 0.03
done = False
if self.game_over or abs(state[0]) >= 1.0:
done = True
reward = -100
if not self.lander.awake:
done = True
reward = +100
return np.array(state, dtype=np.float32), reward, done, {}
class Custom_LunarLander_wind(LunarLander):
"""Custom Environment that follows gym interface"""
metadata = {'render.modes': ['human']}
def __init__(self):
LunarLander.__init__(self);
def step(self, actions):
#assert self.enable_wind
# Update wind
self.reset()
assert self.lander is not None, "You forgot to call reset()"
if self.enable_wind and not (
self.legs[0].ground_contact or self.legs[1].ground_contact
):
# the function used for wind is tanh(sin(2 k x) + sin(pi k x)),
# which is proven to never be periodic, k = 0.01
##modifying wind function 21/07/2022
wind_mag = (
math.tanh(
math.sin(0.02 * self.wind_idx)
+ (math.sin(math.pi * 0.01 * self.wind_idx))
)
* self.wind_power
)
self.wind_idx += 1
self.lander.ApplyForceToCenter(
(wind_mag, 0.0),
True,
)
# the function used for torque is tanh(sin(2 k x) + sin(pi k x)),
# which is proven to never be periodic, k = 0.01
torque_mag = math.tanh(
math.sin(0.02 * self.torque_idx)
+ (math.sin(math.pi * 0.01 * self.torque_idx))
) * (self.turbulence_power)
self.torque_idx += 1
self.lander.ApplyTorque(
(torque_mag),
True,
)
if self.continuous:
action = np.clip(action, -1, +1).astype(np.float32)
else:
assert self.action_space.contains(
action
), f"{action!r} ({type(action)}) invalid "
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0])
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (
not self.continuous and action == 2
):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 # 0.5..1.0
assert m_power >= 0.5 and m_power <= 1.0
else:
m_power = 1.0
# 4 is move a bit downwards, +-2 for randomness
ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
p = self._create_particle(
3.5, # 3.5 is here to make particle speed adequate
impulse_pos[0],
impulse_pos[1],
m_power,
) # particles are just a decoration
p.ApplyLinearImpulse(
(ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (
not self.continuous and action in [1, 3]
):
# Orientation engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
assert s_power >= 0.5 and s_power <= 1.0
else:
direction = action - 2
s_power = 1.0
ox = tip[0] * dispersion[0] + side[0] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
oy = -tip[1] * dispersion[0] - side[1] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
impulse_pos = (
self.lander.position[0] + ox - tip[0] * 17 / SCALE,
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
)
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse(
(ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.lander.position
vel = self.lander.linearVelocity
state = [
(pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
(pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
self.lander.angle,
20.0 * self.lander.angularVelocity / FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0,
]
assert len(state) == 8
reward = 0
shaping = (
-100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
- 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
- 100 * abs(state[4])
+ 10 * state[6]
+ 10 * state[7]
) # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= (
m_power * 0.30
) # less fuel spent is better, about -30 for heuristic landing. You should modify these values.
reward -= s_power * 0.03
done = False
if self.game_over or abs(state[0]) >= 1.0:
done = True
reward = -100
if not self.lander.awake:
done = True
reward = +100
return np.array(state, dtype=np.float32), reward, done, {}
def reset(self):
pass # reward, done, info can't be included
def render(self, mode='human'):
pass
def close(self):
pass
env=Custom_LunarLander_wind()
callback = EvalCallback(env,log_path = log_dir, deterministic=True) #For evaluating the performance of the agent periodically and logging the results.
policy_kwargs = dict(activation_fn=torch.nn.ReLU,
net_arch=nn_layers)
model_test = DQN("MlpPolicy", env,policy_kwargs =policy_kwargs,
learning_rate=6.3e-4,
batch_size=128, #for simplicity, we are not doing batch update.
buffer_size=50000, #size of experience of replay buffer. Set to 1 as batch update is not done
learning_starts=0, #learning starts immediately!
gamma=0.99, #discount facto. range is between 0 and 1.
tau = 1, #the soft update coefficient for updating the target network
target_update_interval=250, #update the target network immediately.
train_freq=(4,"step"), #train the network at every step.
#max_grad_norm = 10, #the maximum value for the gradient clipping
exploration_initial_eps = 0.9, #initial value of random action probability
exploration_fraction = 0.8, #fraction of entire training period over which the exploration rate is reduced
gradient_steps = -1, #number of gradient steps,
exploration_final_eps = 0.1,
# exploration_initial_eps = 1 # initial value of random action probability. Range is between 0 and 1.
# exploration_fraction = 0.5 # fraction of entire training period over which the exploration rate is reduced. Range is between 0 and 1.
# exploration_final_eps = 0.05 # (set by defualt) final value of random action probability. Range is between 0 and 1.
seed = 1, #seed for the pseudo random generators
device="cuda",
verbose=1)
model_test.learn(total_timesteps=100000, log_interval=10, callback=callback)
x, y = ts2xy(load_results(log_dir), 'timesteps') # Organising the logged results in to a clean format for plotting.
plt.plot(x, y)
plt.ylim([-300, 300])
plt.xlabel('Timesteps')
plt.ylabel('Episode Rewards')
plt.title('Carl parameters model')
plt.show()