-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluate_model.py
58 lines (45 loc) · 1.7 KB
/
evaluate_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gym
import os
from stable_baselines3.common.monitor import Monitor as M
from stable_baselines3 import A2C
from random import randint
from csv import reader
model = A2C.load("./best_models/combined_600_1000")
env = gym.make('LunarLander-v2')
# read csv file as a list of lists
with open('./moderate_dataset/urgan_test_samples.csv', 'r') as read_obj:
# pass the file object to reader() to get the reader object
csv_reader = reader(read_obj)
# Pass reader object to list() to get a list of lists
list_of_rows = list(csv_reader)
test_samples = [[float(j) for j in i] for i in list_of_rows]
TEST_LEVEL_NUMS = 20
cumulated_reward_ls = []
last_reward_ls = []
for i in range(TEST_LEVEL_NUMS):
env.load_terrain(test_samples[i])
init_position = randint(1,18)
env.set_initial_x(init_position)
# Logs will be saved in log_dir/monitor.csv
obs = env.reset()
# plt.imshow(env.render(mode='rgb_array'))
# plt.show()
# print(env.terrain_y_values)
cumulated_reward = 0.0
last_reward = 0.0
while True:
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = env.step(action)
cumulated_reward += reward
last_reward = reward
im = env.render('rgb_array')
if done:
break;
print("Cumulated Reward", cumulated_reward, "Last Reward", last_reward)
cumulated_reward_ls.append(cumulated_reward)
last_reward_ls.append(last_reward)
# plt.imshow(env.render(mode='rgb_array'))
# plt.show()
print(" -------------- Final Average Results -----------------")
print("Mean Cumulated Reward", sum(cumulated_reward_ls)/len(cumulated_reward_ls), "Mean Last Reward", sum(last_reward_ls)/len(last_reward_ls))
env.close()