-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_plot.py
152 lines (106 loc) · 3.45 KB
/
test_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 18 03:38:58 2020
@author: yyimi
"""
# Testing Algorithm
import environment.env_maze as env
import environment.maze_sim as mazesim
import numpy as np
import algorithm.qlearning_funct_app as QL
import algorithm.sarsa_func_app as SA
from plot.graph import test_parameters
from plot.graph import get_trace
from plot.graph import plot_weights
from plot.graph import get_determinant_policy
from plot.graph import plot_reward
from plot.graph import val_path
from plot.graph import plot_step
# General Information
#---------------------------------
maze =[[2,1,1,3],
[0,0,1,0],
[1,1,1,0],
[0,0,1,2]]
maze =[[2,1,0,1,1],
[0,1,1,1,0],
[3,0,1,0,0],
[1,1,1,1,2]]
maze = [[2,1,1,1,1],
[0,0,1,0,1],
[3,1,1,0,1],
[0,0,1,1,1],
[0,0,1,1,2]]
"""
maze = [[2,1,1,0,1,0,0,0],
[1,0,1,0,1,1,1,3],
[1,0,1,1,1,0,0,1],
[1,0,1,0,1,0,0,1],
[1,1,1,0,1,0,0,1],
[1,0,1,1,1,1,1,1],
[1,0,0,0,0,0,1,0],
[0,0,1,1,1,1,1,2]]
"""
action = ["up","right","down","left"]
maze_grid = env.maze_grid(maze,action)
feature_maze_sim = mazesim.MazeSim_Features(maze_grid,feature = "onehot_tiling")
num_episodes = 300
# Analysis for Sarsa
#---------------------------------------
sasw,sasw_key,sasw_door,sasreward,sasteps = SA.sarsa_value_function_approx(feature_maze_sim,
0.9,0.1,0.01,num_episodes)
#determinant policy
determinant_policy = get_determinant_policy(feature_maze_sim, sasw)
print(determinant_policy)
#1. Trace
get_trace(determinant_policy)
val_path(feature_maze_sim,determinant_policy)
#2. Weights
plot_weights(num_episodes,sasw_key)
plot_weights(num_episodes,sasw_door)
#3. Rewards
plot_reward(num_episodes,sasreward)
#4. Steps
plot_step(num_episodes,sasteps)
# Analysis for Q-Learning
#----------------------------------------
qw,qw_key,qw_door,qreward,qsteps = QL.q_learning_value_function_approx(feature_maze_sim,
0.9,0.1,0.01,num_episodes)
#determinant policy
determinant_policy = get_determinant_policy(feature_maze_sim, qw)
print(determinant_policy)
#1. Trace
get_trace(determinant_policy)
val_path(feature_maze_sim,determinant_policy)
#2. Weights
plot_weights(num_episodes,qw_key)
plot_weights(num_episodes,qw_door)
#3. Rewards
plot_reward(num_episodes,qreward)
#4. Steps
plot_step(num_episodes,qsteps)
#------------------------------------------------
# Compare two algorithms
#------------------------------------------------
import matplotlib.pyplot as plt
episodes = [j+1 for j in range(num_episodes)]
#1. Reward
plt.plot(episodes, sasreward,label = "Sarsa")
plt.plot(episodes, qreward, label = "Q-Learning")
plt.legend()
plt.ylabel("Total Reward")
plt.xlabel("Episodes")
plt.title("Compare Two Algorithms")
#2. Steps
plt.plot(episodes, sasteps,label = "Sarsa")
plt.plot(episodes, qsteps, label = "Q-Learning")
plt.legend()
plt.ylabel("Total Steps")
plt.xlabel("Episodes")
plt.title("Compare Two Algorithms")
plt.plot(episodes, qsteps, label = "Q-learning")
plt.plot(episodes, a[-1], label = "Q-learning with TDC")
plt.ylabel("Total Steps")
plt.xlabel("Episodes")
plt.legend()
plt.title("Compare Two Algorithms")