-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathForzenLake.py
83 lines (63 loc) · 2.06 KB
/
ForzenLake.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import time, random, math
import numpy as np
import gym
def updateQTable(prevState, prevAction, reward, curState):
q_table[prevState][prevAction] += LEARNING_RATE * (
reward + DISCOUNT * max(q_table[curState]) - q_table[prevState][prevAction])
def getAction(curState):
if random.random() < EPSILON:
return env.action_space.sample()
else:
return np.argmax(q_table[curState])
GAME = 'FrozenLake-v0'
env = gym.make(GAME)
RECORD = None
MAX_EPISODES = 10000
MAX_STEPS = 100 # 100 for FrozenLake v0
EPSILON = 0
DISCOUNT = 0.5
LEARNING_RATE = 0.01
in_dimen = env.observation_space.n
out_dimen = env.action_space.n
obsMin = 0
obsMax = env.observation_space.n
actionMin = 0
actionMax = env.action_space.n
q_table = np.zeros((in_dimen, out_dimen))
print("\nObservation\n--------------------------------")
print("Shape :", in_dimen, " | High :", obsMax, " | Low :", obsMin)
print("\nAction\n--------------------------------")
print("Shape :", out_dimen, " | High :", actionMax, " | Low :", actionMin, "\n")
totalreward = 0
for episode in range(MAX_EPISODES):
if episode % 1000 == 0:
print("Avg Reward =", totalreward / 1000)
totalreward = 0
print("Episode =", episode)
curState = env.reset()
for step in range(MAX_STEPS):
# env.render()
prevState = curState
action = getAction(curState)
curState, reward, done, info = env.step(action)
totalreward += reward
if reward == 0:
if done:
reward = -1
else:
reward = -0.001
updateQTable(prevState, action, reward, curState)
if done:
break
print(q_table)
curState = env.reset()
totalreward = 0
for step in range(MAX_STEPS):
env.render()
prevState = curState
action = getAction(curState)
curState, reward, done, info = env.step(action)
totalreward += reward
if done:
break
print(totalreward)