-
Notifications
You must be signed in to change notification settings - Fork 0
/
gridworld.py
125 lines (86 loc) · 3.54 KB
/
gridworld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import numpy as np
class GridWorld():
def __init__(self, grid_dim = (10,10), randomize_goal = False, goal_position = (9,9)):
if(not randomize_goal):
# Check that the goal position is a valid position on the grid
assert goal_position[0] < grid_dim[0] and goal_position[0] >= 0
assert goal_position[1] < grid_dim[1] and goal_position[1] >= 0
# Save environment parameters
self.grid_dim = np.array(grid_dim)
self.randomize_goal = randomize_goal
self.goal_position = np.array(goal_position)
# Define variables
self.state = None
self.steps = 0
self.total_reward = 0
def __observation__(self):
# Observation is the current agent state concatenated with the position of the goal
return [self.state[0], self.state[1], self.goal_position[0], self.goal_position[1]]
# Action should be an integer from 0-3 inclusive
def __take_action__(self, action):
# Up
# If on top edge of grid, do nothing
if(action == 0):
if(self.state[0] > 0):
self.state[0] -= 1
# Down
# If on bottom edge of grid, do nothing
elif(action == 1):
if(self.state[0] < self.grid_dim[0] - 1):
self.state[0] += 1
# left
# If on left edge of grid, do nothing
elif(action == 2):
if(self.state[1] > 0):
self.state[1] -= 1
# Right
# If on right edge of grid, do nothing
elif(action == 3):
if(self.state[1] < self.grid_dim[1] - 1):
self.state[1] += 1
def __reward__(self):
self.total_reward -= 1
return -1
def reset(self):
# Initialize the state of the agent to be (0,0)
self.state = np.array((0,0))
# Count of the number of steps
self.steps = 0
# Count of the total reward
self.total_reward = 0
# If randomize_goal is true, choose a random location for the goal
# Ensure that the goal position is not the starting position
if(self.randomize_goal):
goal_position = np.array([np.random.randint(0, self.grid_dim[0]), np.random.randint(0, self.grid_dim[1])])
while(goal_position != self.state):
goal_position = np.array([np.random.randint(0, self.grid_dim[0]), np.random.randint(0, self.grid_dim[1])])
return self.__observation__()
# Step environment forward one time step
# Returns observation, reward, done, and info
def step(self, action):
# Take action
self.__take_action__(action)
reward = self.__reward__()
self.steps += 1
# Check if goal is reached
done = np.array_equal(self.state, self.goal_position)
# If done, compile stats about the simulation
info = {
"steps" : self.steps,
"total_reward" : self.total_reward
}
return self.__observation__(), reward, done, info
def __str__(self):
if(self.state is None):
return "Environment not yet initialized. Call reset() to initiliaze"
outStr = ""
for i in range(self.grid_dim[0]):
for j in range(self.grid_dim[1]):
if(i == self.state[0] and j == self.state[1]):
outStr += "O"
elif(i == self.goal_position[0] == j == self.goal_position[1]):
outStr += "X"
else:
outStr += " "
outStr += "\n"
return outStr