Skip to content

Commit

Permalink
Random plant grid and new json write format
Browse files Browse the repository at this point in the history
  • Loading branch information
w07wong committed Dec 6, 2019
1 parent 7953c71 commit f3941fe
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 20 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def _next_observation(self):
def _take_action(self, action):
return self.wrapper_env.take_action(action)

def get_current_step(self):
return self.current_step

def step(self, action):
state = self._take_action(action)
self.current_step += 1
Expand Down
46 changes: 35 additions & 11 deletions RL_Framework/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,23 @@ def graph_evaluations(self, folder_path, garden_time_steps, garden_x, garden_y,
self.plot_average_reward(folder_path, r, time_steps, min_r, max_r, abs(min_r - max_r) / 10)
self.plot_stddev_reward(folder_path, garden_time_steps, rewards, rewards_stddev, time_steps, min_r, max_r, abs(min_r - max_r) / 10)

def evaluate_policy(self, folder_path, num_evals, env, is_baseline=False, baseline_policy=None, step=1):
def evaluate_policy(self, folder_path, num_evals, env, garden_x, garden_y, is_baseline=False, baseline_policy=None, step=1):
model = None
if not is_baseline:
model = PPO2.load('./' + folder_path + '/model')
done = False
for i in range(num_evals):
obs = env.reset()
garden_obs = env.env_method('get_garden_state')
e = {'obs_action': [], 'obs': [], 'rewards': [], 'action': []}
e = {'obs_avg_action': [], 'obs_action': [], 'obs': [], 'rewards': [], 'action': []}

obs_avg_action = {}
for x in range(garden_x):
for y in range(garden_y):
obs_avg_action[x, y] = 0

step_counter = 0

while not done:
action = None
if is_baseline:
Expand All @@ -172,15 +180,31 @@ def evaluate_policy(self, folder_path, num_evals, env, is_baseline=False, baseli
obs, rewards, done, _ = env.step(action)
garden_obs = env.env_method('get_garden_state')
radius_grid = env.env_method('get_radius_grid')
#print([garden_obs[0][i] for i in range()])
#print('\t'.join([str(r) for r in radius_grid[0].tolist()] + [str(a) for a in action[0].tolist()] + [str(rewards.item())]))
e['obs_action'].append((radius_grid[0].tolist(), action[0].tolist()))
e['obs'].append(garden_obs[0].tolist())
e['rewards'].append(rewards.item())
e['action'].append(action[0].tolist())
env.render()

if not done:
step_counter = env.env_method('get_current_step')[0]

rg_list = radius_grid[0].tolist()
obs_action_pairs = []
for x in range(garden_x):
for y in range(garden_y):
cell = (x, y)
cell_action = action[0][x * garden_x + y]
obs_action_pairs.append({str(cell) : (str(rg_list[x][y][0]), str(cell_action))})
obs_avg_action[cell] += cell_action
e['obs_action'].append({step_counter : obs_action_pairs})

e['obs'].append(garden_obs[0].tolist())
e['rewards'].append(rewards.item())
e['action'].append(action[0].tolist())
env.render()
done = False

for x in range(garden_x):
for y in range(garden_y):
obs_avg_action[(x, y)] /= step_counter
e['obs_avg_action'].append({str((x, y)) : obs_avg_action[(x, y)], 'final': rg_list[x][y][0]})

# env.env_method('show_animation')

pathlib.Path(folder_path + '/Returns').mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -234,7 +258,7 @@ def single_run(self, folder_path, num_evals, policy_kwargs=None, is_baseline=Fal
copyfile('gym_config/config.ini', folder_path + '/config.ini')

# Evaluate baseline on 50 random environments of same parameters.
self.evaluate_policy(folder_path=folder_path, num_evals=num_evals, env=env, is_baseline=True, baseline_policy=baseline_policy, step=1)
self.evaluate_policy(folder_path, num_evals, env, garden_x, garden_y, is_baseline=True, baseline_policy=baseline_policy, step=1)

# Graph evaluations
self.graph_evaluations(folder_path, garden_time_steps, garden_x, garden_y, time_steps, step, num_evals, num_plant_types)
Expand All @@ -252,7 +276,7 @@ def single_run(self, folder_path, num_evals, policy_kwargs=None, is_baseline=Fal
copyfile('gym_config/config.ini', folder_path + '/config.ini')

# Evaluate model on 50 random environments of same parameters.
self.evaluate_policy(folder_path=folder_path, num_evals=num_evals, env=env, is_baseline=False)
self.evaluate_policy(folder_path, num_evals, env, garden_x, garden_y, is_baseline=False)

# Graph evaluations
self.graph_evaluations(folder_path, garden_time_steps, garden_x, garden_y, time_steps, step, num_evals, num_plant_types)
Expand Down
1 change: 1 addition & 0 deletions Simulator_v2/simulatorv2/SimAlphaGardenWrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def reward(self, state):
'''
def take_action(self, action):
self.curr_action = action
# print('ACTION', action)
self.garden.perform_timestep(irrigations=action)
return self.garden.get_state()

Expand Down
1 change: 0 additions & 1 deletion Simulator_v2/simulatorv2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from simulatorv2.sim_globals import sim_globals
from simulatorv2.logger import Logger, Event
from simulatorv2.plant_stage import GerminationStage, GrowthStage, WaitingStage, WiltingStage, DeathStage
from simulatorv2.plant_type import PlantType
Expand Down
2 changes: 2 additions & 0 deletions Simulator_v2/simulatorv2/garden.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def perform_timestep(self, water_amt=0, irrigations=None):
for i in np.nonzero(irrigations)[0]:
location = (i // self.N, i % self.M)
self.irrigate(location, irrigations[i])
# print('IRRIGATION:', location, irrigations[i])
self.irrigation_points[location] = irrigations[i]

self.distribute_light()
Expand All @@ -102,6 +103,7 @@ def perform_timestep(self, water_amt=0, irrigations=None):

if self.animate:
self.anim_step()
# print('RADIUS GRID', self.radius_grid.tolist())

return self.plants.values()

Expand Down
18 changes: 10 additions & 8 deletions Simulator_v2/simulatorv2/plant_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ def get_random_plants(self, plant_types, num_x_steps, num_y_steps, plants_per_ty
random.seed(datetime.now())
np.random.seed(random.randint(0, 99999999))
plants = []
for color, (c1, growth_time), name in plant_types:
x_locations = np.array([[num_x_steps - 1]])
y_locations = np.array([[num_y_steps - 1]])
if num_x_steps > 2 and num_y_steps > 2:
x_locations = np.random.randint(1, num_x_steps - 1, (plants_per_type, 1))
y_locations = np.random.randint(1, num_y_steps - 1, (plants_per_type, 1))
locations = np.hstack((x_locations, y_locations))
plants.extend([Plant(row, col, c1=c1, growth_time=growth_time, color=color, plant_type=name) for row, col in locations])
for x in range(num_x_steps):
for y in range(num_y_steps):
if np.random.rand(1, 1)[0] > 0.5:
color, (c1, growth_time), name = plant_types[np.random.randint(0, len(plant_types))]
plants.extend([Plant(x, y, c1=c1, growth_time=growth_time, color=color, plant_type=name)])

# for plant in plants:
# print("PLANT: ", plant.type, plant.row, plant.col)

# print("NUM PLANTS", len(plants))
return plants

0 comments on commit f3941fe

Please sign in to comment.