Skip to content

Commit

Permalink
Merge pull request #21 from garethjns/bug_fixes
Browse files Browse the repository at this point in the history
Fix bug with environment's automatic target selection for vaccination…
  • Loading branch information
garethjns authored Aug 10, 2020
2 parents 7d8c0c7 + f280d92 commit 23266a4
Show file tree
Hide file tree
Showing 13 changed files with 235 additions and 41 deletions.
20 changes: 8 additions & 12 deletions scripts/stats_compare_basic_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def plot_dists(multi_sims: List[sim.MultiSim],
'TreatmentAgent': axs[4],
'MaskingAgent': axs[5]}

min_score = 0
max_score = 0
min_score = np.inf
max_score = -np.inf
for run in multi_sims:
min_score = min(min_score, run.results[result].min())
max_score = max(max_score, run.results[result].max())
Expand Down Expand Up @@ -61,21 +61,17 @@ def plot_dists(multi_sims: List[sim.MultiSim],
class EnvTemplate(TemplateBase):
def build(self):
env_ = env.Environment(name=f"stats_compare_basic_agents_custom_env",
action_space=env.ActionSpace(isolate_efficiency=0.7,
reconnect_efficiency=0.7,
treatment_recovery_rate_modifier=4,
nothing_cost=0,
action_space=env.ActionSpace(nothing_cost=0,
vaccinate_cost=0,
isolate_cost=0,
reconnect_cost=0,
treat_cost=0,
mask_cost=0),
disease=env.Disease(name='COVID-19',
virulence=0.02,
seed=None,
immunity_mean=0.8,
recovery_rate=0.85,
immunity_decay_mean=0.004),
virulence=0.01,
immunity_mean=0.95,
recovery_rate=0.9,
immunity_decay_mean=0.005),
healthcare=env.Healthcare(capacity=50),
observation_space=env.ObservationSpace(graph=env.Graph(community_n=15,
community_size_mean=10,
Expand Down Expand Up @@ -111,7 +107,7 @@ class CustomEnv(GymEnv):
sim_ = sim.Sim(env_spec=env_spec, agent=agt_, n_steps=125)

multi_sims.append(sim.MultiSim(sim_, name='basic agent comparison',
n_reps=300, n_jobs=30))
n_reps=300, n_jobs=60))

# Run all the sims. No need to parallelize here as it's done across n reps in MultiSim.run()
for ms in tqdm(multi_sims):
Expand Down
52 changes: 52 additions & 0 deletions scripts/stats_run_single_population.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import gym
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import social_distancing_sim.environment as env
from social_distancing_sim.environment.gym.gym_env import GymEnv
from social_distancing_sim.sim import MultiSim, Sim
from social_distancing_sim.templates.template_base import TemplateBase


class EnvTemplate(TemplateBase):
def build(self):
return env.Environment(name="visual_run_simulation_with_agent_custom_env",
action_space=env.ActionSpace(isolate_efficiency=0.5,
vaccinate_efficiency=0.95),
disease=env.Disease(name='COVID-19',
virulence=0.008,
immunity_mean=0.7,
recovery_rate=0.9,
immunity_decay_mean=0.01),
healthcare=env.Healthcare(capacity=75),
environment_plotting=env.EnvironmentPlotting(ts_fields_g2=["Actions taken",
"Overall score"]),
observation_space=env.ObservationSpace(
graph=env.Graph(community_n=20,
community_size_mean=15,
considered_immune_threshold=0.7),
test_rate=1),
initial_infections=15)


class CustomEnv(GymEnv):
template = EnvTemplate()


if __name__ == "__main__":
# Prepare a custom environment
env_name = f"SDSTests-CustomEnv{np.random.randint(2e6)}-v0"
gym.envs.register(id=env_name,
entry_point='scripts.stats_run_single_population:CustomEnv',
max_episode_steps=1000)

sim = Sim(env_spec=gym.make(env_name).spec)

ms = MultiSim(sim, n_reps=10, n_jobs=50)

ms.run()

sns.distplot(ms.results['Overall score'])
sns.distplot(ms.results['Total deaths'])
plt.show()
45 changes: 39 additions & 6 deletions scripts/train_and_evaluate_untargeted_dqn.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,69 @@
from functools import partial

import gym
import numpy as np
from reinforcement_learning_keras.agents.components.helpers.virtual_gpu import VirtualGPU

import social_distancing_sim.environment as env
from social_distancing_sim.agent.rl_agents.q_learning.dqn_untargeted import DQNUntargeted
from social_distancing_sim.agent.rl_agents.rlk_agent_configs import RLKAgentConfigs
from social_distancing_sim.environment import ActionSpace, EnvironmentPlotting
from social_distancing_sim.environment.gym.gym_env import GymEnv
from social_distancing_sim.environment.gym.wrappers.flatten_obs_wrapper import FlattenObsWrapper
from social_distancing_sim.environment.gym.wrappers.limit_obs_wrapper import LimitObsWrapper
from social_distancing_sim.sim import Sim
from social_distancing_sim.templates.template_base import TemplateBase


class EnvTemplate(TemplateBase):

@classmethod
def build(cls) -> env.Environment:
return env.Environment(name="agent training example",
action_space=ActionSpace(),
environment_plotting=EnvironmentPlotting(
ts_fields_g2=['Vaccinate actions completed', 'Isolate actions completed',
'Reconnect actions completed', 'Treat actions completed',
'Mask actions completed']),
disease=env.Disease(name='COVID-19',
virulence=0.006,
immunity_mean=0.6,
immunity_decay_mean=0.15),
healthcare=env.Healthcare(),
observation_space=env.ObservationSpace(graph=env.Graph(community_n=50,
community_size_mean=15,
community_p_in=0.1,
community_p_out=0.05,
seed=20200423),
test_rate=1))


class CustomEnv(GymEnv):
template = EnvTemplate()


if __name__ == "__main__":
gpu = VirtualGPU(gpu_memory_limit=2048,
gpu_device_id=0)

gym.envs.register(id='SDS-746-v0',
entry_point='social_distancing_sim.environment.gym.environments.sds_746:SDS746',
env_name = f"SDS-CustomEnv{np.random.randint(2e6)}-v0"
gym.envs.register(id=env_name,
entry_point='scripts.train_and_evaluate_untargeted_dqn:CustomEnv',
max_episode_steps=1000)

config_dict = RLKAgentConfigs(agent_name='flat_obs_dqn', env_spec='SDS-746-v0', expected_obs_shape=(746 * 6,),
config_dict = RLKAgentConfigs(agent_name='flat_obs_dqn', env_spec=env_name, expected_obs_shape=(746 * 6,),
env_wrappers=(partial(LimitObsWrapper, output=2),
FlattenObsWrapper),
n_actions=5).build_for_dqn_untargeted()

# Train agent using rlk agents built in train function. Note that the agent only takes a single action per turn
# unless the multiple actions wrapper is added. TODO: Add this wrapper for training but remove for future use.
agent = DQNUntargeted(**config_dict)
agent.train(render=False, n_episodes=16)
agent.train(render=False, n_episodes=25)
agent.save()

# Eval
env_spec = gym.make('SDS-746-v0').spec
sim = Sim(env_spec=env_spec, agent=agent, n_steps=200, plot=True, save=True, tqdm_on=True,
env_spec = gym.make(env_name).spec
sim = Sim(env_spec=env_spec, agent=agent, n_steps=200, plot=False, save=True, tqdm_on=True, logging=True,
save_dir='exps/untargeted_dqn')
sim.run()
7 changes: 4 additions & 3 deletions scripts/visual_compare_basic_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,15 @@ class CustomEnv(GymEnv):
if __name__ == "__main__":

# Prepare a custom environment
env_name = f"SDSTests-CustomEnv{np.random.randint(2e6)}-v0"
env_name = f"SDS-CustomEnv{np.random.randint(2e6)}-v0"
gym.envs.register(id=env_name,
entry_point='scripts.visual_compare_basic_agents:CustomEnv',
max_episode_steps=1000)
env_spec = gym.make(env_name).spec

# Prepare agents
agents = [agent.DummyAgent, agent.RandomAgent, agent.VaccinationAgent, agent.IsolationAgent, agent.MaskingAgent]
agents = [agent.DummyAgent, agent.RandomAgent, agent.VaccinationAgent, agent.IsolationAgent, agent.TreatmentAgent,
agent.MaskingAgent]
n_actions = [3, 6, 12]

# Prepare Sims
Expand All @@ -69,4 +70,4 @@ class CustomEnv(GymEnv):
tqdm_on=True, logging=True)) # Show progress bars for running sims

# Run all the prepared Sims
Parallel(n_jobs=-2, backend='loky')(delayed(run_and_replay)(sim) for sim in sims)
Parallel(n_jobs=-1, backend='loky')(delayed(run_and_replay)(sim) for sim in sims)
2 changes: 1 addition & 1 deletion scripts/visual_compare_two_diseases_immunity_small.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def run_and_replay(pop, *args, **kwargs):
healthcare=env.Healthcare(capacity=300),
seed=124)

Parallel(n_jobs=1,
Parallel(n_jobs=2,
backend='loky')(delayed(run_and_replay)(pop,
steps=365,
plot=False,
Expand Down
2 changes: 1 addition & 1 deletion social_distancing_sim/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
MAJOR = 0
MINOR = 10
PATCH = 1
PATCH = 2

__version__ = ".".join(str(v) for v in [MAJOR, MINOR, PATCH])
4 changes: 2 additions & 2 deletions social_distancing_sim/environment/action_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ class ActionSpace:
vaccinate_efficiency: float = 0.95
isolate_efficiency: float = 0.95
reconnect_efficiency: float = 0.95
treatment_conclusion_chance: float = 0.9
treatment_recovery_rate_modifier: float = 1.5
treatment_conclusion_chance: float = 0.6
treatment_recovery_rate_modifier: float = 1.2
mask_efficiency: float = 0.25
seed: Union[int, None] = None

Expand Down
8 changes: 6 additions & 2 deletions social_distancing_sim/environment/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def select_reasonable_targets(self, actions: List[int]) -> Dict[int, int]:

suggested_targets = {
0: [], # Nothing
1: self.observation_space.current_immune_nodes, # Vaccinate
1: self.observation_space.current_clear_nodes, # Vaccinate
2: list( # Isolate
set(self.observation_space.current_infected_nodes).difference(
self.observation_space.current_isolated_nodes)),
Expand Down Expand Up @@ -200,6 +200,8 @@ def select_reasonable_targets(self, actions: List[int]) -> Dict[int, int]:
def _act(self, actions: List[int], targets: List[int] = None) -> Tuple[Dict[int, int], float]:
# If no targets supplied, select automatically
if targets is None:
targets = []
if len(targets) == 0:
actions_dict = self.select_reasonable_targets(actions)
else:
actions_dict = {t: a for t, a in zip(targets, actions)}
Expand Down Expand Up @@ -340,7 +342,9 @@ def clone(self) -> "Environment":
scoring=self.scoring.clone(),
environment_plotting=self.environment_plotting.clone(),
name=self.name,
seed=self.seed)
seed=self.seed,
initial_infections=self.initial_infections,
random_infection_chance=self.random_infection_chance)

@property
def state(self) -> np.ndarray:
Expand Down
2 changes: 1 addition & 1 deletion social_distancing_sim/environment/gym/gym_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def step(self, actions_targets: Union[int,
# Return the sds internal observation space in info for convenience. self.state returns a more limited set of
# arrays for state, which are derived from the same internal state.

if isinstance(actions_targets, (int, np.int64)):
if isinstance(actions_targets, (int, np.integer)):
actions_targets = ([actions_targets], [])
actions, targets = actions_targets

Expand Down
17 changes: 10 additions & 7 deletions social_distancing_sim/sim/multi_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ def _run(self):
return results

def run(self):
results = Parallel(n_jobs=self.n_jobs,
backend='loky')(delayed(self._run)()
for _ in tqdm(range(self.n_reps),
desc=self.sim.agent.name))
self.full_results = Parallel(n_jobs=self.n_jobs,
backend='loky')(delayed(self._run)()
for _ in tqdm(range(self.n_reps),
desc=self.sim.agent.name))

# Place in fake history container for now
results_hist = History()
for h in results:
results_hist.log({k: v[0] for k, v in h.items()})
for h in self.full_results:
results_hist.log({k: v[-1] for k, v in h.items()})

self.results = pd.DataFrame(results_hist)
self.log()
Expand Down Expand Up @@ -112,8 +112,11 @@ def log(self):
}) # TODO: Other action costs, etc.

metrics_to_log = {}
for c in ["Observed overall score", "Observed turn score", "Overall score", "Turn score"]:

# These are already totals
for c in ["Observed overall score", "Observed turn score", "Overall score", "Turn score", "Total deaths"]:
metrics_to_log.update(self._agg_stats(self.results[c]))

mlflow.log_metrics(metrics_to_log)

mlflow.end_run()
22 changes: 22 additions & 0 deletions tests/integration/environment/gym/test_gym_env.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import copy
import sys
import unittest
from functools import partial
from typing import Union
Expand Down Expand Up @@ -163,3 +165,23 @@ def test_gym_env_with_rl_agent(self):
self.assertIsInstance(agent.env, FlattenObsWrapper)
self.assertIsInstance(agent.env.unwrapped, GymEnv)
self.assertEqual(agent.env_builder.env_spec, 'SDSTests-GymEnvFixedSeedFixture-v0')

@unittest.skipUnless(int(f"{sys.version_info.major}{sys.version_info.minor}") > 36, 'deepcopy breaks in 3.6')
def test_reset_matches_original_env(self):
"""env.reset() relies on sds_env cloning. This should return the original object. Make sure it does."""

# Arrange
env1 = gym.make('SDSTests-GymEnvFixedSeedFixture-v0')
env2 = copy.deepcopy(env1)

# Act
_ = env1.reset()
_ = env1.step(([], []))

# Assert
# gym env equality will not eval as equal
self.assertNotEqual(env1, env2)
# Should match to initial conditions
self.assertEqual(env1.sds_env, env2.sds_env)
# But not on history or changes by stepping
self.assertNotEqual(env1.sds_env.history, env2.sds_env.history)
Loading

0 comments on commit 23266a4

Please sign in to comment.