-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of https://github.com/Limmen/csle
- Loading branch information
Showing
1 changed file
with
149 additions
and
0 deletions.
There are no files selected for viewing
149 changes: 149 additions & 0 deletions
149
examples/training/pomcp/rampart_castle/run_cyborg_version_two_b_line_ucb.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
from typing import List | ||
import csle_common.constants.constants as constants | ||
from csle_common.dao.training.experiment_config import ExperimentConfig | ||
from csle_common.metastore.metastore_facade import MetastoreFacade | ||
from csle_common.dao.training.agent_type import AgentType | ||
from csle_common.dao.training.hparam import HParam | ||
from csle_common.dao.training.player_type import PlayerType | ||
from csle_agents.agents.pomcp.pomcp_agent import POMCPAgent | ||
from csle_agents.agents.pomcp.pomcp_acquisition_function_type import POMCPAcquisitionFunctionType | ||
import csle_agents.constants.constants as agents_constants | ||
from csle_agents.common.objective_type import ObjectiveType | ||
from csle_common.dao.simulation_config.simulation_env_config import SimulationEnvConfig | ||
from gym_csle_cyborg.dao.csle_cyborg_wrapper_config import CSLECyborgWrapperConfig | ||
from gym_csle_cyborg.envs.cyborg_scenario_two_wrapper import CyborgScenarioTwoWrapper | ||
from gym_csle_cyborg.dao.red_agent_type import RedAgentType | ||
from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig | ||
from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil | ||
|
||
|
||
def heuristic_value(o: List[List[int]]) -> float: | ||
""" | ||
A heuristic value function | ||
:param o: the observation vector | ||
:return: the value | ||
""" | ||
host_costs = CyborgEnvUtil.get_host_compromised_costs() | ||
val = 0 | ||
for i in range(len(o)): | ||
if o[i][2] > 0: | ||
val += host_costs[i] | ||
return val | ||
|
||
|
||
if __name__ == '__main__': | ||
emulation_name = "csle-level9-040" | ||
emulation_env_config = None | ||
simulation_name = "csle-cyborg-001" | ||
simulation_env_config = SimulationEnvConfig(name="", version="", gym_env_name="", simulation_env_input_config="", | ||
players_config="", joint_action_space_config="", | ||
joint_observation_space_config="", time_step_type=None, | ||
reward_function_config=None, transition_operator_config=None, | ||
observation_function_config=None, | ||
initial_state_distribution_config=None, env_parameters_config=None, | ||
plot_transition_probabilities=False, plot_observation_function=False, | ||
plot_reward_function=False, descr="", state_space_config=None) | ||
eval_env_config = CSLECyborgConfig( | ||
gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT], | ||
maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True, | ||
decoy_state=True, decoy_optimization=False, cache_visited_states=True, save_trace=False, | ||
randomize_topology=False) | ||
simulation_env_config.simulation_env_input_config = CSLECyborgWrapperConfig( | ||
gym_env_name="csle-cyborg-scenario-two-wrapper-v1", maximum_steps=100, save_trace=False, scenario=2, | ||
reward_shaping=True, red_agent_type=RedAgentType.B_LINE_AGENT) | ||
simulation_env_config.gym_env_name = "csle-cyborg-scenario-two-wrapper-v1" | ||
csle_cyborg_env = CyborgScenarioTwoWrapper(config=simulation_env_config.simulation_env_input_config) | ||
A = csle_cyborg_env.get_action_space() | ||
initial_particles = csle_cyborg_env.initial_particles | ||
rollout_policy = lambda x, deterministic: 35 | ||
value_function = heuristic_value | ||
experiment_config = ExperimentConfig( | ||
output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}pomcp_test", title="POMCP test", | ||
random_seeds=[555512, 98912, 999, 555], | ||
agent_type=AgentType.POMCP, | ||
log_every=1, | ||
hparams={ | ||
agents_constants.POMCP.N: HParam(value=5000, name=agents_constants.POMCP.N, | ||
descr="the number of episodes"), | ||
agents_constants.POMCP.OBJECTIVE_TYPE: HParam( | ||
value=ObjectiveType.MAX, name=agents_constants.POMCP.OBJECTIVE_TYPE, | ||
descr="the type of objective (max or min)"), | ||
agents_constants.POMCP.ROLLOUT_POLICY: HParam( | ||
value=rollout_policy, name=agents_constants.POMCP.ROLLOUT_POLICY, | ||
descr="the policy to use for rollouts"), | ||
agents_constants.POMCP.VALUE_FUNCTION: HParam( | ||
value=value_function, name=agents_constants.POMCP.VALUE_FUNCTION, | ||
descr="the value function to use for truncated rollouts"), | ||
agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"), | ||
agents_constants.POMCP.GAMMA: HParam(value=0.99, name=agents_constants.POMCP.GAMMA, | ||
descr="the discount factor"), | ||
agents_constants.POMCP.REINVIGORATION: HParam(value=False, name=agents_constants.POMCP.REINVIGORATION, | ||
descr="whether reinvigoration should be used"), | ||
agents_constants.POMCP.REINVIGORATED_PARTICLES_RATIO: HParam( | ||
value=0.0, name=agents_constants.POMCP.REINVIGORATED_PARTICLES_RATIO, | ||
descr="the ratio of reinvigorated particles in the particle filter"), | ||
agents_constants.POMCP.INITIAL_PARTICLES: HParam(value=initial_particles, | ||
name=agents_constants.POMCP.INITIAL_PARTICLES, | ||
descr="the initial belief"), | ||
agents_constants.POMCP.PLANNING_TIME: HParam(value=3.75, name=agents_constants.POMCP.PLANNING_TIME, | ||
descr="the planning time"), | ||
agents_constants.POMCP.PRUNE_ACTION_SPACE: HParam( | ||
value=False, name=agents_constants.POMCP.PRUNE_ACTION_SPACE, | ||
descr="boolean flag indicating whether the action space should be pruned or not"), | ||
agents_constants.POMCP.PRUNE_SIZE: HParam( | ||
value=3, name=agents_constants.POMCP.PRUNE_ACTION_SPACE, descr="size of the pruned action space"), | ||
agents_constants.POMCP.MAX_PARTICLES: HParam(value=1000, name=agents_constants.POMCP.MAX_PARTICLES, | ||
descr="the maximum number of belief particles"), | ||
agents_constants.POMCP.MAX_PLANNING_DEPTH: HParam( | ||
value=50, name=agents_constants.POMCP.MAX_PLANNING_DEPTH, descr="the maximum depth for planning"), | ||
agents_constants.POMCP.MAX_ROLLOUT_DEPTH: HParam(value=4, name=agents_constants.POMCP.MAX_ROLLOUT_DEPTH, | ||
descr="the maximum depth for rollout"), | ||
agents_constants.POMCP.C: HParam(value=0.5, name=agents_constants.POMCP.C, | ||
descr="the weighting factor for UCB exploration"), | ||
agents_constants.POMCP.C2: HParam(value=15000, name=agents_constants.POMCP.C2, | ||
descr="the weighting factor for AlphaGo exploration"), | ||
agents_constants.POMCP.USE_ROLLOUT_POLICY: HParam( | ||
value=False, name=agents_constants.POMCP.USE_ROLLOUT_POLICY, | ||
descr="boolean flag indicating whether rollout policy should be used"), | ||
agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=5, name=agents_constants.POMCP.PRIOR_WEIGHT, | ||
descr="the weight on the prior"), | ||
agents_constants.POMCP.PRIOR_CONFIDENCE: HParam(value=0, name=agents_constants.POMCP.PRIOR_CONFIDENCE, | ||
descr="the prior confidence"), | ||
agents_constants.POMCP.ACQUISITION_FUNCTION_TYPE: HParam( | ||
value=POMCPAcquisitionFunctionType.UCB, name=agents_constants.POMCP.ACQUISITION_FUNCTION_TYPE, | ||
descr="the type of acquisition function"), | ||
agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam( | ||
value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"), | ||
agents_constants.POMCP.MAX_NEGATIVE_SAMPLES: HParam( | ||
value=20, name=agents_constants.POMCP.MAX_NEGATIVE_SAMPLES, | ||
descr="maximum number of negative samples when filling belief particles"), | ||
agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam( | ||
value=0, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in " | ||
"the search tree"), | ||
agents_constants.POMCP.VERBOSE: HParam(value=False, name=agents_constants.POMCP.VERBOSE, | ||
descr="verbose logging flag"), | ||
agents_constants.POMCP.EVAL_ENV_NAME: HParam(value="csle-cyborg-scenario-two-v1", | ||
name=agents_constants.POMCP.EVAL_ENV_NAME, | ||
descr="the name of the evaluation environment"), | ||
agents_constants.POMCP.EVAL_ENV_CONFIG: HParam(value=eval_env_config, | ||
name=agents_constants.POMCP.EVAL_ENV_CONFIG, | ||
descr="the configuration of the evaluation environment"), | ||
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE, | ||
descr="number of evaluation episodes"), | ||
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam( | ||
value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, | ||
descr="confidence interval"), | ||
agents_constants.COMMON.MAX_ENV_STEPS: HParam( | ||
value=100, name=agents_constants.COMMON.MAX_ENV_STEPS, | ||
descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), | ||
agents_constants.COMMON.RUNNING_AVERAGE: HParam( | ||
value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, | ||
descr="the number of samples to include when computing the running avg") | ||
}, | ||
player_type=PlayerType.DEFENDER, player_idx=0 | ||
) | ||
agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config, | ||
experiment_config=experiment_config, save_to_metastore=False) | ||
experiment_execution = agent.train() | ||
MetastoreFacade.save_experiment_execution(experiment_execution) |