-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
65 lines (53 loc) · 2.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
##
## Main code: template to run a single (default) experiment on AdLeap-MAS
##
# 1. Setting the environment
method = 'pomcp' # choose your method
kwargs = {} # define your additional hyperparameters to it (optional)
env_name = 'LevelForagingEnv' # choose your environment
scenario_id = 0 # define your scenario configuration (check the available configuration in our GitHub)
display = True # choosing to turn on or off the display
# 2. Creating the environment
# a. importing necessary modules
from importlib import import_module
from src.log import LogFile
import time
# b. creating the environment
env_module = import_module('src.envs.'+env_name)
load_default_scenario_method = getattr(env_module, 'load_default_scenario')
env, scenario_id = load_default_scenario_method(method,scenario_id,display=display)
###
# ADLEAP-MAS MAIN ROUTINE
###
state = env.reset()
adhoc_agent = env.get_adhoc_agent()
exp_num = 0
header = ['Iteration','Reward','Time to reason','N Rollouts', 'N Simulations']
log = LogFile(env_name,scenario_id,method,exp_num,header)
MAX_EPISODES = 200
done = False
while not done and env.episode < MAX_EPISODES:
# 1. Importing agent method
method = env.import_method(adhoc_agent.type)
# 2. Reasoning about next action and target
start = time.time()
adhoc_agent.next_action, adhoc_agent.target = method(state, adhoc_agent,kwargs=kwargs)
end = time.time()
# 3. Taking a step in the environment
next_state,reward,done,info = env.step(adhoc_agent.next_action)
# 4. For learning methods
if 'dqn_model' in adhoc_agent.smart_parameters.keys():
adhoc_agent.smart_parameters['dqn_model'].add_memory(state,adhoc_agent.next_action,next_state,reward,done)
# 5. Logging the Data
data = {'it':env.episode,
'reward':reward,
'time':end-start,
'nrollout':adhoc_agent.smart_parameters['count']['nrollouts'],
'nsimulation':adhoc_agent.smart_parameters['count']['nsimulations']}
log.write(data)
# 6. Updating the state
state = next_state
env.close()
###
# THE END - That's all folks :)
###