-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_exp1.py
79 lines (70 loc) · 2.24 KB
/
main_exp1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import tensorflow as tf
import numpy as np
import gym
from collections import deque
from matplotlib import pyplot as plt
import random
from utils import NormalizedActions
ENVS = ['Pendulum-v0',
'MountainCarContinuous-v0',
'InvertedPendulum-v1',
'SemisuperPendulumNoise-v0',
'SemisuperPendulumRandom-v0']
ENVNUM = 2
if ENVNUM in (0, 3, 4):
from NAF import NAF
elif ENVNUM == 1:
from NAF_mc import NAF
elif ENVNUM == 2:
from NAF_ip import NAF
TRY_NUM = 1
# pure qnaf algorithm
# directory exp0<env_num><try_num><run_num>
MAX_EP_STEPS = 200
LEARNING_RATE = 0.001
GAMMA = 0.99
TAU = 0.001
RENDER_ENV = False
GYM_MONITOR_EN = True
ENV_NAME = ENVS[ENVNUM]
MONITOR_DIR = './results/exp0' + str(ENVNUM) + str(TRY_NUM)
RANDOM_SEED = 42
BUFFER_SIZE = 800000
MINIBATCH_SIZE = 64
NOISE_MEAN = 0
NOISE_VAR = 1
OU_THETA = 0.15
OU_MU = 0.
OU_SIGMA = 0.3
EXPLORATION_TIME = 50
MAX_EPISODES = 200
if ENVNUM == 2:
EXPLORATION_TIME = 200
MAX_EPISODES = 4500
def main(_):
np.random.seed(RANDOM_SEED)
tf.set_random_seed(RANDOM_SEED)
env = NormalizedActions(gym.make(ENV_NAME))
env.seed(RANDOM_SEED)
if GYM_MONITOR_EN:
#it is always better to create n + 1 new dirs than recreate n + 1 old experiments
if not RENDER_ENV:
env = gym.wrappers.Monitor(env, MONITOR_DIR, video_callable=False, force=False)
else:
env = gym.wrappers.Monitor(env, MONITOR_DIR, force=False)
with tf.Session() as sess:
for iteration in range(5):
monitor_dir = MONITOR_DIR + str(iteration)
naf = NAF(sess, env, LEARNING_RATE, TAU, GAMMA,
BUFFER_SIZE, RANDOM_SEED, monitor_dir,
sigma_P_dep = False,
det=True, qnaf=True,
scope=str(iteration), hn=0, ac=False,
sep_V=True)
naf.run_n_episodes(EXPLORATION_TIME, MAX_EP_STEPS,
MINIBATCH_SIZE, explore=True, num_updates=5)
naf.run_n_episodes(MAX_EPISODES - EXPLORATION_TIME, MAX_EP_STEPS,
MINIBATCH_SIZE, explore=False, num_updates=5)
naf.plot_rewards(monitor_dir)
if __name__ == '__main__':
tf.app.run()