-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmain.py
104 lines (81 loc) · 3.75 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import argparse
import json
# get rid of all the tensorflow errors with this v1/v2 stuff
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import gym
import numpy as np
from tensorforce.agents import Agent
from tensorforce.execution import Runner
import trade_gym
from utils import plot
def main(args):
'''
Train an agent. Note that I've created a custom OpenAI Gym environment
to allow for quick plug and play in comparing performance across
different RL models.
NOTE: as of now, because states are 2d, I've added an additional flatten layer to the network
this needs to be fixed if we wanted to do CONVOLUTIONS, which we probably do
'''
env = gym.make('Trade-v0',
window = args.window,
datadir = 'stocks/s_coinbaseUSD_1_min_data_2014-12-01_to_2018-11-11.csv',
preprocesses = [args.preprocess],
)
# TODO: probably not wrap this in a try and actually find error in failure
try:
with open(args.agent, 'r') as a:
agent = json.load(fp=a)
except:
raise AttributeError('no agent config')
try:
with open(args.network, 'r') as n:
network = json.load(fp=n)
except:
raise AttributeError('no network config')
# prepend the flatten layer, see notes above
network.insert(0, {'type': 'flatten'})
agent = Agent.from_spec(spec = agent,
kwargs = dict(
states = env.observation_space,
actions = env.action_space,
network = network
)
)
print('*'*60)
print('\n')
print('Training for {} episodes...'.format(args.episodes))
runner = Runner(agent = agent, environment = env)
def episode_finished(r):
print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
reward=r.episode_rewards[-1]))
return True
runner.run(episodes = args.episodes, episode_finished = episode_finished)
print("Learning finished. Total episodes: {ep}. Average reward of last 10 episodes (of 10): {ar}.".format(
ep=runner.episode,
ar=np.mean(runner.episode_rewards[-5:]))
)
print('Testing for an episode...')
s = env.reset()
collectables = []
while True:
action = agent.act(s)
s, r, d, i = env.step(action)
agent.observe(reward = r, terminal = d)
collectables.append((s[0][0], action)) # to be replaced by env.render() when i get it fixed
if d:
break
plot(collectables, 0.01) # plot only .1% of one episode
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process command line args...')
parser.add_argument('-w', '--window', type = int, help = 'number of time-steps to use in each state', default = 50)
parser.add_argument('-p', '--preprocess', help = 'how to preprocess data. options: minMax, ..', default = 'log_transform')
parser.add_argument('-e', '--episodes', type = int, help = 'number of episodes to train for', default = 10)
parser.add_argument('-a', '--agent',
help = 'agent config, examples in configs/agents/ taken from TensorForce at https://github.com/tensorforce/tensorforce/tree/major-revision/examples/configs',
default = 'configs/agents/dqn.json')
parser.add_argument('-n', '--network',
help = "network architecture (for agent), examples in configs/networks/ taken from same url as above",
default = 'configs/networks/mlp2_network.json')
args = parser.parse_args()
main(args)