-
Notifications
You must be signed in to change notification settings - Fork 75
/
train.py
133 lines (103 loc) · 4.1 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from __future__ import division
from __future__ import print_function
import glob
import json
import math
import numpy as np
import os
import random
import sys
import tensorflow as tf
import time
import audio_producer
import models
tf.app.flags.DEFINE_string("config", "configs/kws.json",
"Configuration json for model building and training")
tf.app.flags.DEFINE_integer("num_gpus", 1,
"Number of GPUs to train with.")
FLAGS = tf.app.flags.FLAGS
def check_path(path):
if os.path.exists(path):
overwrite = raw_input(("The path \'{}\' exists. Do you want "
"to use it anyway (y)? ").format(path))
if overwrite != 'y':
sys.exit(0)
else:
os.mkdir(path)
def run_epoch(model, producer, session, save_path, saver):
summary_writer = tf.summary.FileWriter(save_path, flush_secs=30)
model_path = os.path.join(save_path, "model.ckpt")
summary_op = tf.summary.scalar('cost', model.avg_cost)
ops = [model.grad_norm, model.cost, model.avg_cost,
model.global_step, summary_op, model.train_op]
start_time = time.time()
compute_time = 0
step, = session.run([model.global_step])
sort = (step == 0)
for e, (inputs, labels) in enumerate(producer.iterator(sort=sort)):
compute_time -= time.time()
feed_dict = model.feed_dict(inputs, labels)
res = session.run(ops, feed_dict)
grad_norm, cost, avg_cost, step, summary, _ = res
compute_time += time.time()
if math.isnan(grad_norm):
print("NaN GradNorm. Exiting")
import sys
sys.exit(1)
if step == 100:
model.start_momentum(session)
if step % 1000 == 0:
saver.save(session, model_path)
summary_writer.add_summary(summary, global_step=step)
log_str = ("Iter {}: AvgCost {}, Cost {:.2f}, "
"GradNorm {:.2f}, CumTime {:.2f} (s), "
"CompTime {:.2f} (s), AvgItTime {:.2f} (s)")
cum_time = time.time() - start_time
print(log_str.format(step, avg_cost, cost,
grad_norm, cum_time,
compute_time, cum_time / (e + 1)))
saver.save(session, model_path)
print("Total time: ", time.time() - start_time)
def main(argv=None):
with open(FLAGS.config) as fid:
config = json.load(fid)
train_jsons = config['data']['train_jsons']
sample_rate = config['data']['sample_rate']
batch_size = config['inference']['batch_size']
epochs = config['train']['epochs']
# TODO, awni, for now it's on the user to get this right.
# E.g. all the config params which have to remain the same
# to "restore" a model.
restore_path = config['io'].get('restore_path', None)
save_path = config['io']['save_path']
check_path(save_path)
producer = audio_producer.AudioProducer(train_jsons, batch_size,
sample_rate=sample_rate)
config['inference']['alphabet_size'] = producer.alphabet_size
config['inference']['input_dim'] = producer.input_dim
with open(os.path.join(save_path, "model.json"), 'w') as fid:
json.dump(config, fid)
with tf.Graph().as_default():
model = models.MultiSpeechModel(FLAGS.num_gpus)
model.init_inference(config['inference'])
model.init_cost()
model.init_train(config['train'])
sess_conf = tf.ConfigProto(allow_soft_placement=True)
session = tf.Session(config=sess_conf)
saver = tf.train.Saver()
if restore_path:
saver.restore(session, os.path.join(restore_path, "model.ckpt"))
else:
session.run(tf.global_variables_initializer())
print("Estimating and setting the mean and standard...")
mean, std = producer.estimate_mean_std()
model.set_mean_std(mean, std, session)
print("Begin training...")
for e in range(epochs):
run_epoch(model, producer, session, save_path, saver)
print("Finished epoch", e)
if __name__=="__main__":
# For determinism
random.seed(10)
np.random.seed(10)
tf.app.run()