-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_training_data.py
65 lines (48 loc) · 2.11 KB
/
create_training_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
from model.input import Serializer
def read_binary(example, feature):
return example.features.feature[feature].bytes_list.value[0]
def read_int64(example, feature):
return example.features.feature[feature].int64_list.value[0]
tf.enable_eager_execution()
class RecordWriter:
PATH = 'training-data/small'
BATCH_SIZE = 256
def current_filename(self):
return 'records-' + str(self.num_file)
def new_writer(self):
self.num_file += 1
self.num_pics_in_current_file = 0
return tf.io.TFRecordWriter(os.path.join(self.PATH, self.current_filename()))
def __init__(self):
self.num_pics_saved = 0
self.num_file = 0
self.writer = self.new_writer()
def write(self, example):
if self.num_pics_in_current_file >= self.BATCH_SIZE:
self.writer.close()
self.writer = self.new_writer()
self.writer.write(example.SerializeToString())
self.num_pics_saved += 1
self.num_pics_in_current_file += 1
print('[saved image {}, {} in current file ({})]'
.format(self.num_pics_saved, self.num_pics_in_current_file, self.num_file))
if __name__ == "__main__":
files = tf.data.Dataset.list_files('imagenet-records/*')
dataset = files.interleave(tf.data.TFRecordDataset, cycle_length=2)
serializer = Serializer()
writer = RecordWriter()
for serialized in dataset:
example = tf.train.Example()
example.ParseFromString(serialized.numpy())
bw = read_binary(example, 'bw')
color = read_binary(example, 'color')
height, width = read_int64(example, 'height'), read_int64(example, 'width')
img_bw = np.frombuffer(bw, dtype=np.float32).reshape((1, height, width)).T
color_channels = np.frombuffer(color, dtype=np.float32).reshape(2, height, width)
example = serializer.serialize_example(img_bw, color_channels)
writer.write(example)