-
Notifications
You must be signed in to change notification settings - Fork 0
/
fruit_categorizer.py
222 lines (165 loc) · 6.32 KB
/
fruit_categorizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import os
import PIL
import random
import numpy as np
from keras import backend
import tensorflow as tf
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.constraints import Constraint
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
# Change to your own source of data
os.chdir("C:\\Users\\tedjt\\Desktop\\School\\2020_summer\\final\\data")
### Prepare data, split into train/test
def get_fruits(images_per_fruit = 50, skip_pixels = 1):
label_names = os.listdir()
# Temporarily nix fruits with subfolders
label_names.remove("Apple")
label_names.remove("Guava")
label_names.remove("Kiwi")
images = []
labels = []
for folder in label_names:
print("\n...Collecting images from {0}...".format(folder))
i = 0
for file in os.listdir(folder):
# Get each image, make into numpy array
image = PIL.Image.open(folder + "/" + file).convert("RGB")
image = np.array(image).astype('float32')
# If correct shape, normalize, then add image and label to lists
if(image.shape == (258, 320, 3)):
image = image[::skip_pixels, ::skip_pixels, :]
image = (image - 127.5) / 127.5
images.append(image)
labels.append(folder)
i += 1
if(i > images_per_fruit):
break
print("\n...Stacking images...")
images = np.stack(images, axis = 0)
# Make labels from like "Apple" to like [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0] (one-hot vectors)
dummied_labels = np.zeros((len(labels), len(label_names)))
for i in range(len(labels)):
dummied_labels[i, label_names.index(labels[i])] = 1
return(images, dummied_labels)
images, labels = get_fruits(images_per_fruit = 200, skip_pixels = 3)
image_shape = images.shape[1:]
label_quantity = labels.shape[1]
print("\nData: \n\t Input: {0}, \t Output: {1}.".format(images.shape, labels.shape))
def train_test(images, labels, train_percent = .8):
# Shuffle an index, split it into training and test
index = [i for i in range(len(labels))]
random.shuffle(index)
train_index = index[:int(train_percent * len(index))]
test_index = index[int(train_percent * len(index)):]
# Split actual data with indexes
x_train = images[train_index]
x_test = images[test_index]
y_train = labels[train_index]
y_test = labels[test_index]
return(x_train, x_test, y_train, y_test)
x_train, x_test, y_train, y_test = train_test(images, labels, train_percent = .8)
print("\nTrain: \n\t Input: {0}, \t Output: {1}.".format(x_train.shape, y_train.shape))
print("\nTest: \n\t Input: {0}, \t Output: {1}.".format(x_test.shape, y_test.shape))
### Utilities for convolutional networks
# Dropout-rate
d = .5
# Noisiness-rate
b = .05
# Random initialization
init = RandomNormal(mean = 0, stddev = .1)
# Reflective padding
def ref_pad(tensor, paddings = [[0,0],[0,0],[0,0],[0,0]]):
tensor = tf.pad(tensor, mode = "SYMMETRIC", paddings = paddings)
return(tensor)
# Constraint for neurons
class ClipConstraint(Constraint):
# set clip value when initialized
def __init__(self, clip_value):
self.clip_value = clip_value
# clip model weights to hypercube
def __call__(self, weights):
return backend.clip(weights, -self.clip_value, self.clip_value)
# get the config
def get_config(self):
return {'clip_value': self.clip_value}
constraint = ClipConstraint(.1)
### Check how noisy we're making images
print("\nExample image:")
sample_image = images[0]
plt.imshow(sample_image)
plt.show()
print("\nExample image with noise:")
noise = np.random.normal(loc=0.0, scale=b, size=sample_image.shape)
sample_image_noisy = sample_image + noise
plt.imshow(sample_image_noisy)
plt.show()
### Build categorizer
def categorizer_model():
model = tf.keras.Sequential()
# Add slight noise to input images
model.add(layers.GaussianNoise(stddev = b))
# Pad with reflective padding
model.add(layers.Lambda(lambda t: ref_pad(t,paddings = [[0,0],[2,2],[2,2],[0,0]])))
# First convolutional layer
model.add(layers.Conv2D(
filters = 64,
kernel_size = (5, 5),
strides = (2,2),
kernel_initializer = init,
kernel_constraint = constraint,
use_bias=False,
input_shape=image_shape))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
model.add(layers.Dropout(d))
# Pad with reflective padding
model.add(layers.Lambda(lambda t: ref_pad(t,paddings = [[0,0],[2,2],[2,2],[0,0]])))
# Second convolutional layer
model.add(layers.Conv2D(
filters = 64,
kernel_size = (5, 5),
strides = (2,2),
kernel_initializer = init,
kernel_constraint = constraint,
use_bias=False))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
model.add(layers.Dropout(d))
# Flatten and add final categorization layer
model.add(layers.Flatten())
model.add(layers.Dense(label_quantity))
model.build(input_shape = (None,) + image_shape)
model.summary()
return model
categorizer = categorizer_model()
categorizer.compile(optimizer='adam', loss='mean_squared_error')
### Train categorizer
epochs = 50
batch = 64
histories = []
history = categorizer.fit(
x_train,
y_train,
epochs=epochs,
validation_data=(x_test, y_test),
batch_size = batch,
).history
histories.append(history)
### Make plot of model accuracy
loss = histories[0]["loss"]
val_loss = histories[0]["val_loss"]
x = [i for i in range(1, len(loss) + 1)]
plt.plot(x,loss, color='blue')
plt.plot(x, val_loss, color='orange')
plt.xlabel('Epoch')
plt.ylabel('Losses')
plt.title('Training Loss and Test Loss')
plt.xlim(0,x[-1])
plt.ylim(0,max(loss + val_loss))
plt.show()
### Make confusion matrix
y_pred = list(categorizer.predict_classes(x_test))
y_true = [list(y).index(1) for y in y_test]
confusion_matrix(y_true, y_pred)