-
Notifications
You must be signed in to change notification settings - Fork 12
/
model.py
399 lines (313 loc) · 15.1 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
# coding: utf-8
# # Behavioral Cloning | End to End Learning for Self Driving Cars
# **In this project, I used a deep neural network (built with [Keras](https://keras.io/)) to clone car driving behavior.**
#
# **The dataset used to train the network is generated from [Udacity's Self-Driving Car Simulator](https://github.com/udacity/self-driving-car-sim), and it consists of images taken from three different camera angles (Center - Left - Right), in addition to the steering angle, throttle, brake, and speed during each frame.**
#
# **The network is based on NVIDIA's paper [End to End Learning for Self-Driving Cars](https://arxiv.org/pdf/1604.07316v1.pdf), which has been proven to work in this problem domain.**
# ## Pipeline architecture:
# - **Data Loading.**
# - **Data Augmentation.**
# - **Data Preprocessing.**
# - **Model Architecture.**
# - **Model Training and Evaluation.**
# - **Model Testing on the simulator.**
#
# I'll explain each step in details below.
# #### Environement:
# - Ubuntu 16.04
# - Anaconda 5.0.1
# - Python 3.6.2
# - Keras 1.2.1
# - TensorFlow 0.12.1 (GPU: NVIDIA GTX 1070)
# In[1]:
# Importing Python libraries
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.layers import Lambda, Conv2D, MaxPooling2D, Dropout, Dense, Flatten
import csv
import cv2
import matplotlib.pyplot as plt
import random
import matplotlib.image as mpimg
# ---
# ## Step 1: Data Loading
# Download the dataset from [here](https://d17h27t6h515a5.cloudfront.net/topher/2016/December/584f6edd_data/data.zip).
# This dataset contains more than 8,000 frame images taken from the 3 cameras (3 images for each frame), in addition to a `csv` file with the steering angle, throttle, brake, and speed during each frame.
# In[2]:
data_dir = './data/'
labels_file = './data/driving_log.csv'
# In[3]:
def load_data(labels_file, test_size):
"""
Display a list of images in a single figure with matplotlib.
Parameters:
labels_file: The labels CSV file.
test_size: The size of the testing set.
"""
labels = pd.read_csv(labels_file)
X = labels[['center', 'left', 'right']].values
y = labels['steering'].values
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=test_size, random_state=0)
return X_train, X_valid, y_train, y_valid
# In[4]:
def load_image(data_dir, image_file):
"""
Load RGB image.
Parameters:
data_dir: The directory where the images are.
image_file: The image file name.
"""
return mpimg.imread(os.path.join(data_dir, image_file.strip()))
# In[5]:
data = load_data(labels_file, 0.2)
# ---
# ## Step 1: Data Preprocessing
#
# ### Preprocessing steps:
# - Cropping the image to cut off the sky scene and the car front.
# - Resizing the image to (66 * 200), the image size that the model expects.
# - Converting the image to the YUV color space.
# - Normalizing the images (by dividing image data by 127.5 and subtracting 1.0). As stated in the Model Architecture section, this is to avoid saturation and make gradients work better).
# In[7]:
IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 66, 200, 3
INPUT_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)
# In[8]:
def preprocess(img):
"""
Preprocessing (Crop - Resize - Convert to YUV) the input image.
Parameters:
img: The input image to be preprocessed.
"""
# Cropping the image
img = img[60:-25, :, :]
# Resizing the image
img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT), cv2.INTER_AREA)
# Converting the image to YUV
img = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
return img
# ---
# ## Step 2: Data Augmentation
#
# ### Augmentation steps:
# - Adjusting the steering angle of random images.
# - Flipping random images horizontaly, with steering angle adjustment.
# - Shifting (Translating) random images, with steering angle adjustment.
# - Adding shadows to random images.
# - Altering the brightness of random images.
# In[11]:
def random_adjust(data_dir, center, left, right, steering_angle):
"""
Adjusting the steering angle of random images.
Parameters:
data_dir: The directory where the images are.
center: Center image.
left: Left image.
right: Right image
steering_angle: The steering angle of the input frame.
"""
choice = np.random.choice(3)
if choice == 0:
return load_image(data_dir, left), steering_angle + 0.2
elif choice == 1:
return load_image(data_dir, right), steering_angle - 0.2
return load_image(data_dir, center), steering_angle
# In[64]:
def random_flip(image, steering_angle):
"""
Randomly flipping the input image horizontaly, with steering angle adjustment.
Parameters:
image: The input image.
steering_angle: The steering angle related to the input image.
"""
if np.random.rand() < 0.5:
image = cv2.flip(image, 1)
steering_angle = -steering_angle
return image, steering_angle
# In[66]:
def random_shift(image, steering_angle, range_x, range_y):
"""
Shifting (Translating) the input images, with steering angle adjustment.
Parameters:
image: The input image.
steering_angle: The steering angle related to the input image.
range_x: Horizontal translation range.
range_y: Vertival translation range.
"""
trans_x = range_x * (np.random.rand() - 0.5)
trans_y = range_y * (np.random.rand() - 0.5)
steering_angle += trans_x * 0.002
trans_m = np.float32([[1, 0, trans_x], [0, 1, trans_y]])
height, width = image.shape[:2]
image = cv2.warpAffine(image, trans_m, (width, height))
return image, steering_angle
# In[68]:
def random_shadow(image):
"""
Adding shadow to the input image.
Parameters:
image: The input image.
"""
bright_factor = 0.3
x = random.randint(0, image.shape[1])
y = random.randint(0, image.shape[0])
width = random.randint(image.shape[1], image.shape[1])
if(x + width > image.shape[1]):
x = image.shape[1] - x
height = random.randint(image.shape[0], image.shape[0])
if(y + height > image.shape[0]):
y = image.shape[0] - y
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
image[y:y+height,x:x+width,2] = image[y:y+height,x:x+width,2]*bright_factor
return cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
# In[70]:
def random_brightness(image):
"""
Altering the brightness of the input image.
Parameters:
image: The input image.
"""
# HSV (Hue, Saturation, Value) is also called HSB ('B' for Brightness).
hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
ratio = 1.0 + (np.random.rand() - 0.5)
hsv[:,:,2] = hsv[:,:,2] * ratio
return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
# In[72]:
def augument(data_dir, center, left, right, steering_angle, range_x=100, range_y=10):
"""
Generate an augumented image and adjust the associated steering angle.
Parameters:
data_dir: The directory where the images are.
center: Center image.
left: Left image.
right: Right image
steering_angle: The steering angle related to the input frame.
range_x (Default = 100): Horizontal translation range.
range_y (Default = 10): Vertival translation range.
"""
image, steering_angle = random_adjust(data_dir, center, left, right, steering_angle)
image, steering_angle = random_flip(image, steering_angle)
image, steering_angle = random_shift(image, steering_angle, range_x, range_y)
image = random_shadow(image)
image = random_brightness(image)
return image, steering_angle
# ---
# ## Step 3: Model Architecture
#
# In this step, we will design and implement a deep learning model that can clone the vehicle's behavior.
# We'll use a convolutional neural network (CNN) to map raw pixels from a single front-facing camera directly to steering commands.
#
# We'll use the ConvNet from NVIDIA's paper [End to End Learning for Self-Driving Cars](https://arxiv.org/pdf/1604.07316v1.pdf), which has been proven to work in this problem domain.
#
# <figure>
# <img src="Images/Training_the_neural_network.png" width="1654" alt="Combined Image" />
# <figcaption>
# <p></p>
# </figcaption>
# </figure>
#
# According to the paper: *"We train the weights of our network to minimize the mean squared error between the steering command output by the network and the command of either the human driver, or the adjusted steering command for off-center and rotated images. Our network architecture is shown in Figure 4. The network consists of 9 layers, including a normalization layer, 5 convolutional layers and 3 fully connected layers. The input image is split into YUV planes and passed to the network.
# The first layer of the network performs image normalization. The normalizer is hard-coded and is not adjusted in the learning process. Performing normalization in the network allows the normalization scheme to be altered with the network architecture and to be accelerated via GPU processing.
# The convolutional layers were designed to perform feature extraction and were chosen empirically through a series of experiments that varied layer configurations. We use strided convolutions in the first three convolutional layers with a 2×2 stride and a 5×5 kernel and a non-strided convolution with a 3×3 kernel size in the last two convolutional layers.
# We follow the five convolutional layers with three fully connected layers leading to an output control value which is the inverse turning radius. The fully connected layers are designed to function as a controller for steering, but we note that by training the system end-to-end, it is not possible to make a clean break between which parts of the network function primarily as feature extractor and which serve as controller."*
# <figure>
# <img src="Images/NVIDIA_model.png" width="624" alt="Combined Image" />
# <figcaption>
# <p></p>
# </figcaption>
# </figure>
#
# I've added a dropout layer to the model to prevent overfitting.
# In[210]:
def NVIDIA_model():
model = Sequential()
model.add(Lambda(lambda x: x/127.5-1.0, input_shape=INPUT_SHAPE))
model.add(Conv2D(24, 5, 5, activation='relu', subsample=(2, 2)))
model.add(Conv2D(36, 5, 5, activation='relu', subsample=(2, 2)))
model.add(Conv2D(48, 5, 5, activation='relu', subsample=(2, 2)))
model.add(Conv2D(64, 3, 3, activation='relu'))
model.add(Conv2D(64, 3, 3, activation='relu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))
model.summary()
return model
# ---
# ## Step 4: Model Training and Evaluation
#
# - I've splitted the data into 80% training set and 20% validation set to measure the performance after each epoch.
# - I used Mean Squared Error (MSE) as a loss function to measure how close the model predicts to the given steering angle for each input frame.
# - I used the Adaptive Moment Estimation (Adam) Algorithm minimize to the loss function. Adam is an optimization algorithm introduced by D. Kingma and J. Lei Ba in a 2015 paper named [Adam: A Method for Stochastic Optimization](https://arxiv.org/abs/1412.6980). Adam algorithm computes adaptive learning rates for each parameter. In addition to storing an exponentially decaying average of past squared gradients like [Adadelta](https://arxiv.org/pdf/1212.5701.pdf) and [RMSprop](https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) algorithms, Adam also keeps an exponentially decaying average of past gradients mtmt, similar to [momentum algorithm](http://www.sciencedirect.com/science/article/pii/S0893608098001166?via%3Dihub), which in turn produce better results.
# - I used `ModelCheckpoint` from Keras to check the validation loss after each epoch and save the model only if the validation loss reduced.
# In[227]:
batch_size = 40
samples_per_epoch = 20000
nb_epoch = 10
# In[213]:
def batcher(data_dir, image_paths, steering_angles, batch_size, training_flag):
"""
Generate a training image given image paths and the associated steering angles
Parameters:
data_dir: The directory where the images are.
image_paths: Paths to the input images.
steering_angle: The steering angle related to the input frame.
batch_size: The batch size used to train the model.
training_flag: A flag to determine whether we're in training or validation mode.
"""
images = np.empty([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS])
steers = np.empty(batch_size)
while True:
i = 0
for index in np.random.permutation(image_paths.shape[0]):
center, left, right = image_paths[index]
steering_angle = steering_angles[index]
if training_flag and np.random.rand() < 0.6:
image, steering_angle = augument(data_dir, center, left, right, steering_angle)
else:
image = load_image(data_dir, center)
images[i] = preprocess(image)
steers[i] = steering_angle
i += 1
if i == batch_size:
break
yield images, steers
# In[225]:
def train_model(model, X_train, X_valid, y_train, y_valid):
checkpoint = ModelCheckpoint('model-{val_loss:03f}.h5',
monitor='val_loss',
verbose=0,
save_best_only=True,
mode='auto')
model.compile(loss='mse', optimizer=Adam(lr=1.0e-4))
model.fit_generator(batcher(data_dir, X_train, y_train, batch_size, True),
samples_per_epoch,
nb_epoch,
max_q_size=1,
validation_data=batcher(data_dir, X_valid, y_valid, batch_size, False),
nb_val_samples=len(X_valid),
callbacks=[checkpoint],
verbose=1)
# In[228]:
model = NVIDIA_model()
train_model(model, *data)
# ---
# ## Step 5: Model Testing on the simulator
# The model was able to drive the car safely through the track without leaving the drivable portion of the track surface.
#
# ### First Track:
# [![First Track](./Images/driving.PNG)](https://youtu.be/EWvqiYJnKfY)
# **[YouTube Link](https://youtu.be/hTPADovdyfA)**
# ---
# ## Conclusion
#
# Using NVIDIA's End to End learning network, the model was able to drive the car through the first track. I've only used the training data provided by Udacity.
# One way to improve the model is to collect more data from the mountain track, and train the model to go through this challenging track as well.
# We can also modify `drive.py` script to change the maximum speed and make it related to the steering angle, in order to make the car speed up in the straight parts of the track, and slow down in the curved parts.