-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathkeras-vgg-kernel.py
128 lines (80 loc) · 3.29 KB
/
keras-vgg-kernel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 19 17:31:05 2017
@author: sahand
"""
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
import os
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import cv2
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
from subprocess import check_output
directory_path = "/home/sahand/Desktop/dogbreed/"
print(check_output(["ls", directory_path]).decode("utf8"))
df_train = pd.read_csv(directory_path+'labels.csv')
df_test = pd.read_csv(directory_path+'sample_submission.csv')
targets_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)
one_hot_labels = np.asarray(one_hot)
im_size = 90
x_train = []
y_train = []
x_test = []
i = 0
for f, breed in tqdm(df_train.values):
img = cv2.imread(directory_path+'train/{}.jpg'.format(f))
label = one_hot_labels[i]
x_train.append(cv2.resize(img, (im_size, im_size)))
y_train.append(label)
i += 1
for f in tqdm(df_test['id'].values):
img = cv2.imread(directory_path+'test/{}.jpg'.format(f))
x_test.append(cv2.resize(img, (im_size, im_size)))
y_train_raw = np.array(y_train, np.uint8)
x_train_raw = np.array(x_train, np.float32) / 255.
x_test = np.array(x_test, np.float32) / 255.
print(x_train_raw.shape)
print(y_train_raw.shape)
print(x_test.shape)
num_class = y_train_raw.shape[1]
X_train, X_valid, Y_train, Y_valid = train_test_split(x_train_raw, y_train_raw, test_size=0.3, random_state=1)
# Create the base pre-trained model
# Can't download weights in the kernel
base_model = VGG19(#weights='imagenet',
weights = None, include_top=False, input_shape=(im_size, im_size, 3))
# Add a new top layer
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)
# This is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
# First: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
layer.trainable = False
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()
model.fit(X_train, Y_train, epochs=1, validation_data=(X_valid, Y_valid), verbose=1)
preds = model.predict(x_test, verbose=1)
sub = pd.DataFrame(preds)
# Set column names to those generated by the one-hot encoding earlier
col_names = one_hot.columns.values
sub.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
sub.insert(0, 'id', df_test['id'])
sub.head(5)
sub.to_csv(directory_path+"result-2.csv", encoding='utf-8', index=False)