diff --git a/odir_image_crop.py b/odir_image_crop.py index d5e6441..282782e 100644 --- a/odir_image_crop.py +++ b/odir_image_crop.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_image_crop_job.py b/odir_image_crop_job.py index be97b87..b419f6f 100644 --- a/odir_image_crop_job.py +++ b/odir_image_crop_job.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_image_resizer.py b/odir_image_resizer.py index b94aebd..c3b2cc7 100644 --- a/odir_image_resizer.py +++ b/odir_image_resizer.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_image_testing_crop_job.py b/odir_image_testing_crop_job.py index 514f64c..fca1f15 100644 --- a/odir_image_testing_crop_job.py +++ b/odir_image_testing_crop_job.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_image_treatment.py b/odir_image_treatment.py new file mode 100644 index 0000000..cc82aa3 --- /dev/null +++ b/odir_image_treatment.py @@ -0,0 +1,91 @@ +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import numpy as np +import tensorflow as tf +from skimage import exposure + + +class ImageTreatment: + def __init__(self, image_size): + self.image_size = image_size + + def scaling(self, image, scale_vector): + # Resize to 4-D vector + image = np.reshape(image, (1, self.image_size, self.image_size, 3)) + boxes = np.zeros((len(scale_vector), 4), dtype=np.float32) + for index, scale in enumerate(scale_vector): + x1 = y1 = 0.5 - 0.5 * scale + x2 = y2 = 0.5 + 0.5 * scale + boxes[index] = np.array([y1, x1, y2, x2], dtype=np.float32) + box_ind = np.zeros((len(scale_vector)), dtype=np.int32) + crop_size = np.array([self.image_size, self.image_size], dtype=np.int32) + + output = tf.image.crop_and_resize(image, boxes, box_ind, crop_size) + output = np.array(output, dtype=np.uint8) + return output + + def brightness(self, image, delta): + output = tf.image.adjust_brightness(image, delta) + output = np.array(output, dtype=np.uint8) + return output + + def contrast(self, image, contrast_factor): + output = tf.image.adjust_contrast(image, contrast_factor) + output = np.array(output, dtype=np.uint8) + return output + + def saturation(self, image, saturation_factor): + output = tf.image.adjust_saturation(image, saturation_factor) + output = np.array(output, dtype=np.uint8) + return output + + def hue(self, image, delta): + output = tf.image.adjust_hue(image, delta) + output = np.array(output, dtype=np.uint8) + return output + + def central_crop(self, image, central_fraction): + output = tf.image.central_crop(image, central_fraction) + output = np.array(output, dtype=np.uint8) + return output + + def crop_to_bounding_box(self, image, offset_height, offset_width, target_height, target_width): + output = tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width) + output = tf.image.resize(output, (self.image_size, self.image_size)) + output = np.array(output, dtype=np.uint8) + return output + + def gamma(self, image, gamma): + output = tf.image.adjust_gamma(image, gamma) + output = np.array(output, dtype=np.uint8) + return output + + def rot90(self, image, k): + output = tf.image.rot90(image, k) + output = np.array(output, dtype=np.uint8) + return output + + def rescale_intensity(self, image): + p2, p98 = np.percentile(image, (2, 98)) + img_rescale = exposure.rescale_intensity(image, in_range=(p2, p98)) + return img_rescale + + def equalize_histogram(self, image): + img_eq = exposure.equalize_hist(image) + return img_eq + + def equalize_adapthist(self, image): + img_adapted = exposure.equalize_adapthist(image, clip_limit=0.03) + return img_adapted diff --git a/odir_inception_testing_inference.py b/odir_inception_testing_inference.py new file mode 100644 index 0000000..bd99ed5 --- /dev/null +++ b/odir_inception_testing_inference.py @@ -0,0 +1,77 @@ +# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging.config +import tensorflow as tf +from absl import app +from odir_advance_plotting import Plotter +from odir_kappa_score import FinalScore +from odir_normalize_input import Normalizer +from odir_predictions_writer import Prediction +import odir + +def main(argv): + print(tf.version.VERSION) + image_size = 224 + test_run = 'zC' + + # load the data + (x_train, y_train), (x_test, y_test) = odir.load_data(image_size, 1) + + class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others'] + + # plot data input + plotter = Plotter(class_names) + plotter.plot_input_images(x_train, y_train) + + x_test_drawing = x_test + + # normalize input based on model + normalizer = Normalizer() + x_test = normalizer.normalize_vgg16(x_test) + + # load one of the test runs + model = tf.keras.models.load_model(r'C:\Users\thund\Source\Repos\TFM-ODIR\models\image_classification\modelvgg100.h5') + model.summary() + + # display the content of the model + baseline_results = model.evaluate(x_test, y_test, verbose=2) + for name, value in zip(model.metrics_names, baseline_results): + print(name, ': ', value) + print() + + # test a prediction + test_predictions_baseline = model.predict(x_test) + plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, test_run, 0) + + # save the predictions + prediction_writer = Prediction(test_predictions_baseline, 400) + prediction_writer.save() + prediction_writer.save_all(y_test) + + # show the final score + score = FinalScore() + score.output() + + # plot output results + plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing) + + +if __name__ == '__main__': + # create logger + logging.config.fileConfig('logging.conf') + logger = logging.getLogger('odir') + app.run(main) diff --git a/odir_kappa_score.py b/odir_kappa_score.py index 7187669..2b9e26e 100644 --- a/odir_kappa_score.py +++ b/odir_kappa_score.py @@ -1,26 +1,46 @@ +# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== import csv import numpy as np from sklearn import metrics -def import_data(filepath): - with open(filepath, 'r') as f: - reader = csv.reader(f) - header = next(reader) - pr_data = [[int(row[0])] + list(map(float, row[1:])) for row in reader] - pr_data = np.array(pr_data) - return pr_data -def odir_metrics(gt_data, pr_data): - th = 0.5 - gt = gt_data.flatten() - pr = pr_data.flatten() - kappa = metrics.cohen_kappa_score(gt, pr > th) - f1 = metrics.f1_score(gt, pr > th, average='micro') - auc = metrics.roc_auc_score(gt, pr) - final_score = (kappa + f1 + auc) / 3.0 - return kappa, f1, auc, final_score +class FinalScore: + def odir_metrics(self, gt_data, pr_data): + th = 0.5 + gt = gt_data.flatten() + pr = pr_data.flatten() + kappa = metrics.cohen_kappa_score(gt, pr > th) + f1 = metrics.f1_score(gt, pr > th, average='micro') + auc = metrics.roc_auc_score(gt, pr) + final_score = (kappa + f1 + auc) / 3.0 + return kappa, f1, auc, final_score -gt_data = import_data('odir_ground_truth.csv') -pr_data = import_data('odir_predictions.csv') -kappa, f1, auc, final_score = odir_metrics(gt_data[:, 1:], pr_data[:, 1:]) -print("kappa score:", kappa, " f-1 score:", f1, " AUC vlaue:", auc, " Final Score:", final_score) \ No newline at end of file + def import_data(self, filepath): + with open(filepath, 'r') as f: + reader = csv.reader(f) + header = next(reader) + pr_data = [[int(row[0])] + list(map(float, row[1:])) for row in reader] + pr_data = np.array(pr_data) + return pr_data + + def output(self): + gt_data = self.import_data('odir_ground_truth.csv') + pr_data = self.import_data('odir_predictions.csv') + kappa, f1, auc, final_score = self.odir_metrics(gt_data[:, 1:], pr_data[:, 1:]) + print("Kappa score:", kappa) + print("F-1 score:", f1) + print("AUC value:", auc) + print("Final Score:", final_score) diff --git a/odir_load_ground_truth_files.py b/odir_load_ground_truth_files.py new file mode 100644 index 0000000..6d7738c --- /dev/null +++ b/odir_load_ground_truth_files.py @@ -0,0 +1,59 @@ +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import csv + + +class GroundTruthFiles: + def __init__(self): + self.amd = [] + self.cataract = [] + self.diabetes = [] + self.glaucoma = [] + self.hypertension = [] + self.myopia = [] + self.others = [] + + def populate_vectors(self, ground_truth_file): + with open(ground_truth_file) as csvDataFile: + csv_reader = csv.reader(csvDataFile) + + for row in csv_reader: + column_id = row[0] + normal = row[1] + diabetes = row[2] + glaucoma = row[3] + cataract = row[4] + amd = row[5] + hypertension = row[6] + myopia = row[7] + others = row[8] + # just discard the first row + if column_id != "ID": + print("Processing image: " + column_id + "_left.jpg") + if diabetes == '1': + self.diabetes.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + if glaucoma == '1': + self.glaucoma.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + if cataract == '1': + self.cataract.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + if amd == '1': + self.amd.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + if hypertension == '1': + self.hypertension.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + if myopia == '1': + self.myopia.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + if others == '1': + self.others.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + diff --git a/odir_model_advanced.py b/odir_model_advanced.py index b9bae46..ccd16f6 100644 --- a/odir_model_advanced.py +++ b/odir_model_advanced.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ def compile(self): model.add(layers.Flatten()) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(8, activation='sigmoid')) - model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + model.compile(optimizer='adam', loss='binary_crossentropy', metrics=self.metrics) self.show_summary(model) self.plot_summary(model, 'model_advanced.png') return model diff --git a/odir_model_base.py b/odir_model_base.py index d752a31..e0b1416 100644 --- a/odir_model_base.py +++ b/odir_model_base.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,8 +17,9 @@ class ModelBase: - def __init__(self, input_shape): + def __init__(self, input_shape, metrics): self.input_shape = input_shape + self.metrics = metrics def show_summary(self, model): model.summary() diff --git a/odir_model_factory.py b/odir_model_factory.py index 6a3df22..24ff2cc 100644 --- a/odir_model_factory.py +++ b/odir_model_factory.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,11 +27,11 @@ class ModelTypes(enum.Enum): class Factory: - def __init__(self, input_shape): + def __init__(self, input_shape, metrics): self.Makers = { - ModelTypes.vgg16: Vgg16(input_shape), - ModelTypes.inception_v1: InceptionV1(input_shape), - ModelTypes.advanced_testing: Advanced(input_shape) + ModelTypes.vgg16: Vgg16(input_shape, metrics), + ModelTypes.inception_v1: InceptionV1(input_shape, metrics), + ModelTypes.advanced_testing: Advanced(input_shape, metrics) } def compile(self, model_type): diff --git a/odir_model_inception_v1.py b/odir_model_inception_v1.py index 9ef1433..1bba1ca 100644 --- a/odir_model_inception_v1.py +++ b/odir_model_inception_v1.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ def compile(self): output = Dense(8, activation='sigmoid')(dense_3) model = Model([input_img], output) - model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + model.compile(optimizer='adam', loss='binary_crossentropy', metrics=self.metrics) self.show_summary(model) self.plot_summary(model, 'model_inception_v1.png') diff --git a/odir_model_runner.py b/odir_model_runner.py index 0fed976..6f6d2ed 100644 --- a/odir_model_runner.py +++ b/odir_model_runner.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_model_vgg16.py b/odir_model_vgg16.py index c30f5fc..af55d0a 100644 --- a/odir_model_vgg16.py +++ b/odir_model_vgg16.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +import tensorflow from tensorflow.keras import models, layers +from tensorflow.python.keras.optimizers import SGD + from odir_model_base import ModelBase @@ -20,39 +23,83 @@ class Vgg16(ModelBase): def compile(self): x = models.Sequential() - + trainable = False # Block 1 - x.add(layers.Conv2D(input_shape=self.input_shape, filters=64,kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.MaxPooling2D((2, 2), strides=(2, 2))) + layer = layers.Conv2D(input_shape=self.input_shape, filters=64,kernel_size=(3,3),padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.MaxPooling2D((2, 2), strides=(2, 2)) + layer.trainable = trainable + x.add(layer) # Block 2 - x.add(layers.Conv2D(128, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.Conv2D(128, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.MaxPooling2D((2, 2), strides=(2, 2))) + layer = layers.Conv2D(128, kernel_size=(3,3),padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.Conv2D(128, kernel_size=(3,3),padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.MaxPooling2D((2, 2), strides=(2, 2)) + layer.trainable = trainable + x.add(layer) # Block 3 - x.add(layers.Conv2D(256, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.Conv2D(256, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.Conv2D(256, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.MaxPooling2D((2, 2), strides=(2, 2))) + layer = layers.Conv2D(256, kernel_size=(3,3),padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.MaxPooling2D((2, 2), strides=(2, 2)) + layer.trainable = trainable + x.add(layer) # Block 4 - x.add(layers.Conv2D(512, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.Conv2D(512, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.Conv2D(512, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.MaxPooling2D((2, 2), strides=(2, 2))) + layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.MaxPooling2D((2, 2), strides=(2, 2)) + layer.trainable = trainable + x.add(layer) # Block 5 - x.add(layers.Conv2D(512, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.Conv2D(512, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.Conv2D(512, kernel_size=(3,3),padding="same", activation="relu")) - x.add(layers.MaxPooling2D((2, 2), strides=(2, 2))) + layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu") + layer.trainable = trainable + x.add(layer) + layer = layers.MaxPooling2D((2, 2), strides=(2, 2)) + layer.trainable = trainable + x.add(layer) - x.add(layers.Flatten()) - x.add(layers.Dense(4096, activation='relu')) - x.add(layers.Dense(4096, activation='relu')) - x.add(layers.Dense(1000, activation='softmax')) + layer = layers.Flatten() + layer.trainable = trainable + x.add(layer) + layer = layers.Dense(4096, activation='relu') + layer.trainable = trainable + x.add(layer) + layer = layers.Dense(4096, activation='relu') + layer.trainable = trainable + x.add(layer) + layer = layers.Dense(1000, activation='softmax') + layer.trainable = trainable + x.add(layer) # Transfer learning, load previous weights x.load_weights(r'C:\temp\vgg16_weights_tf_dim_ordering_tf_kernels.h5') @@ -62,7 +109,9 @@ def compile(self): # Add new dense layer x.add(layers.Dense(8, activation='sigmoid')) - x.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + #optimizer = tensorflow.keras.optimizers.SGD(learning_rate=1e-3) + #sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) + x.compile(optimizer='adam', loss='binary_crossentropy', metrics=self.metrics) self.show_summary(x) self.plot_summary(x, 'model_vggnet.png') diff --git a/odir_normalize_input.py b/odir_normalize_input.py new file mode 100644 index 0000000..24fb75d --- /dev/null +++ b/odir_normalize_input.py @@ -0,0 +1,25 @@ +# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +class Normalizer: + def normalize_vgg16(self, x_test): + x_test = x_test / 1.0 + x_test = x_test[..., ::-1] + mean = [103.939, 116.779, 123.68] + x_test[..., 0] -= mean[0] + x_test[..., 1] -= mean[1] + x_test[..., 2] -= mean[2] + return x_test diff --git a/odir_patients_to_numpy.py b/odir_patients_to_numpy.py index d57af9c..6a6f066 100644 --- a/odir_patients_to_numpy.py +++ b/odir_patients_to_numpy.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ class NumpyDataGenerator: - def __init__(self, training_path, testing_path, csv_path, csv_testing_path): + def __init__(self, training_path, testing_path, csv_path, csv_testing_path, augmented_path, csv_augmented_file): self.training_path = training_path self.testing_path = testing_path self.csv_path = csv_path @@ -32,6 +32,8 @@ def __init__(self, training_path, testing_path, csv_path, csv_testing_path): self.logger = logging.getLogger('odir') self.total_records_training = 0 self.total_records_testing = 0 + self.csv_augmented_path = csv_augmented_file + self.augmented_path = augmented_path def npy_training_files(self, file_name_training, file_name_training_labels): training = [] @@ -196,12 +198,16 @@ def is_sickness(self, row, sickness): def npy_training_files_split_all(self, split_number, file_name_training, file_name_training_labels, file_name_testing, - file_name_testing_labels): + file_name_testing_labels, include_augmented): + split_factor = 10820 training = [] training_labels = [] + training_2 = [] + training_labels_2 = [] testing = [] testing_labels = [] images_used = [] + count_images = 0 class_names = ['normal', 'diabetes', 'glaucoma', 'cataract', 'amd', 'hypertension', 'myopia', 'others'] @@ -237,8 +243,7 @@ def npy_training_files_split_all(self, split_number, file_name_training, file_na if self.is_sickness(row, sickness) and class_count[sickness] < split_pocket: testing.append(image) images_used.append(row[0] + ',' + sickness + ',' + str(class_count[sickness])) - testing_labels.append( - [normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + testing_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) self.total_records_testing = self.total_records_testing + 1 class_count[sickness] = class_count[sickness] + 1 found = True @@ -246,9 +251,38 @@ def npy_training_files_split_all(self, split_number, file_name_training, file_na if not found: training.append(image) - training_labels.append( - [normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + training_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) self.total_records_training = self.total_records_training + 1 + count_images = count_images + 1 + + if include_augmented: + with open(self.csv_augmented_path) as csvDataFile: + csv_reader = csv.reader(csvDataFile) + for row in csv_reader: + column_id = row[0] + normal = row[1] + diabetes = row[2] + glaucoma = row[3] + cataract = row[4] + amd = row[5] + hypertension = row[6] + myopia = row[7] + others = row[8] + # just discard the first row + if column_id != "ID": + self.logger.debug("Processing image: " + column_id) + # load first the image from the folder + eye_image = os.path.join(self.augmented_path, column_id) + image = cv2.imread(eye_image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + if count_images >= split_factor: + training_2.append(image) + training_labels_2.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + else: + training.append(image) + training_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others]) + self.total_records_training = self.total_records_training + 1 + count_images = count_images + 1 testing = np.array(testing, dtype='uint8') testing_labels = np.array(testing_labels, dtype='uint8') @@ -264,16 +298,31 @@ def npy_training_files_split_all(self, split_number, file_name_training, file_na # for example (6069 * 28 * 28 * 3)-> (6069 x 2352) (14,274,288) training = np.reshape(training, [training.shape[0], training.shape[1], training.shape[2], training.shape[3]]) + training_2 = np.array(training_2, dtype='uint8') + training_labels_2 = np.array(training_labels_2, dtype='uint8') + # convert (number of images x height x width x number of channels) to (number of images x (height * width *3)) + # for example (6069 * 28 * 28 * 3)-> (6069 x 2352) (14,274,288) + training_2 = np.reshape(training_2, [training_2.shape[0], training_2.shape[1], training_2.shape[2], training_2.shape[3]]) + + self.logger.debug(testing.shape) + self.logger.debug(testing_labels.shape) + self.logger.debug(training.shape) + self.logger.debug(training_labels.shape) + self.logger.debug(training_2.shape) + self.logger.debug(training_labels_2.shape) + # save numpy array as .npy formats - np.save(file_name_training, training) - np.save(file_name_training_labels, training_labels) + np.save(file_name_training + '_1', training) + np.save(file_name_training_labels + '_1', training_labels) + np.save(file_name_training + '_2', training_2) + np.save(file_name_training_labels + '_2', training_labels_2) self.logger.debug("Closing CSV file") for sickness in class_names: self.logger.debug('found ' + sickness + ' ' + str(class_count[sickness])) csv_writer = csv.writer(open("files_used.csv", 'w', newline='')) for item in images_used: self.logger.debug(item) - entries = item.split(","); + entries = item.split(",") csv_writer.writerow(entries) @@ -282,10 +331,13 @@ def main(argv): image_width = 224 training_path = r'C:\temp\ODIR-5K_Training_Dataset_treated' + '_' + str(image_width) testing_path = r'C:\temp\ODIR-5K_Testing_Images_treated' + '_' + str(image_width) - csv_file = 'ground_truth\odir.csv' + augmented_path = r'C:\temp\ODIR-5K_Training_Dataset_augmented' + '_' + str(image_width) + csv_file = r'ground_truth\odir.csv' + csv_augmented_file = r'ground_truth\odir_augmented.csv' training_file = 'ground_truth\XYZ_ODIR.csv' logger.debug('Generating npy files') - generator = NumpyDataGenerator(training_path, testing_path, csv_file, training_file) + generator = NumpyDataGenerator(training_path, testing_path, csv_file, training_file, augmented_path, + csv_augmented_file) # Generate testing file # generator.npy_testing_files('odir_testing', 'odir_testing_labels') @@ -295,8 +347,11 @@ def main(argv): # generator.npy_training_files_split(1000, 'odir_training', # 'odir_training_labels', 'odir_testing', 'odir_testing_labels') - generator.npy_training_files_split_all(400, 'odir_training' + '_' + str(image_width), 'odir_training_labels' + '_' + str(image_width), 'odir_testing' + '_' + str(image_width), - 'odir_testing_labels' + '_' + str(image_width)) + generator.npy_training_files_split_all(400, 'odir_training' + '_' + str(image_width), + 'odir_training_labels' + '_' + str(image_width), + 'odir_testing' + '_' + str(image_width), + 'odir_testing_labels' + '_' + str(image_width), + True) end = time.time() logger.debug('Training Records ' + str(generator.total_records_training)) logger.debug('Testing Records ' + str(generator.total_records_testing)) diff --git a/odir_pipeline_runer.py b/odir_pipeline_runer.py index 8dd0a5a..0eb4a36 100644 --- a/odir_pipeline_runer.py +++ b/odir_pipeline_runer.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_plot_input.py b/odir_plot_input.py new file mode 100644 index 0000000..bd99ed5 --- /dev/null +++ b/odir_plot_input.py @@ -0,0 +1,77 @@ +# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging.config +import tensorflow as tf +from absl import app +from odir_advance_plotting import Plotter +from odir_kappa_score import FinalScore +from odir_normalize_input import Normalizer +from odir_predictions_writer import Prediction +import odir + +def main(argv): + print(tf.version.VERSION) + image_size = 224 + test_run = 'zC' + + # load the data + (x_train, y_train), (x_test, y_test) = odir.load_data(image_size, 1) + + class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others'] + + # plot data input + plotter = Plotter(class_names) + plotter.plot_input_images(x_train, y_train) + + x_test_drawing = x_test + + # normalize input based on model + normalizer = Normalizer() + x_test = normalizer.normalize_vgg16(x_test) + + # load one of the test runs + model = tf.keras.models.load_model(r'C:\Users\thund\Source\Repos\TFM-ODIR\models\image_classification\modelvgg100.h5') + model.summary() + + # display the content of the model + baseline_results = model.evaluate(x_test, y_test, verbose=2) + for name, value in zip(model.metrics_names, baseline_results): + print(name, ': ', value) + print() + + # test a prediction + test_predictions_baseline = model.predict(x_test) + plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, test_run, 0) + + # save the predictions + prediction_writer = Prediction(test_predictions_baseline, 400) + prediction_writer.save() + prediction_writer.save_all(y_test) + + # show the final score + score = FinalScore() + score.output() + + # plot output results + plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing) + + +if __name__ == '__main__': + # create logger + logging.config.fileConfig('logging.conf') + logger = logging.getLogger('odir') + app.run(main) diff --git a/odir_predictions_writer.py b/odir_predictions_writer.py index cb7ffc3..35db8eb 100644 --- a/odir_predictions_writer.py +++ b/odir_predictions_writer.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_rule_engine.py b/odir_rule_engine.py index 8ac98f6..b3b884a 100644 --- a/odir_rule_engine.py +++ b/odir_rule_engine.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_runner.py b/odir_runner.py index 258d117..fe88c89 100644 --- a/odir_runner.py +++ b/odir_runner.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_runner_inception.py b/odir_runner_inception.py new file mode 100644 index 0000000..0baaed3 --- /dev/null +++ b/odir_runner_inception.py @@ -0,0 +1,117 @@ +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import, division, print_function, unicode_literals + +import csv +import logging.config +import os + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf +from absl import app + +from odir_model_factory import Factory, ModelTypes +from odir_predictions_writer import Prediction + +os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/' +os.environ["CUDA_VISIBLE_DEVICES"]="-1" +from sklearn import metrics +import odir + + +def main(argv): + print(tf.version.VERSION) + image_size = 128 + + (x_train, y_train), (x_test, y_test) = odir.load_data(image_size) + + x_train, x_test = x_train / 255.0, x_test / 255.0 + x_train = (x_train - x_train.mean()) / x_train.std() + x_test = (x_test - x_test.mean()) / x_test.std() + + plt.figure(figsize=(9, 9)) + for i in range(100): + plt.subplot(10, 10, i + 1) + plt.xticks([]) + plt.yticks([]) + plt.grid(False) + plt.imshow(x_train[i]) + + plt.subplots_adjust(bottom=0.04, right=0.94, top=0.95, left=0.06, wspace=0.20, hspace=0.17) + plt.show() + + factory = Factory((image_size,image_size,3)) + model = factory.compile(ModelTypes.inception_v1) + + print("Training") + + class_weight = { 0:1., + 1:1.583802025, + 2:8.996805112, + 3:10.24, + 4:10.05714286, + 5:14.66666667, + 6:10.7480916, + 7:2.505338078 } + + history = model.fit(x_train, y_train, epochs=30,batch_size=32,verbose=1,shuffle=True, + validation_data=(x_test, y_test), class_weight=class_weight) + + plt.plot(history.history['accuracy'], label='accuracy') + plt.plot(history.history['val_accuracy'], label='val_accuracy') + plt.xlabel('Epoch') + plt.ylabel('Accuracy') + plt.legend(loc='lower right') + plt.show() + + test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2) + print(test_acc) + + predictions = model.predict(x_test) + + def odir_metrics(gt_data, pr_data): + th = 0.5 + gt = gt_data.flatten() + pr = pr_data.flatten() + kappa = metrics.cohen_kappa_score(gt, pr > th) + f1 = metrics.f1_score(gt, pr > th, average='micro') + auc = metrics.roc_auc_score(gt, pr) + final_score = (kappa + f1 + auc) / 3.0 + return kappa, f1, auc, final_score + + def import_data(filepath): + with open(filepath, 'r') as f: + reader = csv.reader(f) + header = next(reader) + pr_data = [[int(row[0])] + list(map(float, row[1:])) for row in reader] + pr_data = np.array(pr_data) + return pr_data + + prediction_writer = Prediction(predictions, 400) + prediction_writer.save() + prediction_writer.save_all(y_test) + + gt_data = import_data('odir_ground_truth.csv') + pr_data = import_data('odir_predictions.csv') + kappa, f1, auc, final_score = odir_metrics(gt_data[:, 1:], pr_data[:, 1:]) + print("kappa score:", kappa, " f-1 score:", f1, " AUC vlaue:", auc, " Final Score:", final_score) + +if __name__ == '__main__': + # create logger + logging.config.fileConfig('logging.conf') + logger = logging.getLogger('odir') + app.run(main) diff --git a/odir_runner_vgg.py b/odir_runner_vgg.py new file mode 100644 index 0000000..5bdc118 --- /dev/null +++ b/odir_runner_vgg.py @@ -0,0 +1,128 @@ +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import, division, print_function, unicode_literals + +import csv +import logging.config +import os + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf +from absl import app + +from odir_model_factory import Factory, ModelTypes +from odir_predictions_writer import Prediction + +os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/' +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" +from sklearn import metrics +import odir + + +def main(argv): + print(tf.version.VERSION) + image_size = 224 + + (x_train, y_train), (x_test, y_test) = odir.load_data(image_size) + + x_train, x_test = x_train / 1.0, x_test / 1.0 + + x_train = x_train[..., ::-1] + x_test = x_test[..., ::-1] + mean = [103.939, 116.779, 123.68] + x_train[..., 0] -= mean[0] + x_train[..., 1] -= mean[1] + x_train[..., 2] -= mean[2] + x_test[..., 0] -= mean[0] + x_test[..., 1] -= mean[1] + x_test[..., 2] -= mean[2] + + x_train = (x_train - x_train.mean()) / x_train.std() + x_test = (x_test - x_test.mean()) / x_test.std() + + plt.figure(figsize=(9, 9)) + for i in range(100): + plt.subplot(10, 10, i + 1) + plt.xticks([]) + plt.yticks([]) + plt.grid(False) + plt.imshow(x_train[i]) + plt.subplots_adjust(bottom=0.04, right=0.94, top=0.95, left=0.06, wspace=0.20, hspace=0.17) + plt.show() + + factory = Factory((image_size, image_size, 3)) + model = factory.compile(ModelTypes.vgg16) + + print("Training") + + class_weight = {0: 1., + 1: 1.583802025, + 2: 8.996805112, + 3: 10.24, + 4: 10.05714286, + 5: 14.66666667, + 6: 10.7480916, + 7: 2.505338078} + + history = model.fit(x_train, y_train, epochs=30, batch_size=32, verbose=1, shuffle=True, + validation_data=(x_test, y_test), class_weight=class_weight) + + plt.plot(history.history['accuracy'], label='accuracy') + plt.plot(history.history['val_accuracy'], label='val_accuracy') + plt.xlabel('Epoch') + plt.ylabel('Accuracy') + plt.legend(loc='lower right') + plt.show() + + test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2) + print(test_acc) + + predictions = model.predict(x_test) + + def odir_metrics(gt_data, pr_data): + th = 0.5 + gt = gt_data.flatten() + pr = pr_data.flatten() + kappa = metrics.cohen_kappa_score(gt, pr > th) + f1 = metrics.f1_score(gt, pr > th, average='micro') + auc = metrics.roc_auc_score(gt, pr) + final_score = (kappa + f1 + auc) / 3.0 + return kappa, f1, auc, final_score + + def import_data(filepath): + with open(filepath, 'r') as f: + reader = csv.reader(f) + header = next(reader) + pr_data = [[int(row[0])] + list(map(float, row[1:])) for row in reader] + pr_data = np.array(pr_data) + return pr_data + + prediction_writer = Prediction(predictions, 400) + prediction_writer.save() + prediction_writer.save_all(y_test) + + gt_data = import_data('odir_ground_truth.csv') + pr_data = import_data('odir_predictions.csv') + kappa, f1, auc, final_score = odir_metrics(gt_data[:, 1:], pr_data[:, 1:]) + print("kappa score:", kappa, " f-1 score:", f1, " AUC vlaue:", auc, " Final Score:", final_score) + + +if __name__ == '__main__': + # create logger + logging.config.fileConfig('logging.conf') + logger = logging.getLogger('odir') + app.run(main) diff --git a/odir_testing_image_treatment_job.py b/odir_testing_image_treatment_job.py index 9583098..c1a925e 100644 --- a/odir_testing_image_treatment_job.py +++ b/odir_testing_image_treatment_job.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_training_data_parser.py b/odir_training_data_parser.py index 0f99f11..3b338e8 100644 --- a/odir_training_data_parser.py +++ b/odir_training_data_parser.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_training_image_treatment_job.py b/odir_training_image_treatment_job.py index ac2f747..ee04018 100644 --- a/odir_training_image_treatment_job.py +++ b/odir_training_image_treatment_job.py @@ -1,4 +1,4 @@ -# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# Copyright 2019-2020 Jordi Corbilla. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/odir_vgg_testing_inference.py b/odir_vgg_testing_inference.py new file mode 100644 index 0000000..bd99ed5 --- /dev/null +++ b/odir_vgg_testing_inference.py @@ -0,0 +1,77 @@ +# Copyright 2019 Jordi Corbilla. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging.config +import tensorflow as tf +from absl import app +from odir_advance_plotting import Plotter +from odir_kappa_score import FinalScore +from odir_normalize_input import Normalizer +from odir_predictions_writer import Prediction +import odir + +def main(argv): + print(tf.version.VERSION) + image_size = 224 + test_run = 'zC' + + # load the data + (x_train, y_train), (x_test, y_test) = odir.load_data(image_size, 1) + + class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others'] + + # plot data input + plotter = Plotter(class_names) + plotter.plot_input_images(x_train, y_train) + + x_test_drawing = x_test + + # normalize input based on model + normalizer = Normalizer() + x_test = normalizer.normalize_vgg16(x_test) + + # load one of the test runs + model = tf.keras.models.load_model(r'C:\Users\thund\Source\Repos\TFM-ODIR\models\image_classification\modelvgg100.h5') + model.summary() + + # display the content of the model + baseline_results = model.evaluate(x_test, y_test, verbose=2) + for name, value in zip(model.metrics_names, baseline_results): + print(name, ': ', value) + print() + + # test a prediction + test_predictions_baseline = model.predict(x_test) + plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, test_run, 0) + + # save the predictions + prediction_writer = Prediction(test_predictions_baseline, 400) + prediction_writer.save() + prediction_writer.save_all(y_test) + + # show the final score + score = FinalScore() + score.output() + + # plot output results + plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing) + + +if __name__ == '__main__': + # create logger + logging.config.fileConfig('logging.conf') + logger = logging.getLogger('odir') + app.run(main)