diff --git a/.gitignore b/.gitignore index 594c572..50c7cab 100644 --- a/.gitignore +++ b/.gitignore @@ -114,3 +114,4 @@ weights/ plots/ sub.csv core +work_dirs/ diff --git a/configs/plates.yml b/configs/plates.yml index d296b06..2c2abb0 100644 --- a/configs/plates.yml +++ b/configs/plates.yml @@ -1,21 +1,45 @@ input_shape : [128, 128, 3] -encodings_len: 256 -margin: 0.5 +encodings_len: 512 +margin: 0.4 mode : 'triplet' -distance_type : 'l1' -backbone : 'resnext50' +distance_type : 'l2' +backbone : 'resnet18' backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'plates/' freeze_backbone : True -augmentation_type : 'default' +augmentation_type : None embeddings_normalization: True -#paths +# optimizer parameters +optimizer : 'radam' +learning_rate : 0.0001 +decay_factor : 0.99 +step_size : 10 + +# embeddings learning training parameters +n_epochs : 1000 +n_steps_per_epoch : 10 +val_batch_size : 8 +val_steps : 10 +negatives_selection_mode : 'semihard' +mining_n_classes: 2 +mining_n_samples: 3 + +# softmax pretraining parameters +softmax_pretraining : True +softmax_batch_size : 8 +softmax_val_steps : 2 +softmax_steps_per_epoch : 2 +softmax_epochs : 1000 + +# paths +work_dir : 'work_dirs/plates/' dataset_path : '/home/rauf/plates_competition/dataset/to_train/' tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_simple2_plates.h5' \ No newline at end of file +plot_history : True +model_save_name : 'best_model_resnet18_plates.h5' +encodings_save_name: 'encodings_resnet18_plates.pkl' + +# encodings parameters +save_encodings : True +max_num_samples_of_each_class : 30 +knn_k : 1 \ No newline at end of file diff --git a/configs/road_signs.yml b/configs/road_signs.yml deleted file mode 100644 index a3acb4f..0000000 --- a/configs/road_signs.yml +++ /dev/null @@ -1,19 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 1024 -mode : 'triplet' -distance_type : 'l1' -backbone : 'simple2' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : True -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_separated/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_simple2.h5' diff --git a/configs/road_signs_mobilenetv2.yml b/configs/road_signs_mobilenetv2.yml deleted file mode 100644 index 8532ecd..0000000 --- a/configs/road_signs_mobilenetv2.yml +++ /dev/null @@ -1,19 +0,0 @@ -input_shape : [96, 96, 3] -encodings_len: 1024 -mode : 'triplet' -distance_type : 'l1' -backbone : 'mobilenetv2' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : True -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_separated/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_mobilenetv2.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet18.yml b/configs/road_signs_resnet18.yml index 5a394fa..28b76b3 100644 --- a/configs/road_signs_resnet18.yml +++ b/configs/road_signs_resnet18.yml @@ -5,16 +5,38 @@ mode : 'triplet' distance_type : 'l1' backbone : 'resnet18' backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 project_name : 'road_signs/' -freeze_backbone : True +freeze_backbone : False +augmentation_type : 'default' embeddings_normalization: True +# optimizer parameters +optimizer : 'radam' +learning_rate : 0.0001 +decay_factor : 0.99 +step_size : 1 + +# embeddings learning training parameters +n_epochs : 1000 +n_steps_per_epoch : 200 +val_batch_size : 8 +val_steps : 200 +negatives_selection_mode : 'semihard' +mining_n_classes: 5 +mining_n_samples: 3 + +# softmax pretraining parameters +softmax_pretraining : True +softmax_batch_size : 8 +softmax_val_steps : 200 +softmax_steps_per_epoch : 500 +softmax_epochs : 20 + #paths dataset_path : '/home/rauf/datasets/road_signs/road_signs_separated/' tensorboard_log_path : 'tf_log/' weights_save_path : 'weights/' plots_path : 'plots/' encodings_path : 'encodings/' -model_save_name : 'best_model_resnet18.h5' \ No newline at end of file +model_save_name : 'best_model_resnet18.h5' +encodings_save_name: 'encodings_resnet18.pkl' \ No newline at end of file diff --git a/configs/road_signs_resnet18_all.yml b/configs/road_signs_resnet18_all.yml deleted file mode 100644 index d104973..0000000 --- a/configs/road_signs_resnet18_all.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.5 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet18' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : False -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_all/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet18_all.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet18_max80_min30.yml b/configs/road_signs_resnet18_max80_min30.yml deleted file mode 100644 index 2d44b22..0000000 --- a/configs/road_signs_resnet18_max80_min30.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.5 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet18' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : False -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_max80_min30/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet18_max80_min30.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet18_merged_dataset.yml b/configs/road_signs_resnet18_merged_dataset.yml deleted file mode 100644 index 39211f9..0000000 --- a/configs/road_signs_resnet18_merged_dataset.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.7 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet18' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : False -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs_merged/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet18_merged.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet18_mini.yml b/configs/road_signs_resnet18_mini.yml deleted file mode 100644 index b2c4f56..0000000 --- a/configs/road_signs_resnet18_mini.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.3 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet18' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : False -embeddings_normalization: False - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_mini/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet18_mini.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet18_paper.yml b/configs/road_signs_resnet18_paper.yml deleted file mode 100644 index ad84ab5..0000000 --- a/configs/road_signs_resnet18_paper.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.5 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet18' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : False -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_full/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet18_paper.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet18_paper_cutted.yml b/configs/road_signs_resnet18_paper_cutted.yml deleted file mode 100644 index c0ebcc4..0000000 --- a/configs/road_signs_resnet18_paper_cutted.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.4 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet18' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : False -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_full_cutted/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet18_paper.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet18_paper_remaining.yml b/configs/road_signs_resnet18_paper_remaining.yml deleted file mode 100644 index 356fa29..0000000 --- a/configs/road_signs_resnet18_paper_remaining.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.5 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet18' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : False -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_full_remaining/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet18_paper.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet50v2.yml b/configs/road_signs_resnet50v2.yml deleted file mode 100644 index 7b406a0..0000000 --- a/configs/road_signs_resnet50v2.yml +++ /dev/null @@ -1,19 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 1024 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet50v2' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : True -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_separated/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet50v2.h5' \ No newline at end of file diff --git a/configs/road_signs_resnet50v2_merged_dataset.yml b/configs/road_signs_resnet50v2_merged_dataset.yml deleted file mode 100644 index c9cfb17..0000000 --- a/configs/road_signs_resnet50v2_merged_dataset.yml +++ /dev/null @@ -1,19 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 1024 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnet50v2' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : True -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs_merged/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnet50v2_merged.h5' \ No newline at end of file diff --git a/configs/road_signs_resnext50.yml b/configs/road_signs_resnext50.yml deleted file mode 100644 index 79e43ba..0000000 --- a/configs/road_signs_resnext50.yml +++ /dev/null @@ -1,19 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 1024 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnext50' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : True -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_separated/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model.h5' \ No newline at end of file diff --git a/configs/road_signs_resnext50_merged_dataset.yml b/configs/road_signs_resnext50_merged_dataset.yml deleted file mode 100644 index d7f1d39..0000000 --- a/configs/road_signs_resnext50_merged_dataset.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.7 -mode : 'triplet' -distance_type : 'l1' -backbone : 'resnext50' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : True -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs_merged/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_resnext50_merged.h5' \ No newline at end of file diff --git a/configs/road_signs_simple2_merged_dataset.yml b/configs/road_signs_simple2_merged_dataset.yml deleted file mode 100644 index 4eb3a93..0000000 --- a/configs/road_signs_simple2_merged_dataset.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.5 -mode : 'triplet' -distance_type : 'l1' -backbone : 'simple2' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : True -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs_merged/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_simple2_merged.h5' diff --git a/configs/road_signs_simple2_mini.yml b/configs/road_signs_simple2_mini.yml deleted file mode 100644 index b2d54fb..0000000 --- a/configs/road_signs_simple2_mini.yml +++ /dev/null @@ -1,20 +0,0 @@ -input_shape : [48, 48, 3] -encodings_len: 256 -margin: 0.5 -mode : 'triplet' -distance_type : 'l1' -backbone : 'simple2' -backbone_weights : 'imagenet' -optimizer : 'radam' -learning_rate : 0.0001 -project_name : 'road_signs/' -freeze_backbone : True -embeddings_normalization: True - -#paths -dataset_path : '/home/rauf/datasets/road_signs/road_signs_mini/' -tensorboard_log_path : 'tf_log/' -weights_save_path : 'weights/' -plots_path : 'plots/' -encodings_path : 'encodings/' -model_save_name : 'best_model_simple2_mini.h5' \ No newline at end of file diff --git a/embedding_net/augmentations.py b/embedding_net/augmentations.py index ad16e65..8383c05 100644 --- a/embedding_net/augmentations.py +++ b/embedding_net/augmentations.py @@ -1,12 +1,13 @@ import albumentations as A -def get_aug(name='default',input_shape=[48,48,3]): + +def get_aug(name='default', input_shape=[48, 48, 3]): if name == 'default': augmentations = A.Compose([ A.RandomBrightnessContrast(p=0.4), A.RandomGamma(p=0.4), A.HueSaturationValue(hue_shift_limit=20, - sat_shift_limit=30, val_shift_limit=30, p=0.4), + sat_shift_limit=30, val_shift_limit=30, p=0.4), A.CLAHE(p=0.4), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), @@ -16,5 +17,5 @@ def get_aug(name='default',input_shape=[48,48,3]): ], p=1) else: augmentations = None - - return albumentations + + return augmentations diff --git a/embedding_net/model.py b/embedding_net/model.py index 26e6c99..ef34930 100644 --- a/embedding_net/model.py +++ b/embedding_net/model.py @@ -7,77 +7,64 @@ from keras import optimizers from keras.layers import Dense, Input, Lambda, concatenate import pickle -from .utils import parse_net_params, load_encodings +from .utils import load_encodings from .backbones import get_backbone +from .pretrain_backbone_softmax import pretrain_backbone_softmax from . import losses_and_accuracies as lac import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsClassifier -# TODO +# TODO # [] - implement magnet loss # [] - finalize settings with l1 and l2 losses + class EmbeddingNet: """ SiameseNet for image classification distance_type = 'l1' -> l1_loss distance_type = 'l2' -> l2_loss - + mode = 'siamese' -> Siamese network mode = 'triplet' -> Triplen network """ - def __init__(self, cfg_file=None): - if cfg_file: - params = parse_net_params(cfg_file) - self.input_shape = params['input_shape'] - self.encodings_len = params['encodings_len'] - self.backbone = params['backbone'] - self.backbone_weights = params['backbone_weights'] - self.distance_type = params['distance_type'] - self.mode = params['mode'] - self.project_name = params['project_name'] - self.optimizer = params['optimizer'] - self.freeze_backbone = params['freeze_backbone'] - self.data_loader = params['loader'] - self.embeddings_normalization = params['embeddings_normalization'] - self.margin = params['margin'] - - self.model = [] - self.base_model = [] - self.l_model = [] - self.backbone_model = [] - - self.encodings_path = params['encodings_path'] - self.plots_path = params['plots_path'] - self.tensorboard_log_path = params['tensorboard_log_path'] - self.weights_save_path = params['weights_save_path'] - self.model_save_name = params['model_save_name'] - - os.makedirs(self.encodings_path, exist_ok=True) - os.makedirs(self.plots_path, exist_ok=True) - os.makedirs(self.tensorboard_log_path, exist_ok=True) - os.makedirs(self.weights_save_path, exist_ok=True) - - if self.mode == 'siamese': - self._create_model_siamese() - elif self.mode == 'triplet': - self._create_model_triplet() - - self.encoded_training_data = {} - else: - self.margin = 0.5 + def __init__(self, cfg_params): + self.input_shape = cfg_params['input_shape'] + self.encodings_len = cfg_params['encodings_len'] + self.backbone = cfg_params['backbone'] + self.backbone_weights = cfg_params['backbone_weights'] + self.distance_type = cfg_params['distance_type'] + self.mode = cfg_params['mode'] + self.optimizer = cfg_params['optimizer'] + self.freeze_backbone = cfg_params['freeze_backbone'] + self.data_loader = cfg_params['loader'] + self.embeddings_normalization = cfg_params['embeddings_normalization'] + self.margin = cfg_params['margin'] + self.model = [] + self.base_model = [] + self.backbone_model = [] - def _create_base_model(self): - self.base_model, self.backbone_model = get_backbone(input_shape=self.input_shape, - encodings_len=self.encodings_len, - backbone_type=self.backbone, - embeddings_normalization=self.embeddings_normalization, - backbone_weights=self.backbone_weights, - freeze_backbone=self.freeze_backbone) - + self.tensorboard_log_path = cfg_params['tensorboard_log_path'] + + if self.mode == 'siamese': + self._create_model_siamese() + elif self.mode == 'triplet': + self._create_model_triplet() + self.encoded_training_data = {} + + if cfg_params['softmax_pretraining']: + pretrain_backbone_softmax(self.backbone_model, cfg_params) + + def _create_base_model(self): + self.base_model, self.backbone_model = get_backbone(input_shape=self.input_shape, + encodings_len=self.encodings_len, + backbone_type=self.backbone, + embeddings_normalization=self.embeddings_normalization, + backbone_weights=self.backbone_weights, + freeze_backbone=self.freeze_backbone) def _create_model_siamese(self): @@ -130,19 +117,19 @@ def _create_model_triplet(self): image_encoding_p = self.base_model(input_image_p) image_encoding_n = self.base_model(input_image_n) - merged_vector = concatenate([image_encoding_a, image_encoding_p, image_encoding_n], + merged_vector = concatenate([image_encoding_a, image_encoding_p, image_encoding_n], axis=-1, name='merged_layer') - self.model = Model(inputs=[input_image_a,input_image_p, input_image_n], - outputs=merged_vector) - + self.model = Model(inputs=[input_image_a, input_image_p, input_image_n], + outputs=merged_vector) + print('Base model summary') self.base_model.summary() print('Whole model summary') self.model.summary() - self.model.compile(loss=lac.triplet_loss(self.margin), optimizer=self.optimizer) - + self.model.compile(loss=lac.triplet_loss( + self.margin), optimizer=self.optimizer) def train_on_batch(self, batch_size=8, s="train"): generator = self.data_loader.generate(batch_size, s=s) @@ -158,35 +145,41 @@ def validate_on_batch(self, batch_size=8, s="val"): pairs, targets) return val_loss, val_accuracy - def train_generator(self, steps_per_epoch, epochs, callbacks = [], val_steps=100, with_val=True, batch_size=8, verbose=1): + def train_generator(self, steps_per_epoch, epochs, callbacks=[], val_steps=100, with_val=True, batch_size=8, verbose=1): + + train_generator = self.data_loader.generate( + batch_size, mode=self.mode, s="train") + val_generator = self.data_loader.generate( + batch_size, mode=self.mode, s="val") - train_generator = self.data_loader.generate(batch_size, mode=self.mode, s="train") - val_generator = self.data_loader.generate(batch_size, mode=self.mode, s="val") - history = self.model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, - verbose=verbose, validation_data = val_generator, validation_steps = val_steps, callbacks=callbacks) - if self.plots_path: - self.plot_grapths(history) + verbose=verbose, validation_data=val_generator, validation_steps=val_steps, callbacks=callbacks) + return history - - def train_generator_mining(self, - steps_per_epoch, - epochs, callbacks = [], - val_steps=100, - with_val=True, - n_classes=4, + + def train_generator_mining(self, + steps_per_epoch, + epochs, callbacks=[], + val_steps=100, + with_val=True, + n_classes=4, n_samples=4, val_batch=8, - negative_selection_mode='semihard', + negative_selection_mode='semihard', verbose=1): - train_generator = self.data_loader.generate_mining(self.base_model, n_classes, n_samples, margin=self.margin, negative_selection_mode=negative_selection_mode, s="train") - val_generator = self.data_loader.generate(val_batch, mode=self.mode, s="val") - - history = self.model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, - verbose=verbose, validation_data = val_generator, validation_steps = val_steps, callbacks=callbacks) - if self.plots_path: - self.plot_grapths(history) + train_generator = self.data_loader.generate_mining( + self.base_model, n_classes, n_samples, margin=self.margin, negative_selection_mode=negative_selection_mode, s="train") + val_generator = self.data_loader.generate( + val_batch, mode=self.mode, s="val") + + history = self.model.fit_generator(train_generator, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=verbose, + validation_data=val_generator, + validation_steps=val_steps, + callbacks=callbacks) return history def validate(self, number_of_comparisons=100, batch_size=4, s="val"): @@ -212,20 +205,22 @@ def _generate_encoding(self, img_path): encoding = self.base_model.predict(np.expand_dims(img, axis=0)) return encoding - def generate_encodings(self, save_file_name='encodings.pkl', max_num_samples_of_each_classes=10, knn_k = 1, shuffle = True): + def generate_encodings(self, save_file_name='encodings.pkl', max_num_samples_of_each_class=10, knn_k=1, shuffle=True): data_paths, data_labels, data_encodings = [], [], [] classes_counter = {} if shuffle: - c = list(zip(self.data_loader.images_paths['train'], self.data_loader.images_labels['train'])) + c = list(zip( + self.data_loader.images_paths['train'], self.data_loader.images_labels['train'])) random.shuffle(c) - self.data_loader.images_paths['train'], self.data_loader.images_labels['train'] = zip(*c) + self.data_loader.images_paths['train'], self.data_loader.images_labels['train'] = zip( + *c) for img_path, img_label in zip(self.data_loader.images_paths['train'], self.data_loader.images_labels['train']): if img_label not in classes_counter: classes_counter[img_label] = 0 - if classes_counter[img_label] < max_num_samples_of_each_classes: + if classes_counter[img_label] < max_num_samples_of_each_class: encod = self._generate_encoding(img_path) if encod is not None: data_paths.append(img_path) @@ -236,25 +231,26 @@ def generate_encodings(self, save_file_name='encodings.pkl', max_num_samples_of_ self.encoded_training_data['labels'] = data_labels self.encoded_training_data['encodings'] = np.squeeze( np.array(data_encodings)) - self.encoded_training_data['knn_classifier'] = KNeighborsClassifier(n_neighbors=knn_k) + self.encoded_training_data['knn_classifier'] = KNeighborsClassifier( + n_neighbors=knn_k) self.encoded_training_data['knn_classifier'].fit(self.encoded_training_data['encodings'], self.encoded_training_data['labels']) - f = open(os.path.join(self.encodings_path, save_file_name), "wb") + f = open(save_file_name, "wb") pickle.dump(self.encoded_training_data, f) f.close() def load_encodings(self, path_to_encodings): self.encoded_training_data = load_encodings(path_to_encodings) - def load_model(self,file_path): + def load_model(self, file_path): from keras_radam import RAdam - self.model = load_model(file_path, - custom_objects={'contrastive_loss': lac.contrastive_loss, - 'accuracy': lac.accuracy, - 'loss_function': lac.triplet_loss(self.margin), - 'RAdam': RAdam}) + self.model = load_model(file_path, + custom_objects={'contrastive_loss': lac.contrastive_loss, + 'accuracy': lac.accuracy, + 'loss_function': lac.triplet_loss(self.margin), + 'RAdam': RAdam}) self.input_shape = list(self.model.inputs[0].shape[1:]) - self.base_model = Model(inputs=[self.model.layers[3].get_input_at(0)], + self.base_model = Model(inputs=[self.model.layers[3].get_input_at(0)], outputs=[self.model.layers[3].layers[-1].output]) self.base_model._make_predict_function() @@ -282,7 +278,8 @@ def predict_knn(self, image): img = image img = cv2.resize(img, (self.input_shape[0], self.input_shape[1])) encoding = self.base_model.predict(np.expand_dims(img, axis=0)) - predicted_label = self.encoded_training_data['knn_classifier'].predict(encoding) + predicted_label = self.encoded_training_data['knn_classifier'].predict( + encoding) return predicted_label def calculate_prediction_accuracy(self): @@ -292,18 +289,5 @@ def calculate_prediction_accuracy(self): self.data_loader.images_labels['val']): prediction = self.predict_knn(img_path)[0] if prediction == img_label: - correct+=1 + correct += 1 return correct/total_n_of_images - - def plot_grapths(self, history): - for k, v in history.history.items(): - t = list(range(len(v))) - fig, ax = plt.subplots() - ax.plot(t, v) - - ax.set(xlabel='epoch', ylabel='{}'.format(k), - title='{}'.format(k)) - ax.grid() - - fig.savefig("{}{}.png".format(self.plots_path, k)) - diff --git a/embedding_net/pretrain_backbone_softmax.py b/embedding_net/pretrain_backbone_softmax.py index a75ea5b..857785a 100644 --- a/embedding_net/pretrain_backbone_softmax.py +++ b/embedding_net/pretrain_backbone_softmax.py @@ -1,50 +1,48 @@ import keras import numpy as np -import yaml from classification_models import Classifiers from .data_loader import SimpleNetImageLoader from keras.callbacks import TensorBoard, LearningRateScheduler from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint +def pretrain_backbone_softmax(input_model, cfg_params): -def pretrain_backbone_softmax(input_model, config_file): - - backbone_model = input_model.backbone_model - with open(config_file, 'r') as ymlfile: - cfg = yaml.safe_load(ymlfile) - input_shape = cfg['input_shape'] - dataset_path = cfg['dataset_path'] - image_loader = SimpleNetImageLoader(dataset_path, input_shape=input_shape, augmentations = None) + input_shape = cfg_params['input_shape'] + dataset_path = cfg_params['dataset_path'] + image_loader = SimpleNetImageLoader( + dataset_path, input_shape=input_shape, augmentations=None) n_classes = image_loader.n_classes['train'] - x = keras.layers.GlobalAveragePooling2D()(backbone_model.output) + x = keras.layers.GlobalAveragePooling2D()(input_model.output) output = keras.layers.Dense(n_classes, activation='softmax')(x) - model = keras.models.Model(inputs=[backbone_model.input], outputs=[output]) + model = keras.models.Model(inputs=[input_model.input], outputs=[output]) # train - model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy']) + model.compile(optimizer='Adam', + loss='categorical_crossentropy', metrics=['accuracy']) + + batch_size = cfg_params['softmax_batch_size'] + val_steps = cfg_params['softmax_val_steps'] + steps_per_epoch = cfg_params['softmax_steps_per_epoch'] + epochs = cfg_params['softmax_epochs'] - batch_size = 8 - val_steps = 200 - steps_per_epoch = 500 - epochs = 20 train_generator = image_loader.generate(batch_size, s="train") val_generator = image_loader.generate(batch_size, s="val") - initial_lr = 1e-4 - decay_factor = 0.95 - step_size = 1 - callbacks = [ - LearningRateScheduler(lambda x: initial_lr * - decay_factor ** np.floor(x/step_size)), - ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4, verbose=1), + ReduceLROnPlateau(monitor='val_loss', factor=0.1, + patience=4, verbose=1), EarlyStopping(patience=50, verbose=1), TensorBoard(log_dir='tf_log/') ] - history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, - verbose=1, validation_data = val_generator, validation_steps = val_steps, callbacks=callbacks) + history = model.fit_generator(train_generator, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=1, + validation_data=val_generator, + validation_steps=val_steps, + callbacks=callbacks) - return backbone_model \ No newline at end of file + return input_model diff --git a/embedding_net/utils.py b/embedding_net/utils.py index f321b0f..658a052 100644 --- a/embedding_net/utils.py +++ b/embedding_net/utils.py @@ -73,16 +73,25 @@ def plot_tsne_interactive(encodings_path): fig.show() +def plot_grapths(history, save_path): + for k, v in history.history.items(): + t = list(range(len(v))) + fig, ax = plt.subplots() + ax.plot(t, v) + + ax.set(xlabel='epoch', ylabel='{}'.format(k), + title='{}'.format(k)) + ax.grid() + + fig.savefig("{}{}.png".format(save_path, k)) + + def parse_net_params(filename='configs/road_signs.yml'): params = {} with open(filename, 'r') as ymlfile: cfg = yaml.safe_load(ymlfile) - if cfg['learning_rate']: - learning_rate = cfg['learning_rate'] - else: - learning_rate = 0.0004 - + learning_rate = cfg['learning_rate'] if cfg['optimizer'] == 'adam': optimizer = optimizers.Adam(lr=learning_rate) elif cfg['optimizer'] == 'rms_prop': @@ -100,19 +109,10 @@ def parse_net_params(filename='configs/road_signs.yml'): params = {k: v for k, v in cfg.items() if k not in ['optimizer']} - params['encodings_path'] = os.path.join(cfg['encodings_path'], - cfg['project_name']) - params['plots_path'] = os.path.join(cfg['plots_path'], - cfg['project_name']) - params['tensorboard_log_path'] = os.path.join(cfg['tensorboard_log_path'], - cfg['project_name']) - params['weights_save_path'] = os.path.join(cfg['weights_save_path'], - cfg['project_name']) - params['model_save_name'] = cfg['model_save_name'] if 'dataset_path' in cfg: params['loader'] = EmbeddingNetImageLoader(cfg['dataset_path'], - input_shape=cfg['input_shape'], - augmentations=augmentations) + input_shape=cfg['input_shape'], + augmentations=augmentations) params['optimizer'] = optimizer return params diff --git a/train.py b/train.py index 7da72cb..89b8ee0 100644 --- a/train.py +++ b/train.py @@ -4,43 +4,77 @@ from embedding_net.pretrain_backbone_softmax import pretrain_backbone_softmax from keras.callbacks import TensorBoard, LearningRateScheduler from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint +from embedding_net.utils import parse_net_params, plot_grapths +import argparse -n_epochs = 1000 -n_steps_per_epoch = 200 -val_batch_size = 8 -val_steps = 200 - -config_name = 'road_signs_resnet18_max80_min30' -config_file_name = 'configs/{}.yml'.format(config_name) -model = EmbeddingNet(config_file_name) - -pretrain_backbone_softmax(model, config_file_name) - -initial_lr = 1e-4 -decay_factor = 0.99 -step_size = 1 - -callbacks = [ - LearningRateScheduler(lambda x: initial_lr * - decay_factor ** np.floor(x/step_size)), - ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4, verbose=1), - EarlyStopping(patience=5, verbose=1), - TensorBoard(log_dir=model.tensorboard_log_path), - ModelCheckpoint(filepath=os.path.join(model.weights_save_path, model.model_save_name), - verbose=1, monitor='val_loss', save_best_only=True) -] - -model.train_generator_mining(steps_per_epoch=n_steps_per_epoch, - epochs=n_epochs, - callbacks = callbacks, - val_steps=val_steps, - n_classes=5, - n_samples=3, - negative_selection_mode='semihard') - -model.generate_encodings(save_file_name='encodings_{}.pkl'.format(config_name), - max_num_samples_of_each_classes=30, knn_k=1, shuffle=True) - -model_accuracy = model.calculate_prediction_accuracy() -print('Model accuracy on validation set: {}'.format(model_accuracy)) +def parse_args(): + parser = argparse.ArgumentParser(description='Train a classificator') + parser.add_argument('config', help='model config file path') + parser.add_argument( + '--resume_from', help='the checkpoint file to resume from') + + args = parser.parse_args() + + return args + + +def main(): + args = parse_args() + cfg_params = parse_net_params(args.config) + os.makedirs(cfg_params['work_dir'], exist_ok=True) + weights_save_path = os.path.join(cfg_params['work_dir'], 'weights/') + encodings_save_path = os.path.join(cfg_params['work_dir'], 'encodings/') + plots_save_path = os.path.join(cfg_params['work_dir'], 'plots/') + tensorboard_save_path = os.path.join(cfg_params['work_dir'], 'tf_log/') + + os.makedirs(weights_save_path, exist_ok=True) + os.makedirs(encodings_save_path, exist_ok=True) + + model = EmbeddingNet(cfg_params) + + weights_save_file = os.path.join( + weights_save_path, cfg_params['model_save_name']) + + initial_lr = cfg_params['learning_rate'] + decay_factor = cfg_params['decay_factor'] + step_size = cfg_params['step_size'] + + callbacks = [ + LearningRateScheduler(lambda x: initial_lr * + decay_factor ** np.floor(x/step_size)), + ReduceLROnPlateau(monitor='val_loss', factor=0.1, + patience=4, verbose=1), + EarlyStopping(patience=5, verbose=1), + TensorBoard(log_dir=tensorboard_save_path), + ModelCheckpoint(filepath=weights_save_file, + verbose=1, monitor='val_loss', save_best_only=True) + ] + + history = model.train_generator_mining(steps_per_epoch=cfg_params['n_steps_per_epoch'], + epochs=cfg_params['n_epochs'], + callbacks=callbacks, + val_steps=cfg_params['val_steps'], + val_batch=cfg_params['val_batch_size'], + n_classes=cfg_params['mining_n_classes'], + n_samples=cfg_params['mining_n_samples'], + negative_selection_mode=cfg_params['negatives_selection_mode']) + + if cfg_params['plot_history']: + os.makedirs(plots_save_path, exist_ok=True) + plot_grapths(history, plots_save_path) + + if cfg_params['save_encodings']: + encodings_save_file = os.path.join( + encodings_save_path, cfg_params['encodings_save_name']) + model.generate_encodings(save_file_name=encodings_save_file, + max_num_samples_of_each_class=cfg_params['max_num_samples_of_each_class'], + knn_k=cfg_params['knn_k'], + shuffle=True) + + model_accuracy = model.calculate_prediction_accuracy() + print('Model accuracy on validation set: {}'.format(model_accuracy)) + + +if __name__ == '__main__': + main()