diff --git a/configs/bengali.yml b/configs/bengali.yml index 9ec7362..1693606 100644 --- a/configs/bengali.yml +++ b/configs/bengali.yml @@ -3,7 +3,7 @@ MODEL: encodings_len: 256 mode : 'triplet' distance_type : 'l1' - backbone : 'resnet18' + backbone_name : 'efficientnet-b0' backbone_weights : 'imagenet' freeze_backbone : False embeddings_normalization: True @@ -12,18 +12,18 @@ DATALOADER: dataset_path : '/home/rauf/datasets/bengali/pngs/train/' csv_file : '/home/rauf/datasets/bengali/train_new.csv' image_id_column : 'image_id' - label_column : 'label' + label_column : 'grapheme_root' validate : True val_ratio : 0.2 GENERATOR: negatives_selection_mode : 'semihard' - mining_n_classes: 5 - mining_n_samples: 3 + k_classes: 3 + k_samples: 5 margin: 0.5 - batch_size : 8 - n_batches : 200 - augmentation_type : 'default' + batch_size : 10 + n_batches : 10 + augmentations : 'default' TRAIN: # optimizer parameters @@ -38,23 +38,21 @@ TRAIN: # plot training history plot_history : True -SOFTMAX_PRETRAINING: - # softmax pretraining parameters - optimizer : 'radam' - learning_rate : 0.0001 - decay_factor : 0.99 - step_size : 1 +# SOFTMAX_PRETRAINING: +# # softmax pretraining parameters +# optimizer : 'radam' +# learning_rate : 0.0001 +# decay_factor : 0.99 +# step_size : 1 - batch_size : 8 - val_steps : 200 - steps_per_epoch : 500 - n_epochs : 20 +# batch_size : 8 +# val_steps : 200 +# steps_per_epoch : 10 +# n_epochs : 1 SAVE_PATHS: - work_dir : 'work_dirs/road_signs_resnet18/' - encodings_path : 'encodings/' - model_save_name : 'best_model_resnet18.h5' - encodings_save_name: 'encodings_resnet18.pkl' + project_name : 'bengali_efficientnet' + work_dir : 'work_dirs/' ENCODINGS: # encodings parameters diff --git a/embedding_net/backbones.py b/embedding_net/backbones.py index c5753a1..2fb7873 100644 --- a/embedding_net/backbones.py +++ b/embedding_net/backbones.py @@ -10,7 +10,8 @@ def get_backbone(input_shape, backbone_name='simple', embeddings_normalization=True, backbone_weights='imagenet', - freeze_backbone=False): + freeze_backbone=False, + **kwargs): if backbone_name == 'simple': input_image = Input(input_shape) x = Conv2D(64, (10, 10), activation='relu', diff --git a/embedding_net/datagenerators.py b/embedding_net/datagenerators.py index 86ab1c3..9795500 100644 --- a/embedding_net/datagenerators.py +++ b/embedding_net/datagenerators.py @@ -130,9 +130,9 @@ def __init__(self, embedding_model, k_classes=5, k_samples=5, margin=0.5, - negative_selection_mode='semihard'): + negatives_selection_mode='semihard'): super().__init__(class_files_paths=class_files_paths, - clas_names=class_names, + class_names=class_names, input_shape=input_shape, batch_size=batch_size, n_batches=n_batches, @@ -141,10 +141,10 @@ def __init__(self, embedding_model, 'hardest': self.hardest_negative, 'random_hard': self.random_hard_negative} self.embedding_model = embedding_model - self.k_classes=k_classes, - self.k_samples=k_samples, - self.margin=margin - self.negative_selection_fn = modes[negative_selection_mode] + self.k_classes = k_classes + self.k_samples = k_samples + self.margin = margin + self.negative_selection_fn = modes[negatives_selection_mode] def hardest_negative(self, loss_values, margin=0.5): hard_negative = np.argmax(loss_values) @@ -160,7 +160,6 @@ def semihard_negative(self, loss_values, margin=0.5): return np.random.choice(semihard_negatives) if len(semihard_negatives) > 0 else None def get_batch_triplets_mining(self): - selected_classes_idxs = np.random.choice(self.n_classes, size=self.k_classes, replace=False) selected_classes = [self.class_names[cl] for cl in selected_classes_idxs] selected_classes_n_elements = [self.n_samples[cl] for cl in selected_classes] @@ -228,9 +227,10 @@ def __init__(self, class_files_paths, input_shape=None, batch_size = 32, n_batches = 10, - augmentations=None): + augmentations=None, + **kwargs): super().__init__(class_files_paths=class_files_paths, - clas_names=class_names, + class_names=class_names, input_shape=input_shape, batch_size=batch_size, n_batches=n_batches, @@ -284,7 +284,7 @@ def __init__(self, class_files_paths, augmentations=None): super().__init__(class_files_paths=class_files_paths, - clas_names=class_names, + class_names=class_names, input_shape=input_shape, batch_size=batch_size, n_batches=n_batches, @@ -346,7 +346,7 @@ def __init__(self, class_files_paths, augmentations=None): super().__init__(class_files_paths=class_files_paths, - clas_names=class_names, + class_names=class_names, input_shape=input_shape, batch_size=batch_size, n_batches=n_batches, diff --git a/embedding_net/losses_and_accuracies.py b/embedding_net/losses_and_accuracies.py index 1c0fbcb..f817bf7 100644 --- a/embedding_net/losses_and_accuracies.py +++ b/embedding_net/losses_and_accuracies.py @@ -1,5 +1,4 @@ -import keras.backend as K -import keras +import tensorflow.keras.backend as K def contrastive_loss(y_true, y_pred): @@ -51,30 +50,30 @@ def accuracy(y_true, y_pred): return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) -class tSNECallback(keras.callbacks.Callback): +# class tSNECallback(keras.callbacks.Callback): - def __init__(self, save_file_name='tSNE.gif'): - super(tSNECallback, self).__init__() - self.save_file_name = save_file_name +# def __init__(self, save_file_name='tSNE.gif'): +# super(tSNECallback, self).__init__() +# self.save_file_name = save_file_name - def on_train_begin(self, logs={}): - self.aucs = [] - self.losses = [] +# def on_train_begin(self, logs={}): +# self.aucs = [] +# self.losses = [] - def on_train_end(self, logs={}): - return +# def on_train_end(self, logs={}): +# return - def on_epoch_begin(self, epoch, logs={}): - return +# def on_epoch_begin(self, epoch, logs={}): +# return - def on_epoch_end(self, epoch, logs={}): - self.losses.append(logs.get('loss')) - y_pred = self.model.predict(self.model.validation_data[0]) - self.aucs.append(roc_auc_score(self.model.validation_data[1], y_pred)) - return +# def on_epoch_end(self, epoch, logs={}): +# self.losses.append(logs.get('loss')) +# y_pred = self.model.predict(self.model.validation_data[0]) +# self.aucs.append(roc_auc_score(self.model.validation_data[1], y_pred)) +# return - def on_batch_begin(self, batch, logs={}): - return +# def on_batch_begin(self, batch, logs={}): +# return - def on_batch_end(self, batch, logs={}): - return +# def on_batch_end(self, batch, logs={}): +# return diff --git a/embedding_net/model_new.py b/embedding_net/model_new.py index 2e75c01..3e33cb7 100644 --- a/embedding_net/model_new.py +++ b/embedding_net/model_new.py @@ -3,7 +3,6 @@ import tensorflow.keras.backend as K import cv2 import random -import keras from tensorflow.keras.models import Model, load_model from tensorflow.keras import optimizers from tensorflow.keras.layers import Dense, Input, Lambda, concatenate, GlobalAveragePooling2D @@ -19,20 +18,22 @@ class EmbeddingNet: - def __init__(self, cfg): - self.params_backbone = cfg['backbone'] - self.params_dataloader = cfg['dataloader'] - self.params_generator = cfg['generator'] - self.params_save_paths = cfg['save_paths'] - self.params_train = cfg['train'] - if 'SOFTMAX_PRETRAINING' in cfg: - self.params_softmax = cfg['softmax'] + def __init__(self, params): + self.params_model = params['model'] + self.params_dataloader = params['dataloader'] + self.params_generator = params['generator'] + self.params_save_paths = params['save_paths'] + self.params_train = params['train'] + if 'softmax' in params: + self.params_softmax = params['softmax'] - self.base_model = {} - self.backbone_model = {} + self.base_model = None + self.backbone_model = None self.encoded_training_data = {} - self.data_loader = {} + self.dataloader = None + self.train_generator = None + self.val_generator = None def pretrain_backbone_softmax(self): @@ -61,7 +62,7 @@ def pretrain_backbone_softmax(self): metrics=['accuracy']) train_generator = SimpleDataGenerator(self.data_loader.train_data, - self.class_names, + self.data_loader.class_names, input_shape=input_shape, batch_size = batch_size, n_batches = steps_per_epoch, @@ -69,7 +70,7 @@ def pretrain_backbone_softmax(self): if self.data_loader.validate: val_generator = SimpleDataGenerator(self.data_loader.val_data, - self.class_names, + self.data_loader.class_names, input_shape=input_shape, batch_size = batch_size, n_batches = steps_per_epoch, @@ -80,11 +81,14 @@ def pretrain_backbone_softmax(self): checkpoint_callback_monitor = 'loss' tensorboard_save_path = os.path.join( - self.params_save_paths['work_dir'], 'tf_log/pretraining_model/') + self.params_save_paths['work_dir'], + self.params_save_paths['project_name'], + 'pretraining_model/tf_log/') weights_save_file = os.path.join( - self.params_save_paths['work_dir'], - 'weights/pretraining_model/', - self.params_save_paths['model_save_name']) + self.params_save_paths['work_dir'], + self.params_save_paths['project_name'], + 'pretraining_model/weights/', + self.params_save_paths['project_name']+'.h5') callbacks = [ LearningRateScheduler(lambda x: learning_rate * @@ -110,30 +114,30 @@ def pretrain_backbone_softmax(self): validation_steps=val_steps, callbacks=callbacks) - def _create_base_model(self, params_backbone): - self.base_model, self.backbone_model = get_backbone(**params_backbone) + def _create_base_model(self): + self.base_model, self.backbone_model = get_backbone(**self.params_model) - def _create_dataloader(self, dataloader_params): - return ENDataLoader(**dataloader_params) + def _create_dataloader(self): + self.data_loader = ENDataLoader(**self.params_dataloader) def _create_generators(self): pass - def train_generator(self, callbacks=[], verbose=1): - history = self.model.fit_generator(self.train_generator, - validation_data=self.val_generator, - epochs=self.params_train['n_epoch'], - callbacks=callbacks, - verbose=verbose) - - return history - def _generate_encoding(self, img_path): img = self.data_loader.get_image(img_path) if img is None: return None encoding = self.base_model.predict(np.expand_dims(img, axis=0)) return encoding + + def train(self, callbacks=[], verbose=1): + history = self.model.fit_generator(self.train_generator, + validation_data=self.val_generator, + epochs=self.params_train['n_epochs'], + callbacks=callbacks, + verbose=verbose) + + return history def generate_encodings(self, save_file_name='encodings.pkl', only_centers=False, @@ -258,35 +262,32 @@ def calculate_prediction_accuracy(self): class TripletNet(EmbeddingNet): - def __init__(self, cfg, training=False): - super().__init__(cfg) + def __init__(self, params, training=False): + super().__init__(params) self._create_base_model() self.base_model._make_predict_function() - self.model = self._create_model_triplet() + self.training = training - if training: - self.dataloader = {} - self.train_generator = {} - self.val_generator = {} + if self.training: + self._create_dataloader() self._create_generators() + self._create_model_triplet() def _create_generators(self): self.train_generator = TripletsDataGenerator(embedding_model=self.base_model, - self.data_loader.train_data, - self.data_loader.class_names, + class_files_paths=self.data_loader.train_data, + class_names=self.data_loader.class_names, **self.params_generator) if self.data_loader.validate: self.val_generator = SimpleTripletsDataGenerator(self.data_loader.val_data, self.data_loader.class_names, **self.params_generator) - else: - self.val_generator = None def _create_model_triplet(self): - input_image_a = Input(self.input_shape) - input_image_p = Input(self.input_shape) - input_image_n = Input(self.input_shape) + input_image_a = Input(self.params_model['input_shape']) + input_image_p = Input(self.params_model['input_shape']) + input_image_n = Input(self.params_model['input_shape']) image_encoding_a = self.base_model(input_image_a) image_encoding_p = self.base_model(input_image_p) @@ -303,42 +304,39 @@ def _create_model_triplet(self): print('Whole model summary') self.model.summary() - self.model.compile(loss=lac.triplet_loss( - self.margin), optimizer=self.optimizer) + self.model.compile(loss=lac.triplet_loss(self.params_generator['margin']), + optimizer=self.params_train['optimizer']) class SiameseNet(EmbeddingNet): - def __init__(self, cfg, training): - super().__init__(cfg) - self.model = self._create_model_siamese() + def __init__(self, params, training): + super().__init__(params) + self._create_model_siamese() if training: self.dataloader = {} self.train_generator = {} self.val_generator = {} - self.train_generator = TripletsDataGenerator(**train_generator_params) - self.val_generator = TripletsDataGenerator(**val_generator_params) + self._create_generators() def _create_generators(self): - self.train_generator = TripletsDataGenerator(embedding_model=self.base_model, - self.data_loader.train_data, - self.data_loader.class_names, + self.train_generator = SiameseDataGenerator(class_files_paths=self.data_loader.train_data, + class_names=self.data_loader.class_names, **self.params_generator) if self.data_loader.validate: - self.val_generator = TripletsDataGenerator(embedding_model=self.base_model, - self.data_loader.val_data, - self.data_loader.class_names, + self.val_generator = SiameseDataGenerator(class_files_paths=self.data_loader.val_data, + class_names=self.data_loader.class_names, **self.params_generator) def _create_model_siamese(self): - input_image_1 = Input(self.input_shape) - input_image_2 = Input(self.input_shape) + input_image_1 = Input(self.params_model['input_shape']) + input_image_2 = Input(self.params_model['input_shape']) image_encoding_1 = self.base_model(input_image_1) image_encoding_2 = self.base_model(input_image_2) - if self.distance_type == 'l1': + if self.params_model['distance_type'] == 'l1': L1_layer = Lambda( lambda tensors: K.abs(tensors[0] - tensors[1])) distance = L1_layer([image_encoding_1, image_encoding_2]) @@ -346,7 +344,7 @@ def _create_model_siamese(self): prediction = Dense(units=1, activation='sigmoid')(distance) metric = 'binary_accuracy' - elif self.distance_type == 'l2': + elif self.params_model['distance_type'] == 'l2': L2_layer = Lambda( lambda tensors: K.sqrt(K.maximum(K.sum(K.square(tensors[0] - tensors[1]), axis=1, keepdims=True), K.epsilon()))) @@ -365,4 +363,4 @@ def _create_model_siamese(self): self.model.summary() self.model.compile(loss=lac.contrastive_loss, metrics=[metric], - optimizer=self.optimizer) \ No newline at end of file + optimizer=self.params_train['optimizer']) \ No newline at end of file diff --git a/embedding_net/utils.py b/embedding_net/utils.py index 1749e86..1f19e7d 100644 --- a/embedding_net/utils.py +++ b/embedding_net/utils.py @@ -1,5 +1,6 @@ from sklearn.manifold import TSNE import os +os.environ["TF_KERAS"] = '1' import cv2 import pickle import numpy as np @@ -7,11 +8,6 @@ import yaml from tensorflow.keras import optimizers from .augmentations import get_aug -from .data_loader import EmbeddingNetImageLoader -from .datagenerators import ENDataLoader, - TripletsDataGenerator, - SiameseDataGenerator, - SimpleTripletsDataGenerator def load_encodings(path_to_encodings): @@ -162,7 +158,7 @@ def parse_params(filename='configs/road_signs.yml'): params_generator['input_shape'] = params_model['input_shape'] params_train['optimizer'] = optimizer - params_model['augmentations'] = augmentations + params_generator['augmentations'] = augmentations params = {'dataloader' : params_dataloader, 'generator' : params_generator, diff --git a/train.py b/train.py index c291f76..28c7bf2 100644 --- a/train.py +++ b/train.py @@ -1,10 +1,9 @@ import os import numpy as np -from embedding_net.model import EmbeddingNet -from embedding_net.pretrain_backbone_softmax import pretrain_backbone_softmax +from embedding_net.model_new import EmbeddingNet, TripletNet from tensorflow.keras.callbacks import TensorBoard, LearningRateScheduler from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint -from embedding_net.utils import parse_net_params, plot_grapths +from embedding_net.utils import parse_params, plot_grapths import argparse @@ -18,37 +17,47 @@ def parse_args(): return args +def create_save_folders(params): + work_dir_path = os.path.join(params['work_dir'], params['project_name']) + weights_save_path = os.path.join(work_dir_path, 'weights/') + weights_pretrained_save_path = os.path.join(work_dir_path, 'pretraining_model/weights/') + encodings_save_path = os.path.join(work_dir_path, 'encodings/') + plots_save_path = os.path.join(work_dir_path, 'plots/') + tensorboard_save_path = os.path.join(work_dir_path, 'tf_log/') + tensorboard_pretrained_save_path = os.path.join(work_dir_path, 'pretraining_model/tf_log/') + weights_save_file_path = os.path.join(weights_save_path, 'best_' + params['project_name'] + '.h5') + + os.makedirs(work_dir_path , exist_ok=True) + os.makedirs(weights_save_path, exist_ok=True) + os.makedirs(weights_pretrained_save_path, exist_ok=True) + os.makedirs(encodings_save_path, exist_ok=True) + os.makedirs(plots_save_path, exist_ok=True) + os.makedirs(tensorboard_pretrained_save_path, exist_ok=True) + + return tensorboard_save_path, weights_save_file_path, plots_save_path def main(): args = parse_args() - cfg_params = parse_net_params(args.config) - os.makedirs(cfg_params['work_dir'], exist_ok=True) - weights_save_path = os.path.join(cfg_params['work_dir'], 'weights/') - weights_pretrained_save_path = os.path.join(weights_save_path, 'pretraining_model/') - encodings_save_path = os.path.join(cfg_params['work_dir'], 'encodings/') - plots_save_path = os.path.join(cfg_params['work_dir'], 'plots/') - tensorboard_save_path = os.path.join(cfg_params['work_dir'], 'tf_log/') + cfg_params = parse_params(args.config) + params_train = cfg_params['train'] + params_dataloader = cfg_params['dataloader'] + tensorboard_save_path, weights_save_file_path, plots_save_path = create_save_folders(cfg_params['save_paths']) - os.makedirs(weights_save_path, exist_ok=True) - os.makedirs(weights_pretrained_save_path, exist_ok=True) - os.makedirs(encodings_save_path, exist_ok=True) + model = TripletNet(cfg_params, training=True) - model = EmbeddingNet(cfg_params) - if cfg_params['mode'] not in ['triplet', 'siamese']: - return + if 'softmax' in cfg_params: + model.pretrain_backbone_softmax() + if args.resume_from is not None: model.load_model(args.resume_from) - weights_save_file = os.path.join( - weights_save_path, cfg_params['model_save_name']) + initial_lr = params_train['learning_rate'] + decay_factor = params_train['decay_factor'] + step_size = params_train['step_size'] - initial_lr = cfg_params['learning_rate'] - decay_factor = cfg_params['decay_factor'] - step_size = cfg_params['step_size'] - - if cfg_params['to_validate']: + if params_dataloader['validate']: callback_monitor = 'val_loss' else: callback_monitor = 'loss' @@ -62,34 +71,26 @@ def main(): patience=10, verbose=1), TensorBoard(log_dir=tensorboard_save_path), - ModelCheckpoint(filepath=weights_save_file, + ModelCheckpoint(filepath=weights_save_file_path, verbose=1, monitor=callback_monitor, save_best_only=True) ] - history = model.train_generator_mining(steps_per_epoch=cfg_params['n_steps_per_epoch'], - epochs=cfg_params['n_epochs'], - callbacks=callbacks, - val_steps=cfg_params['val_steps'], - val_batch=cfg_params['val_batch_size'], - n_classes=cfg_params['mining_n_classes'], - n_samples=cfg_params['mining_n_samples'], - negative_selection_mode=cfg_params['negatives_selection_mode']) - - if cfg_params['plot_history']: - os.makedirs(plots_save_path, exist_ok=True) + history = model.train(callbacks=callbacks) + + if params_train['plot_history']: plot_grapths(history, plots_save_path) - if cfg_params['save_encodings']: - encodings_save_file = os.path.join( - encodings_save_path, cfg_params['encodings_save_name']) - model.generate_encodings(save_file_name=encodings_save_file, - max_num_samples_of_each_class=cfg_params['max_num_samples_of_each_class'], - knn_k=cfg_params['knn_k'], - shuffle=True) - if cfg_params['to_validate']: - model_accuracies = model.calculate_prediction_accuracy() - print('Model top1 accuracy on validation set: {}'.format(model_accuracies['top1'])) - print('Model top5 accuracy on validation set: {}'.format(model_accuracies['top5'])) + # if cfg_params['save_encodings']: + # encodings_save_file = os.path.join( + # encodings_save_path, cfg_params['encodings_save_name']) + # model.generate_encodings(save_file_name=encodings_save_file, + # max_num_samples_of_each_class=cfg_params['max_num_samples_of_each_class'], + # knn_k=cfg_params['knn_k'], + # shuffle=True) + # if cfg_params['to_validate']: + # model_accuracies = model.calculate_prediction_accuracy() + # print('Model top1 accuracy on validation set: {}'.format(model_accuracies['top1'])) + # print('Model top5 accuracy on validation set: {}'.format(model_accuracies['top5'])) if __name__ == '__main__':