modify training and testing

RocketFlash · Dec 17, 2019 · e4d6505 · e4d6505
1 parent 82ab143
commit e4d6505
Show file tree

Hide file tree

Showing 9 changed files with 146 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -61,18 +61,20 @@ Dataset
 
 For training, it is necessary to create a configuration file in which all network parameters and training parameters will be indicated. Examples of configuration files can be found in the **configs** folder. 
 
-After the configuration file is created, you can modify **train.py** file, and then start training:
+After the configuration file is created, you can run **train.py** file, and start training:
 
 ```bash
-$ python3 train.py
+$ python3 train.py [config (path to configuration_file)]
+                   [--resume_from (the checkpoint file to resume from)]
 ```
 
 # Test
 
 The trained model can be tested using the following command:
 
 ```bash
-$ python3 test.py [--weights (path to trained model weights file)] 
+$ python3 test.py [config (path to configuration_file)]
+                  [--weights (path to trained model weights file)] 
                   [--encodings (path to trained model encodings file)]
                   [--image (path to image file)]
 ```

diff --git a/configs/plates.yml b/configs/plates.yml
@@ -34,7 +34,6 @@ softmax_epochs : 1000
 # paths
 work_dir : 'work_dirs/plates/'
 dataset_path : '/home/rauf/plates_competition/dataset/to_train/'
-tensorboard_log_path : 'tf_log/'
 plot_history : True
 model_save_name : 'best_model_resnet18_plates.h5'
 encodings_save_name: 'encodings_resnet18_plates.pkl'

diff --git a/configs/road_signs_resnet18.yml b/configs/road_signs_resnet18.yml
@@ -33,10 +33,14 @@ softmax_steps_per_epoch : 500
 softmax_epochs : 20
 
 #paths
+work_dir : 'work_dirs/plates/'
 dataset_path : '/home/rauf/datasets/road_signs/road_signs_separated/'
-tensorboard_log_path : 'tf_log/'
-weights_save_path : 'weights/'
-plots_path : 'plots/'
+plot_history : True
 encodings_path : 'encodings/'
 model_save_name : 'best_model_resnet18.h5'
-encodings_save_name: 'encodings_resnet18.pkl'
+encodings_save_name: 'encodings_resnet18.pkl'
+
+# encodings parameters
+save_encodings : True
+max_num_samples_of_each_class : 30
+knn_k : 1
diff --git a/configs/road_signs_resnet34.yml b/configs/road_signs_resnet34.yml
@@ -0,0 +1,46 @@
+input_shape :  [48, 48, 3]
+encodings_len: 256
+margin: 0.5
+mode : 'triplet'
+distance_type : 'l1'
+backbone : 'resnet34'
+backbone_weights : 'imagenet'
+project_name : 'road_signs/'
+freeze_backbone : False
+augmentation_type : 'default'
+embeddings_normalization: True
+
+# optimizer parameters
+optimizer : 'radam'
+learning_rate : 0.0001
+decay_factor : 0.99
+step_size : 1
+
+# embeddings learning training parameters
+n_epochs : 1000
+n_steps_per_epoch : 50
+val_batch_size : 8
+val_steps : 200
+negatives_selection_mode : 'semihard'
+mining_n_classes: 5
+mining_n_samples: 3
+
+# softmax pretraining parameters
+softmax_pretraining : True
+softmax_batch_size : 16
+softmax_val_steps : 200
+softmax_steps_per_epoch : 500
+softmax_epochs : 20
+
+#paths
+work_dir : 'work_dirs/plates/'
+dataset_path : '/home/rauf/datasets/road_signs/road_signs_separated/'
+plot_history : True
+encodings_path : 'encodings/'
+model_save_name : 'best_model_resnet18.h5'
+encodings_save_name: 'encodings_resnet18.pkl'
+
+# encodings parameters
+save_encodings : True
+max_num_samples_of_each_class : 30
+knn_k : 1
diff --git a/configs/road_signs_resnext50.yml b/configs/road_signs_resnext50.yml
@@ -0,0 +1,46 @@
+input_shape :  [48, 48, 3]
+encodings_len: 256
+margin: 0.4
+mode : 'triplet'
+distance_type : 'l1'
+backbone : 'resnext50'
+backbone_weights : 'imagenet'
+project_name : 'road_signs/'
+freeze_backbone : False
+augmentation_type : 'default'
+embeddings_normalization: True
+
+# optimizer parameters
+optimizer : 'radam'
+learning_rate : 0.001
+decay_factor : 0.95
+step_size : 2
+
+# embeddings learning training parameters
+n_epochs : 1000
+n_steps_per_epoch : 200
+val_batch_size : 8
+val_steps : 200
+negatives_selection_mode : 'semihard'
+mining_n_classes: 5 # training batch_size = mining_n_classes * mining_n_samples
+mining_n_samples: 3
+
+# softmax pretraining parameters
+softmax_pretraining : True
+softmax_batch_size : 8
+softmax_val_steps : 200
+softmax_steps_per_epoch : 500
+softmax_epochs : 20
+
+#paths
+work_dir : 'work_dirs/plates/'
+dataset_path : '/home/rauf/datasets/road_signs/road_signs_separated/'
+plot_history : True
+encodings_path : 'encodings/'
+model_save_name : 'best_model_resnet18.h5'
+encodings_save_name: 'encodings_resnet18.pkl'
+
+# encodings parameters
+save_encodings : True
+max_num_samples_of_each_class : 30
+knn_k : 1
diff --git a/embedding_net/data_loader.py b/embedding_net/data_loader.py
@@ -34,7 +34,7 @@ def _load_images_paths(self):
             self.images_labels[d] = []
             for root, dirs, files in os.walk(self.dataset_path+d):
                 for f in files:
-                    if f.endswith('.jpg') or f.endswith('.png'):
+                    if f.endswith('.jpg') or f.endswith('.png') and not f.startswith('._'):
                         self.images_paths[d].append(root+'/'+f)
                         self.images_labels[d].append(root.split('/')[-1])
 
@@ -136,7 +136,7 @@ def get_batch_triplets(self, batch_size,  s='train'):
 
     def get_batch_triplets_batch_all(self):
         pass
-    
+
     def hardest_negative(self, loss_values, margin=0.5):
         hard_negative = np.argmax(loss_values)
         return hard_negative if loss_values[hard_negative] > 0 else None
@@ -146,16 +146,16 @@ def random_hard_negative(self, loss_values, margin=0.5):
         return np.random.choice(hard_negatives) if len(hard_negatives) > 0 else None
 
     def semihard_negative(self, loss_values, margin=0.5):
-        semihard_negatives = np.where(np.logical_and(loss_values < margin, loss_values > 0))[0]
+        semihard_negatives = np.where(np.logical_and(
+            loss_values < margin, loss_values > 0))[0]
         return np.random.choice(semihard_negatives) if len(semihard_negatives) > 0 else None
 
-
-    def get_batch_triplets_mining(self, 
-                                  embedding_model, 
-                                  n_classes, 
+    def get_batch_triplets_mining(self,
+                                  embedding_model,
+                                  n_classes,
                                   n_samples,
-                                  margin = 0.5, 
-                                  negative_selection_mode='semihard', 
+                                  margin=0.5,
+                                  negative_selection_mode='semihard',
                                   s='train'):
         if negative_selection_mode == 'semihard':
             negative_selection_fn = self.semihard_negative
@@ -166,10 +166,11 @@ def get_batch_triplets_mining(self,
 
         selected_classes_idxs = np.random.choice(
             self.n_classes[s], size=n_classes, replace=False)
-        selected_classes = [self.classes[s][cl] for cl in selected_classes_idxs]
+        selected_classes = [self.classes[s][cl]
+                            for cl in selected_classes_idxs]
         selected_classes_n_elements = [
             self.indexes[s][cl].shape[0] for cl in selected_classes]
-        
+
         selected_images = [np.random.choice(
             cl, size=n_samples, replace=False) for cl in selected_classes_n_elements]
 
@@ -186,7 +187,7 @@ def get_batch_triplets_mining(self,
         all_embeddings = np.vstack(all_embeddings_list)
         all_images = np.vstack(all_images_list)
         distance_matrix = pairwise_distances(all_embeddings)
-        
+
         triplet_anchors = []
         triplet_positives = []
         triplet_negatives = []
@@ -200,11 +201,15 @@ def get_batch_triplets_mining(self,
             anchor_positives = np.array(
                 list(combinations(positive_indices, 2)))
 
-            ap_distances = distance_matrix[anchor_positives[:,0], anchor_positives[:,1]]
+            ap_distances = distance_matrix[anchor_positives[:,
+                                                            0], anchor_positives[:, 1]]
             for anchor_positive, ap_distance in zip(anchor_positives, ap_distances):
-                loss_values = ap_distance - distance_matrix[anchor_positive[0], negative_indices] + margin
+                loss_values = ap_distance - \
+                    distance_matrix[anchor_positive[0],
+                                    negative_indices] + margin
                 loss_values = np.array(loss_values)
-                hard_negative = negative_selection_fn(loss_values, margin = margin)
+                hard_negative = negative_selection_fn(
+                    loss_values, margin=margin)
                 if hard_negative is not None:
                     hard_negative = negative_indices[hard_negative]
                     triplet_anchors.append(all_images[anchor_positive[0]])
@@ -217,13 +222,12 @@ def get_batch_triplets_mining(self,
             triplet_positives.append(all_images[anchor_positive[1]])
             triplet_negatives.append(all_images[negative_indices[0]])
             targets.append(1)
-        
+
         triplet_anchors = np.array(triplet_anchors)
         triplet_positives = np.array(triplet_positives)
         triplet_negatives = np.array(triplet_negatives)
         targets = np.array(targets)
 
-
         triplets = [triplet_anchors, triplet_positives, triplet_negatives]
         return triplets, targets
 
@@ -235,14 +239,14 @@ def generate(self, batch_size, mode='siamese', s='train'):
                 data, targets = self.get_batch_triplets(batch_size, s)
             yield (data, targets)
 
-    def generate_mining(self, embedding_model, n_classes, n_samples, margin = 0.5, negative_selection_mode='semihard', s='train'):
+    def generate_mining(self, embedding_model, n_classes, n_samples, margin=0.5, negative_selection_mode='semihard', s='train'):
         while True:
             data, targets = self.get_batch_triplets_mining(embedding_model,
-                                                                   n_classes, 
-                                                                   n_samples,
-                                                                   margin = margin,
-                                                                   negative_selection_mode='semihard', 
-                                                                   s=s)
+                                                           n_classes,
+                                                           n_samples,
+                                                           margin=margin,
+                                                           negative_selection_mode='semihard',
+                                                           s=s)
             yield (data, targets)
 
     def get_image(self, img_path):
@@ -275,8 +279,6 @@ def plot_batch(self, data, targets):
         plt.show()
 
 
-
-
 class SimpleNetImageLoader:
     """
     Image loader for Embedding network
@@ -298,23 +300,22 @@ def __init__(self, dataset_path, input_shape=None, augmentations=None, data_subs
         self.indexes = {d: {cl: np.where(np.array(self.images_labels[d]) == cl)[
             0] for cl in self.classes[d]} for d in data_subsets}
 
-
     def _load_images_paths(self):
         for d in self.data_subsets:
             self.images_paths[d] = []
             self.images_labels[d] = []
             for root, dirs, files in os.walk(self.dataset_path+d):
                 for f in files:
-                    if f.endswith('.jpg') or f.endswith('.png'):
+                    if f.endswith('.jpg') or f.endswith('.png') and not f.startswith('._'):
                         self.images_paths[d].append(root+'/'+f)
                         self.images_labels[d].append(root.split('/')[-1])
 
-
     def _get_images_set(self, clsss, idxs, s='train', with_aug=True):
         if type(clsss) is list:
             indxs = [self.indexes[s][cl][idx] for cl, idx in zip(clsss, idxs)]
         else:
             indxs = [self.indexes[s][clsss][idx] for idx in idxs]
+
         imgs = [cv2.imread(self.images_paths[s][idx]) for idx in indxs]
 
         if self.input_shape:
@@ -326,10 +327,10 @@ def _get_images_set(self, clsss, idxs, s='train', with_aug=True):
 
         return imgs
 
-
     def get_batch(self, batch_size,  s='train'):
-        images = [np.zeros((batch_size, self.input_shape[0], self.input_shape[1], 3))]
-        targets = np.zeros((batch_size,self.n_classes[s]))
+        images = [
+            np.zeros((batch_size, self.input_shape[0], self.input_shape[1], 3))]
+        targets = np.zeros((batch_size, self.n_classes[s]))
 
         count = 0
         with_aug = s == 'train' and self.augmentations
@@ -339,30 +340,27 @@ def get_batch(self, batch_size,  s='train'):
             selected_class_n_elements = len(self.indexes[s][selected_class])
 
             indx = random.randrange(0, selected_class_n_elements)
-            
+
             img = self._get_images_set(
                 [selected_class], [indx], s=s, with_aug=with_aug)
             images[0][count, :, :, :] = img[0]
             targets[i][selected_class_idx] = 1
-            count+=1
+            count += 1
 
         return images, targets
 
-
     def generate(self, batch_size, s='train'):
         while True:
             data, targets = self.get_batch(batch_size, s)
             yield (data, targets)
 
-
     def get_image(self, img_path):
         img = cv2.imread(img_path)
         if self.input_shape:
             img = cv2.resize(
                 img, (self.input_shape[0], self.input_shape[1]))
         return img
 
-
     def plot_batch(self, data, targets):
         num_imgs = data[0].shape[0]
         it_val = len(data)

diff --git a/embedding_net/model.py b/embedding_net/model.py
@@ -46,8 +46,6 @@ def __init__(self,  cfg_params):
         self.base_model = []
         self.backbone_model = []
 
-        self.tensorboard_log_path = cfg_params['tensorboard_log_path']
-
         if self.mode == 'siamese':
             self._create_model_siamese()
         elif self.mode == 'triplet':

diff --git a/test.py b/test.py
@@ -3,18 +3,21 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
+    parser.add_argument("config", type=str,
+                        help="path to config file")
     parser.add_argument("--weights", type=str,
                         help="path to trained model weights file")
     parser.add_argument("--encodings", type=str,
                         help="path to trained model encodings file")
     parser.add_argument("--image", type=str, help="path to image file")
     opt = parser.parse_args()
 
+    config_path = opt.config
     weights_path = opt.weights
     encodings_path = opt.encodings
     image_path = opt.image
 
-    model = EmbeddingNet()
+    model = EmbeddingNet(config_path)
     model.load_model(weights_path)
     model.load_encodings(encodings_path)
 

diff --git a/train.py b/train.py
@@ -33,6 +33,9 @@ def main():
 
     model = EmbeddingNet(cfg_params)
 
+    if args.resume_from is not None:
+        model.load_model(args.resume_from)
+
     weights_save_file = os.path.join(
         weights_save_path, cfg_params['model_save_name'])