diff --git a/README.md b/README.md index c4485bb..b9cec45 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,7 @@ Examples of augmentation are given in [https://github.com/matterport/Mask_RCNN]( #### Training Loss +The model was trained using pretrained CoCo weights. Instead of a single training loop, it was trained multiple times in smaller epochs to observe change in the loss with changes in parameters and to avoid overfitting. As the data was less, the network used to saturate quickly and required more augmentations to proceed. Also i did not wanted to go overboard on the augmentation so was observing which one works best. Below are the logs of final training setting with the above given augmentation. | heads Epoch | all Epoch | loss | val_loss | diff --git a/inference.py b/inference.py index efe143c..93b0933 100644 --- a/inference.py +++ b/inference.py @@ -12,9 +12,8 @@ import matplotlib.pyplot as plt import skimage.io import time -# from imgaug import augmenters as iaa -# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' ROOT_DIR = os.path.abspath("./") MODEL_DIR = os.path.join('./', "logs") @@ -31,7 +30,7 @@ -class ShapesConfig(Config): +class LyftChallengeConfig(Config): """Configuration for training on the toy shapes dataset. Derives from the base Config class and overrides values specific to the toy shapes dataset. @@ -68,10 +67,10 @@ class ShapesConfig(Config): -config = ShapesConfig() +config = LyftChallengeConfig() config.display() -class InferenceConfig(ShapesConfig): +class InferenceConfig(LyftChallengeConfig): GPU_COUNT = 1 IMAGES_PER_GPU = 1 @@ -99,9 +98,6 @@ class InferenceConfig(ShapesConfig): def segment_images(original_image): results = model.detect([original_image], verbose=0) - # print ("-"*80) - # print(len(results)) - # print ("-"*80) r = results[0] f_mask = r['masks'] f_class = r["class_ids"] @@ -123,34 +119,6 @@ def segment_images(original_image): final_img = cv2.addWeighted(final_img, 1, mask1.astype(np.uint8), 1, 0) return final_img -def segment_images_batch(original_images): - results = model.detect(original_images, verbose=0) - print ("-"*80) - print(len(results)) - print ("-"*80) - images = [] - for idx,res in enumerate(results): - r = res - f_mask = r['masks'] - f_class = r["class_ids"] - - - no_ch = f_mask.shape[2] - final_img = np.copy(original_images[idx,:,:,:]) - for ch in range(no_ch): - - _id = f_class[ch] - if _id==1: - color_id=0 - else: - color_id=1 - mask_1 = f_mask[:,:,ch] - mask1 = np.dstack([mask_1*colors[color_id][0], - mask_1*colors[color_id][1], - mask_1*colors[color_id][2]]) - final_img = cv2.addWeighted(final_img, 1, mask1.astype(np.uint8), 1, 0) - images.append(final_img) - return np.dstack(images) import sys, skvideo.io, json, base64 diff --git a/test_inference.py b/test_inference.py index 3b7e736..845f7ef 100644 --- a/test_inference.py +++ b/test_inference.py @@ -8,11 +8,11 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' -ROOT_DIR = os.path.abspath("./Lyft_challenge") -MODEL_DIR = os.path.join('./Lyft_challenge', "logs") +ROOT_DIR = os.path.abspath("./") +MODEL_DIR = os.path.join('./', "logs") sys.path.append(ROOT_DIR) # To find local version of the library -sys.path.append(os.path.join(os.getcwd(),"./Lyft_challenge/Mask_RCNN/")) +sys.path.append(os.path.join(os.getcwd(),"./Mask_RCNN/")) from mrcnn.config import Config from mrcnn import utils @@ -21,13 +21,13 @@ from mrcnn.model import log -class ShapesConfig(Config): +class LyftChallengeConfig(Config): """Configuration for training on the toy shapes dataset. Derives from the base Config class and overrides values specific to the toy shapes dataset. """ # Give the configuration a recognizable name - NAME = "shapes" + NAME = "lyft_perception_challenge" # Train on 1 GPU and 8 images per GPU. We can put multiple images on each # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). @@ -57,11 +57,11 @@ class ShapesConfig(Config): VALIDATION_STEPS = 5 -config = ShapesConfig() +config = LyftChallengeConfig() # config.display() -class InferenceConfig(ShapesConfig): +class InferenceConfig(LyftChallengeConfig): GPU_COUNT = 1 IMAGES_PER_GPU = 1 @@ -77,7 +77,7 @@ class InferenceConfig(ShapesConfig): config=inference_config, model_dir=MODEL_DIR) -model_path = os.path.join('./Lyft_challenge', "mask_rcnn_lyft.h5") +model_path = os.path.join('./', "mask_rcnn_lyft.h5") assert model_path != "", "Provide path to trained weights" # print("Loading weights from ", model_path) model.load_weights(model_path, by_name=True) @@ -101,12 +101,6 @@ def segment_image(image_frame): return car_mask,road_mask -# Define encoder function -# def encode(array): -# pil_img = Image.fromarray(array) -# buff = BytesIO() -# pil_img.save(buff, format="PNG") -# return base64.b64encode(buff.getvalue()).decode("utf-8") def encode(array): retval, buffer = cv2.imencode('.png', array) @@ -121,13 +115,7 @@ def encode(array): for rgb_frame in video: - # Grab red channel - # red = rgb_frame[:,:,0] - # Look for red cars :) - # - # Look for road :) - # car_mask,road_mask = segment_image(rgb_frame) binary_car_result = car_mask*1 binary_road_result = road_mask*1 diff --git a/train_mrcnn.py b/train_mrcnn.py index a7e9283..607031a 100644 --- a/train_mrcnn.py +++ b/train_mrcnn.py @@ -30,18 +30,19 @@ MODEL_DIR = os.path.join('./', "logs") COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") + # Download COCO trained weights from Releases if needed if not os.path.exists(COCO_MODEL_PATH): utils.download_trained_weights(COCO_MODEL_PATH) class LyftChallengeConfig(Config): - """Configuration for training on the toy shapes dataset. + """Configuration for training on the lyft_perception_challenge dataset. Derives from the base Config class and overrides values specific - to the toy shapes dataset. + to the lyft_perception_challenge dataset. """ # Give the configuration a recognizable name - NAME = "shapes" + NAME = "lyft_perception_challenge" # Train on 1 GPU and 8 images per GPU. We can put multiple images on each # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). @@ -49,7 +50,7 @@ class LyftChallengeConfig(Config): IMAGES_PER_GPU = 1 # Number of classes (including background) - NUM_CLASSES = 1 + 2 # background + 2 shapes + NUM_CLASSES = 1 + 2 # background + 2 classes # Use small images for faster training. Set the limits of the small side # the large side, and that determines the image shape. @@ -79,11 +80,10 @@ class lyftDataset(utils.Dataset): random_idx=0 def load_images(self,dataset_dir,dataset_type='train'): image_paths = os.path.join(dataset_dir,'CameraRGB') - # image_paths = os.path.join(dataset_dir,'extraRGB') images = os.listdir(image_paths) - self.add_class("shapes", 1, "road") - self.add_class("shapes", 2, "car") + self.add_class("lyft_perception_challenge", 1, "road") + self.add_class("lyft_perception_challenge", 2, "car") if dataset_type=='train': images = images[:900] @@ -98,7 +98,7 @@ def load_images(self,dataset_dir,dataset_type='train'): # height, width = image.shape[:2] print("[image]",os.path.join(image_paths,_image)) self.add_image( - "shapes", + "lyft_perception_challenge", image_id=_image, # use file name as a unique image id path=os.path.join(image_paths,_image)) # width=width, height=height) @@ -109,25 +109,24 @@ def load_image(self, image_id): # Load image image = skimage.io.imread(self.image_info[image_id]['path']) image = cv2.resize(image,(256,256)) + # If grayscale. Convert to RGB for consistency. if image.ndim != 3: image = skimage.color.gray2rgb(image) + # If has an alpha channel, remove it for consistency if image.shape[-1] == 4: image = image[..., :3] - # image = cv2.resize(image, dsize=(256, 256), interpolation=cv2.INTER_CUBIC) return image def load_mask(self,image_id): - # print(self.random_idx) self.random_idx+=1 image_info = self.image_info[image_id] - if image_info["source"] != "shapes": + if image_info["source"] != "lyft_perception_challenge": print("not shape",image_info["source"]) return super(self.__class__, self).load_mask(image_id) info = self.image_info[image_id] mask_label = skimage.io.imread(os.path.join("./Train/CameraSeg",info["id"])) - # mask_label = skimage.io.imread(os.path.join("./Train/extraSeg",info["id"])) mask = self.process_labels(mask_label[:,:,0]) mask = cv2.resize(mask,(256,256)) @@ -135,8 +134,7 @@ def load_mask(self,image_id): return mask,np.array([1,2], dtype=np.int32) def process_labels(self,labels): - - # labels_new = np.copy(labels) + labels_new = np.zeros(labels.shape) labels_new_car = np.zeros(labels.shape) @@ -160,9 +158,9 @@ def process_labels(self,labels): def image_reference(self, image_id): - """Return the shapes data of the image.""" + """Return the lyft_perception_challenge data of the image.""" info = self.image_info[image_id] - if info["source"] == "shapes": + if info["source"] == "lyft_perception_challenge": return info["id"] else: super(self.__class__).image_reference(self, image_id) @@ -179,9 +177,7 @@ def image_reference(self, image_id): dataset_val.load_images(RGB_PATH,dataset_type='val') dataset_val.prepare() -# dataset_test = lyftDataset() -# dataset_test.load_images(RGB_PATH,dataset_type='test') -# dataset_test.prepare() + augmentation = iaa.SomeOf((0, None), [ iaa.Fliplr(0.5), iaa.Flipud(0.5), @@ -190,25 +186,24 @@ def image_reference(self, image_id): iaa.Affine(rotate=270)]), iaa.Multiply((0.8, 1.5)), iaa.GaussianBlur(sigma=(0.0, 5.0)), - # iaa.PiecewiseAffine(scale=(0.01, 0.05)), iaa.Affine(scale=(0.5, 1.5)), iaa.Affine(scale={"x": (0.5, 1.5), "y": (0.5, 1.5)}), - # iaa.ElasticTransformation(alpha=(0, 5.0), sigma=0.25) ]) -model = modellib.MaskRCNN(mode="training", config=config, - model_dir=MODEL_DIR) +# Uncomment this to train it on CoCo for the first time # model.load_weights(COCO_MODEL_PATH, by_name=True, # exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", # "mrcnn_bbox", "mrcnn_mask"]) -# model_path = os.path.join('./', "mask_rcnn_lyft.h5") +model = modellib.MaskRCNN(mode="training", config=config, + model_dir=MODEL_DIR) + + model_path = os.path.join('./', "mask_rcnn_lyft.h5") -# model_path = model.find_last()[1] # Load trained weights (fill in path to trained weights here) assert model_path != "", "Provide path to trained weights" @@ -217,14 +212,6 @@ def image_reference(self, image_id): print("Training ...") -# print(model.get_trainable_layers()) -# exit() - -# model.train(dataset_train, dataset_val, -# learning_rate=config.LEARNING_RATE, -# epochs=20, -# augmentation=augmentation, -# layers='heads') # model.train(dataset_train, dataset_val, # learning_rate=config.LEARNING_RATE / 100.0, @@ -233,10 +220,10 @@ def image_reference(self, image_id): # layers="just_mrcnn_mask") # model.train(dataset_train, dataset_val, -# learning_rate=config.LEARNING_RATE , +# learning_rate=config.LEARNING_RATE, # epochs=20, # augmentation=augmentation, -# layers="heads") +# layers='heads') model.train(dataset_train, dataset_val, learning_rate=config.LEARNING_RATE/10.0, @@ -249,63 +236,4 @@ def image_reference(self, image_id): -# inference - -class InferenceConfig(LyftChallengeConfig): - GPU_COUNT = 1 - IMAGES_PER_GPU = 1 - -inference_config = InferenceConfig() - -# Recreate the model in inference mode -model = modellib.MaskRCNN(mode="inference", - config=inference_config, - model_dir=MODEL_DIR) - -# Get path to saved weights -# Either set a specific path or find last trained weights -# model_path = os.path.join(ROOT_DIR, ".h5 file name here") - - -# # Test on a random image - -RED = (255,0,0) -GREEN = (0,255,0) -BLUE = (0,0,255) -colors = [RED,GREEN,BLUE] - - -def segment_images(original_image): - results = model.detect([original_image], verbose=0) - r = results[0] - f_mask = r['masks'] - f_class = r["class_ids"] - - - no_ch = f_mask.shape[2] - final_img = np.copy(original_image) - for ch in range(no_ch): - - _id = f_class[ch] - if _id==1: - color_id=0 - else: - color_id=1 - print('id:',_id) - mask_1 = f_mask[:,:,ch] - mask1 = np.dstack([mask_1*colors[color_id][0], - mask_1*colors[color_id][1], - mask_1*colors[color_id][2]]) - final_img = cv2.addWeighted(final_img, 1, mask1.astype(np.uint8), 1, 0) - return final_img - -# for image_id in range(900,1000): - -# original_image = cv2.imread('./Train/CameraRGB/{}.png'.format(image_id))[:,:,::-1] - -# final_img = segment_images(original_image) - -# cv2.imshow('output', final_img[:,:,::-1]) -# cv2.waitKey(1) - exit() \ No newline at end of file