diff --git a/README.md b/README.md index b531ed7..387a51c 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,43 @@ Instead of a single training loop, it was trained multiple times in smaller epoc | 20 | 60 | ![loss](./assets/loss4.png) | ![val_loss](./assets/val_loss4.png) | +For the purpose of this competition, I froze the bounding box and class label layer and only trained the masks head by adding 2 new regex (`just_mrcnn_mask`,`heads_mask`) to the layers in train method of Model found in `Mask_RCNN/mrcnn/model.py` + +```python + layer_regex = { + # all layers but the backbone + "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", + + # just to train the branch dealing with masks + "just_mrcnn_mask": r"(mrcnn\_mask.*)", + "heads_mask": r"(mrcnn\_mask.*)|(rpn\_.*)|(fpn\_.*)", + + # From a specific Resnet stage and up + "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", + "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", + "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", + + # All layers + "all": ".*", + } +``` + +Following is the sample snippet of training function + +```python +model.train(dataset_train, dataset_val, + learning_rate=config.LEARNING_RATE, + epochs=2, + augmentation=augmentation, + layers="just_mrcnn_mask") + +model.train(dataset_train, dataset_val, + learning_rate=config.LEARNING_RATE/10.0, + epochs=10, + augmentation=augmentation, + layers="heads_mask") +``` + ## Results ``` @@ -149,9 +186,20 @@ Your program runs at 1.703 FPS Car F score: 0.519 | Car Precision: 0.509 | Car Recall: 0.521 | Road F score: 0.961 | Road Precision: 0.970 | Road Recall: 0.926 | Averaged F score: 0.740 ``` +I also observed that the class frequency was imbalanced and there were fewer number of samples with cars than roads. This could have been solved by weighing the loss function but faced dimension issues while implementing it + ## Inference and Submission +### Inference +The inference configuration uses a batch size of only one image. To improve the FPS, a batch of image was taken from the video and was passed to the pipeline but this did not improve the FPS drastically and rather increased the complexity as the test video had odd number of frames. + +```python +class InferenceConfig(ShapesConfig): + GPU_COUNT = 1 + IMAGES_PER_GPU = 1 +``` + ### Submission Submission requires files to be encoded in a json. `test_inference.py` contains the inference and submission code. In attempt to increase the FPS, The encode function was replaced with the follows which was shared on the forum ```python @@ -160,8 +208,31 @@ def encode(array): return base64.b64encode(buffer).decode("utf-8") ``` + +## Code Execution +- training on CARLA dataset +```sh +python train_mrcnn.py +``` +- test inference on test_video.mp4 +```sh +python inference.py +``` +- submission script `test_inference.py` +```sh +grader 'python Lyft_challenge/test_inference.py' +``` + +## Requirements +Python 3.4, TensorFlow 1.3, Keras 2.0.8, pycocotools and other dependencies required for [https://github.com/matterport/Mask_RCNN](https://github.com/matterport/Mask_RCNN) + + +## Acknowledgement +This code heavily uses [https://github.com/matterport/Mask_RCNN](https://github.com/matterport/Mask_RCNN) by [waleedka](https://github.com/waleedka). Thanks for your contribution. + + + ## Reference -https://github.com/matterport/Mask_RCNN ``` @misc{Charles2013, author = {waleedka et.al}, diff --git a/readData.py b/readData.py deleted file mode 100644 index c537cb8..0000000 --- a/readData.py +++ /dev/null @@ -1,90 +0,0 @@ -#! /usr/bin/python3.5 -import os -# import glob -# import cv2 -import sys -import random -import math -import numpy as np -import skimage.io -import matplotlib -import matplotlib.pyplot as plt - - -sys.path.append(os.path.join(os.getcwd(),"./Mask_RCNN/")) - -ROOT_DIR = os.path.abspath("./") - -from mrcnn import utils -import mrcnn.model as modellib -from mrcnn import visualize - -sys.path.append(os.path.join("./Mask_RCNN/", "samples/coco/")) # To find local version -import coco - -MODEL_DIR = os.path.join('./', "logs") - -COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") -# Download COCO trained weights from Releases if needed -if not os.path.exists(COCO_MODEL_PATH): - utils.download_trained_weights(COCO_MODEL_PATH) - - -IMAGE_DIR = os.path.join(ROOT_DIR, "Mask_RCNN/images") - -class InferenceConfig(coco.CocoConfig): - # Set batch size to 1 since we'll be running inference on - # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU - GPU_COUNT = 1 - IMAGES_PER_GPU = 1 - -config = InferenceConfig() -config.display() - -model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config) - -model.load_weights(COCO_MODEL_PATH, by_name=True) - - -class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', - 'bus', 'train', 'truck', 'boat', 'traffic light', - 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', - 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', - 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', - 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', - 'kite', 'baseball bat', 'baseball glove', 'skateboard', - 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', - 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', - 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', - 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', - 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', - 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', - 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', - 'teddy bear', 'hair drier', 'toothbrush'] - - -file_names = next(os.walk(IMAGE_DIR))[2] -print(">>",file_names) - -DATASET_DIR = "./Train" - -RGB_DIR = os.path.join(DATASET_DIR,'CameraRGB') -MASK_DIR = os.path.join(DATASET_DIR,'CameraSeg') - -# files = os.listdir(RGB_DIR) - - -# image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names))) -image = skimage.io.imread(os.path.join(RGB_DIR, "51.png")) - -# Run detection -results = model.detect([image], verbose=1) - -# Visualize results -r = results[0] -visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], - class_names, r['scores']) - - - -