From c06e2d90c0623b91ea6b73124037a3fbacdc7011 Mon Sep 17 00:00:00 2001 From: ameyawagh Date: Sun, 3 Jun 2018 08:51:16 -0400 Subject: [PATCH] README updated, test_inference updated to last working --- README.md | 44 ++++++++++++++++++++++++++++---------------- test_inference.py | 14 +++++++++----- train_mrcnn.py | 2 -- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 268ec94..6bdc6c1 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,53 @@ ---- -![Header](./assets/Udacity_Header.png) + +
-# Lyft Perception Challenge ---- +# * Lyft Perception Challenge * -## About the challenge -## Approach - -## Training + +The [lyft Perception challenge](https://www.udacity.com/lyft-challenge) in association with Udacity had an image segmentation task where the candidates had to submit their algorithm which could segment road and cars pixels as precisely as possible. The challenge started on May 1st,2018 and went through June 3rd, 2018. +## Approach +Although it was a segmentation problem and did not require instance segmentation, I went ahead with [MASK-RCNN](https://arxiv.org/pdf/1703.06870.pdf) as it was the state of the art algorithm in image segmentation and I was always intrigued to learn about it. +### Mask-RCNN +Mask-RCNN, also known as [Detectron](https://github.com/facebookresearch/Detectron) is a research platform for object detection developed by facebookresearch. It uses Resnet as the backbone, +For this application Resnet-50 was used by setting `BACKBONE = "resnet50"` in config. -| heads Epoch | all Epoch | loss | val_loss | -|:-----------:|:---------:|:----:|:--------:| -| 10 | 40 | ![loss](./assets/loss_40.png) | ![val_loss](./assets/val_loss_40.png) | -| 40 | 100 | ![loss](./assets/loss2.png) | ![val_loss](./assets/val_loss2.png) | -| 10 | 40 | ![loss](./assets/loss3.png) | ![val_loss](./assets/val_loss3.png) | -| 20 | 60 | ![loss](./assets/loss4.png) | ![val_loss](./assets/val_loss4.png) | +## Training +| heads Epoch | all Epoch | loss | val_loss | +|:-----------:|:---------:|:-------------------------------:|:-------------------------------------:| +| 10 | 40 | ![loss](./assets/loss_40.png) | ![val_loss](./assets/val_loss_40.png) | +| 40 | 100 | ![loss](./assets/loss2.png) | ![val_loss](./assets/val_loss2.png) | +| 10 | 40 | ![loss](./assets/loss3.png) | ![val_loss](./assets/val_loss3.png) | +| 20 | 60 | ![loss](./assets/loss4.png) | ![val_loss](./assets/val_loss4.png) | ## Results ``` -Your program runs at 1.730 FPS +Your program runs at 1.703 FPS -Car F score: 0.455 | Car Precision: 0.243 | Car Recall: 0.582 | Road F score: 0.949 | Road Precision: 0.986 | Road Recall: 0.825 | Averaged F score: 0.702 +Car F score: 0.519 | Car Precision: 0.509 | Car Recall: 0.521 | Road F score: 0.961 | Road Precision: 0.970 | Road Recall: 0.926 | Averaged F score: 0.740 ``` ## Inference and Submission +## Submission +Submission requires files to be encoded in a json. `test_inference.py` contains the inference and submission code. In attempt to increase the FPS, The encode function was replaced with the follows which was shared on the forum +```python +def encode(array): + retval, buffer = cv2.imencode('.png', array) + return base64.b64encode(buffer).decode("utf-8") +``` + ## Reference https://github.com/matterport/Mask_RCNN ``` @@ -51,6 +62,7 @@ https://github.com/matterport/Mask_RCNN } ``` + ## Author Ameya Wagh [aywagh@wpi.edu](aywagh@wpi.edu) \ No newline at end of file diff --git a/test_inference.py b/test_inference.py index 93c6664..3b7e736 100644 --- a/test_inference.py +++ b/test_inference.py @@ -2,7 +2,7 @@ import numpy as np from PIL import Image from io import BytesIO, StringIO - +import cv2 import os @@ -102,11 +102,15 @@ def segment_image(image_frame): return car_mask,road_mask # Define encoder function +# def encode(array): +# pil_img = Image.fromarray(array) +# buff = BytesIO() +# pil_img.save(buff, format="PNG") +# return base64.b64encode(buff.getvalue()).decode("utf-8") + def encode(array): - pil_img = Image.fromarray(array) - buff = BytesIO() - pil_img.save(buff, format="PNG") - return base64.b64encode(buff.getvalue()).decode("utf-8") + retval, buffer = cv2.imencode('.png', array) + return base64.b64encode(buffer).decode("utf-8") video = skvideo.io.vread(file) diff --git a/train_mrcnn.py b/train_mrcnn.py index 5a57f45..a7e9283 100644 --- a/train_mrcnn.py +++ b/train_mrcnn.py @@ -75,8 +75,6 @@ class LyftChallengeConfig(Config): - - class lyftDataset(utils.Dataset): random_idx=0 def load_images(self,dataset_dir,dataset_type='train'):