From 91fec7c24c7bc2a7efaf965d31a36003ff91ac64 Mon Sep 17 00:00:00 2001 From: Gueren Sanford Date: Tue, 9 Apr 2024 16:52:55 -0500 Subject: [PATCH 1/8] Working object detection with visualizer. --- .gitignore | 1 + Dockerfile | 2 +- data/perception/configs/complex_yolov4.cfg | 1160 +++++++++++++++++ .../complex_yolo/__init__.py | 0 .../complex_yolo/darknet2pytorch.py | 451 +++++++ .../complex_yolo/darknet_utils.py | 270 ++++ .../complex_yolo/model_utils.py | 90 ++ .../complex_yolo/yolo_layer.py | 253 ++++ .../lidar_detection_model.py | 156 +++ .../lidar_detection_node.py | 127 ++ .../object_detector_3d/lidar_objdet3d_node.py | 2 +- .../lidar_utils/__init__.py | 0 .../cal_intersection_rotated_boxes.py | 171 +++ .../lidar_utils/evaluation_utils.py | 357 +++++ .../lidar_utils/iou_rotated_boxes_utils.py | 216 +++ .../lidar_utils/kitti_bev_utils.py | 210 +++ .../lidar_utils/kitti_config.py | 66 + .../lidar_utils/torch_utils.py | 27 + src/perception/object_detector_3d/package.xml | 1 + src/perception/object_detector_3d/setup.py | 3 +- .../visualizer/object_visualizer_node.py | 21 +- 21 files changed, 3571 insertions(+), 13 deletions(-) create mode 100644 data/perception/configs/complex_yolov4.cfg create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolo/__init__.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet2pytorch.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet_utils.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolo/model_utils.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolo/yolo_layer.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_detection_model.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/__init__.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/cal_intersection_rotated_boxes.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/evaluation_utils.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/iou_rotated_boxes_utils.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_bev_utils.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_config.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/torch_utils.py diff --git a/.gitignore b/.gitignore index 7fa512d79..c68149a2d 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ docs/_site meters_to_gnss_degrees.py data/maps/grand_loop/carlaMapAdjust3.xodr data/maps/grand_loop/grand_loop_medium.pcd +complex_yolov4_mse_loss.pth *.bt *.pyc trace* diff --git a/Dockerfile b/Dockerfile index 627eb3aa8..e70fc21df 100755 --- a/Dockerfile +++ b/Dockerfile @@ -177,7 +177,7 @@ RUN pip3 install \ # Scientific Computing - used widely scipy \ # - shapely==2.0.0 \ + shapely==2.0.2 \ # simple-pid \ # diff --git a/data/perception/configs/complex_yolov4.cfg b/data/perception/configs/complex_yolov4.cfg new file mode 100644 index 000000000..132e6a4e9 --- /dev/null +++ b/data/perception/configs/complex_yolov4.cfg @@ -0,0 +1,1160 @@ +[net] +batch=64 +subdivisions=8 +# Training +#width=512 +#height=512 +width=608 +height=608 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0013 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + +#:104x104 54:52x52 85:26x26 104:13x13 for 416 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-7 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 85 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 54 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=30 +activation=linear + + +[yolo] +mask = 0,1,2 +#anchors = 11,14,-3.14, 11,14,0, 11,14,3.14, 11,25,-3.14, 11,25,0, 11,25,3.14, 23,51,-3.14, 23,51,0, 23,51,3.14 +anchors = 11, 15, 0, 10, 24, 0, 11, 25, 0, 23, 49, 0, 23, 55, 0, 24, 53, 0, 24, 60, 0, 27, 63, 0, 29, 74, 0 +classes=3 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +scale_x_y = 1.2 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=leaky + +[route] +layers = -1, -16 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=30 +activation=linear + + +[yolo] +mask = 3,4,5 +#anchors = 11,14,-3.14, 11,14,0, 11,14,3.14, 11,25,-3.14, 11,25,0, 11,25,3.14, 23,51,-3.14, 23,51,0, 23,51,3.14 +anchors = 11, 15, 0, 10, 24, 0, 11, 25, 0, 23, 49, 0, 23, 55, 0, 24, 53, 0, 24, 60, 0, 27, 63, 0, 29, 74, 0 +classes=3 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +scale_x_y = 1.1 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=leaky + +[route] +layers = -1, -37 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=30 +activation=linear + + +[yolo] +mask = 6,7,8 +#anchors = 11,14,-3.14, 11,14,0, 11,14,3.14, 11,25,-3.14, 11,25,0, 11,25,3.14, 23,51,-3.14, 23,51,0, 23,51,3.14 +anchors = 11, 15, 0, 10, 24, 0, 11, 25, 0, 23, 49, 0, 23, 55, 0, 24, 53, 0, 24, 60, 0, 27, 63, 0, 29, 74, 0 +classes=3 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/__init__.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet2pytorch.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet2pytorch.py new file mode 100644 index 000000000..4113c2bd3 --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet2pytorch.py @@ -0,0 +1,451 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4 +""" + +import sys +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +sys.path.append('../') + +from object_detector_3d.complex_yolo.yolo_layer import YoloLayer +from object_detector_3d.complex_yolo.darknet_utils import parse_cfg, print_cfg, load_fc, load_conv_bn, load_conv +from object_detector_3d.lidar_utils.torch_utils import to_cpu + + +class Mish(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + x = x * (torch.tanh(F.softplus(x))) + return x + + +class MaxPoolDark(nn.Module): + def __init__(self, size=2, stride=1): + super(MaxPoolDark, self).__init__() + self.size = size + self.stride = stride + + def forward(self, x): + ''' + darknet output_size = (input_size + p - k) / s +1 + p : padding = k - 1 + k : size + s : stride + torch output_size = (input_size + 2*p -k) / s +1 + p : padding = k//2 + ''' + p = self.size // 2 + if ((x.shape[2] - 1) // self.stride) != ((x.shape[2] + 2 * p - self.size) // self.stride): + padding1 = (self.size - 1) // 2 + padding2 = padding1 + 1 + else: + padding1 = (self.size - 1) // 2 + padding2 = padding1 + if ((x.shape[3] - 1) // self.stride) != ((x.shape[3] + 2 * p - self.size) // self.stride): + padding3 = (self.size - 1) // 2 + padding4 = padding3 + 1 + else: + padding3 = (self.size - 1) // 2 + padding4 = padding3 + x = F.max_pool2d(F.pad(x, (padding3, padding4, padding1, padding2), mode='replicate'), + self.size, stride=self.stride) + return x + + +class Upsample_expand(nn.Module): + def __init__(self, stride=2): + super(Upsample_expand, self).__init__() + self.stride = stride + + def forward(self, x): + stride = self.stride + assert (x.data.dim() == 4) + B = x.data.size(0) + C = x.data.size(1) + H = x.data.size(2) + W = x.data.size(3) + ws = stride + hs = stride + x = x.view(B, C, H, 1, W, 1).expand(B, C, H, stride, W, stride).contiguous().view(B, C, H * stride, W * stride) + return x + + +class Upsample_interpolate(nn.Module): + def __init__(self, stride): + super(Upsample_interpolate, self).__init__() + self.stride = stride + + def forward(self, x): + x_numpy = x.cpu().detach().numpy() + H = x_numpy.shape[2] + W = x_numpy.shape[3] + + H = H * self.stride + W = W * self.stride + + out = F.interpolate(x, size=(H, W), mode='nearest') + return out + + +class Reorg(nn.Module): + def __init__(self, stride=2): + super(Reorg, self).__init__() + self.stride = stride + + def forward(self, x): + stride = self.stride + assert (x.data.dim() == 4) + B = x.data.size(0) + C = x.data.size(1) + H = x.data.size(2) + W = x.data.size(3) + assert (H % stride == 0) + assert (W % stride == 0) + ws = stride + hs = stride + x = x.view(B, C, H / hs, hs, W / ws, ws).transpose(3, 4).contiguous() + x = x.view(B, C, H / hs * W / ws, hs * ws).transpose(2, 3).contiguous() + x = x.view(B, C, hs * ws, H / hs, W / ws).transpose(1, 2).contiguous() + x = x.view(B, hs * ws * C, H / hs, W / ws) + return x + + +class GlobalAvgPool2d(nn.Module): + def __init__(self): + super(GlobalAvgPool2d, self).__init__() + + def forward(self, x): + N = x.data.size(0) + C = x.data.size(1) + H = x.data.size(2) + W = x.data.size(3) + x = F.avg_pool2d(x, (H, W)) + x = x.view(N, C) + return x + + +# for route and shortcut +class EmptyModule(nn.Module): + def __init__(self): + super(EmptyModule, self).__init__() + + def forward(self, x): + return x + + +# support route shortcut and reorg +class Darknet(nn.Module): + def __init__(self, cfgfile, use_giou_loss): + super(Darknet, self).__init__() + self.use_giou_loss = use_giou_loss + self.blocks = parse_cfg(cfgfile) + self.width = int(self.blocks[0]['width']) + self.height = int(self.blocks[0]['height']) + + self.models = self.create_network(self.blocks) # merge conv, bn,leaky + self.yolo_layers = [layer for layer in self.models if layer.__class__.__name__ == 'YoloLayer'] + + self.loss = self.models[len(self.models) - 1] + + self.header = torch.IntTensor([0, 0, 0, 0]) + self.seen = 0 + + def forward(self, x, targets=None): + # batch_size, c, h, w + img_size = x.size(2) + ind = -2 + self.loss = None + outputs = dict() + loss = 0. + yolo_outputs = [] + for block in self.blocks: + ind = ind + 1 + # if ind > 0: + # return x + + if block['type'] == 'net': + continue + elif block['type'] in ['convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']: + x = self.models[ind](x) + outputs[ind] = x + elif block['type'] == 'route': + layers = block['layers'].split(',') + layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers] + if len(layers) == 1: + if 'groups' not in block.keys() or int(block['groups']) == 1: + x = outputs[layers[0]] + outputs[ind] = x + else: + groups = int(block['groups']) + group_id = int(block['group_id']) + _, b, _, _ = outputs[layers[0]].shape + x = outputs[layers[0]][:, b // groups * group_id:b // groups * (group_id + 1)] + outputs[ind] = x + elif len(layers) == 2: + x1 = outputs[layers[0]] + x2 = outputs[layers[1]] + x = torch.cat((x1, x2), 1) + outputs[ind] = x + elif len(layers) == 4: + x1 = outputs[layers[0]] + x2 = outputs[layers[1]] + x3 = outputs[layers[2]] + x4 = outputs[layers[3]] + x = torch.cat((x1, x2, x3, x4), 1) + outputs[ind] = x + else: + print("rounte number > 2 ,is {}".format(len(layers))) + + elif block['type'] == 'shortcut': + from_layer = int(block['from']) + activation = block['activation'] + from_layer = from_layer if from_layer > 0 else from_layer + ind + x1 = outputs[from_layer] + x2 = outputs[ind - 1] + x = x1 + x2 + if activation == 'leaky': + x = F.leaky_relu(x, 0.1, inplace=True) + elif activation == 'relu': + x = F.relu(x, inplace=True) + outputs[ind] = x + elif block['type'] == 'yolo': + x, layer_loss = self.models[ind](x, targets, img_size, self.use_giou_loss) + loss += layer_loss + yolo_outputs.append(x) + elif block['type'] == 'cost': + continue + else: + print('unknown type %s' % (block['type'])) + yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) + + return yolo_outputs if targets is None else (loss, yolo_outputs) + + def print_network(self): + print_cfg(self.blocks) + + def create_network(self, blocks): + models = nn.ModuleList() + + prev_filters = 3 + out_filters = [] + prev_stride = 1 + out_strides = [] + conv_id = 0 + for block in blocks: + if block['type'] == 'net': + prev_filters = int(block['channels']) + continue + elif block['type'] == 'convolutional': + conv_id = conv_id + 1 + batch_normalize = int(block['batch_normalize']) + filters = int(block['filters']) + kernel_size = int(block['size']) + stride = int(block['stride']) + is_pad = int(block['pad']) + pad = (kernel_size - 1) // 2 if is_pad else 0 + activation = block['activation'] + model = nn.Sequential() + if batch_normalize: + model.add_module('conv{0}'.format(conv_id), + nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False)) + model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters)) + # model.add_module('bn{0}'.format(conv_id), BN2d(filters)) + else: + model.add_module('conv{0}'.format(conv_id), + nn.Conv2d(prev_filters, filters, kernel_size, stride, pad)) + if activation == 'leaky': + model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True)) + elif activation == 'relu': + model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True)) + elif activation == 'mish': + model.add_module('mish{0}'.format(conv_id), Mish()) + else: + print("[INFO] No error, the convolution haven't activate {}".format(activation)) + + prev_filters = filters + out_filters.append(prev_filters) + prev_stride = stride * prev_stride + out_strides.append(prev_stride) + models.append(model) + elif block['type'] == 'maxpool': + pool_size = int(block['size']) + stride = int(block['stride']) + if stride == 1 and pool_size % 2: + # You can use Maxpooldark instead, here is convenient to convert onnx. + # Example: [maxpool] size=3 stride=1 + model = nn.MaxPool2d(kernel_size=pool_size, stride=stride, padding=pool_size // 2) + elif stride == pool_size: + # You can use Maxpooldark instead, here is convenient to convert onnx. + # Example: [maxpool] size=2 stride=2 + model = nn.MaxPool2d(kernel_size=pool_size, stride=stride, padding=0) + else: + model = MaxPoolDark(pool_size, stride) + out_filters.append(prev_filters) + prev_stride = stride * prev_stride + out_strides.append(prev_stride) + models.append(model) + elif block['type'] == 'avgpool': + model = GlobalAvgPool2d() + out_filters.append(prev_filters) + models.append(model) + elif block['type'] == 'softmax': + model = nn.Softmax() + out_strides.append(prev_stride) + out_filters.append(prev_filters) + models.append(model) + elif block['type'] == 'cost': + if block['_type'] == 'sse': + model = nn.MSELoss(size_average=True) + elif block['_type'] == 'L1': + model = nn.L1Loss(size_average=True) + elif block['_type'] == 'smooth': + model = nn.SmoothL1Loss(size_average=True) + out_filters.append(1) + out_strides.append(prev_stride) + models.append(model) + elif block['type'] == 'reorg': + stride = int(block['stride']) + prev_filters = stride * stride * prev_filters + out_filters.append(prev_filters) + prev_stride = prev_stride * stride + out_strides.append(prev_stride) + models.append(Reorg(stride)) + elif block['type'] == 'upsample': + stride = int(block['stride']) + out_filters.append(prev_filters) + prev_stride = prev_stride // stride + out_strides.append(prev_stride) + + models.append(Upsample_expand(stride)) + # models.append(Upsample_interpolate(stride)) + + elif block['type'] == 'route': + layers = block['layers'].split(',') + ind = len(models) + layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers] + if len(layers) == 1: + if 'groups' not in block.keys() or int(block['groups']) == 1: + prev_filters = out_filters[layers[0]] + prev_stride = out_strides[layers[0]] + else: + prev_filters = out_filters[layers[0]] // int(block['groups']) + prev_stride = out_strides[layers[0]] // int(block['groups']) + elif len(layers) == 2: + assert (layers[0] == ind - 1 or layers[1] == ind - 1) + prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + prev_stride = out_strides[layers[0]] + elif len(layers) == 4: + assert (layers[0] == ind - 1) + prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + \ + out_filters[layers[3]] + prev_stride = out_strides[layers[0]] + else: + print("route error!!!") + + out_filters.append(prev_filters) + out_strides.append(prev_stride) + models.append(EmptyModule()) + elif block['type'] == 'shortcut': + ind = len(models) + prev_filters = out_filters[ind - 1] + out_filters.append(prev_filters) + prev_stride = out_strides[ind - 1] + out_strides.append(prev_stride) + models.append(EmptyModule()) + elif block['type'] == 'connected': + filters = int(block['output']) + if block['activation'] == 'linear': + model = nn.Linear(prev_filters, filters) + elif block['activation'] == 'leaky': + model = nn.Sequential( + nn.Linear(prev_filters, filters), + nn.LeakyReLU(0.1, inplace=True)) + elif block['activation'] == 'relu': + model = nn.Sequential( + nn.Linear(prev_filters, filters), + nn.ReLU(inplace=True)) + prev_filters = filters + out_filters.append(prev_filters) + out_strides.append(prev_stride) + models.append(model) + elif block['type'] == 'yolo': + anchor_masks = [int(i) for i in block['mask'].split(',')] + anchors = [float(i) for i in block['anchors'].split(',')] + anchors = [(anchors[i], anchors[i + 1], math.sin(anchors[i + 2]), math.cos(anchors[i + 2])) for i in + range(0, len(anchors), 3)] + anchors = [anchors[i] for i in anchor_masks] + + num_classes = int(block['classes']) + self.num_classes = num_classes + scale_x_y = float(block['scale_x_y']) + ignore_thresh = float(block['ignore_thresh']) + + yolo_layer = YoloLayer(num_classes=num_classes, anchors=anchors, stride=prev_stride, + scale_x_y=scale_x_y, ignore_thresh=ignore_thresh) + + out_filters.append(prev_filters) + out_strides.append(prev_stride) + models.append(yolo_layer) + else: + print('unknown type %s' % (block['type'])) + + return models + + def load_weights(self, weightfile): + fp = open(weightfile, 'rb') + header = np.fromfile(fp, count=5, dtype=np.int32) + self.header = torch.from_numpy(header) + self.seen = self.header[3] + buf = np.fromfile(fp, dtype=np.float32) + fp.close() + + start = 0 + ind = -2 + for block in self.blocks: + if start >= buf.size: + break + ind = ind + 1 + if block['type'] == 'net': + continue + elif block['type'] == 'convolutional': + model = self.models[ind] + batch_normalize = int(block['batch_normalize']) + if batch_normalize: + start = load_conv_bn(buf, start, model[0], model[1]) + else: + start = load_conv(buf, start, model[0]) + elif block['type'] == 'connected': + model = self.models[ind] + if block['activation'] != 'linear': + start = load_fc(buf, start, model[0]) + else: + start = load_fc(buf, start, model) + elif block['type'] == 'maxpool': + pass + elif block['type'] == 'reorg': + pass + elif block['type'] == 'upsample': + pass + elif block['type'] == 'route': + pass + elif block['type'] == 'shortcut': + pass + elif block['type'] == 'yolo': + pass + elif block['type'] == 'avgpool': + pass + elif block['type'] == 'softmax': + pass + elif block['type'] == 'cost': + pass + else: + print('unknown type %s' % (block['type'])) diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet_utils.py new file mode 100644 index 000000000..5500fb057 --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet_utils.py @@ -0,0 +1,270 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4 +""" + +import sys + +import torch + +sys.path.append('../') +from object_detector_3d.lidar_utils.torch_utils import convert2cpu + +__all__ = ['parse_cfg', 'print_cfg', 'load_conv', 'load_conv_bn', 'save_conv', 'save_conv_bn', 'load_fc', 'save_fc'] + + +def parse_cfg(cfgfile): + blocks = [] + fp = open(cfgfile, 'r') + block = None + line = fp.readline() + while line != '': + line = line.rstrip() + if line == '' or line[0] == '#': + line = fp.readline() + continue + elif line[0] == '[': + if block: + blocks.append(block) + block = dict() + block['type'] = line.lstrip('[').rstrip(']') + # set default value + if block['type'] == 'convolutional': + block['batch_normalize'] = 0 + else: + key, value = line.split('=') + key = key.strip() + if key == 'type': + key = '_type' + value = value.strip() + block[key] = value + line = fp.readline() + + if block: + blocks.append(block) + fp.close() + return blocks + + +def print_cfg(blocks): + print('layer filters size input output') + prev_width = 416 + prev_height = 416 + prev_filters = 3 + out_filters = [] + out_widths = [] + out_heights = [] + ind = -2 + for block in blocks: + ind = ind + 1 + if block['type'] == 'net': + prev_width = int(block['width']) + prev_height = int(block['height']) + continue + elif block['type'] == 'convolutional': + filters = int(block['filters']) + kernel_size = int(block['size']) + stride = int(block['stride']) + is_pad = int(block['pad']) + pad = (kernel_size - 1) // 2 if is_pad else 0 + width = (prev_width + 2 * pad - kernel_size) // stride + 1 + height = (prev_height + 2 * pad - kernel_size) // stride + 1 + print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( + ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width, + height, filters)) + prev_width = width + prev_height = height + prev_filters = filters + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'maxpool': + pool_size = int(block['size']) + stride = int(block['stride']) + width = prev_width // stride + height = prev_height // stride + print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( + ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, + filters)) + prev_width = width + prev_height = height + prev_filters = filters + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'avgpool': + width = 1 + height = 1 + print('%5d %-6s %3d x %3d x%4d -> %3d' % ( + ind, 'avg', prev_width, prev_height, prev_filters, prev_filters)) + prev_width = width + prev_height = height + prev_filters = filters + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'softmax': + print('%5d %-6s -> %3d' % (ind, 'softmax', prev_filters)) + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'cost': + print('%5d %-6s -> %3d' % (ind, 'cost', prev_filters)) + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'reorg': + stride = int(block['stride']) + filters = stride * stride * prev_filters + width = prev_width // stride + height = prev_height // stride + print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( + ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters)) + prev_width = width + prev_height = height + prev_filters = filters + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'upsample': + stride = int(block['stride']) + filters = prev_filters + width = prev_width * stride + height = prev_height * stride + print('%5d %-6s * %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( + ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters)) + prev_width = width + prev_height = height + prev_filters = filters + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'route': + layers = block['layers'].split(',') + layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers] + if len(layers) == 1: + print('%5d %-6s %d' % (ind, 'route', layers[0])) + prev_width = out_widths[layers[0]] + prev_height = out_heights[layers[0]] + prev_filters = out_filters[layers[0]] + elif len(layers) == 2: + print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1])) + prev_width = out_widths[layers[0]] + prev_height = out_heights[layers[0]] + assert (prev_width == out_widths[layers[1]]) + assert (prev_height == out_heights[layers[1]]) + prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + elif len(layers) == 4: + print('%5d %-6s %d %d %d %d' % (ind, 'route', layers[0], layers[1], layers[2], layers[3])) + prev_width = out_widths[layers[0]] + prev_height = out_heights[layers[0]] + assert (prev_width == out_widths[layers[1]] == out_widths[layers[2]] == out_widths[layers[3]]) + assert (prev_height == out_heights[layers[1]] == out_heights[layers[2]] == out_heights[layers[3]]) + prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + out_filters[ + layers[3]] + else: + print("route error !!! {} {} {}".format(sys._getframe().f_code.co_filename, + sys._getframe().f_code.co_name, sys._getframe().f_lineno)) + + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] in ['region', 'yolo']: + print('%5d %-6s' % (ind, 'detection')) + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'shortcut': + from_id = int(block['from']) + from_id = from_id if from_id > 0 else from_id + ind + print('%5d %-6s %d' % (ind, 'shortcut', from_id)) + prev_width = out_widths[from_id] + prev_height = out_heights[from_id] + prev_filters = out_filters[from_id] + out_widths.append(prev_width) + out_heights.append(prev_height) + out_filters.append(prev_filters) + elif block['type'] == 'connected': + filters = int(block['output']) + print('%5d %-6s %d -> %3d' % (ind, 'connected', prev_filters, filters)) + prev_filters = filters + out_widths.append(1) + out_heights.append(1) + out_filters.append(prev_filters) + else: + print('unknown type %s' % (block['type'])) + + +def load_conv(buf, start, conv_model): + num_w = conv_model.weight.numel() + num_b = conv_model.bias.numel() + conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])) + start = start + num_b + conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape)) + start = start + num_w + return start + + +def save_conv(fp, conv_model): + if conv_model.bias.is_cuda: + convert2cpu(conv_model.bias.data).numpy().tofile(fp) + convert2cpu(conv_model.weight.data).numpy().tofile(fp) + else: + conv_model.bias.data.numpy().tofile(fp) + conv_model.weight.data.numpy().tofile(fp) + + +def load_conv_bn(buf, start, conv_model, bn_model): + num_w = conv_model.weight.numel() + num_b = bn_model.bias.numel() + bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])) + start = start + num_b + bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b])) + start = start + num_b + bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b])) + start = start + num_b + bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b])) + start = start + num_b + conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape)) + start = start + num_w + return start + + +def save_conv_bn(fp, conv_model, bn_model): + if bn_model.bias.is_cuda: + convert2cpu(bn_model.bias.data).numpy().tofile(fp) + convert2cpu(bn_model.weight.data).numpy().tofile(fp) + convert2cpu(bn_model.running_mean).numpy().tofile(fp) + convert2cpu(bn_model.running_var).numpy().tofile(fp) + convert2cpu(conv_model.weight.data).numpy().tofile(fp) + else: + bn_model.bias.data.numpy().tofile(fp) + bn_model.weight.data.numpy().tofile(fp) + bn_model.running_mean.numpy().tofile(fp) + bn_model.running_var.numpy().tofile(fp) + conv_model.weight.data.numpy().tofile(fp) + + +def load_fc(buf, start, fc_model): + num_w = fc_model.weight.numel() + num_b = fc_model.bias.numel() + fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])) + start = start + num_b + fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w])) + start = start + num_w + return start + + +def save_fc(fp, fc_model): + fc_model.bias.data.numpy().tofile(fp) + fc_model.weight.data.numpy().tofile(fp) + + +if __name__ == '__main__': + import sys + + blocks = parse_cfg('cfg/yolo.cfg') + if len(sys.argv) == 2: + blocks = parse_cfg(sys.argv[1]) + print_cfg(blocks) diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/model_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/model_utils.py new file mode 100644 index 000000000..0b4f3668b --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/model_utils.py @@ -0,0 +1,90 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Author: Nguyen Mau Dung +# DoC: 2020.06.18 +# email: nguyenmaudung93.kstn@gmail.com +----------------------------------------------------------------------------------- +# Description: utils functions that use for model +""" + +import sys + +import torch + +sys.path.append('../') + +from object_detector_3d.complex_yolo.darknet2pytorch import Darknet + + +def create_model(configs): + """Create model based on architecture name""" + if (configs.arch == 'darknet') and (configs.cfgfile is not None): + print('using darknet') + model = Darknet(cfgfile=configs.cfgfile, use_giou_loss=configs.use_giou_loss) + else: + assert False, 'Undefined model backbone' + + return model + + +def get_num_parameters(model): + """Count number of trained parameters of the model""" + if hasattr(model, 'module'): + num_parameters = sum(p.numel() for p in model.module.parameters() if p.requires_grad) + else: + num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + + return num_parameters + + +def make_data_parallel(model, configs): + if configs.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if configs.gpu_idx is not None: + torch.cuda.set_device(configs.gpu_idx) + model.cuda(configs.gpu_idx) + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + configs.batch_size = int(configs.batch_size / configs.ngpus_per_node) + configs.num_workers = int((configs.num_workers + configs.ngpus_per_node - 1) / configs.ngpus_per_node) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[configs.gpu_idx]) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + model = torch.nn.parallel.DistributedDataParallel(model) + elif configs.gpu_idx is not None: + torch.cuda.set_device(configs.gpu_idx) + model = model.cuda(configs.gpu_idx) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + model = torch.nn.DataParallel(model).cuda() + + return model + + +if __name__ == '__main__': + import argparse + + from torchsummary import summary + from easydict import EasyDict as edict + + parser = argparse.ArgumentParser(description='Complexer YOLO Implementation') + parser.add_argument('-a', '--arch', type=str, default='darknet', metavar='ARCH', + help='The name of the model architecture') + parser.add_argument('--cfgfile', type=str, default='../config/cfg/complex_yolov4.cfg', metavar='PATH', + help='The path for cfgfile (only for darknet)') + + configs = edict(vars(parser.parse_args())) + + configs.device = torch.device('cuda:1') + + model = create_model(configs).to(device=configs.device) + sample_input = torch.randn((1, 3, 608, 608)).to(device=configs.device) + # summary(model.cuda(), (3, 608, 608)) + output = model(sample_input, targets=None) + print(output.size()) diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/yolo_layer.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/yolo_layer.py new file mode 100644 index 000000000..34854da3c --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/yolo_layer.py @@ -0,0 +1,253 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Author: Nguyen Mau Dung +# DoC: 2020.07.05 +# email: nguyenmaudung93.kstn@gmail.com +----------------------------------------------------------------------------------- +# Description: This script for the yolo layer + +# Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4 +# Refer: https://github.com/VCasecnikovs/Yet-Another-YOLOv4-Pytorch +""" + +import sys + +import torch +import torch.nn as nn +import torch.nn.functional as F + +sys.path.append('../') + +from object_detector_3d.lidar_utils.torch_utils import to_cpu +from object_detector_3d.lidar_utils.iou_rotated_boxes_utils import iou_pred_vs_target_boxes, iou_rotated_boxes_targets_vs_anchors, \ + get_polygons_areas_fix_xy + + +class YoloLayer(nn.Module): + """Yolo layer""" + + def __init__(self, num_classes, anchors, stride, scale_x_y, ignore_thresh): + super(YoloLayer, self).__init__() + # Update the attributions when parsing the cfg during create the darknet + self.num_classes = num_classes + self.anchors = anchors + self.num_anchors = len(anchors) + self.stride = stride + self.scale_x_y = scale_x_y + self.ignore_thresh = ignore_thresh + + self.noobj_scale = 100 + self.obj_scale = 1 + self.lgiou_scale = 3.54 + self.leular_scale = 3.54 + self.lobj_scale = 64.3 + self.lcls_scale = 37.4 + + self.seen = 0 + # Initialize dummy variables + self.grid_size = 0 + self.img_size = 0 + self.metrics = {} + + def compute_grid_offsets(self, grid_size): + self.grid_size = grid_size + g = self.grid_size + self.stride = self.img_size / self.grid_size + # Calculate offsets for each grid + self.grid_x = torch.arange(g, device=self.device, dtype=torch.float).repeat(g, 1).view([1, 1, g, g]) + self.grid_y = torch.arange(g, device=self.device, dtype=torch.float).repeat(g, 1).t().view([1, 1, g, g]) + self.scaled_anchors = torch.tensor( + [(a_w / self.stride, a_h / self.stride, im, re) for a_w, a_h, im, re in self.anchors], device=self.device, + dtype=torch.float) + self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) + self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) + + # Pre compute polygons and areas of anchors + self.scaled_anchors_polygons, self.scaled_anchors_areas = get_polygons_areas_fix_xy(self.scaled_anchors) + + def build_targets(self, pred_boxes, pred_cls, target, anchors): + """ Built yolo targets to compute loss + :param out_boxes: [num_samples or batch, num_anchors, grid_size, grid_size, 6] + :param pred_cls: [num_samples or batch, num_anchors, grid_size, grid_size, num_classes] + :param target: [num_boxes, 8] + :param anchors: [num_anchors, 4] + :return: + """ + nB, nA, nG, _, nC = pred_cls.size() + n_target_boxes = target.size(0) + + # Create output tensors on "device" + obj_mask = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.uint8) + noobj_mask = torch.full(size=(nB, nA, nG, nG), fill_value=1, device=self.device, dtype=torch.uint8) + class_mask = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) + iou_scores = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) + tx = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) + ty = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) + tw = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) + th = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) + tim = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) + tre = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) + tcls = torch.full(size=(nB, nA, nG, nG, nC), fill_value=0, device=self.device, dtype=torch.float) + tconf = obj_mask.float() + giou_loss = torch.tensor([0.], device=self.device, dtype=torch.float) + + if n_target_boxes > 0: # Make sure that there is at least 1 box + b, target_labels = target[:, :2].long().t() + target_boxes = torch.cat((target[:, 2:6] * nG, target[:, 6:8]), dim=-1) # scale up x, y, w, h + + gxy = target_boxes[:, :2] + gwh = target_boxes[:, 2:4] + gimre = target_boxes[:, 4:6] + + targets_polygons, targets_areas = get_polygons_areas_fix_xy(target_boxes[:, 2:6]) + # Get anchors with best iou + ious = iou_rotated_boxes_targets_vs_anchors(self.scaled_anchors_polygons, self.scaled_anchors_areas, + targets_polygons, targets_areas) + best_ious, best_n = ious.max(0) + + gx, gy = gxy.t() + gw, gh = gwh.t() + gim, gre = gimre.t() + gi, gj = gxy.long().t() + # Set masks + obj_mask[b, best_n, gj, gi] = 1 + noobj_mask[b, best_n, gj, gi] = 0 + + # Set noobj mask to zero where iou exceeds ignore threshold + for i, anchor_ious in enumerate(ious.t()): + noobj_mask[b[i], anchor_ious > self.ignore_thresh, gj[i], gi[i]] = 0 + + # Coordinates + tx[b, best_n, gj, gi] = gx - gx.floor() + ty[b, best_n, gj, gi] = gy - gy.floor() + # Width and height + tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) + th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) + # Im and real part + tim[b, best_n, gj, gi] = gim + tre[b, best_n, gj, gi] = gre + + # One-hot encoding of label + tcls[b, best_n, gj, gi, target_labels] = 1 + class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() + ious, giou_loss = iou_pred_vs_target_boxes(pred_boxes[b, best_n, gj, gi], target_boxes, + GIoU=self.use_giou_loss) + iou_scores[b, best_n, gj, gi] = ious + if self.reduction == 'mean': + giou_loss /= n_target_boxes + tconf = obj_mask.float() + + return iou_scores, giou_loss, class_mask, obj_mask.type(torch.bool), noobj_mask.type(torch.bool), \ + tx, ty, tw, th, tim, tre, tcls, tconf + + def forward(self, x, targets=None, img_size=608, use_giou_loss=False): + """ + :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size] + :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw)) + :param img_size: default 608 + :return: + """ + self.img_size = img_size + self.use_giou_loss = use_giou_loss + self.device = x.device + num_samples, _, _, grid_size = x.size() + + prediction = x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size) + prediction = prediction.permute(0, 1, 3, 4, 2).contiguous() + # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7] + + # Get outputs + pred_x = torch.sigmoid(prediction[..., 0]) + pred_y = torch.sigmoid(prediction[..., 1]) + pred_w = prediction[..., 2] # Width + pred_h = prediction[..., 3] # Height + pred_im = prediction[..., 4] # angle imaginary part + pred_re = prediction[..., 5] # angle real part + pred_conf = torch.sigmoid(prediction[..., 6]) # Conf + pred_cls = torch.sigmoid(prediction[..., 7:]) # Cls pred. + + # If grid size does not match current we compute new offsets + if grid_size != self.grid_size: + self.compute_grid_offsets(grid_size) + + # Add offset and scale with anchors + # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6] + pred_boxes = torch.empty(prediction[..., :6].shape, device=self.device, dtype=torch.float) + pred_boxes[..., 0] = pred_x + self.grid_x + pred_boxes[..., 1] = pred_y + self.grid_y + pred_boxes[..., 2] = torch.exp(pred_w).clamp(max=1E3) * self.anchor_w + pred_boxes[..., 3] = torch.exp(pred_h).clamp(max=1E3) * self.anchor_h + pred_boxes[..., 4] = pred_im + pred_boxes[..., 5] = pred_re + + output = torch.cat(( + pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride, + pred_boxes[..., 4:6].view(num_samples, -1, 2), + pred_conf.view(num_samples, -1, 1), + pred_cls.view(num_samples, -1, self.num_classes), + ), dim=-1) + # output size: [num_samples, num boxes, 7 + num_classes] + + if targets is None: + return output, 0 + else: + self.reduction = 'mean' + iou_scores, giou_loss, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets( + pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors) + + loss_x = F.mse_loss(pred_x[obj_mask], tx[obj_mask], reduction=self.reduction) + loss_y = F.mse_loss(pred_y[obj_mask], ty[obj_mask], reduction=self.reduction) + loss_w = F.mse_loss(pred_w[obj_mask], tw[obj_mask], reduction=self.reduction) + loss_h = F.mse_loss(pred_h[obj_mask], th[obj_mask], reduction=self.reduction) + loss_im = F.mse_loss(pred_im[obj_mask], tim[obj_mask], reduction=self.reduction) + loss_re = F.mse_loss(pred_re[obj_mask], tre[obj_mask], reduction=self.reduction) + loss_im_re = (1. - torch.sqrt(pred_im[obj_mask] ** 2 + pred_re[obj_mask] ** 2)) ** 2 # as tim^2 + tre^2 = 1 + loss_im_re_red = loss_im_re.sum() if self.reduction == 'sum' else loss_im_re.mean() + loss_eular = loss_im + loss_re + loss_im_re_red + + loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask], tconf[obj_mask], reduction=self.reduction) + loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask], tconf[noobj_mask], reduction=self.reduction) + loss_cls = F.binary_cross_entropy(pred_cls[obj_mask], tcls[obj_mask], reduction=self.reduction) + + if self.use_giou_loss: + loss_obj = loss_conf_obj + loss_conf_noobj + total_loss = giou_loss * self.lgiou_scale + loss_eular * self.leular_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale + else: + loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj + total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_obj + loss_cls + + # Metrics (store loss values using tensorboard) + cls_acc = 100 * class_mask[obj_mask].mean() + conf_obj = pred_conf[obj_mask].mean() + conf_noobj = pred_conf[noobj_mask].mean() + conf50 = (pred_conf > 0.5).float() + iou50 = (iou_scores > 0.5).float() + iou75 = (iou_scores > 0.75).float() + detected_mask = conf50 * class_mask * tconf + precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) + recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) + recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) + + self.metrics = { + "loss": to_cpu(total_loss).item(), + "iou_score": to_cpu(iou_scores[obj_mask].mean()).item(), + 'giou_loss': to_cpu(giou_loss).item(), + 'loss_x': to_cpu(loss_x).item(), + 'loss_y': to_cpu(loss_y).item(), + 'loss_w': to_cpu(loss_w).item(), + 'loss_h': to_cpu(loss_h).item(), + 'loss_eular': to_cpu(loss_eular).item(), + 'loss_im': to_cpu(loss_im).item(), + 'loss_re': to_cpu(loss_re).item(), + "loss_obj": to_cpu(loss_obj).item(), + "loss_cls": to_cpu(loss_cls).item(), + "cls_acc": to_cpu(cls_acc).item(), + "recall50": to_cpu(recall50).item(), + "recall75": to_cpu(recall75).item(), + "precision": to_cpu(precision).item(), + "conf_obj": to_cpu(conf_obj).item(), + "conf_noobj": to_cpu(conf_noobj).item() + } + + return output, total_loss diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_model.py b/src/perception/object_detector_3d/object_detector_3d/lidar_detection_model.py new file mode 100644 index 000000000..ec0c2b34f --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_detection_model.py @@ -0,0 +1,156 @@ +""" +Package: object_detector_3d +Filename: lidar_detection_model.py +Author: Gueren Sanford +Email: guerensanford@gmail.com +Copyright: 2021, Nova UTD +License: MIT License +Use this class to make object detections with LiDAR data in the format +[x,y,z,intensity]. We use the model Complex Yolo to make the +predictions, which requires a birds eye view map of the LiDAR data. +With the predictions, feed it into the postprocess to filter out and +format the results. +""" + +# Python Imports +import random +import numpy as np +import ros2_numpy as rnp +import torch + +# Local Import +from object_detector_3d.complex_yolo.darknet2pytorch import Darknet +from object_detector_3d.lidar_utils import kitti_bev_utils +from object_detector_3d.lidar_utils.evaluation_utils import \ + rescale_boxes, post_processing_v2 +import object_detector_3d.lidar_utils.kitti_config as cnf + +# Message definitions +from geometry_msgs.msg import Point +from sensor_msgs.msg import PointCloud2 +from navigator_msgs.msg import BoundingBox3D, Object3D, Object3DArray + +config_file = './data/perception/configs/complex_yolov4.cfg' +model_path = './data/perception/checkpoints/complex_yolov4_mse_loss.pth' + +class LidarDetectionModel(): + def __init__(self, device: torch.device, use_giou_loss: bool = True): + """! Initializes the node. + @param device[torch.device] The device the model will run on + @param use_giou_loss[bool] Use generalized intersection over + union as a loss function + """ + self.img_size = 608 # Determined by the birds eye view setup + self.device = device + + # Setup for the Complex Yolo model + self.model = Darknet(cfgfile=config_file, + use_giou_loss=use_giou_loss) + self.model.load_state_dict(torch.load(model_path, + map_location=self.device)) + self.model.to(device=self.device) + + def preprocess(self, lidar_msg: PointCloud2): + """! Converts ROS LiDAR message into the bird's eye view input + tensor for the model. + @param lidar_msg[PointCloud2] The ros2 lidar dmessage data + formatted as a PointCloud2 message. + @return torch.Tensor A 608 x 608 birds eye view image of the + point cloud data. Red = Intensity, Green = Height, Blue = + Density + """ + + # Point cloud byte stream -> np array with all fields and dtypes + lidar_np = rnp.numpify(lidar_msg) + # Reduces the vertical res from 128 to 64 + lidar_np = lidar_np[::2] # matches Kitti dataset + # Combines necessary lidar data into an array [n, n, n, n]] + lidar_pcd = np.array([ # Avoids np array dtype issues + lidar_np['x'].flatten(), + lidar_np['y'].flatten(), + lidar_np['z'].flatten(), + lidar_np['reflectivity'].flatten()]) + # Tranforms array to shape [(x y z r), n] + lidar_pcd = np.transpose(lidar_pcd) + lidar_pcd[:, 3] /= 255.0 # Normalizes refl. + + # Removes the points outside birds eye view (bev) map + reduced_lidar = kitti_bev_utils.removePoints(lidar_pcd, + cnf.boundary) + # Turns the point cloud into a bev rgb map + rgb_map = kitti_bev_utils.makeBVFeature(reduced_lidar, + cnf.DISCRETIZATION, cnf.boundary) + + return rgb_map + + def predict(self, input_bev: torch.Tensor): + """! Uses the tensor from the preprocess fn to return the + bounding boxes for the objects. + @param input_bev[torch.Tensor] The result of the preprocess + function. + @return torch.Tensor The output of the model in the shape + [1, boxes, x y w l classes], where the classes are Car, + Pedestrian, Cyclist, Van, Person_sitting. + """ + # The model outputs [1, boxes, x y w l classes] + return self.model(input_bev) + + def postprocess(self, rgb_map: torch.Tensor, + predictions: torch.Tensor, conf_thresh: int, + nms_thresh: int): + """! + NEEDS OPTIMIZATION + + @param input_bev[torch.Tensor] The result of the preprocess + function. + @param predictions[torch.Tensor] The output of the model in + the shape [1, boxes, x y w l classes], where the classes + are Car, Pedestrian, Cyclist, Van, Person_sitting. + @param conf_thresh[int] The mininum confidence value accepted + for bounding boxes. + @param nms_thresh[int] The maximum accepted intersection over + union value for bounding boxes. + @return navigator_msgs.msg.Object3DArray A ros2 message ready + to be published. Before publishing, the header needs to be + attached. + """ + # Returns detections with shape: (x, y, w, l, im, re, object_conf, class_score, class_pred) + detections = post_processing_v2(predictions, conf_thresh=conf_thresh, + nms_thresh=nms_thresh)[0] + + # Skip if no detections made + if detections is None: + return None + + predictions = [] + for x, y, w, l, im, re, cls_conf, *_, cls_pred in detections: + predictions.append([cls_pred, cls_conf, x / self.img_size, y / self.img_size, + w / self.img_size, l / self.img_size, im, re]) + + # Returns shape [c, c_conf, x, y, z, w, l, h, yaw] + predictions = kitti_bev_utils.inverse_yolo_target(np.array(predictions), cnf.boundary) # IMPROVE + + # Turns the predictions into array of Object3D msg types + object_3d_array = Object3DArray() + for prediction in predictions: + # Defines the custom ros2 msgs + bounding_box = BoundingBox3D() + object_3d = Object3D() + + # Need x, y, z, x_size, y_size, z_size, yaw + bounding_box.coordinates = prediction[2:] + + # Returns the corners in the order specified by BoundingBox3D msg + corners_3d = kitti_bev_utils.get_corners_3d(*bounding_box.coordinates) # IMPROVE get_corners + # Fills the Point corners array + for i, corner_3d in enumerate(corners_3d): + bounding_box.corners[i] = Point(x=corner_3d[0], y=corner_3d[1], z=corner_3d[2]) + + object_3d.label = (int(prediction[0]) - 1) % 3 # changes label to match format of message + object_3d.id = random.randint(0, 2**16-1) + object_3d.confidence_score = prediction[1] + object_3d.bounding_box = bounding_box + + object_3d_array.objects.append(object_3d) + + return object_3d_array \ No newline at end of file diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py b/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py new file mode 100644 index 000000000..311270cc1 --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py @@ -0,0 +1,127 @@ +""" +Package: object_detector_3d +Filename: lidar_detection_node.py +Author: Gueren Sanford +Email: guerensanford@gmail.com +Copyright: 2021, Nova UTD +License: MIT License +Description of what this file does, what inputs it takes and what it outputs or accomplishes +""" + +# Python Imports +import torch, math +import numpy as np +from time import time # DEBUG + +# Local Import +from object_detector_3d.lidar_detection_model import LidarDetectionModel + +# Ros Imports +import rclpy +import ros2_numpy as rnp +from rclpy.node import Node +import sensor_msgs_py.point_cloud2 as pc2 + +# Message definitions +from rosgraph_msgs.msg import Clock +from builtin_interfaces.msg import Time +from sensor_msgs.msg import PointCloud2 +from navigator_msgs.msg import Object3DArray + +class LidarDetectionNode(Node): + + def __init__(self): + """! Initializes the node.""" + super().__init__("lidar_detection_node") + + # Paths to pretrained model and config + model_path = './data/perception/checkpoints/complex_yolov4_mse_loss.pth' + config_file = './data/perception/configs/complex_yolov4.cfg' + + # Declare default ROS2 node parameters + self.declare_parameter('device', 'cuda:1') # cpu | cuda:{gpu num} + self.declare_parameter('conf_thresh', 0.4) # 0.0 - 1.0 + self.declare_parameter('nms_thresh', 0.2) # 0.0 - 1.0 + self.declare_parameter('down_sample', False) + + # Get ROS2 parameters + self.device = torch.device(self.get_parameter('device') \ + .get_parameter_value().string_value) + self.conf_thresh = self.get_parameter('conf_thresh') \ + .get_parameter_value().double_value + self.nms_thresh = self.get_parameter('nms_thresh') \ + .get_parameter_value().double_value + self.down_sample = self.get_parameter('down_sample') \ + .get_parameter_value().bool_value + + # Instantiates the model + self.model = LidarDetectionModel(self.device) + self.clock = Clock() + + # Subcribes to filtered lidar + self.lidar_sub = self.create_subscription( + PointCloud2, '/lidar', self.lidar_callback, 10) + # Subscribes to clock for headers + self.clock_sub = self.create_subscription( + Clock, '/clock', self.clock_cb, 10) + + # Instantiates publishers + self.objects3d_pub = self.create_publisher( + Object3DArray, 'detected/objects3d', 10) + + def clock_cb(self, msg): + """! + Updates the clock for message headers. + @param msg[Clock] The clock message. + """ + self.stamp = Time() + self.stamp.sec = msg.clock.sec + self.stamp.nanosec = msg.clock.nanosec + + def lidar_callback(self, lidar_msg: PointCloud2): + """! + For now, just prints the lidar point cloud and turns it into a numpy array. + @param lidar_msg[PointCloud2] The filtered lidar point cloud. + """ + + # 608x608 birds eye view rgb map of points + rgb_map = self.model.preprocess(lidar_msg) + # Turned into a tensor for the model + input_bev = torch.tensor(rgb_map, device=self.device).float() \ + .unsqueeze(0) + + # The model outputs [1, boxes, x y w l classes] + outputs = self.model.predict(input_bev) + + objecst3d_array = self.model.postprocess(rgb_map, outputs, + self.conf_thresh, self.nms_thresh) + + # Return if no detections made + if objecst3d_array is None: + return + + # Attaches the header to the message + objecst3d_array.header.stamp = self.stamp + objecst3d_array.header.frame_id = lidar_msg.header.frame_id + + # Publishes the Object3DArray msg + self.objects3d_pub.publish(objecst3d_array) + +def main(args=None): + rclpy.init(args=args) + + lidar_detection_node = LidarDetectionNode() + + rclpy.spin(lidar_detection_node) + + # Destroy the node explicitly + # (optional - otherwise it will be done automatically + # when the garbage collector destroys the node object) + lidar_detection_node.destroy_node() + rclpy.shutdown() + +if __name__ == '__main__': + main() + + + diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_objdet3d_node.py b/src/perception/object_detector_3d/object_detector_3d/lidar_objdet3d_node.py index c5c6daac8..723aa40a4 100644 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_objdet3d_node.py +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_objdet3d_node.py @@ -50,7 +50,7 @@ def __init__(self): def lidar_callback(self, lidar_msg: PointCloud2): pcd = rnp.numpify(lidar_msg) - pcd = pcd[::2] # this reduces the rings to 64, it appears to work when running against the parade bag + pcd = pcd[::2] # this reduces the vertical res to 64, it appears to work when running against the parade bag pcd = np.array([ pcd['x'].flatten(), pcd['y'].flatten(), pcd['z'].flatten(), pcd['reflectivity'].flatten()]).T pcd[:, 3] /= 255.0 pcd[:, 2] += self.translate_height diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/__init__.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/cal_intersection_rotated_boxes.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/cal_intersection_rotated_boxes.py new file mode 100644 index 000000000..e79bf9a89 --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/cal_intersection_rotated_boxes.py @@ -0,0 +1,171 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Author: Nguyen Mau Dung +# DoC: 2020.07.20 +# email: nguyenmaudung93.kstn@gmail.com +----------------------------------------------------------------------------------- +# Description: This script for intersection calculation of rotated boxes (on GPU) + +Refer from # https://stackoverflow.com/questions/44797713/calculate-the-area-of-intersection-of-two-rotated-rectangles-in-python?noredirect=1&lq=1 +""" + +import torch + + +class Line: + # ax + by + c = 0 + def __init__(self, p1, p2): + """ + + Args: + p1: (x, y) + p2: (x, y) + """ + self.a = p2[1] - p1[1] + self.b = p1[0] - p2[0] + self.c = p2[0] * p1[1] - p2[1] * p1[0] # cross + self.device = p1.device + + def cal_values(self, pts): + return self.a * pts[:, 0] + self.b * pts[:, 1] + self.c + + def find_intersection(self, other): + # See e.g. https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Using_homogeneous_coordinates + if not isinstance(other, Line): + return NotImplemented + w = self.a * other.b - self.b * other.a + return torch.tensor([(self.b * other.c - self.c * other.b) / w, (self.c * other.a - self.a * other.c) / w], + device=self.device) + + +def intersection_area(rect1, rect2): + """Calculate the inter + + Args: + rect1: vertices of the rectangles (4, 2) + rect2: vertices of the rectangles (4, 2) + + Returns: + + """ + + # Use the vertices of the first rectangle as, starting vertices of the intersection polygon. + intersection = rect1 + + # Loop over the edges of the second rectangle + roll_rect2 = torch.roll(rect2, -1, dims=0) + for p, q in zip(rect2, roll_rect2): + if len(intersection) <= 2: + break # No intersection + + line = Line(p, q) + + # Any point p with line(p) <= 0 is on the "inside" (or on the boundary), + # any point p with line(p) > 0 is on the "outside". + # Loop over the edges of the intersection polygon, + # and determine which part is inside and which is outside. + new_intersection = [] + line_values = line.cal_values(intersection) + roll_intersection = torch.roll(intersection, -1, dims=0) + roll_line_values = torch.roll(line_values, -1, dims=0) + for s, t, s_value, t_value in zip(intersection, roll_intersection, line_values, roll_line_values): + if s_value <= 0: + new_intersection.append(s) + if s_value * t_value < 0: + # Points are on opposite sides. + # Add the intersection of the lines to new_intersection. + intersection_point = line.find_intersection(Line(s, t)) + new_intersection.append(intersection_point) + + if len(new_intersection) > 0: + intersection = torch.stack(new_intersection) + else: + break + + # Calculate area + if len(intersection) <= 2: + return 0. + + return PolyArea2D(intersection) + + +def PolyArea2D(pts): + roll_pts = torch.roll(pts, -1, dims=0) + area = (pts[:, 0] * roll_pts[:, 1] - pts[:, 1] * roll_pts[:, 0]).sum().abs() * 0.5 + return area + + +if __name__ == "__main__": + import cv2 + import numpy as np + from shapely.geometry import Polygon + + + def cvt_box_2_polygon(box): + """ + :param array: an array of shape [num_conners, 2] + :return: a shapely.geometry.Polygon object + """ + # use .buffer(0) to fix a line polygon + # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera + return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) + + + def get_corners_torch(x, y, w, l, yaw): + device = x.device + bev_corners = torch.zeros((4, 2), dtype=torch.float, device=device) + cos_yaw = torch.cos(yaw) + sin_yaw = torch.sin(yaw) + # front left + bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw + + # rear left + bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw + + # rear right + bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw + + # front right + bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw + + return bev_corners + + + # Show convex in an image + + img_size = 300 + img = np.zeros((img_size, img_size, 3)) + img = cv2.resize(img, (img_size, img_size)) + + box1 = torch.tensor([100, 100, 40, 10, np.pi / 2], dtype=torch.float).cuda() + box2 = torch.tensor([100, 100, 40, 20, 0], dtype=torch.float).cuda() + + box1_conners = get_corners_torch(box1[0], box1[1], box1[2], box1[3], box1[4]) + box1_polygon = cvt_box_2_polygon(box1_conners) + box1_area = box1_polygon.area + + box2_conners = get_corners_torch(box2[0], box2[1], box2[2], box2[3], box2[4]) + box2_polygon = cvt_box_2_polygon(box2_conners) + box2_area = box2_polygon.area + + intersection = box2_polygon.intersection(box1_polygon).area + union = box1_area + box2_area - intersection + iou = intersection / (union + 1e-16) + + print('Shapely- box1_area: {:.2f}, box2_area: {:.2f}, inter: {:.2f}, iou: {:.4f}'.format(box1_area, box2_area, + intersection, iou)) + + print('intersection from intersection_area(): {}'.format(intersection_area(box1_conners, box2_conners))) + + img = cv2.polylines(img, [box1_conners.cpu().numpy().astype(np.int)], True, (255, 0, 0), 2) + img = cv2.polylines(img, [box2_conners.cpu().numpy().astype(np.int)], True, (0, 255, 0), 2) + + while True: + cv2.imshow('img', img) + if cv2.waitKey(0) & 0xff == 27: + break diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/evaluation_utils.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/evaluation_utils.py new file mode 100644 index 000000000..e378b8d18 --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/evaluation_utils.py @@ -0,0 +1,357 @@ +from __future__ import division +import sys +import tqdm + +import torch +import numpy as np +from shapely.geometry import Polygon + +sys.path.append('../') + +import object_detector_3d.lidar_utils.kitti_bev_utils as bev_utils + + +def cvt_box_2_polygon(box): + """ + :param box: an array of shape [4, 2] + :return: a shapely.geometry.Polygon object + """ + # use .buffer(0) to fix a line polygon + # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera + return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) + + +def compute_iou_nms(idx_self, idx_other, polygons, areas): + """Calculates IoU of the given box with the array of the given boxes. + box: a polygon + boxes: a vector of polygons + Note: the areas are passed in rather than calculated here for + efficiency. Calculate once in the caller to avoid duplicate work. + """ + # Calculate intersection areas + ious = [] + box1 = polygons[idx_self] + for idx in idx_other: + box2 = polygons[idx] + intersection = box1.intersection(box2).area + iou = intersection / (areas[idx] + areas[idx_self] - intersection + 1e-12) + ious.append(iou) + + return np.array(ious, dtype=np.float32) + + +def load_classes(path): + """ + Loads class labels at 'path' + """ + fp = open(path, "r") + names = fp.read().split("\n")[:-1] + return names + + +def rescale_boxes(boxes, current_dim, original_shape): + """ Rescales bounding boxes to the original shape """ + orig_h, orig_w = original_shape + # The amount of padding that was added + pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) + pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) + # Image height and width after padding is removed + unpad_h = current_dim - pad_y + unpad_w = current_dim - pad_x + # Rescale bounding boxes to dimension of original image + boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w + boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h + boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w + boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h + + return boxes + + +def ap_per_class(tp, conf, pred_cls, target_cls): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (list). + conf: Objectness value from 0-1 (list). + pred_cls: Predicted object classes (list). + target_cls: True object classes (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes = np.unique(target_cls) + + # Create Precision-Recall curve and compute AP for each class + ap, p, r = [], [], [] + for c in tqdm.tqdm(unique_classes, desc="Computing AP"): + i = pred_cls == c + n_gt = (target_cls == c).sum() # Number of ground truth objects + n_p = i.sum() # Number of predicted objects + + if n_p == 0 and n_gt == 0: + continue + elif n_p == 0 or n_gt == 0: + ap.append(0) + r.append(0) + p.append(0) + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum() + tpc = (tp[i]).cumsum() + + # Recall + recall_curve = tpc / (n_gt + 1e-16) + r.append(recall_curve[-1]) + + # Precision + precision_curve = tpc / (tpc + fpc) + p.append(precision_curve[-1]) + + # AP from recall-precision curve + ap.append(compute_ap(recall_curve, precision_curve)) + + # Compute F1 score (harmonic mean of precision and recall) + p, r, ap = np.array(p), np.array(r), np.array(ap) + f1 = 2 * p * r / (p + r + 1e-16) + + return p, r, ap, f1, unique_classes.astype("int32") + + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves. + Code originally from https://github.com/rbgirshick/py-faster-rcnn. + # Arguments + recall: The recall curve (list). + precision: The precision curve (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([0.0], precision, [0.0])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def get_batch_statistics_rotated_bbox(outputs, targets, iou_threshold): + """ Compute true positives, predicted scores and predicted labels per sample """ + batch_metrics = [] + for sample_i in range(len(outputs)): + + if outputs[sample_i] is None: + continue + + output = outputs[sample_i] + pred_boxes = output[:, :6] + pred_scores = output[:, 6] + pred_labels = output[:, -1] + + true_positives = np.zeros(pred_boxes.shape[0]) + + annotations = targets[targets[:, 0] == sample_i][:, 1:] + if len(annotations) > 0: + target_labels = annotations[:, 0] + detected_boxes = [] + target_boxes = annotations[:, 1:] + + for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): + + # If targets are found break + if len(detected_boxes) == len(annotations): + break + + # Ignore if label is not one of the target labels + if pred_label not in target_labels: + continue + + iou, box_index = iou_rotated_single_vs_multi_boxes_cpu(pred_box, target_boxes).max(dim=0) + + if iou >= iou_threshold and box_index not in detected_boxes: + true_positives[pred_i] = 1 + detected_boxes += [box_index] + batch_metrics.append([true_positives, pred_scores, pred_labels]) + + return batch_metrics + + +def iou_rotated_single_vs_multi_boxes_cpu(single_box, multi_boxes): + """ + :param pred_box: Numpy array + :param target_boxes: Numpy array + :return: + """ + + s_x, s_y, s_w, s_l, s_im, s_re = single_box + s_area = s_w * s_l + s_yaw = np.arctan2(s_im, s_re) + s_conners = bev_utils.get_corners(s_x, s_y, s_w, s_l, s_yaw) + s_polygon = cvt_box_2_polygon(s_conners) + + m_x, m_y, m_w, m_l, m_im, m_re = multi_boxes.transpose(1, 0) + targets_areas = m_w * m_l + m_yaw = np.arctan2(m_im, m_re) + m_boxes_conners = get_corners_vectorize(m_x, m_y, m_w, m_l, m_yaw) + m_boxes_polygons = [cvt_box_2_polygon(box_) for box_ in m_boxes_conners] + + ious = [] + for m_idx in range(multi_boxes.shape[0]): + intersection = s_polygon.intersection(m_boxes_polygons[m_idx]).area + iou_ = intersection / (s_area + targets_areas[m_idx] - intersection + 1e-16) + ious.append(iou_) + + return torch.tensor(ious, dtype=torch.float) + + +def get_corners_vectorize(x, y, w, l, yaw): + """bev image coordinates format - vectorization + + :param x, y, w, l, yaw: [num_boxes,] + :return: num_boxes x (x,y) of 4 conners + """ + bbox2 = np.zeros((x.shape[0], 4, 2), dtype=np.float32) + cos_yaw = np.cos(yaw) + sin_yaw = np.sin(yaw) + + # front left + bbox2[:, 0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw + bbox2[:, 0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw + + # rear left + bbox2[:, 1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw + bbox2[:, 1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw + + # rear right + bbox2[:, 2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw + bbox2[:, 2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw + + # front right + bbox2[:, 3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw + bbox2[:, 3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw + + return bbox2 + + +def nms_cpu(boxes, confs, nms_thresh=0.5): + """ + :param boxes: [num, 6] + :param confs: [num, num_classes] + :param nms_thresh: + :param min_mode: + :return: + """ + # order of reduce confidence (high --> low) + order = confs.argsort()[::-1] + + x, y, w, l, im, re = boxes.transpose(1, 0) + yaw = np.arctan2(im, re) + boxes_conners = get_corners_vectorize(x, y, w, l, yaw) + boxes_polygons = [cvt_box_2_polygon(box_) for box_ in boxes_conners] # 4 vertices of the box + boxes_areas = w * l + + keep = [] + while order.size > 0: + idx_self = order[0] + idx_other = order[1:] + keep.append(idx_self) + over = compute_iou_nms(idx_self, idx_other, boxes_polygons, boxes_areas) + inds = np.where(over <= nms_thresh)[0] + order = order[inds + 1] + + return np.array(keep) + + +def post_processing(outputs, conf_thresh=0.95, nms_thresh=0.4): + """ + Removes detections with lower object confidence score than 'conf_thres' and performs + Non-Maximum Suppression to further filter detections. + Returns detections with shape: + (x, y, w, l, im, re, object_conf, class_score, class_pred) + """ + if type(outputs).__name__ != 'ndarray': + outputs = outputs.numpy() + # outputs shape: (batch_size, 22743, 10) + batch_size = outputs.shape[0] + # box_array: [batch, num, 6] + box_array = outputs[:, :, :6] + + # confs: [batch, num, num_classes] + confs = outputs[:, :, 6:7] * outputs[:, :, 7:] + obj_confs = outputs[:, :, 6] + + # [batch, num, num_classes] --> [batch, num] + max_conf = np.max(confs, axis=2) + max_id = np.argmax(confs, axis=2) + + bboxes_batch = [None for _ in range(batch_size)] + + for i in range(batch_size): + argwhere = max_conf[i] > conf_thresh + l_box_array = box_array[i, argwhere, :] + l_obj_confs = obj_confs[i, argwhere, :] + l_max_conf = max_conf[i, argwhere] + l_max_id = max_id[i, argwhere] + + keep = nms_cpu(l_box_array, l_max_conf, nms_thresh=nms_thresh) + + if (keep.size > 0): + l_box_array = l_box_array[keep, :] + l_obj_confs = l_obj_confs[keep].reshape(-1, 1) + l_max_conf = l_max_conf[keep].reshape(-1, 1) + l_max_id = l_max_id[keep].reshape(-1, 1) + bboxes_batch[i] = np.concatenate((l_box_array, l_obj_confs, l_max_conf, l_max_id), axis=-1) + return bboxes_batch + + +def post_processing_v2(prediction, conf_thresh=0.95, nms_thresh=0.4): + """ + Removes detections with lower object confidence score than 'conf_thres' and performs + Non-Maximum Suppression to further filter detections. + Returns detections with shape: + (x, y, w, l, im, re, object_conf, class_score, class_pred) + """ + output = [None for _ in range(len(prediction))] + for image_i, image_pred in enumerate(prediction): + # Filter out confidence scores below threshold + image_pred = image_pred[image_pred[:, 6] >= conf_thresh] + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Object confidence times class confidence + score = image_pred[:, 6] * image_pred[:, 7:].max(dim=1)[0] + # Sort by it + image_pred = image_pred[(-score).argsort()] + class_confs, class_preds = image_pred[:, 7:].max(dim=1, keepdim=True) + detections = torch.cat((image_pred[:, :7].float(), class_confs.float(), class_preds.float()), dim=1) + # Perform non-maximum suppression + keep_boxes = [] + while detections.size(0): + # large_overlap = rotated_bbox_iou(detections[0, :6].unsqueeze(0), detections[:, :6], 1.0, False) > nms_thres # not working + large_overlap = iou_rotated_single_vs_multi_boxes_cpu(detections[0, :6], detections[:, :6]) > nms_thresh + label_match = detections[0, -1] == detections[:, -1] + # Indices of boxes with lower confidence scores, large IOUs and matching labels + invalid = large_overlap & label_match + weights = detections[invalid, 6:7] + # Merge overlapping bboxes by order of confidence + detections[0, :6] = (weights * detections[invalid, :6]).sum(0) / weights.sum() + keep_boxes += [detections[0]] + detections = detections[~invalid] + if len(keep_boxes) > 0: + output[image_i] = torch.stack(keep_boxes) + + return output diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/iou_rotated_boxes_utils.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/iou_rotated_boxes_utils.py new file mode 100644 index 000000000..4f7d8c77c --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/iou_rotated_boxes_utils.py @@ -0,0 +1,216 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Author: Nguyen Mau Dung +# DoC: 2020.07.20 +# email: nguyenmaudung93.kstn@gmail.com +----------------------------------------------------------------------------------- +# Description: This script for iou calculation of rotated boxes (on GPU) + +""" + +from __future__ import division +import sys + +import torch +from shapely.geometry import Polygon +from scipy.spatial import ConvexHull + +sys.path.append('../') + +from object_detector_3d.lidar_utils.cal_intersection_rotated_boxes import intersection_area, PolyArea2D + + +def cvt_box_2_polygon(box): + """ + :param array: an array of shape [num_conners, 2] + :return: a shapely.geometry.Polygon object + """ + # use .buffer(0) to fix a line polygon + # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera + return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) + + +def get_corners_vectorize(x, y, w, l, yaw): + """bev image coordinates format - vectorization + + :param x, y, w, l, yaw: [num_boxes,] + :return: num_boxes x (x,y) of 4 conners + """ + device = x.device + bbox2 = torch.zeros((x.size(0), 4, 2), device=device, dtype=torch.float) + cos_yaw = torch.cos(yaw) + sin_yaw = torch.sin(yaw) + + # front left + bbox2[:, 0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw + bbox2[:, 0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw + + # rear left + bbox2[:, 1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw + bbox2[:, 1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw + + # rear right + bbox2[:, 2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw + bbox2[:, 2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw + + # front right + bbox2[:, 3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw + bbox2[:, 3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw + + return bbox2 + + +def get_polygons_areas_fix_xy(boxes, fix_xy=100.): + """ + Args: + box: (num_boxes, 4) --> w, l, im, re + """ + device = boxes.device + n_boxes = boxes.size(0) + x = torch.full(size=(n_boxes,), fill_value=fix_xy, device=device, dtype=torch.float) + y = torch.full(size=(n_boxes,), fill_value=fix_xy, device=device, dtype=torch.float) + w, l, im, re = boxes.t() + yaw = torch.atan2(im, re) + boxes_conners = get_corners_vectorize(x, y, w, l, yaw) + boxes_polygons = [cvt_box_2_polygon(box_) for box_ in boxes_conners] + boxes_areas = w * l + + return boxes_polygons, boxes_areas + + +def iou_rotated_boxes_targets_vs_anchors(anchors_polygons, anchors_areas, targets_polygons, targets_areas): + device = anchors_areas.device + num_anchors = len(anchors_areas) + num_targets_boxes = len(targets_areas) + + ious = torch.zeros(size=(num_anchors, num_targets_boxes), device=device, dtype=torch.float) + + for a_idx in range(num_anchors): + for tg_idx in range(num_targets_boxes): + intersection = anchors_polygons[a_idx].intersection(targets_polygons[tg_idx]).area + iou = intersection / (anchors_areas[a_idx] + targets_areas[tg_idx] - intersection + 1e-16) + ious[a_idx, tg_idx] = iou + + return ious + + +def iou_pred_vs_target_boxes(pred_boxes, target_boxes, GIoU=False, DIoU=False, CIoU=False): + assert pred_boxes.size() == target_boxes.size(), "Unmatch size of pred_boxes and target_boxes" + device = pred_boxes.device + n_boxes = pred_boxes.size(0) + + t_x, t_y, t_w, t_l, t_im, t_re = target_boxes.t() + t_yaw = torch.atan2(t_im, t_re) + t_conners = get_corners_vectorize(t_x, t_y, t_w, t_l, t_yaw) + t_areas = t_w * t_l + + p_x, p_y, p_w, p_l, p_im, p_re = pred_boxes.t() + p_yaw = torch.atan2(p_im, p_re) + p_conners = get_corners_vectorize(p_x, p_y, p_w, p_l, p_yaw) + p_areas = p_w * p_l + + ious = [] + giou_loss = torch.tensor([0.], device=device, dtype=torch.float) + # Thinking to apply vectorization this step + for box_idx in range(n_boxes): + p_cons, t_cons = p_conners[box_idx], t_conners[box_idx] + if not GIoU: + p_poly, t_poly = cvt_box_2_polygon(p_cons), cvt_box_2_polygon(t_cons) + intersection = p_poly.intersection(t_poly).area + else: + intersection = intersection_area(p_cons, t_cons) + + p_area, t_area = p_areas[box_idx], t_areas[box_idx] + union = p_area + t_area - intersection + iou = intersection / (union + 1e-16) + + if GIoU: + convex_conners = torch.cat((p_cons, t_cons), dim=0) + hull = ConvexHull(convex_conners.clone().detach().cpu().numpy()) # done on cpu, just need indices output + convex_conners = convex_conners[hull.vertices] + convex_area = PolyArea2D(convex_conners) + giou_loss += 1. - (iou - (convex_area - union) / (convex_area + 1e-16)) + else: + giou_loss += 1. - iou + + if DIoU or CIoU: + raise NotImplementedError + + ious.append(iou) + + return torch.tensor(ious, device=device, dtype=torch.float), giou_loss + + +if __name__ == "__main__": + import cv2 + import numpy as np + + + def get_corners_torch(x, y, w, l, yaw): + device = x.device + bev_corners = torch.zeros((4, 2), dtype=torch.float, device=device) + cos_yaw = torch.cos(yaw) + sin_yaw = torch.sin(yaw) + # front left + bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw + + # rear left + bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw + + # rear right + bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw + + # front right + bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw + + return bev_corners + + + # Show convex in an image + + img_size = 300 + img = np.zeros((img_size, img_size, 3)) + img = cv2.resize(img, (img_size, img_size)) + + box1 = torch.tensor([100, 100, 60, 10, 0.5], dtype=torch.float).cuda() + box2 = torch.tensor([100, 100, 40, 20, 0], dtype=torch.float).cuda() + + box1_conners = get_corners_torch(box1[0], box1[1], box1[2], box1[3], box1[4]) + box1_polygon = cvt_box_2_polygon(box1_conners) + box1_area = box1_polygon.area + + box2_conners = get_corners_torch(box2[0], box2[1], box2[2], box2[3], box2[4]) + box2_polygon = cvt_box_2_polygon(box2_conners) + box2_area = box2_polygon.area + + intersection = box2_polygon.intersection(box1_polygon).area + union = box1_area + box2_area - intersection + iou = intersection / (union + 1e-16) + + convex_conners = torch.cat((box1_conners, box2_conners), dim=0) + hull = ConvexHull(convex_conners.clone().detach().cpu().numpy()) # done on cpu, just need indices output + convex_conners = convex_conners[hull.vertices] + convex_polygon = cvt_box_2_polygon(convex_conners) + convex_area = convex_polygon.area + giou_loss = 1. - (iou - (convex_area - union) / (convex_area + 1e-16)) + + print( + 'box1_area: {:.2f}, box2_area: {:.2f}, intersection: {:.2f}, iou: {:.4f}, convex_area: {:.4f}, giou_loss: {}'.format( + box1_area, box2_area, intersection, iou, convex_area, giou_loss)) + + print('intersection_area: {}'.format(intersection_area(box1_conners, box2_conners))) + print('convex_area using PolyArea2D: {}'.format(PolyArea2D(convex_conners))) + + img = cv2.polylines(img, [box1_conners.cpu().numpy().astype(np.int)], True, (255, 0, 0), 2) + img = cv2.polylines(img, [box2_conners.cpu().numpy().astype(np.int)], True, (0, 255, 0), 2) + img = cv2.polylines(img, [convex_conners.cpu().numpy().astype(np.int)], True, (0, 0, 255), 2) + + while True: + cv2.imshow('img', img) + if cv2.waitKey(0) & 0xff == 27: + break diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_bev_utils.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_bev_utils.py new file mode 100644 index 000000000..bc765999b --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_bev_utils.py @@ -0,0 +1,210 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Refer: https://github.com/ghimiredhikura/Complex-YOLOv3 +""" + +import math +import sys + +import cv2 +import numpy as np + +sys.path.append('../') + +import object_detector_3d.lidar_utils.kitti_config as cnf + + +def removePoints(PointCloud, BoundaryCond): + # Boundary condition + minX = BoundaryCond['minX'] + maxX = BoundaryCond['maxX'] + minY = BoundaryCond['minY'] + maxY = BoundaryCond['maxY'] + minZ = BoundaryCond['minZ'] + maxZ = BoundaryCond['maxZ'] + + # Remove the point out of range x,y,z + mask = np.where((PointCloud[:, 0] >= minX) & (PointCloud[:, 0] <= maxX) & (PointCloud[:, 1] >= minY) & ( + PointCloud[:, 1] <= maxY) & (PointCloud[:, 2] >= minZ) & (PointCloud[:, 2] <= maxZ)) + PointCloud = PointCloud[mask] + + PointCloud[:, 2] = PointCloud[:, 2] - minZ + + return PointCloud + + +def makeBVFeature(PointCloud_, Discretization, bc): + Height = cnf.BEV_HEIGHT + 1 + Width = cnf.BEV_WIDTH + 1 + + # Discretize Feature Map + PointCloud = np.copy(PointCloud_) + PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / Discretization)) + PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / Discretization) + Width / 2) + + # sort-3times + indices = np.lexsort((-PointCloud[:, 2], PointCloud[:, 1], PointCloud[:, 0])) + PointCloud = PointCloud[indices] + + # Height Map + heightMap = np.zeros((Height, Width)) + + _, indices = np.unique(PointCloud[:, 0:2], axis=0, return_index=True) + PointCloud_frac = PointCloud[indices] + # some important problem is image coordinate is (y,x), not (x,y) + max_height = float(np.abs(bc['maxZ'] - bc['minZ'])) + heightMap[np.int_(PointCloud_frac[:, 0]), np.int_(PointCloud_frac[:, 1])] = PointCloud_frac[:, 2] / max_height + + # Intensity Map & DensityMap + intensityMap = np.zeros((Height, Width)) + densityMap = np.zeros((Height, Width)) + + _, indices, counts = np.unique(PointCloud[:, 0:2], axis=0, return_index=True, return_counts=True) + PointCloud_top = PointCloud[indices] + + normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64)) + + intensityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = PointCloud_top[:, 3] + densityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = normalizedCounts + + RGB_Map = np.zeros((3, Height - 1, Width - 1)) + RGB_Map[2, :, :] = densityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # r_map + RGB_Map[1, :, :] = heightMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # g_map + RGB_Map[0, :, :] = intensityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # b_map + + return RGB_Map + + +def read_labels_for_bevbox(objects): + bbox_selected = [] + for obj in objects: + if obj.cls_id != -1: + bbox = [] + bbox.append(obj.cls_id) + bbox.extend([obj.t[0], obj.t[1], obj.t[2], obj.h, obj.w, obj.l, obj.ry]) + bbox_selected.append(bbox) + + if len(bbox_selected) == 0: + labels = np.zeros((1, 8), dtype=np.float32) + noObjectLabels = True + else: + labels = np.array(bbox_selected, dtype=np.float32) + noObjectLabels = False + + return labels, noObjectLabels + + +# bev image coordinates format +def get_corners(x, y, w, l, yaw): + bev_corners = np.zeros((4, 2), dtype=np.float32) + cos_yaw = np.cos(yaw) + sin_yaw = np.sin(yaw) + # front left + bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw + + # rear left + bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw + + # rear right + bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw + + # front right + bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw + + return bev_corners + +# bev image coordinates format +def get_corners_3d(x, y, z, w, l, h, yaw): + bev_corners = np.zeros((8, 3), dtype=np.float64) + cos_yaw = np.cos(yaw) + sin_yaw = np.sin(yaw) + + # front right + bev_corners[[0,1], 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[[0,1], 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw + + # front left + bev_corners[[2,3], 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[[2,3], 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw + + # rear right + bev_corners[[4,5], 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[[4,5], 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw + + # rear left + bev_corners[[6,7], 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[[6,7], 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw + + # top corners + bev_corners[[1,2,5,6], 2] = z + h + + # bottom corners + bev_corners[[0,3,4,7], 2] = z + + return bev_corners + +def build_yolo_target(labels): + bc = cnf.boundary + target = [] + for i in range(labels.shape[0]): + cl, x, y, z, h, w, l, yaw = labels[i] + # ped and cyc labels are very small, so lets add some factor to height/width + l = l + 0.3 + w = w + 0.3 + yaw = np.pi * 2 - yaw + if (bc["minX"] < x < bc["maxX"]) and (bc["minY"] < y < bc["maxY"]): + y1 = (y - bc["minY"]) / (bc["maxY"] - bc["minY"]) # we should put this in [0,1], so divide max_size 80 m + x1 = (x - bc["minX"]) / (bc["maxX"] - bc["minX"]) # we should put this in [0,1], so divide max_size 40 m + w1 = w / (bc["maxY"] - bc["minY"]) + l1 = l / (bc["maxX"] - bc["minX"]) + target.append([cl, y1, x1, w1, l1, math.sin(float(yaw)), math.cos(float(yaw))]) + + return np.array(target, dtype=np.float32) + + +def inverse_yolo_target(targets, bc): + labels = [] + for t in targets: + c, c_conf, y, x, l, w, im, re = t + z, h = -0.55, 1.5 + if c == 1: + h = 1.8 + elif c == 2: + h = 1.4 + + y = y * (bc["maxY"] - bc["minY"]) + bc["minY"] + x = x * (bc["maxX"] - bc["minX"]) + bc["minX"] + w = w * (bc["maxY"] - bc["minY"]) + l = l * (bc["maxX"] - bc["minX"]) + w -= 0.3 + l -= 0.3 + labels.append([c, c_conf, x, y, z, w, l, h, - np.arctan2(im, re) - 2 * np.pi]) + + return np.array(labels) + + +# send parameters in bev image coordinates format +def drawRotatedBox(img, x, y, w, l, yaw, color, text): + bev_corners = get_corners(x, y, w, l, yaw) + corners_int = bev_corners.reshape(-1, 1, 2).astype(int) + cv2.polylines(img, [corners_int], True, color, 2) + corners_int = corners_int.reshape(-1, 2) + cv2.line(img, (corners_int[0, 0], corners_int[0, 1]), (corners_int[3, 0], corners_int[3, 1]), (255, 255, 0), 2) + cv2.putText(img, text, (corners_int[0, 0], corners_int[0, 1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) + + +def draw_box_in_bev(rgb_map, target): + for j in range(50): + if (np.sum(target[j, 1:]) == 0): continue + cls_id = int(target[j][0]) + x = target[j][1] * cnf.BEV_WIDTH + y = target[j][2] * cnf.BEV_HEIGHT + w = target[j][3] * cnf.BEV_WIDTH + l = target[j][4] * cnf.BEV_HEIGHT + yaw = np.arctan2(target[j][5], target[j][6]) + drawRotatedBox(rgb_map, x, y, w, l, yaw, cnf.colors[cls_id]) diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_config.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_config.py new file mode 100644 index 000000000..8a257af0e --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_config.py @@ -0,0 +1,66 @@ +import numpy as np + +class_list = ["Car", "Pedestrian", "Cyclist"] + +CLASS_NAME_TO_ID = { + 'Car': 0, + 'Pedestrian': 1, + 'Cyclist': 2, + 'Van': 0, + 'Person_sitting': 1, +} + +# Front side (of vehicle) Point Cloud boundary for BEV +boundary = { + "minX": 0, + "maxX": 50, + "minY": -25, + "maxY": 25, + "minZ": -1.23, #-0.73, #-2.73, # -(height to ground + 1) + "maxZ": 2.77 #3.27 #1.27 +} + +# Back back (of vehicle) Point Cloud boundary for BEV +boundary_back = { + "minX": -50, + "maxX": 0, + "minY": -25, + "maxY": 25, + "minZ": -2.73, + "maxZ": 1.27 +} + +BEV_WIDTH = 608 # across y axis -25m ~ 25m +BEV_HEIGHT = 608 # across x axis 0m ~ 50m + +DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT + +colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0]] + +# Following parameters are calculated as an average from KITTI dataset for simplicity +##################################################################################### +Tr_velo_to_cam = np.array([ + [7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03], + [1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02], + [9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01], + [0, 0, 0, 1] +]) + +# cal mean from train set +R0 = np.array([ + [0.99992475, 0.00975976, -0.00734152, 0], + [-0.0097913, 0.99994262, -0.00430371, 0], + [0.00729911, 0.0043753, 0.99996319, 0], + [0, 0, 0, 1] +]) + +P2 = np.array([[719.787081, 0., 608.463003, 44.9538775], + [0., 719.787081, 174.545111, 0.1066855], + [0., 0., 1., 3.0106472e-03], + [0., 0., 0., 0] + ]) + +R0_inv = np.linalg.inv(R0) +Tr_velo_to_cam_inv = np.linalg.inv(Tr_velo_to_cam) +P2_inv = np.linalg.pinv(P2) +##################################################################################### diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/torch_utils.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/torch_utils.py new file mode 100644 index 000000000..b5e6684b9 --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/torch_utils.py @@ -0,0 +1,27 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Author: Nguyen Mau Dung +# DoC: 2020.07.05 +# email: nguyenmaudung93.kstn@gmail.com +----------------------------------------------------------------------------------- +# Description: some utilities of torch (conversion) +----------------------------------------------------------------------------------- +# Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4 +""" + +import torch + +__all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu'] + + +def convert2cpu(gpu_matrix): + return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) + + +def convert2cpu_long(gpu_matrix): + return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) + + +def to_cpu(tensor): + return tensor.detach().cpu() diff --git a/src/perception/object_detector_3d/package.xml b/src/perception/object_detector_3d/package.xml index 39567d1e9..d9f3267fd 100644 --- a/src/perception/object_detector_3d/package.xml +++ b/src/perception/object_detector_3d/package.xml @@ -10,6 +10,7 @@ rclpy ros2_numpy sensor_msgs + torch geometry_msgs navigator_msgs diff --git a/src/perception/object_detector_3d/setup.py b/src/perception/object_detector_3d/setup.py index 32005a8ac..b2252c0b4 100644 --- a/src/perception/object_detector_3d/setup.py +++ b/src/perception/object_detector_3d/setup.py @@ -20,7 +20,8 @@ tests_require=['pytest'], entry_points={ 'console_scripts': [ - 'lidar_object_detector_3d_node = object_detector_3d.lidar_objdet3d_node:main' + 'lidar_object_detector_3d_node = object_detector_3d.lidar_objdet3d_node:main', + 'lidar_detection_node = object_detector_3d.lidar_detection_node:main', ], }, ) diff --git a/src/perception/visualizer/visualizer/object_visualizer_node.py b/src/perception/visualizer/visualizer/object_visualizer_node.py index 1e1aefd1a..35cd58cf6 100644 --- a/src/perception/visualizer/visualizer/object_visualizer_node.py +++ b/src/perception/visualizer/visualizer/object_visualizer_node.py @@ -18,7 +18,7 @@ def __init__(self): self.subscription = self.create_subscription( msg_type = Object3DArray, - topic = 'objdet3d_tracked', + topic = '/detected/objects3d', # NEEDS MORE CHOICES callback = self.visualize_objects, qos_profile = 1 ) @@ -27,48 +27,50 @@ def __init__(self): def visualize_objects(self, msg: Object3DArray): - #self.get_logger().info(f"{msg.header}") + # Creates an array of bounding boxes for rviz marker_array = MarkerArray() for object in msg.objects: - + # Creates bounding box for object box = Marker() box.header.frame_id = msg.header.frame_id box.header.stamp = self.get_clock().now().to_msg() box.id = object.id - box.type = 5 + box.type = Marker.LINE_LIST box.color.r, box.color.g, box.color.b = LABEL_TO_COLOR[object.label] box.color.a = 1.0 - box.scale.x = 0.10 + box.scale.x = 0.10 # width of the line box.lifetime = Duration(seconds=0.2).to_msg() # should be removed when object.id exists box.ns = "object_bounding_box" + # Creates 12 lines for the bounding box for i in range(4): # this should do 0-1, 1-2, 2-3, 3-4 src = object.bounding_box.corners[i] dst = object.bounding_box.corners[(i+1) % 4] box.points.append(src) - box.points.append(dst) + box.points.append(dst) # needs 2 points per line # this should do 4-5, 5-6, 6-7, 7-4 src = object.bounding_box.corners[i+4] dst = object.bounding_box.corners[((i+1) % 4) + 4] box.points.append(src) - box.points.append(dst) + box.points.append(dst) # needs 2 points per line # this should do 0-4, 1-5, 2-6, 3-7 src = object.bounding_box.corners[i] dst = object.bounding_box.corners[i+4] box.points.append(src) - box.points.append(dst) + box.points.append(dst) # needs 2 points per line marker_array.markers.append(box) + # Creates the label for box tag = Marker() tag.header.frame_id = msg.header.frame_id tag.header.stamp = self.get_clock().now().to_msg() tag.id = object.id - tag.type = 9 + tag.type = Marker.TEXT_VIEW_FACING tag.color.r, tag.color.g, tag.color.b = (1.0, 1.0, 1.0) tag.color.a = 1.0 tag.scale.z = 0.5 @@ -84,7 +86,6 @@ def visualize_objects(self, msg: Object3DArray): self.visualization_publisher.publish(marker_array) - def main(args=None): rclpy.init(args=args) From 767930ebfa08dbe4f966c64a4de992ede8244028 Mon Sep 17 00:00:00 2001 From: Gueren Sanford Date: Tue, 9 Apr 2024 21:48:54 -0500 Subject: [PATCH 2/8] Decoupling ComplexYOLOv4 files --- .../complex_yolo/model_utils.py | 90 ----- .../complex_yolov4/README.md | 5 + .../__init__.py | 0 .../model}/__init__.py | 0 .../model}/darknet2pytorch.py | 6 +- .../model}/darknet_utils.py | 2 +- .../model/model_utils.py} | 157 ++++---- .../model}/yolo_layer.py | 4 +- .../complex_yolov4/utils/__init__.py | 0 .../complex_yolov4/utils/evaluation_utils.py | 114 ++++++ .../utils}/kitti_bev_utils.py | 66 +--- .../complex_yolov4/utils/kitti_config.py | 36 ++ .../utils}/torch_utils.py | 0 ...ction_model.py => complex_yolov4_model.py} | 16 +- .../lidar_detection_node.py | 11 +- .../cal_intersection_rotated_boxes.py | 171 --------- .../lidar_utils/evaluation_utils.py | 357 ------------------ .../lidar_utils/kitti_config.py | 66 ---- 18 files changed, 254 insertions(+), 847 deletions(-) delete mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolo/model_utils.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolov4/README.md rename src/perception/object_detector_3d/object_detector_3d/{complex_yolo => complex_yolov4}/__init__.py (100%) rename src/perception/object_detector_3d/object_detector_3d/{lidar_utils => complex_yolov4/model}/__init__.py (100%) rename src/perception/object_detector_3d/object_detector_3d/{complex_yolo => complex_yolov4/model}/darknet2pytorch.py (98%) rename src/perception/object_detector_3d/object_detector_3d/{complex_yolo => complex_yolov4/model}/darknet_utils.py (99%) rename src/perception/object_detector_3d/object_detector_3d/{lidar_utils/iou_rotated_boxes_utils.py => complex_yolov4/model/model_utils.py} (62%) rename src/perception/object_detector_3d/object_detector_3d/{complex_yolo => complex_yolov4/model}/yolo_layer.py (98%) create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/__init__.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/evaluation_utils.py rename src/perception/object_detector_3d/object_detector_3d/{lidar_utils => complex_yolov4/utils}/kitti_bev_utils.py (65%) create mode 100644 src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_config.py rename src/perception/object_detector_3d/object_detector_3d/{lidar_utils => complex_yolov4/utils}/torch_utils.py (100%) rename src/perception/object_detector_3d/object_detector_3d/{lidar_detection_model.py => complex_yolov4_model.py} (91%) delete mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/cal_intersection_rotated_boxes.py delete mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/evaluation_utils.py delete mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_config.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/model_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolo/model_utils.py deleted file mode 100644 index 0b4f3668b..000000000 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/model_utils.py +++ /dev/null @@ -1,90 +0,0 @@ -""" -# -*- coding: utf-8 -*- ------------------------------------------------------------------------------------ -# Author: Nguyen Mau Dung -# DoC: 2020.06.18 -# email: nguyenmaudung93.kstn@gmail.com ------------------------------------------------------------------------------------ -# Description: utils functions that use for model -""" - -import sys - -import torch - -sys.path.append('../') - -from object_detector_3d.complex_yolo.darknet2pytorch import Darknet - - -def create_model(configs): - """Create model based on architecture name""" - if (configs.arch == 'darknet') and (configs.cfgfile is not None): - print('using darknet') - model = Darknet(cfgfile=configs.cfgfile, use_giou_loss=configs.use_giou_loss) - else: - assert False, 'Undefined model backbone' - - return model - - -def get_num_parameters(model): - """Count number of trained parameters of the model""" - if hasattr(model, 'module'): - num_parameters = sum(p.numel() for p in model.module.parameters() if p.requires_grad) - else: - num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) - - return num_parameters - - -def make_data_parallel(model, configs): - if configs.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if configs.gpu_idx is not None: - torch.cuda.set_device(configs.gpu_idx) - model.cuda(configs.gpu_idx) - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - configs.batch_size = int(configs.batch_size / configs.ngpus_per_node) - configs.num_workers = int((configs.num_workers + configs.ngpus_per_node - 1) / configs.ngpus_per_node) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[configs.gpu_idx]) - else: - model.cuda() - # DistributedDataParallel will divide and allocate batch_size to all - # available GPUs if device_ids are not set - model = torch.nn.parallel.DistributedDataParallel(model) - elif configs.gpu_idx is not None: - torch.cuda.set_device(configs.gpu_idx) - model = model.cuda(configs.gpu_idx) - else: - # DataParallel will divide and allocate batch_size to all available GPUs - model = torch.nn.DataParallel(model).cuda() - - return model - - -if __name__ == '__main__': - import argparse - - from torchsummary import summary - from easydict import EasyDict as edict - - parser = argparse.ArgumentParser(description='Complexer YOLO Implementation') - parser.add_argument('-a', '--arch', type=str, default='darknet', metavar='ARCH', - help='The name of the model architecture') - parser.add_argument('--cfgfile', type=str, default='../config/cfg/complex_yolov4.cfg', metavar='PATH', - help='The path for cfgfile (only for darknet)') - - configs = edict(vars(parser.parse_args())) - - configs.device = torch.device('cuda:1') - - model = create_model(configs).to(device=configs.device) - sample_input = torch.randn((1, 3, 608, 608)).to(device=configs.device) - # summary(model.cuda(), (3, 608, 608)) - output = model(sample_input, targets=None) - print(output.size()) diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/README.md b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/README.md new file mode 100644 index 000000000..e3fc7368b --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/README.md @@ -0,0 +1,5 @@ +# Complex YOLOv4 + +The PyTorch Implementation based on YOLOv4 of the paper: [Complex-YOLO: Real-time 3D Object Detection on Point Clouds](https://arxiv.org/pdf/1803.06199.pdf) + +Most files are taken (some modified) from the GitHub Repo: [https://github.com/maudzung/Complex-YOLOv4-Pytorch/tree/master](https://github.com/maudzung/Complex-YOLOv4-Pytorch/tree/master) \ No newline at end of file diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/__init__.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/__init__.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolo/__init__.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4/__init__.py diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/__init__.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/__init__.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/lidar_utils/__init__.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/__init__.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet2pytorch.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet2pytorch.py similarity index 98% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet2pytorch.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet2pytorch.py index 4113c2bd3..71e97e217 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet2pytorch.py +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet2pytorch.py @@ -14,9 +14,9 @@ sys.path.append('../') -from object_detector_3d.complex_yolo.yolo_layer import YoloLayer -from object_detector_3d.complex_yolo.darknet_utils import parse_cfg, print_cfg, load_fc, load_conv_bn, load_conv -from object_detector_3d.lidar_utils.torch_utils import to_cpu +from object_detector_3d.complex_yolov4.model.yolo_layer import YoloLayer +from object_detector_3d.complex_yolov4.model.darknet_utils import parse_cfg, print_cfg, load_fc, load_conv_bn, load_conv +from object_detector_3d.complex_yolov4.utils.torch_utils import to_cpu class Mish(nn.Module): diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet_utils.py similarity index 99% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet_utils.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet_utils.py index 5500fb057..a5eb6cdff 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/darknet_utils.py +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet_utils.py @@ -9,7 +9,7 @@ import torch sys.path.append('../') -from object_detector_3d.lidar_utils.torch_utils import convert2cpu +from object_detector_3d.complex_yolov4.utils.torch_utils import convert2cpu __all__ = ['parse_cfg', 'print_cfg', 'load_conv', 'load_conv_bn', 'save_conv', 'save_conv_bn', 'load_fc', 'save_fc'] diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/iou_rotated_boxes_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/model_utils.py similarity index 62% rename from src/perception/object_detector_3d/object_detector_3d/lidar_utils/iou_rotated_boxes_utils.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/model_utils.py index 4f7d8c77c..15a55f261 100644 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/iou_rotated_boxes_utils.py +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/model_utils.py @@ -9,7 +9,6 @@ """ -from __future__ import division import sys import torch @@ -18,7 +17,85 @@ sys.path.append('../') -from object_detector_3d.lidar_utils.cal_intersection_rotated_boxes import intersection_area, PolyArea2D +class Line: + # ax + by + c = 0 + def __init__(self, p1, p2): + """ + + Args: + p1: (x, y) + p2: (x, y) + """ + self.a = p2[1] - p1[1] + self.b = p1[0] - p2[0] + self.c = p2[0] * p1[1] - p2[1] * p1[0] # cross + self.device = p1.device + + def cal_values(self, pts): + return self.a * pts[:, 0] + self.b * pts[:, 1] + self.c + + def find_intersection(self, other): + # See e.g. https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Using_homogeneous_coordinates + if not isinstance(other, Line): + return NotImplemented + w = self.a * other.b - self.b * other.a + return torch.tensor([(self.b * other.c - self.c * other.b) / w, (self.c * other.a - self.a * other.c) / w], + device=self.device) + +def PolyArea2D(pts): + roll_pts = torch.roll(pts, -1, dims=0) + area = (pts[:, 0] * roll_pts[:, 1] - pts[:, 1] * roll_pts[:, 0]).sum().abs() * 0.5 + return area + +def intersection_area(rect1, rect2): + """Calculate the inter + + Args: + rect1: vertices of the rectangles (4, 2) + rect2: vertices of the rectangles (4, 2) + + Returns: + + """ + + # Use the vertices of the first rectangle as, starting vertices of the intersection polygon. + intersection = rect1 + + # Loop over the edges of the second rectangle + roll_rect2 = torch.roll(rect2, -1, dims=0) + for p, q in zip(rect2, roll_rect2): + if len(intersection) <= 2: + break # No intersection + + line = Line(p, q) + + # Any point p with line(p) <= 0 is on the "inside" (or on the boundary), + # any point p with line(p) > 0 is on the "outside". + # Loop over the edges of the intersection polygon, + # and determine which part is inside and which is outside. + new_intersection = [] + line_values = line.cal_values(intersection) + roll_intersection = torch.roll(intersection, -1, dims=0) + roll_line_values = torch.roll(line_values, -1, dims=0) + for s, t, s_value, t_value in zip(intersection, roll_intersection, line_values, roll_line_values): + if s_value <= 0: + new_intersection.append(s) + if s_value * t_value < 0: + # Points are on opposite sides. + # Add the intersection of the lines to new_intersection. + intersection_point = line.find_intersection(Line(s, t)) + new_intersection.append(intersection_point) + + if len(new_intersection) > 0: + intersection = torch.stack(new_intersection) + else: + break + + # Calculate area + if len(intersection) <= 2: + return 0. + + return PolyArea2D(intersection) def cvt_box_2_polygon(box): @@ -139,78 +216,4 @@ def iou_pred_vs_target_boxes(pred_boxes, target_boxes, GIoU=False, DIoU=False, C ious.append(iou) - return torch.tensor(ious, device=device, dtype=torch.float), giou_loss - - -if __name__ == "__main__": - import cv2 - import numpy as np - - - def get_corners_torch(x, y, w, l, yaw): - device = x.device - bev_corners = torch.zeros((4, 2), dtype=torch.float, device=device) - cos_yaw = torch.cos(yaw) - sin_yaw = torch.sin(yaw) - # front left - bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw - bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw - - # rear left - bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw - bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw - - # rear right - bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw - bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw - - # front right - bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw - bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw - - return bev_corners - - - # Show convex in an image - - img_size = 300 - img = np.zeros((img_size, img_size, 3)) - img = cv2.resize(img, (img_size, img_size)) - - box1 = torch.tensor([100, 100, 60, 10, 0.5], dtype=torch.float).cuda() - box2 = torch.tensor([100, 100, 40, 20, 0], dtype=torch.float).cuda() - - box1_conners = get_corners_torch(box1[0], box1[1], box1[2], box1[3], box1[4]) - box1_polygon = cvt_box_2_polygon(box1_conners) - box1_area = box1_polygon.area - - box2_conners = get_corners_torch(box2[0], box2[1], box2[2], box2[3], box2[4]) - box2_polygon = cvt_box_2_polygon(box2_conners) - box2_area = box2_polygon.area - - intersection = box2_polygon.intersection(box1_polygon).area - union = box1_area + box2_area - intersection - iou = intersection / (union + 1e-16) - - convex_conners = torch.cat((box1_conners, box2_conners), dim=0) - hull = ConvexHull(convex_conners.clone().detach().cpu().numpy()) # done on cpu, just need indices output - convex_conners = convex_conners[hull.vertices] - convex_polygon = cvt_box_2_polygon(convex_conners) - convex_area = convex_polygon.area - giou_loss = 1. - (iou - (convex_area - union) / (convex_area + 1e-16)) - - print( - 'box1_area: {:.2f}, box2_area: {:.2f}, intersection: {:.2f}, iou: {:.4f}, convex_area: {:.4f}, giou_loss: {}'.format( - box1_area, box2_area, intersection, iou, convex_area, giou_loss)) - - print('intersection_area: {}'.format(intersection_area(box1_conners, box2_conners))) - print('convex_area using PolyArea2D: {}'.format(PolyArea2D(convex_conners))) - - img = cv2.polylines(img, [box1_conners.cpu().numpy().astype(np.int)], True, (255, 0, 0), 2) - img = cv2.polylines(img, [box2_conners.cpu().numpy().astype(np.int)], True, (0, 255, 0), 2) - img = cv2.polylines(img, [convex_conners.cpu().numpy().astype(np.int)], True, (0, 0, 255), 2) - - while True: - cv2.imshow('img', img) - if cv2.waitKey(0) & 0xff == 27: - break + return torch.tensor(ious, device=device, dtype=torch.float), giou_loss \ No newline at end of file diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/yolo_layer.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/yolo_layer.py similarity index 98% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolo/yolo_layer.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/yolo_layer.py index 34854da3c..3777b0d45 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolo/yolo_layer.py +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/yolo_layer.py @@ -19,8 +19,8 @@ sys.path.append('../') -from object_detector_3d.lidar_utils.torch_utils import to_cpu -from object_detector_3d.lidar_utils.iou_rotated_boxes_utils import iou_pred_vs_target_boxes, iou_rotated_boxes_targets_vs_anchors, \ +from object_detector_3d.complex_yolov4.utils.torch_utils import to_cpu +from object_detector_3d.complex_yolov4.model.model_utils import iou_pred_vs_target_boxes, iou_rotated_boxes_targets_vs_anchors, \ get_polygons_areas_fix_xy diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/__init__.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/evaluation_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/evaluation_utils.py new file mode 100644 index 000000000..0949e4c3c --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/evaluation_utils.py @@ -0,0 +1,114 @@ +import sys +import torch +import numpy as np +from shapely.geometry import Polygon + +sys.path.append('../') + +from object_detector_3d.complex_yolov4.utils.kitti_bev_utils import get_corners + + +def cvt_box_2_polygon(box): + """ + :param box: an array of shape [4, 2] + :return: a shapely.geometry.Polygon object + """ + # use .buffer(0) to fix a line polygon + # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera + return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) + + +def iou_rotated_single_vs_multi_boxes_cpu(single_box, multi_boxes): + """ + :param pred_box: Numpy array + :param target_boxes: Numpy array + :return: + """ + + s_x, s_y, s_w, s_l, s_im, s_re = single_box + s_area = s_w * s_l + s_yaw = np.arctan2(s_im, s_re) + s_conners = get_corners(s_x, s_y, s_w, s_l, s_yaw) + s_polygon = cvt_box_2_polygon(s_conners) + + m_x, m_y, m_w, m_l, m_im, m_re = multi_boxes.transpose(1, 0) + targets_areas = m_w * m_l + m_yaw = np.arctan2(m_im, m_re) + m_boxes_conners = get_corners_vectorize(m_x, m_y, m_w, m_l, m_yaw) + m_boxes_polygons = [cvt_box_2_polygon(box_) for box_ in m_boxes_conners] + + ious = [] + for m_idx in range(multi_boxes.shape[0]): + intersection = s_polygon.intersection(m_boxes_polygons[m_idx]).area + iou_ = intersection / (s_area + targets_areas[m_idx] - intersection + 1e-16) + ious.append(iou_) + + return torch.tensor(ious, dtype=torch.float) + + +def get_corners_vectorize(x, y, w, l, yaw): + """bev image coordinates format - vectorization + + :param x, y, w, l, yaw: [num_boxes,] + :return: num_boxes x (x,y) of 4 conners + """ + bbox2 = np.zeros((x.shape[0], 4, 2), dtype=np.float32) + cos_yaw = np.cos(yaw) + sin_yaw = np.sin(yaw) + + # front left + bbox2[:, 0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw + bbox2[:, 0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw + + # rear left + bbox2[:, 1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw + bbox2[:, 1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw + + # rear right + bbox2[:, 2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw + bbox2[:, 2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw + + # front right + bbox2[:, 3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw + bbox2[:, 3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw + + return bbox2 + + +def post_processing_v2(prediction, conf_thresh=0.95, nms_thresh=0.4): + """ + Removes detections with lower object confidence score than 'conf_thres' and performs + Non-Maximum Suppression to further filter detections. + Returns detections with shape: + (x, y, w, l, im, re, object_conf, class_score, class_pred) + """ + output = [None for _ in range(len(prediction))] + for image_i, image_pred in enumerate(prediction): + # Filter out confidence scores below threshold + image_pred = image_pred[image_pred[:, 6] >= conf_thresh] + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Object confidence times class confidence + score = image_pred[:, 6] * image_pred[:, 7:].max(dim=1)[0] + # Sort by it + image_pred = image_pred[(-score).argsort()] + class_confs, class_preds = image_pred[:, 7:].max(dim=1, keepdim=True) + detections = torch.cat((image_pred[:, :7].float(), class_confs.float(), class_preds.float()), dim=1) + # Perform non-maximum suppression + keep_boxes = [] + while detections.size(0): + # large_overlap = rotated_bbox_iou(detections[0, :6].unsqueeze(0), detections[:, :6], 1.0, False) > nms_thres # not working + large_overlap = iou_rotated_single_vs_multi_boxes_cpu(detections[0, :6], detections[:, :6]) > nms_thresh + label_match = detections[0, -1] == detections[:, -1] + # Indices of boxes with lower confidence scores, large IOUs and matching labels + invalid = large_overlap & label_match + weights = detections[invalid, 6:7] + # Merge overlapping bboxes by order of confidence + detections[0, :6] = (weights * detections[invalid, :6]).sum(0) / weights.sum() + keep_boxes += [detections[0]] + detections = detections[~invalid] + if len(keep_boxes) > 0: + output[image_i] = torch.stack(keep_boxes) + + return output diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_bev_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py similarity index 65% rename from src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_bev_utils.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py index bc765999b..603f2a87d 100644 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_bev_utils.py +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py @@ -4,15 +4,12 @@ # Refer: https://github.com/ghimiredhikura/Complex-YOLOv3 """ -import math import sys - -import cv2 import numpy as np sys.path.append('../') -import object_detector_3d.lidar_utils.kitti_config as cnf +import object_detector_3d.complex_yolov4.utils.kitti_config as cnf def removePoints(PointCloud, BoundaryCond): @@ -76,25 +73,6 @@ def makeBVFeature(PointCloud_, Discretization, bc): return RGB_Map -def read_labels_for_bevbox(objects): - bbox_selected = [] - for obj in objects: - if obj.cls_id != -1: - bbox = [] - bbox.append(obj.cls_id) - bbox.extend([obj.t[0], obj.t[1], obj.t[2], obj.h, obj.w, obj.l, obj.ry]) - bbox_selected.append(bbox) - - if len(bbox_selected) == 0: - labels = np.zeros((1, 8), dtype=np.float32) - noObjectLabels = True - else: - labels = np.array(bbox_selected, dtype=np.float32) - noObjectLabels = False - - return labels, noObjectLabels - - # bev image coordinates format def get_corners(x, y, w, l, yaw): bev_corners = np.zeros((4, 2), dtype=np.float32) @@ -118,7 +96,7 @@ def get_corners(x, y, w, l, yaw): return bev_corners -# bev image coordinates format +# From Nova, converts ComplexYolo 2D boxes into BoundingBox3D ros2 msg def get_corners_3d(x, y, z, w, l, h, yaw): bev_corners = np.zeros((8, 3), dtype=np.float64) cos_yaw = np.cos(yaw) @@ -148,24 +126,6 @@ def get_corners_3d(x, y, z, w, l, h, yaw): return bev_corners -def build_yolo_target(labels): - bc = cnf.boundary - target = [] - for i in range(labels.shape[0]): - cl, x, y, z, h, w, l, yaw = labels[i] - # ped and cyc labels are very small, so lets add some factor to height/width - l = l + 0.3 - w = w + 0.3 - yaw = np.pi * 2 - yaw - if (bc["minX"] < x < bc["maxX"]) and (bc["minY"] < y < bc["maxY"]): - y1 = (y - bc["minY"]) / (bc["maxY"] - bc["minY"]) # we should put this in [0,1], so divide max_size 80 m - x1 = (x - bc["minX"]) / (bc["maxX"] - bc["minX"]) # we should put this in [0,1], so divide max_size 40 m - w1 = w / (bc["maxY"] - bc["minY"]) - l1 = l / (bc["maxX"] - bc["minX"]) - target.append([cl, y1, x1, w1, l1, math.sin(float(yaw)), math.cos(float(yaw))]) - - return np.array(target, dtype=np.float32) - def inverse_yolo_target(targets, bc): labels = [] @@ -186,25 +146,3 @@ def inverse_yolo_target(targets, bc): labels.append([c, c_conf, x, y, z, w, l, h, - np.arctan2(im, re) - 2 * np.pi]) return np.array(labels) - - -# send parameters in bev image coordinates format -def drawRotatedBox(img, x, y, w, l, yaw, color, text): - bev_corners = get_corners(x, y, w, l, yaw) - corners_int = bev_corners.reshape(-1, 1, 2).astype(int) - cv2.polylines(img, [corners_int], True, color, 2) - corners_int = corners_int.reshape(-1, 2) - cv2.line(img, (corners_int[0, 0], corners_int[0, 1]), (corners_int[3, 0], corners_int[3, 1]), (255, 255, 0), 2) - cv2.putText(img, text, (corners_int[0, 0], corners_int[0, 1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) - - -def draw_box_in_bev(rgb_map, target): - for j in range(50): - if (np.sum(target[j, 1:]) == 0): continue - cls_id = int(target[j][0]) - x = target[j][1] * cnf.BEV_WIDTH - y = target[j][2] * cnf.BEV_HEIGHT - w = target[j][3] * cnf.BEV_WIDTH - l = target[j][4] * cnf.BEV_HEIGHT - yaw = np.arctan2(target[j][5], target[j][6]) - drawRotatedBox(rgb_map, x, y, w, l, yaw, cnf.colors[cls_id]) diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_config.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_config.py new file mode 100644 index 000000000..37b24b29a --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_config.py @@ -0,0 +1,36 @@ +class_list = ["Car", "Pedestrian", "Cyclist"] + +CLASS_NAME_TO_ID = { + 'Car': 0, + 'Pedestrian': 1, + 'Cyclist': 2, + 'Van': 0, + 'Person_sitting': 1, +} + +# Front side (of vehicle) Point Cloud boundary for BEV +boundary = { + "minX": 0, + "maxX": 50, + "minY": -25, + "maxY": 25, + "minZ": -1.23, #-0.73, #-2.73, # -(height to ground + 1) + "maxZ": 2.77 #3.27 #1.27 +} + +# Back back (of vehicle) Point Cloud boundary for BEV +boundary_back = { + "minX": -50, + "maxX": 0, + "minY": -25, + "maxY": 25, + "minZ": -2.73, + "maxZ": 1.27 +} + +BEV_WIDTH = 608 # across y axis -25m ~ 25m +BEV_HEIGHT = 608 # across x axis 0m ~ 50m + +DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT + +colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0]] diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/torch_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/torch_utils.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/lidar_utils/torch_utils.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/torch_utils.py diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_model.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py similarity index 91% rename from src/perception/object_detector_3d/object_detector_3d/lidar_detection_model.py rename to src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py index ec0c2b34f..bef5ce5ef 100644 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_model.py +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py @@ -19,21 +19,21 @@ import torch # Local Import -from object_detector_3d.complex_yolo.darknet2pytorch import Darknet -from object_detector_3d.lidar_utils import kitti_bev_utils -from object_detector_3d.lidar_utils.evaluation_utils import \ - rescale_boxes, post_processing_v2 -import object_detector_3d.lidar_utils.kitti_config as cnf +from object_detector_3d.complex_yolov4.model.darknet2pytorch import Darknet +from object_detector_3d.complex_yolov4.utils import kitti_bev_utils +from object_detector_3d.complex_yolov4.utils.evaluation_utils import post_processing_v2 +import object_detector_3d.complex_yolov4.utils.kitti_config as cnf # Message definitions from geometry_msgs.msg import Point from sensor_msgs.msg import PointCloud2 from navigator_msgs.msg import BoundingBox3D, Object3D, Object3DArray +# Paths to pretrained model and config config_file = './data/perception/configs/complex_yolov4.cfg' model_path = './data/perception/checkpoints/complex_yolov4_mse_loss.pth' -class LidarDetectionModel(): +class ComplexYOLOv4Model(): def __init__(self, device: torch.device, use_giou_loss: bool = True): """! Initializes the node. @param device[torch.device] The device the model will run on @@ -114,9 +114,9 @@ def postprocess(self, rgb_map: torch.Tensor, to be published. Before publishing, the header needs to be attached. """ - # Returns detections with shape: (x, y, w, l, im, re, object_conf, class_score, class_pred) + # Filters boxes under conf_thresh and over nms_thresh detections = post_processing_v2(predictions, conf_thresh=conf_thresh, - nms_thresh=nms_thresh)[0] + nms_thresh=nms_thresh)[0] # Returns: (x, y, w, l, im, re, object_conf, class_score, class_pred) # Skip if no detections made if detections is None: diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py b/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py index 311270cc1..433d51aac 100644 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py @@ -14,7 +14,7 @@ from time import time # DEBUG # Local Import -from object_detector_3d.lidar_detection_model import LidarDetectionModel +from object_detector_3d.complex_yolov4_model import ComplexYOLOv4Model # Ros Imports import rclpy @@ -34,10 +34,6 @@ def __init__(self): """! Initializes the node.""" super().__init__("lidar_detection_node") - # Paths to pretrained model and config - model_path = './data/perception/checkpoints/complex_yolov4_mse_loss.pth' - config_file = './data/perception/configs/complex_yolov4.cfg' - # Declare default ROS2 node parameters self.declare_parameter('device', 'cuda:1') # cpu | cuda:{gpu num} self.declare_parameter('conf_thresh', 0.4) # 0.0 - 1.0 @@ -55,8 +51,8 @@ def __init__(self): .get_parameter_value().bool_value # Instantiates the model - self.model = LidarDetectionModel(self.device) - self.clock = Clock() + self.model = ComplexYOLOv4Model(self.device) + self.stamp = Time() # For published msgs # Subcribes to filtered lidar self.lidar_sub = self.create_subscription( @@ -74,7 +70,6 @@ def clock_cb(self, msg): Updates the clock for message headers. @param msg[Clock] The clock message. """ - self.stamp = Time() self.stamp.sec = msg.clock.sec self.stamp.nanosec = msg.clock.nanosec diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/cal_intersection_rotated_boxes.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/cal_intersection_rotated_boxes.py deleted file mode 100644 index e79bf9a89..000000000 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/cal_intersection_rotated_boxes.py +++ /dev/null @@ -1,171 +0,0 @@ -""" -# -*- coding: utf-8 -*- ------------------------------------------------------------------------------------ -# Author: Nguyen Mau Dung -# DoC: 2020.07.20 -# email: nguyenmaudung93.kstn@gmail.com ------------------------------------------------------------------------------------ -# Description: This script for intersection calculation of rotated boxes (on GPU) - -Refer from # https://stackoverflow.com/questions/44797713/calculate-the-area-of-intersection-of-two-rotated-rectangles-in-python?noredirect=1&lq=1 -""" - -import torch - - -class Line: - # ax + by + c = 0 - def __init__(self, p1, p2): - """ - - Args: - p1: (x, y) - p2: (x, y) - """ - self.a = p2[1] - p1[1] - self.b = p1[0] - p2[0] - self.c = p2[0] * p1[1] - p2[1] * p1[0] # cross - self.device = p1.device - - def cal_values(self, pts): - return self.a * pts[:, 0] + self.b * pts[:, 1] + self.c - - def find_intersection(self, other): - # See e.g. https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Using_homogeneous_coordinates - if not isinstance(other, Line): - return NotImplemented - w = self.a * other.b - self.b * other.a - return torch.tensor([(self.b * other.c - self.c * other.b) / w, (self.c * other.a - self.a * other.c) / w], - device=self.device) - - -def intersection_area(rect1, rect2): - """Calculate the inter - - Args: - rect1: vertices of the rectangles (4, 2) - rect2: vertices of the rectangles (4, 2) - - Returns: - - """ - - # Use the vertices of the first rectangle as, starting vertices of the intersection polygon. - intersection = rect1 - - # Loop over the edges of the second rectangle - roll_rect2 = torch.roll(rect2, -1, dims=0) - for p, q in zip(rect2, roll_rect2): - if len(intersection) <= 2: - break # No intersection - - line = Line(p, q) - - # Any point p with line(p) <= 0 is on the "inside" (or on the boundary), - # any point p with line(p) > 0 is on the "outside". - # Loop over the edges of the intersection polygon, - # and determine which part is inside and which is outside. - new_intersection = [] - line_values = line.cal_values(intersection) - roll_intersection = torch.roll(intersection, -1, dims=0) - roll_line_values = torch.roll(line_values, -1, dims=0) - for s, t, s_value, t_value in zip(intersection, roll_intersection, line_values, roll_line_values): - if s_value <= 0: - new_intersection.append(s) - if s_value * t_value < 0: - # Points are on opposite sides. - # Add the intersection of the lines to new_intersection. - intersection_point = line.find_intersection(Line(s, t)) - new_intersection.append(intersection_point) - - if len(new_intersection) > 0: - intersection = torch.stack(new_intersection) - else: - break - - # Calculate area - if len(intersection) <= 2: - return 0. - - return PolyArea2D(intersection) - - -def PolyArea2D(pts): - roll_pts = torch.roll(pts, -1, dims=0) - area = (pts[:, 0] * roll_pts[:, 1] - pts[:, 1] * roll_pts[:, 0]).sum().abs() * 0.5 - return area - - -if __name__ == "__main__": - import cv2 - import numpy as np - from shapely.geometry import Polygon - - - def cvt_box_2_polygon(box): - """ - :param array: an array of shape [num_conners, 2] - :return: a shapely.geometry.Polygon object - """ - # use .buffer(0) to fix a line polygon - # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera - return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) - - - def get_corners_torch(x, y, w, l, yaw): - device = x.device - bev_corners = torch.zeros((4, 2), dtype=torch.float, device=device) - cos_yaw = torch.cos(yaw) - sin_yaw = torch.sin(yaw) - # front left - bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw - bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw - - # rear left - bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw - bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw - - # rear right - bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw - bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw - - # front right - bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw - bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw - - return bev_corners - - - # Show convex in an image - - img_size = 300 - img = np.zeros((img_size, img_size, 3)) - img = cv2.resize(img, (img_size, img_size)) - - box1 = torch.tensor([100, 100, 40, 10, np.pi / 2], dtype=torch.float).cuda() - box2 = torch.tensor([100, 100, 40, 20, 0], dtype=torch.float).cuda() - - box1_conners = get_corners_torch(box1[0], box1[1], box1[2], box1[3], box1[4]) - box1_polygon = cvt_box_2_polygon(box1_conners) - box1_area = box1_polygon.area - - box2_conners = get_corners_torch(box2[0], box2[1], box2[2], box2[3], box2[4]) - box2_polygon = cvt_box_2_polygon(box2_conners) - box2_area = box2_polygon.area - - intersection = box2_polygon.intersection(box1_polygon).area - union = box1_area + box2_area - intersection - iou = intersection / (union + 1e-16) - - print('Shapely- box1_area: {:.2f}, box2_area: {:.2f}, inter: {:.2f}, iou: {:.4f}'.format(box1_area, box2_area, - intersection, iou)) - - print('intersection from intersection_area(): {}'.format(intersection_area(box1_conners, box2_conners))) - - img = cv2.polylines(img, [box1_conners.cpu().numpy().astype(np.int)], True, (255, 0, 0), 2) - img = cv2.polylines(img, [box2_conners.cpu().numpy().astype(np.int)], True, (0, 255, 0), 2) - - while True: - cv2.imshow('img', img) - if cv2.waitKey(0) & 0xff == 27: - break diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/evaluation_utils.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/evaluation_utils.py deleted file mode 100644 index e378b8d18..000000000 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/evaluation_utils.py +++ /dev/null @@ -1,357 +0,0 @@ -from __future__ import division -import sys -import tqdm - -import torch -import numpy as np -from shapely.geometry import Polygon - -sys.path.append('../') - -import object_detector_3d.lidar_utils.kitti_bev_utils as bev_utils - - -def cvt_box_2_polygon(box): - """ - :param box: an array of shape [4, 2] - :return: a shapely.geometry.Polygon object - """ - # use .buffer(0) to fix a line polygon - # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera - return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) - - -def compute_iou_nms(idx_self, idx_other, polygons, areas): - """Calculates IoU of the given box with the array of the given boxes. - box: a polygon - boxes: a vector of polygons - Note: the areas are passed in rather than calculated here for - efficiency. Calculate once in the caller to avoid duplicate work. - """ - # Calculate intersection areas - ious = [] - box1 = polygons[idx_self] - for idx in idx_other: - box2 = polygons[idx] - intersection = box1.intersection(box2).area - iou = intersection / (areas[idx] + areas[idx_self] - intersection + 1e-12) - ious.append(iou) - - return np.array(ious, dtype=np.float32) - - -def load_classes(path): - """ - Loads class labels at 'path' - """ - fp = open(path, "r") - names = fp.read().split("\n")[:-1] - return names - - -def rescale_boxes(boxes, current_dim, original_shape): - """ Rescales bounding boxes to the original shape """ - orig_h, orig_w = original_shape - # The amount of padding that was added - pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) - pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) - # Image height and width after padding is removed - unpad_h = current_dim - pad_y - unpad_w = current_dim - pad_x - # Rescale bounding boxes to dimension of original image - boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w - boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h - boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w - boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h - - return boxes - - -def ap_per_class(tp, conf, pred_cls, target_cls): - """ Compute the average precision, given the recall and precision curves. - Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. - # Arguments - tp: True positives (list). - conf: Objectness value from 0-1 (list). - pred_cls: Predicted object classes (list). - target_cls: True object classes (list). - # Returns - The average precision as computed in py-faster-rcnn. - """ - - # Sort by objectness - i = np.argsort(-conf) - tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] - - # Find unique classes - unique_classes = np.unique(target_cls) - - # Create Precision-Recall curve and compute AP for each class - ap, p, r = [], [], [] - for c in tqdm.tqdm(unique_classes, desc="Computing AP"): - i = pred_cls == c - n_gt = (target_cls == c).sum() # Number of ground truth objects - n_p = i.sum() # Number of predicted objects - - if n_p == 0 and n_gt == 0: - continue - elif n_p == 0 or n_gt == 0: - ap.append(0) - r.append(0) - p.append(0) - else: - # Accumulate FPs and TPs - fpc = (1 - tp[i]).cumsum() - tpc = (tp[i]).cumsum() - - # Recall - recall_curve = tpc / (n_gt + 1e-16) - r.append(recall_curve[-1]) - - # Precision - precision_curve = tpc / (tpc + fpc) - p.append(precision_curve[-1]) - - # AP from recall-precision curve - ap.append(compute_ap(recall_curve, precision_curve)) - - # Compute F1 score (harmonic mean of precision and recall) - p, r, ap = np.array(p), np.array(r), np.array(ap) - f1 = 2 * p * r / (p + r + 1e-16) - - return p, r, ap, f1, unique_classes.astype("int32") - - -def compute_ap(recall, precision): - """ Compute the average precision, given the recall and precision curves. - Code originally from https://github.com/rbgirshick/py-faster-rcnn. - # Arguments - recall: The recall curve (list). - precision: The precision curve (list). - # Returns - The average precision as computed in py-faster-rcnn. - """ - # correct AP calculation - # first append sentinel values at the end - mrec = np.concatenate(([0.0], recall, [1.0])) - mpre = np.concatenate(([0.0], precision, [0.0])) - - # compute the precision envelope - for i in range(mpre.size - 1, 0, -1): - mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - i = np.where(mrec[1:] != mrec[:-1])[0] - - # and sum (\Delta recall) * prec - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) - return ap - - -def get_batch_statistics_rotated_bbox(outputs, targets, iou_threshold): - """ Compute true positives, predicted scores and predicted labels per sample """ - batch_metrics = [] - for sample_i in range(len(outputs)): - - if outputs[sample_i] is None: - continue - - output = outputs[sample_i] - pred_boxes = output[:, :6] - pred_scores = output[:, 6] - pred_labels = output[:, -1] - - true_positives = np.zeros(pred_boxes.shape[0]) - - annotations = targets[targets[:, 0] == sample_i][:, 1:] - if len(annotations) > 0: - target_labels = annotations[:, 0] - detected_boxes = [] - target_boxes = annotations[:, 1:] - - for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): - - # If targets are found break - if len(detected_boxes) == len(annotations): - break - - # Ignore if label is not one of the target labels - if pred_label not in target_labels: - continue - - iou, box_index = iou_rotated_single_vs_multi_boxes_cpu(pred_box, target_boxes).max(dim=0) - - if iou >= iou_threshold and box_index not in detected_boxes: - true_positives[pred_i] = 1 - detected_boxes += [box_index] - batch_metrics.append([true_positives, pred_scores, pred_labels]) - - return batch_metrics - - -def iou_rotated_single_vs_multi_boxes_cpu(single_box, multi_boxes): - """ - :param pred_box: Numpy array - :param target_boxes: Numpy array - :return: - """ - - s_x, s_y, s_w, s_l, s_im, s_re = single_box - s_area = s_w * s_l - s_yaw = np.arctan2(s_im, s_re) - s_conners = bev_utils.get_corners(s_x, s_y, s_w, s_l, s_yaw) - s_polygon = cvt_box_2_polygon(s_conners) - - m_x, m_y, m_w, m_l, m_im, m_re = multi_boxes.transpose(1, 0) - targets_areas = m_w * m_l - m_yaw = np.arctan2(m_im, m_re) - m_boxes_conners = get_corners_vectorize(m_x, m_y, m_w, m_l, m_yaw) - m_boxes_polygons = [cvt_box_2_polygon(box_) for box_ in m_boxes_conners] - - ious = [] - for m_idx in range(multi_boxes.shape[0]): - intersection = s_polygon.intersection(m_boxes_polygons[m_idx]).area - iou_ = intersection / (s_area + targets_areas[m_idx] - intersection + 1e-16) - ious.append(iou_) - - return torch.tensor(ious, dtype=torch.float) - - -def get_corners_vectorize(x, y, w, l, yaw): - """bev image coordinates format - vectorization - - :param x, y, w, l, yaw: [num_boxes,] - :return: num_boxes x (x,y) of 4 conners - """ - bbox2 = np.zeros((x.shape[0], 4, 2), dtype=np.float32) - cos_yaw = np.cos(yaw) - sin_yaw = np.sin(yaw) - - # front left - bbox2[:, 0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw - bbox2[:, 0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw - - # rear left - bbox2[:, 1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw - bbox2[:, 1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw - - # rear right - bbox2[:, 2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw - bbox2[:, 2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw - - # front right - bbox2[:, 3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw - bbox2[:, 3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw - - return bbox2 - - -def nms_cpu(boxes, confs, nms_thresh=0.5): - """ - :param boxes: [num, 6] - :param confs: [num, num_classes] - :param nms_thresh: - :param min_mode: - :return: - """ - # order of reduce confidence (high --> low) - order = confs.argsort()[::-1] - - x, y, w, l, im, re = boxes.transpose(1, 0) - yaw = np.arctan2(im, re) - boxes_conners = get_corners_vectorize(x, y, w, l, yaw) - boxes_polygons = [cvt_box_2_polygon(box_) for box_ in boxes_conners] # 4 vertices of the box - boxes_areas = w * l - - keep = [] - while order.size > 0: - idx_self = order[0] - idx_other = order[1:] - keep.append(idx_self) - over = compute_iou_nms(idx_self, idx_other, boxes_polygons, boxes_areas) - inds = np.where(over <= nms_thresh)[0] - order = order[inds + 1] - - return np.array(keep) - - -def post_processing(outputs, conf_thresh=0.95, nms_thresh=0.4): - """ - Removes detections with lower object confidence score than 'conf_thres' and performs - Non-Maximum Suppression to further filter detections. - Returns detections with shape: - (x, y, w, l, im, re, object_conf, class_score, class_pred) - """ - if type(outputs).__name__ != 'ndarray': - outputs = outputs.numpy() - # outputs shape: (batch_size, 22743, 10) - batch_size = outputs.shape[0] - # box_array: [batch, num, 6] - box_array = outputs[:, :, :6] - - # confs: [batch, num, num_classes] - confs = outputs[:, :, 6:7] * outputs[:, :, 7:] - obj_confs = outputs[:, :, 6] - - # [batch, num, num_classes] --> [batch, num] - max_conf = np.max(confs, axis=2) - max_id = np.argmax(confs, axis=2) - - bboxes_batch = [None for _ in range(batch_size)] - - for i in range(batch_size): - argwhere = max_conf[i] > conf_thresh - l_box_array = box_array[i, argwhere, :] - l_obj_confs = obj_confs[i, argwhere, :] - l_max_conf = max_conf[i, argwhere] - l_max_id = max_id[i, argwhere] - - keep = nms_cpu(l_box_array, l_max_conf, nms_thresh=nms_thresh) - - if (keep.size > 0): - l_box_array = l_box_array[keep, :] - l_obj_confs = l_obj_confs[keep].reshape(-1, 1) - l_max_conf = l_max_conf[keep].reshape(-1, 1) - l_max_id = l_max_id[keep].reshape(-1, 1) - bboxes_batch[i] = np.concatenate((l_box_array, l_obj_confs, l_max_conf, l_max_id), axis=-1) - return bboxes_batch - - -def post_processing_v2(prediction, conf_thresh=0.95, nms_thresh=0.4): - """ - Removes detections with lower object confidence score than 'conf_thres' and performs - Non-Maximum Suppression to further filter detections. - Returns detections with shape: - (x, y, w, l, im, re, object_conf, class_score, class_pred) - """ - output = [None for _ in range(len(prediction))] - for image_i, image_pred in enumerate(prediction): - # Filter out confidence scores below threshold - image_pred = image_pred[image_pred[:, 6] >= conf_thresh] - # If none are remaining => process next image - if not image_pred.size(0): - continue - # Object confidence times class confidence - score = image_pred[:, 6] * image_pred[:, 7:].max(dim=1)[0] - # Sort by it - image_pred = image_pred[(-score).argsort()] - class_confs, class_preds = image_pred[:, 7:].max(dim=1, keepdim=True) - detections = torch.cat((image_pred[:, :7].float(), class_confs.float(), class_preds.float()), dim=1) - # Perform non-maximum suppression - keep_boxes = [] - while detections.size(0): - # large_overlap = rotated_bbox_iou(detections[0, :6].unsqueeze(0), detections[:, :6], 1.0, False) > nms_thres # not working - large_overlap = iou_rotated_single_vs_multi_boxes_cpu(detections[0, :6], detections[:, :6]) > nms_thresh - label_match = detections[0, -1] == detections[:, -1] - # Indices of boxes with lower confidence scores, large IOUs and matching labels - invalid = large_overlap & label_match - weights = detections[invalid, 6:7] - # Merge overlapping bboxes by order of confidence - detections[0, :6] = (weights * detections[invalid, :6]).sum(0) / weights.sum() - keep_boxes += [detections[0]] - detections = detections[~invalid] - if len(keep_boxes) > 0: - output[image_i] = torch.stack(keep_boxes) - - return output diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_config.py b/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_config.py deleted file mode 100644 index 8a257af0e..000000000 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_utils/kitti_config.py +++ /dev/null @@ -1,66 +0,0 @@ -import numpy as np - -class_list = ["Car", "Pedestrian", "Cyclist"] - -CLASS_NAME_TO_ID = { - 'Car': 0, - 'Pedestrian': 1, - 'Cyclist': 2, - 'Van': 0, - 'Person_sitting': 1, -} - -# Front side (of vehicle) Point Cloud boundary for BEV -boundary = { - "minX": 0, - "maxX": 50, - "minY": -25, - "maxY": 25, - "minZ": -1.23, #-0.73, #-2.73, # -(height to ground + 1) - "maxZ": 2.77 #3.27 #1.27 -} - -# Back back (of vehicle) Point Cloud boundary for BEV -boundary_back = { - "minX": -50, - "maxX": 0, - "minY": -25, - "maxY": 25, - "minZ": -2.73, - "maxZ": 1.27 -} - -BEV_WIDTH = 608 # across y axis -25m ~ 25m -BEV_HEIGHT = 608 # across x axis 0m ~ 50m - -DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT - -colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0]] - -# Following parameters are calculated as an average from KITTI dataset for simplicity -##################################################################################### -Tr_velo_to_cam = np.array([ - [7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03], - [1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02], - [9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01], - [0, 0, 0, 1] -]) - -# cal mean from train set -R0 = np.array([ - [0.99992475, 0.00975976, -0.00734152, 0], - [-0.0097913, 0.99994262, -0.00430371, 0], - [0.00729911, 0.0043753, 0.99996319, 0], - [0, 0, 0, 1] -]) - -P2 = np.array([[719.787081, 0., 608.463003, 44.9538775], - [0., 719.787081, 174.545111, 0.1066855], - [0., 0., 1., 3.0106472e-03], - [0., 0., 0., 0] - ]) - -R0_inv = np.linalg.inv(R0) -Tr_velo_to_cam_inv = np.linalg.inv(Tr_velo_to_cam) -P2_inv = np.linalg.pinv(P2) -##################################################################################### From 662df8bef017707fa7d929eb57b401959ee18e5a Mon Sep 17 00:00:00 2001 From: Gueren Sanford Date: Thu, 11 Apr 2024 23:23:08 -0500 Subject: [PATCH 3/8] Combined both models into one node and cleaned up the mmdet3d model code. --- .../complex_yolov4/utils/kitti_bev_utils.py | 14 +- .../complex_yolov4_model.py | 64 ++++---- .../lidar_detection_node.py | 48 +++--- .../object_detector_3d/lidar_objdet3d_node.py | 109 ------------- .../object_detector_3d/mmdetection3d_model.py | 154 ++++++++++++++++++ 5 files changed, 215 insertions(+), 174 deletions(-) delete mode 100644 src/perception/object_detector_3d/object_detector_3d/lidar_objdet3d_node.py create mode 100644 src/perception/object_detector_3d/object_detector_3d/mmdetection3d_model.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py index 603f2a87d..2cdde31e5 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py @@ -127,20 +127,20 @@ def get_corners_3d(x, y, z, w, l, h, yaw): return bev_corners -def inverse_yolo_target(targets, bc): +def inverse_yolo_target(targets, img_size, bc): labels = [] for t in targets: - c, c_conf, y, x, l, w, im, re = t - z, h = -0.55, 1.5 + y, x, l, w, im, re, c_conf, *_, c = t # swaps x and y + z, h = 0.0, 1.5 if c == 1: h = 1.8 elif c == 2: h = 1.4 - y = y * (bc["maxY"] - bc["minY"]) + bc["minY"] - x = x * (bc["maxX"] - bc["minX"]) + bc["minX"] - w = w * (bc["maxY"] - bc["minY"]) - l = l * (bc["maxX"] - bc["minX"]) + y = (y / img_size) * (bc["maxY"] - bc["minY"]) + bc["minY"] + x = (x / img_size) * (bc["maxX"] - bc["minX"]) + bc["minX"] + w = (w / img_size) * (bc["maxY"] - bc["minY"]) + l = (l / img_size) * (bc["maxX"] - bc["minX"]) w -= 0.3 l -= 0.3 labels.append([c, c_conf, x, y, z, w, l, h, - np.arctan2(im, re) - 2 * np.pi]) diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py index bef5ce5ef..4a334cb63 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py +++ b/src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py @@ -1,11 +1,11 @@ """ Package: object_detector_3d -Filename: lidar_detection_model.py +Filename: complex_yolov4_model.py Author: Gueren Sanford Email: guerensanford@gmail.com Copyright: 2021, Nova UTD License: MIT License -Use this class to make object detections with LiDAR data in the format +Use the ComplexYOLOv4Model class to make object detections with LiDAR data in the format [x,y,z,intensity]. We use the model Complex Yolo to make the predictions, which requires a birds eye view map of the LiDAR data. With the predictions, feed it into the postprocess to filter out and @@ -18,42 +18,40 @@ import ros2_numpy as rnp import torch -# Local Import +# Model Imports from object_detector_3d.complex_yolov4.model.darknet2pytorch import Darknet from object_detector_3d.complex_yolov4.utils import kitti_bev_utils from object_detector_3d.complex_yolov4.utils.evaluation_utils import post_processing_v2 import object_detector_3d.complex_yolov4.utils.kitti_config as cnf -# Message definitions +# Message Imports from geometry_msgs.msg import Point from sensor_msgs.msg import PointCloud2 from navigator_msgs.msg import BoundingBox3D, Object3D, Object3DArray # Paths to pretrained model and config -config_file = './data/perception/configs/complex_yolov4.cfg' -model_path = './data/perception/checkpoints/complex_yolov4_mse_loss.pth' +config_path = './data/perception/configs/complex_yolov4.cfg' +checkpoint_path = './data/perception/checkpoints/complex_yolov4_mse_loss.pth' class ComplexYOLOv4Model(): - def __init__(self, device: torch.device, use_giou_loss: bool = True): + def __init__(self, device: torch.device): """! Initializes the node. @param device[torch.device] The device the model will run on - @param use_giou_loss[bool] Use generalized intersection over - union as a loss function """ self.img_size = 608 # Determined by the birds eye view setup self.device = device # Setup for the Complex Yolo model - self.model = Darknet(cfgfile=config_file, - use_giou_loss=use_giou_loss) - self.model.load_state_dict(torch.load(model_path, + self.model = Darknet(cfgfile=config_path, + use_giou_loss=True) # generalized intersection over union + self.model.load_state_dict(torch.load(checkpoint_path, map_location=self.device)) self.model.to(device=self.device) def preprocess(self, lidar_msg: PointCloud2): """! Converts ROS LiDAR message into the bird's eye view input - tensor for the model. - @param lidar_msg[PointCloud2] The ros2 lidar dmessage data + tensor for the model. + @param lidar_msg[PointCloud2] The ros2 lidar message data formatted as a PointCloud2 message. @return torch.Tensor A 608 x 608 birds eye view image of the point cloud data. Red = Intensity, Green = Height, Blue = @@ -70,7 +68,7 @@ def preprocess(self, lidar_msg: PointCloud2): lidar_np['y'].flatten(), lidar_np['z'].flatten(), lidar_np['reflectivity'].flatten()]) - # Tranforms array to shape [(x y z r), n] + # Tranforms array to shape [n, (x y z r)] lidar_pcd = np.transpose(lidar_pcd) lidar_pcd[:, 3] /= 255.0 # Normalizes refl. @@ -80,29 +78,29 @@ def preprocess(self, lidar_msg: PointCloud2): # Turns the point cloud into a bev rgb map rgb_map = kitti_bev_utils.makeBVFeature(reduced_lidar, cnf.DISCRETIZATION, cnf.boundary) + # Turned into a tensor for the model + inputs = torch.tensor(rgb_map, device=self.device).float() \ + .unsqueeze(0) - return rgb_map + return inputs - def predict(self, input_bev: torch.Tensor): + def predict(self, inputs: torch.Tensor): """! Uses the tensor from the preprocess fn to return the bounding boxes for the objects. - @param input_bev[torch.Tensor] The result of the preprocess + @param inputs[torch.Tensor] The result of the preprocess function. @return torch.Tensor The output of the model in the shape [1, boxes, x y w l classes], where the classes are Car, Pedestrian, Cyclist, Van, Person_sitting. """ # The model outputs [1, boxes, x y w l classes] - return self.model(input_bev) + return self.model(inputs) - def postprocess(self, rgb_map: torch.Tensor, - predictions: torch.Tensor, conf_thresh: int, + def postprocess(self, predictions: torch.Tensor, conf_thresh: int, nms_thresh: int): - """! - NEEDS OPTIMIZATION - - @param input_bev[torch.Tensor] The result of the preprocess - function. + """! Converts the model's predictions into a ROS2 message, and filters + out boxes below the confidence threshold and above the intersection + over union threshold. @param predictions[torch.Tensor] The output of the model in the shape [1, boxes, x y w l classes], where the classes are Car, Pedestrian, Cyclist, Van, Person_sitting. @@ -110,9 +108,8 @@ def postprocess(self, rgb_map: torch.Tensor, for bounding boxes. @param nms_thresh[int] The maximum accepted intersection over union value for bounding boxes. - @return navigator_msgs.msg.Object3DArray A ros2 message ready - to be published. Before publishing, the header needs to be - attached. + @return Object3DArray A ros2 message ready to be published. + Before publishing, the header needs to be attached. """ # Filters boxes under conf_thresh and over nms_thresh detections = post_processing_v2(predictions, conf_thresh=conf_thresh, @@ -122,13 +119,8 @@ def postprocess(self, rgb_map: torch.Tensor, if detections is None: return None - predictions = [] - for x, y, w, l, im, re, cls_conf, *_, cls_pred in detections: - predictions.append([cls_pred, cls_conf, x / self.img_size, y / self.img_size, - w / self.img_size, l / self.img_size, im, re]) - # Returns shape [c, c_conf, x, y, z, w, l, h, yaw] - predictions = kitti_bev_utils.inverse_yolo_target(np.array(predictions), cnf.boundary) # IMPROVE + predictions = kitti_bev_utils.inverse_yolo_target(detections, self.img_size, cnf.boundary) # Turns the predictions into array of Object3D msg types object_3d_array = Object3DArray() @@ -141,7 +133,7 @@ def postprocess(self, rgb_map: torch.Tensor, bounding_box.coordinates = prediction[2:] # Returns the corners in the order specified by BoundingBox3D msg - corners_3d = kitti_bev_utils.get_corners_3d(*bounding_box.coordinates) # IMPROVE get_corners + corners_3d = kitti_bev_utils.get_corners_3d(*bounding_box.coordinates) # Fills the Point corners array for i, corner_3d in enumerate(corners_3d): bounding_box.corners[i] = Point(x=corner_3d[0], y=corner_3d[1], z=corner_3d[2]) diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py b/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py index 433d51aac..0774088da 100644 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py +++ b/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py @@ -15,6 +15,7 @@ # Local Import from object_detector_3d.complex_yolov4_model import ComplexYOLOv4Model +from object_detector_3d.mmdetection3d_model import MMDetection3DModel # Ros Imports import rclpy @@ -36,9 +37,9 @@ def __init__(self): # Declare default ROS2 node parameters self.declare_parameter('device', 'cuda:1') # cpu | cuda:{gpu num} - self.declare_parameter('conf_thresh', 0.4) # 0.0 - 1.0 + self.declare_parameter('model', 'mmdetection3d') # mmdetection3d | complex_yolo + self.declare_parameter('conf_thresh', 0.7) # 0.0 - 1.0 self.declare_parameter('nms_thresh', 0.2) # 0.0 - 1.0 - self.declare_parameter('down_sample', False) # Get ROS2 parameters self.device = torch.device(self.get_parameter('device') \ @@ -47,21 +48,26 @@ def __init__(self): .get_parameter_value().double_value self.nms_thresh = self.get_parameter('nms_thresh') \ .get_parameter_value().double_value - self.down_sample = self.get_parameter('down_sample') \ - .get_parameter_value().bool_value + model_type = self.get_parameter('model') \ + .get_parameter_value().string_value - # Instantiates the model - self.model = ComplexYOLOv4Model(self.device) - self.stamp = Time() # For published msgs + # Declared for publishing msgs + self.stamp = Time() - # Subcribes to filtered lidar + # Instantiates the model type + if model_type == 'complex_yolo': + self.model = ComplexYOLOv4Model(self.device) + else: + self.model = MMDetection3DModel(str(self.device)) + + # Subcribes to raw lidar data self.lidar_sub = self.create_subscription( PointCloud2, '/lidar', self.lidar_callback, 10) # Subscribes to clock for headers self.clock_sub = self.create_subscription( Clock, '/clock', self.clock_cb, 10) - # Instantiates publishers + # Publishes array of 3D objects self.objects3d_pub = self.create_publisher( Object3DArray, 'detected/objects3d', 10) @@ -70,30 +76,28 @@ def clock_cb(self, msg): Updates the clock for message headers. @param msg[Clock] The clock message. """ + self.stamp.sec = msg.clock.sec self.stamp.nanosec = msg.clock.nanosec def lidar_callback(self, lidar_msg: PointCloud2): - """! - For now, just prints the lidar point cloud and turns it into a numpy array. - @param lidar_msg[PointCloud2] The filtered lidar point cloud. + """! Uses the lidar msg and a 3D object deteciton model to + form 3D bouding boxes around objects. Then, publishes + the boxes to a 'detected' topic. + @param lidar_msg[PointCloud2] The raw lidar point cloud. """ - # 608x608 birds eye view rgb map of points - rgb_map = self.model.preprocess(lidar_msg) - # Turned into a tensor for the model - input_bev = torch.tensor(rgb_map, device=self.device).float() \ - .unsqueeze(0) - - # The model outputs [1, boxes, x y w l classes] - outputs = self.model.predict(input_bev) + # Values determined by model + inputs = self.model.preprocess(lidar_msg) + predictions = self.model.predict(inputs) - objecst3d_array = self.model.postprocess(rgb_map, outputs, + # Object3DArray ROS2 msg w/ bounding box data + objecst3d_array = self.model.postprocess(predictions, self.conf_thresh, self.nms_thresh) # Return if no detections made if objecst3d_array is None: - return + objecst3d_array = Object3DArray() # Attaches the header to the message objecst3d_array.header.stamp = self.stamp diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_objdet3d_node.py b/src/perception/object_detector_3d/object_detector_3d/lidar_objdet3d_node.py deleted file mode 100644 index 723aa40a4..000000000 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_objdet3d_node.py +++ /dev/null @@ -1,109 +0,0 @@ -import rclpy -from rclpy.node import Node -from sensor_msgs.msg import PointCloud2 -import ros2_numpy as rnp -import numpy as np -from mmdet3d.apis import init_model, inference_detector -from navigator_msgs.msg import Object3D, Object3DArray -from geometry_msgs.msg import Point -import random - - -CLASS2LABEL = { - 'Pedestrian': 0, - 'Cyclist': 1, - 'Car': 2 -} - - -class LidarObjectDetector3DNode(Node): - - def __init__(self): - super().__init__('lidar_object_detector_3d_node') - - # declare parameters - self.declare_parameter("config_path", "/navigator/data/perception/configs/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py") - self.declare_parameter("checkpoint_path", "/navigator/data/perception/checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth") - self.declare_parameter("device", "cuda:0") - self.declare_parameter("translate_height", -1.5) - - # get parameters - config_path = self.get_parameter("config_path").get_parameter_value().string_value - checkpoint_path = self.get_parameter("checkpoint_path").get_parameter_value().string_value - device = self.get_parameter("device").get_parameter_value().string_value - self.translate_height = self.get_parameter("translate_height").get_parameter_value().double_value - - self.model = init_model(config=config_path, checkpoint=checkpoint_path, device=device) - - self.lidar_subscription = self.create_subscription( - msg_type = PointCloud2, - topic = 'lidar', - callback = self.lidar_callback, - qos_profile = 10 - ) - - self.objects_publisher = self.create_publisher( - Object3DArray, - 'objdet3d_raw', - 10 - ) - - def lidar_callback(self, lidar_msg: PointCloud2): - pcd = rnp.numpify(lidar_msg) - pcd = pcd[::2] # this reduces the vertical res to 64, it appears to work when running against the parade bag - pcd = np.array([ pcd['x'].flatten(), pcd['y'].flatten(), pcd['z'].flatten(), pcd['reflectivity'].flatten()]).T - pcd[:, 3] /= 255.0 - pcd[:, 2] += self.translate_height - - results, _ = inference_detector(model=self.model, pcds=pcd) - bounding_boxes = results.pred_instances_3d.bboxes_3d - bbox_corners = results.pred_instances_3d.bboxes_3d.corners.cpu().numpy() - labels = results.pred_instances_3d.labels_3d - scores = results.pred_instances_3d.scores_3d - - object_instance_array = Object3DArray() - for bounding_box, corners, label, score in zip(bounding_boxes, bbox_corners, labels, scores): - object_instance = Object3D() - - x, y, z, x_size, y_size, z_size, yaw = bounding_box - object_instance.bounding_box.coordinates[0] = float(x) - object_instance.bounding_box.coordinates[1] = float(y) - object_instance.bounding_box.coordinates[2] = float(z) - self.translate_height - object_instance.bounding_box.coordinates[3] = float(x_size) - object_instance.bounding_box.coordinates[4] = float(y_size) - object_instance.bounding_box.coordinates[5] = float(z_size) - object_instance.bounding_box.coordinates[6] = float(yaw) - - corners[:,2] -= self.translate_height - x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0 = corners - object_instance.bounding_box.corners[3] = Point(x=float(x0y0z0[0]), y=float(x0y0z0[1]), z=float(x0y0z0[2])) - object_instance.bounding_box.corners[2] = Point(x=float(x0y0z1[0]), y=float(x0y0z1[1]), z=float(x0y0z1[2])) - object_instance.bounding_box.corners[1] = Point(x=float(x0y1z1[0]), y=float(x0y1z1[1]), z=float(x0y1z1[2])) - object_instance.bounding_box.corners[0] = Point(x=float(x0y1z0[0]), y=float(x0y1z0[1]), z=float(x0y1z0[2])) - object_instance.bounding_box.corners[7] = Point(x=float(x1y0z0[0]), y=float(x1y0z0[1]), z=float(x1y0z0[2])) - object_instance.bounding_box.corners[6] = Point(x=float(x1y0z1[0]), y=float(x1y0z1[1]), z=float(x1y0z1[2])) - object_instance.bounding_box.corners[5] = Point(x=float(x1y1z1[0]), y=float(x1y1z1[1]), z=float(x1y1z1[2])) - object_instance.bounding_box.corners[4] = Point(x=float(x1y1z0[0]), y=float(x1y1z0[1]), z=float(x1y1z0[2])) - - object_instance.label = CLASS2LABEL[self.model.dataset_meta['classes'][label]] - object_instance.confidence_score = float(score) - object_instance.id = random.randint(0, 2**16-1) - object_instance_array.objects.append(object_instance) - - object_instance_array.header.stamp = lidar_msg.header.stamp - object_instance_array.header.frame_id = lidar_msg.header.frame_id - self.objects_publisher.publish(object_instance_array) - - -def main(args=None): - rclpy.init(args=args) - - lidar_objdet3d_node = LidarObjectDetector3DNode() - rclpy.spin(lidar_objdet3d_node) - - lidar_objdet3d_node.destroy_node() - rclpy.shutdown() - - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/src/perception/object_detector_3d/object_detector_3d/mmdetection3d_model.py b/src/perception/object_detector_3d/object_detector_3d/mmdetection3d_model.py new file mode 100644 index 000000000..e868cc94e --- /dev/null +++ b/src/perception/object_detector_3d/object_detector_3d/mmdetection3d_model.py @@ -0,0 +1,154 @@ +""" +Package: object_detector_3d +Filename: mmdetection3d_model.py +Author: Gueren Sanford, Ragib Arnab +Email: guerensanford@gmail.com +Copyright: 2021, Nova UTD +License: MIT License +Use the MMDetection3D class to make object detections with LiDAR data in the format +[x,y,z,intensity]. We use the model MMDetection3D with PointPillars to make the +predictions. With the predictions, feed it into the postprocess to filter out and +format the results. +""" + +# Python Imports +import random +import numpy as np +import ros2_numpy as rnp +from mmcv.ops import nms_rotated + +# Model Imports +from mmdet3d.apis import init_model, inference_detector +from mmdet3d.structures.det3d_data_sample import Det3DDataSample + +# Message Imports +from geometry_msgs.msg import Point +from sensor_msgs.msg import PointCloud2 +from navigator_msgs.msg import BoundingBox3D, Object3D, Object3DArray + +# Paths to pretrained model and config +config_path = './data/perception/configs/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py' +checkpoint_path = './data/perception/checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth' + +# dict of class/label association +CLASS2LABEL = { 'Pedestrian': 0, 'Cyclist': 1, 'Car': 2 } + +class MMDetection3DModel(): + def __init__(self, device: str): + """! Initializes the node. + @param device[str] The device the model will run on + """ + + # Declare model attributes + self.translate_height = -1.5 + self.device = device + + # Setup for the Complex Yolo model + self.model = init_model(config=config_path, checkpoint=checkpoint_path, device=device) + + def preprocess(self, lidar_msg: PointCloud2): + """! Converts ROS LiDAR message into a numpy for the model. + @param lidar_msg[PointCloud2] The ros2 lidar message data + formatted as a PointCloud2 message. + @return numpy.ndarray An array of point cloud data in the shape + [n, (x, y, z, refl)] + """ + + # Point cloud byte stream -> np array with all fields and dtypes + lidar_np = rnp.numpify(lidar_msg) + + # Reduces the vertical res from 128 to 64 + lidar_np = lidar_np[::2] # matches Kitti dataset + + # Combines necessary lidar data into an array [n, n, n, n]] + lidar_pcd = np.array([ # Avoids np array dtype issues + lidar_np['x'].flatten(), + lidar_np['y'].flatten(), + lidar_np['z'].flatten(), + lidar_np['reflectivity'].flatten()]) + + # Tranforms array to shape [n, (x y z r)] + lidar_pcd = np.transpose(lidar_pcd) + lidar_pcd[:, 3] /= 255.0 # Normalizes refl. + lidar_pcd[:, 2] += self.translate_height + + return lidar_pcd + + def predict(self, inputs: np.ndarray): + """! Uses the tensor from the preprocess fn to return the + bounding boxes for the objects. + @param inputs[Det3DDataSample] The result of the preprocess + function. + @return Det3DDataSample You will see a list of Det3DDataSample, + and the predictions are in the pred_instances_3d, indicating + the detected bounding boxes, labels, and scores. The classes + are Car, Pedestrian, Cyclist, Van, Person_sitting. + """ + # Outputs predictions in format of Det3DDataSample class + predictions, _ = inference_detector(model=self.model, pcds=inputs) + + return predictions + + def postprocess(self, predictions: Det3DDataSample, conf_thresh: int, + nms_thresh: int): + """! Converts the model's predictions into a ROS2 message, and filters + out boxes below the confidence threshold and above the intersection + over union threshold. + @param predictions[Det3DDataSample] The output from the ppredict + function. + @param conf_thresh[int] The mininum confidence value accepted + for bounding boxes. + @param nms_thresh[int] The maximum accepted intersection over + union value for bounding boxes. + @return Object3DArray A ros2 message ready to be published. + Before publishing, the header needs to be attached. + """ + + bounding_boxes = predictions.pred_instances_3d.bboxes_3d + bbox_corners = predictions.pred_instances_3d.bboxes_3d.corners + labels = predictions.pred_instances_3d.labels_3d + scores = predictions.pred_instances_3d.scores_3d + detections = [bounding_boxes.tensor.double(), bbox_corners, labels, scores] + + # Filters out scores less than the conficdence threshold + indices = scores >= conf_thresh + detections = [tensor[indices] for tensor in detections] + + # Skip if no detections made + if len(detections[0]) == 0: + return None + + # Filters out boxes overlapping above nms threshold + bounding_boxes2d = detections[0][:,[0,1,3,4,6]] # [center_x, center_y, width, height, rotation] + _, indices = nms_rotated(bounding_boxes2d, detections[3], iou_threshold=nms_thresh) + detections = [tensor[indices] for tensor in detections] + + # Turns the detections into array of Object3D msg types + object_3d_array = Object3DArray() + for bounding_box, corners, label, score in zip(*detections): + # Defines the custom ros2 msgs + bounding_box_3d = BoundingBox3D() + object_3d = Object3D() + + # Shifts height of boxe down + bounding_box[2] -= self.translate_height + corners[:,2] -= self.translate_height + + # Coordinates needs x, y, z, x_size, y_size, z_size, yaw + bounding_box_3d.coordinates = bounding_box.double().cpu().numpy() + + # Converts detection corners into BoundingBox3D msg format + index = 3 + for corner in corners: + bounding_box_3d.corners[index % 8] = Point( + x=float(corner[0]), y=float(corner[1]), z=float(corner[2])) + index -= 1 + + object_3d.label = int(label) + object_3d.confidence_score = float(score) + object_3d.id = random.randint(0, 2**16-1) + object_3d.bounding_box = bounding_box_3d + + object_3d_array.objects.append(object_3d) + + return object_3d_array \ No newline at end of file From 4f5b38d3119d2c07bafbb3dd38c35b715c49b240 Mon Sep 17 00:00:00 2001 From: Gueren Sanford Date: Mon, 15 Apr 2024 15:51:25 -0500 Subject: [PATCH 4/8] Modified documentation and launch files. --- data/navigator_default.rviz | 91 ++++++++++++++++--- docs/Perception/lidar_detection.md | 77 ++++++++++++++++ .../{lidar.md => lidar_processor.md} | 0 docs/Perception/object_detector_3d.md | 34 ------- docs/Perception/static_occupancy_node.md | 6 +- launches/launch.object3d.py | 73 +++++++++++++++ launches/launch.perception.py | 17 +++- launches/object_detection_tracking.launch.py | 56 ------------ param/perception/lidar_objdet3d_params.yaml | 5 - .../lidar_detection}/__init__.py | 0 .../lidar_detection}/complex_yolov4/README.md | 0 .../complex_yolov4/__init__.py | 0 .../complex_yolov4/model/__init__.py | 0 .../complex_yolov4/model/darknet2pytorch.py | 6 +- .../complex_yolov4/model/darknet_utils.py | 2 +- .../complex_yolov4/model/model_utils.py | 0 .../complex_yolov4/model/yolo_layer.py | 4 +- .../complex_yolov4/utils/__init__.py | 0 .../complex_yolov4/utils/evaluation_utils.py | 2 +- .../complex_yolov4/utils/kitti_bev_utils.py | 2 +- .../complex_yolov4/utils/kitti_config.py | 0 .../complex_yolov4/utils/torch_utils.py | 0 .../lidar_detection}/complex_yolov4_model.py | 15 ++- .../lidar_detection}/lidar_detection_node.py | 38 +++++--- .../lidar_detection}/mmdetection3d_model.py | 8 +- .../package.xml | 5 +- .../resource/lidar_detection} | 0 src/perception/lidar_detection/setup.cfg | 4 + .../setup.py | 9 +- .../multi_object_tracker_3d/mot3d_node.py | 4 +- src/perception/object_detector_3d/setup.cfg | 4 - .../object_detector_3d/test/test_copyright.py | 25 ----- .../object_detector_3d/test/test_flake8.py | 25 ----- .../object_detector_3d/test/test_pep257.py | 23 ----- .../visualizer/object_visualizer_node.py | 9 +- 35 files changed, 308 insertions(+), 236 deletions(-) create mode 100644 docs/Perception/lidar_detection.md rename docs/Perception/{lidar.md => lidar_processor.md} (100%) delete mode 100644 docs/Perception/object_detector_3d.md create mode 100644 launches/launch.object3d.py delete mode 100644 launches/object_detection_tracking.launch.py delete mode 100644 param/perception/lidar_objdet3d_params.yaml rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/__init__.py (100%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/README.md (100%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/__init__.py (100%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/model/__init__.py (100%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/model/darknet2pytorch.py (98%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/model/darknet_utils.py (99%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/model/model_utils.py (100%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/model/yolo_layer.py (98%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/utils/__init__.py (100%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/utils/evaluation_utils.py (98%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/utils/kitti_bev_utils.py (98%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/utils/kitti_config.py (100%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4/utils/torch_utils.py (100%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/complex_yolov4_model.py (91%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/lidar_detection_node.py (72%) rename src/perception/{object_detector_3d/object_detector_3d => lidar_detection/lidar_detection}/mmdetection3d_model.py (97%) rename src/perception/{object_detector_3d => lidar_detection}/package.xml (85%) rename src/perception/{object_detector_3d/resource/object_detector_3d => lidar_detection/resource/lidar_detection} (100%) create mode 100644 src/perception/lidar_detection/setup.cfg rename src/perception/{object_detector_3d => lidar_detection}/setup.py (66%) delete mode 100644 src/perception/object_detector_3d/setup.cfg delete mode 100644 src/perception/object_detector_3d/test/test_copyright.py delete mode 100644 src/perception/object_detector_3d/test/test_flake8.py delete mode 100644 src/perception/object_detector_3d/test/test_pep257.py diff --git a/data/navigator_default.rviz b/data/navigator_default.rviz index 6f017558d..745aadb83 100644 --- a/data/navigator_default.rviz +++ b/data/navigator_default.rviz @@ -8,8 +8,9 @@ Panels: - /Routes Paths1 - /Cameras1 - /Grids1 + - /3D Objects1 Splitter Ratio: 0.5 - Tree Height: 960 + Tree Height: 423 - Class: rviz_common/Selection Name: Selection - Class: rviz_common/Tool Properties @@ -62,20 +63,53 @@ Visualization Manager: Expand Link Details: false Expand Tree: false Link Tree Style: Links in Alphabetic Order + arduino_imu: + Alpha: 1 + Show Axes: false + Show Trail: false base_link: Alpha: 1 Show Axes: false Show Trail: false Value: true - hero: + gnss: Alpha: 1 Show Axes: false Show Trail: false - model_link: + imu: + Alpha: 1 + Show Axes: false + Show Trail: false + lidar_left: + Alpha: 1 + Show Axes: false + Show Trail: false + Value: true + lidar_right: Alpha: 1 Show Axes: false Show Trail: false Value: true + model_link: + Alpha: 1 + Show Axes: false + Show Trail: false + os_sensor: + Alpha: 1 + Show Axes: false + Show Trail: false + radar: + Alpha: 1 + Show Axes: false + Show Trail: false + zed2_camera_center: + Alpha: 1 + Show Axes: false + Show Trail: false + zed2_left_camera_optical_frame: + Alpha: 1 + Show Axes: false + Show Trail: false Mass Properties: Inertia: false Mass: false @@ -237,9 +271,9 @@ Visualization Manager: Enabled: true Invert Rainbow: false Max Color: 255; 255; 255 - Max Intensity: 0.9825243353843689 + Max Intensity: 5699 Min Color: 0; 73; 112 - Min Intensity: 0.7125484347343445 + Min Intensity: 1 Name: Filtered LiDAR Position Transformer: XYZ Selectable: true @@ -537,6 +571,35 @@ Visualization Manager: Reliability Policy: Reliable Value: /radar/radar_viz Value: false + - Class: rviz_common/Group + Displays: + - Class: rviz_default_plugins/MarkerArray + Enabled: true + Name: Detected + Namespaces: + object_bounding_box: true + object_tag: true + Topic: + Depth: 5 + Durability Policy: Volatile + History Policy: Keep Last + Reliability Policy: Reliable + Value: /viz/detected/objects3d + Value: true + - Class: rviz_default_plugins/MarkerArray + Enabled: false + Name: Tracked + Namespaces: + {} + Topic: + Depth: 5 + Durability Policy: Volatile + History Policy: Keep Last + Reliability Policy: Reliable + Value: /viz/tracked/objects3d + Value: false + Enabled: true + Name: 3D Objects Enabled: true Global Options: Background Color: 222; 222; 222 @@ -583,25 +646,25 @@ Visualization Manager: Views: Current: Class: rviz_default_plugins/Orbit - Distance: 40.92072296142578 + Distance: 17.278823852539062 Enable Stereo Rendering: Stereo Eye Separation: 0.05999999865889549 Stereo Focal Distance: 1 Swap Stereo Eyes: false Value: false Focal Point: - X: -1.3190577030181885 - Y: 0.0719301849603653 - Z: 0 + X: 5.467041015625 + Y: -0.561253547668457 + Z: 5.697730541229248 Focal Shape Fixed Size: true Focal Shape Size: 0.05000000074505806 Invert Z Axis: false Name: Current View Near Clip Distance: 0.009999999776482582 - Pitch: 0.649796724319458 + Pitch: 0.474796861410141 Target Frame: Value: Orbit (rviz_default_plugins) - Yaw: 2.9573850631713867 + Yaw: 3.142385721206665 Saved: ~ Window Geometry: Birds Eye: @@ -616,16 +679,16 @@ Window Geometry: collapsed: false Displays: collapsed: false - Height: 1373 + Height: 836 Hide Left Dock: false Hide Right Dock: true - QMainWindow State: 000000ff00000000fd000000040000000000000156000003fbfc0200000008fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000005c00fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001c8000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afc0000003b000003fb000000c700fffffffa000000010100000002fb0000000c00430061006d0065007200610000000000ffffffff0000000000000000fb000000100044006900730070006c0061007900730100000000000001560000015600fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261000000010000022c00000290fc0200000007fb0000000c00430061006d006500720061010000003b0000022f0000000000000000fb0000001400430065006e0074006500720020005200470042000000019b000006fb0000000000000000fb0000001000530065006d0061006e00740069006300000001d9000001b00000000000000000fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073000000003b00000290000000a000fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b20000000000000000fb0000001800460072006f006e0074002000430061006d0065007200610000000000ffffffff000000000000000000000002000003c0000000d9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e100000197000000030000088000000106fc0100000013fb0000001000430061006d0065007200610020003301000000000000020b0000006f00fffffffb0000001000430061006d006500720061002000300100000211000002110000006f00fffffffb0000001000430061006d0065007200610020003201000004280000021b0000006f00fffffffb0000001000430061006d006500720061002000310100000649000002370000006f00fffffffb00000012004200690072006400730020004500790065000000000000000bea0000006c00fffffffb00000014004200690072006400270073002000450079006501000000000000031e0000000000000000fb000000100052004700420020004c006500660074010000032a000002e00000000000000000fb00000014005200470042002000430065006e0074006500720100000616000002d50000000000000000fb0000001200520047004200200052006900670068007401000008f7000002f30000000000000000fb0000001a00530065006d0061006e0074006900630020004c0065006600740000000000000002780000000000000000fb0000001c00530065006d0061006e00740069006300200052006900670068007400000001de000001dc0000000000000000fb000000100052004700420020004c00650066007401000002d3000001dd0000000000000000fc000004b6000000ea0000000000fffffffa000000000200000002fb0000001c00530065006d0061006e0074006900630020005200690067006800740000000000ffffffff0000000000000000fb0000001200520047004200200052006900670068007401000008a2000001f50000000000000000fb00000014004200690072006400270073002000450079006500000004d1000001250000000000000000fb0000000a004400650070007400680000000000000007800000000000000000fb0000000800540069006d00650100000000000004420000000000000000fb0000000800540069006d00650100000000000004500000000000000000fb0000000a0049006d0061006700650000000000ffffffff0000000000000000fb0000000a0049006d0061006700650000000000ffffffff000000000000000000000724000003fb00000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000 + QMainWindow State: 000000ff00000000fd000000040000000000000156000001e2fc0200000008fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000005c00fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001c8000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afc0000003b000001e2000000c700fffffffa000000010100000002fb0000000c00430061006d0065007200610000000000ffffffff0000000000000000fb000000100044006900730070006c0061007900730100000000000001560000015600fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261000000010000022c00000290fc0200000007fb0000000c00430061006d006500720061010000003b0000022f0000000000000000fb0000001400430065006e0074006500720020005200470042000000019b000006fb0000000000000000fb0000001000530065006d0061006e00740069006300000001d9000001b00000000000000000fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073000000003b00000290000000a000fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b20000000000000000fb0000001800460072006f006e0074002000430061006d0065007200610000000000ffffffff000000000000000000000002000003c0000000d9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000005f600000106fc0100000013fb0000001000430061006d0065007200610020003301000000000000016d0000006f00fffffffb0000001000430061006d006500720061002000300100000173000001720000006f00fffffffb0000001000430061006d0065007200610020003201000002eb000001790000006f00fffffffb0000001000430061006d00650072006100200031010000046a0000018c0000006f00fffffffb00000012004200690072006400730020004500790065000000000000000bea0000006c00fffffffb00000014004200690072006400270073002000450079006501000000000000031e0000000000000000fb000000100052004700420020004c006500660074010000032a000002e00000000000000000fb00000014005200470042002000430065006e0074006500720100000616000002d50000000000000000fb0000001200520047004200200052006900670068007401000008f7000002f30000000000000000fb0000001a00530065006d0061006e0074006900630020004c0065006600740000000000000002780000000000000000fb0000001c00530065006d0061006e00740069006300200052006900670068007400000001de000001dc0000000000000000fb000000100052004700420020004c00650066007401000002d3000001dd0000000000000000fc000004b6000000ea0000000000fffffffa000000000200000002fb0000001c00530065006d0061006e0074006900630020005200690067006800740000000000ffffffff0000000000000000fb0000001200520047004200200052006900670068007401000008a2000001f50000000000000000fb00000014004200690072006400270073002000450079006500000004d1000001250000000000000000fb0000000a004400650070007400680000000000000007800000000000000000fb0000000800540069006d00650100000000000004420000000000000000fb0000000800540069006d00650100000000000004500000000000000000fb0000000a0049006d0061006700650000000000ffffffff0000000000000000fb0000000a0049006d0061006700650000000000ffffffff00000000000000000000049a000001e200000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000 Selection: collapsed: false Tool Properties: collapsed: false Views: collapsed: true - Width: 2176 + Width: 1526 X: 74 Y: 27 diff --git a/docs/Perception/lidar_detection.md b/docs/Perception/lidar_detection.md new file mode 100644 index 000000000..bc3bcf281 --- /dev/null +++ b/docs/Perception/lidar_detection.md @@ -0,0 +1,77 @@ +--- +layout: default +title: LiDAR Detection Node +nav_order: 6 +parent: Perception +--- + +# Lidar 3D Object Detector Node +{: .no_toc } + +*Maintained by Gueren Sanford and Ragib Arnab* + +## Overview +The LiDAR Detection Node performs 3D object detection on LiDAR pointclouds. The detection must be done using raw pointcloud data to optimize the detection rate. The node has two different methods for detection: + +### Complex YOLOv4 +The `complex_yolov4_model` turns pointcloud data into a birdseye view map of the points. The model uses these 2D images to form 3D detections. The implementation was adpated from [this](https://github.com/maudzung/Complex-YOLOv4-Pytorch) GitHub repo. Look at `complex_yolov4_model` for an example of how to integrate + organize a custom neural network into navigator. More information: +- Average Hz: + - 20.5 +- Optimal confidence threshold: + - 0.9 +- Pros: + - Higher accuracy + - Many detections +- Cons: + - More hallucinations + +### MMDetection3D +The `mmdetection3d_model` uses pointcloud data to form 3D detections. This implementation uses the [MMDetection3D](https://mmdetection3d.readthedocs.io/en/latest/) API for its model intialization and inferencing. The API supports a variety of LiDAR vehicle detection models, which can be switched by changing the `config_path` and `checkpoint_path`. More information: +- Average Hz: + - 23.5 +- Optimal confidence threshold: + - 0.4 +- Pros: + - More reliable + - Interchangable model files +- Cons: + - Low confidence scores + +--- + +### Parameters: + +- **device** *str* + - The device the model will run on. Choices: + - "cuda:0" (DEFAULT) + - "cuda:{NUMBER}" +- **model** *str* + - The type of model making the detections. Choices: + - "mmdetection3d" (DEFAULT) + - "complex_yolo" +- **conf_thresh** *float* + - The mininum confidence value accepted for bounding boxes. Choices: + - 0.7 (DEFAULT) + - 0.0 to 1.0 +- **nms_thresh** *float* + - The maximum accepted intersection accepted for bounding boxes. Choices: + - 0.2 (DEFAULT) + - 0.0 to 1.0 + +--- + +### In: + +- **/lidar** [*PointCloud2*](https://docs.ros2.org/latest/api/sensor_msgs/msg/PointCloud.html) + - Receives raw LiDAR point cloud data. + + +### Out: + +- **/detected/objects3d** [*Object3DArray*](../messages.md#object3darray) + - Publishes an array of 3D objects. + +--- + +### lidar_callback(lidar_msg: PointCloud2) +Uses the lidar msg and a 3D object deteciton model to form 3D bouding boxes around objects. Then, publishes the boxes to a 'detected' topic. diff --git a/docs/Perception/lidar.md b/docs/Perception/lidar_processor.md similarity index 100% rename from docs/Perception/lidar.md rename to docs/Perception/lidar_processor.md diff --git a/docs/Perception/object_detector_3d.md b/docs/Perception/object_detector_3d.md deleted file mode 100644 index 2000f9b5d..000000000 --- a/docs/Perception/object_detector_3d.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -layout: default -title: 3D Object Detector -nav_order: 6 -parent: Perception ---- - -# Lidar 3D Object Detector Node -{: .no_toc } - -*Maintained by Ragib Arnab* - -## Overview -A ROS node which performs 3D object detection on LiDAR data using [MMDetection3D](https://github.com/open-mmlab/mmdetection3d) API. - -Any LiDAR object detection models supported by that package can be used by this node. To use a model, specify the path to the config and checkpoint in the parameters file `/navigator/param/perception/lidar_objdet3d_params` and use that when launching this node. In this project, configs and checkpoints are stored under `/navigator/data/perception` in their respective directories. - ---- - -### In: - -- **/lidar** [*PointCloud2*](https://docs.ros2.org/latest/api/sensor_msgs/msg/PointCloud.html) - - Receives raw LiDAR point cloud data. - - -### Out: - -- **/objdet3d_raw** [*Object3DArray*](../messages.md#object3darray) - - Publishes an array of 3D objects. - ---- - -### lidar_callback(lidar_msg: PointCloud2) -Takes in LiDAR input, performs 3D object detection using [MMDetection3D](https://github.com/open-mmlab/mmdetection3d) API, and publishes the results as an `Object3DArray` message. diff --git a/docs/Perception/static_occupancy_node.md b/docs/Perception/static_occupancy_node.md index c088c070e..67bae30ba 100644 --- a/docs/Perception/static_occupancy_node.md +++ b/docs/Perception/static_occupancy_node.md @@ -12,11 +12,11 @@ parent: Perception ## Overview The `StaticOccupancyNode` is responsible for generating static occupancy and mass grids based on ground-segmented pointclouds provided by the -[`ground_segmentation_node`](https://github.com/jpahm/navigator/blob/documentation/docs/Perception/ground_segmentation_node.md). +[`ground_segmentation_node`](https://nova-utd.github.io/navigator/Perception/ground_segmentation.html). These grids are then consumed by the -[`junction_manager`](https://github.com/jpahm/navigator/blob/documentation/docs/Planning/junction_manager.md) +[`junction_manager`](https://nova-utd.github.io/navigator/Planning/junction_manager.html) and the -[`grid_summation_node`](https://github.com/jpahm/navigator/blob/documentation/docs/Planning/grid_summation_node.md) +[`grid_summation_node`](https://nova-utd.github.io/navigator/Planning/grid_summation.html) for planning purposes. The **occupancy grid** is generated using diff --git a/launches/launch.object3d.py b/launches/launch.object3d.py new file mode 100644 index 000000000..cb7317726 --- /dev/null +++ b/launches/launch.object3d.py @@ -0,0 +1,73 @@ +from os import path + +from launch import LaunchDescription +from launch_ros.actions import Node + + +launch_path = path.realpath(__file__) +launch_dir = path.join(path.dirname(launch_path), '..') +print(launch_dir) + +def generate_launch_description(): + + launch_path = path.realpath(__file__) + launch_dir = path.dirname(launch_path) + param_dir = path.join(launch_dir, "param") + data_dir = path.join(launch_dir, "data") + + complex_yolo_model = Node( + package = 'lidar_detection', + executable = 'lidar_detection_node', + name = 'lidar_detection_node', + parameters=[ + {'model': 'complex_yolo'}, + {'device': 'cuda:0'}, + {'conf_thresh': 0.9} + ], + ) + + mmdetection3d_model = Node( + package = 'lidar_detection', + executable = 'lidar_detection_node', + name = 'lidar_detection_node', + parameters=[ + {'model': 'mmdetection3d'}, + {'device': 'cuda:1'}, + {'conf_thresh': 0.4} + ], + ) + + object_viz_deteced_node = Node( + package='visualizer', + executable='object_visualizer_node', + name='object_visualizer_node', + parameters=[ + {'topic': '/detected/objects3d'} + ] + ) + + multi_object_tracker_3d_node = Node( + package = 'multi_object_tracker_3d', + executable = 'multi_object_tracker_3d_node', + name = 'multi_object_tracker_3d_node', + parameters=[ + (path.join(param_dir, "perception/mot3d_params.yaml")), + ], + ) + + object_viz_tracked_node = Node( + package='visualizer', + executable='object_visualizer_node', + name='object_visualizer_node', + parameters=[ + {'topic': '/tracked/objects3d'} + ] + ) + + return LaunchDescription([ + complex_yolo_model, + mmdetection3d_model, + object_viz_deteced_node, + multi_object_tracker_3d_node, + object_viz_tracked_node, + ]) diff --git a/launches/launch.perception.py b/launches/launch.perception.py index 199d89f56..3b4ea30b1 100644 --- a/launches/launch.perception.py +++ b/launches/launch.perception.py @@ -7,14 +7,29 @@ sys.path.append(path.abspath('/navigator/')) from launches.launch_node_definitions import * - +from launches.utils import ( + err_fatal, + try_get_launch_description_from_include, + IncludeError, +) def generate_launch_description(): + # Include perception launch file and extract launch entities for reuse. + try: + object3d_launch_description = try_get_launch_description_from_include( + "launches/launch.object3d.py" + ) + except IncludeError as e: + err_fatal(e) + + object3d_launch_entities = object3d_launch_description.describe_sub_entities() + return LaunchDescription([ # image_segmentation, # semantic_projection, ground_seg, static_grid, + # *object3d_launch_entities # prednet_inference, # driveable_area, ]) \ No newline at end of file diff --git a/launches/object_detection_tracking.launch.py b/launches/object_detection_tracking.launch.py deleted file mode 100644 index d4afb989c..000000000 --- a/launches/object_detection_tracking.launch.py +++ /dev/null @@ -1,56 +0,0 @@ -from os import path - -from launch import LaunchDescription -from launch_ros.actions import Node - - -launch_path = path.realpath(__file__) -launch_dir = path.join(path.dirname(launch_path), '..') -print(launch_dir) - -def generate_launch_description(): - - launch_path = path.realpath(__file__) - launch_dir = path.dirname(launch_path) - param_dir = path.join(launch_dir, "param") - data_dir = path.join(launch_dir, "data") - - lidar_object_detection_3d_node = Node( - package = 'object_detector_3d', - executable = 'lidar_object_detector_3d_node', - name = 'lidar_object_detector_3d_node', - parameters=[ - (path.join(param_dir, "perception/lidar_objdet3d_params.yaml")), - ], - ) - - multi_object_tracker_3d_node = Node( - package = 'multi_object_tracker_3d', - executable = 'multi_object_tracker_3d_node', - name = 'multi_object_tracker_3d_node', - parameters=[ - (path.join(param_dir, "perception/mot3d_params.yaml")), - ], - ) - - object_visualizer_node = Node( - package='visualizer', - executable='object_visualizer_node', - name='object_visualizer_node', - ) - - rviz = Node( - package='rviz2', - namespace='', - executable='rviz2', - name='rviz2', - arguments=['-d' + path.join(data_dir, 'object_detection_tracking.rviz')], - respawn=True - ) - - return LaunchDescription([ - lidar_object_detection_3d_node, - multi_object_tracker_3d_node, - object_visualizer_node, - rviz - ]) diff --git a/param/perception/lidar_objdet3d_params.yaml b/param/perception/lidar_objdet3d_params.yaml deleted file mode 100644 index 83033d625..000000000 --- a/param/perception/lidar_objdet3d_params.yaml +++ /dev/null @@ -1,5 +0,0 @@ -/**: - ros__parameters: - config_path: "/navigator/data/perception/configs/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py" - checkpoint_path: "/navigator/data/perception/checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth" - device: "cuda:0" diff --git a/src/perception/object_detector_3d/object_detector_3d/__init__.py b/src/perception/lidar_detection/lidar_detection/__init__.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/__init__.py rename to src/perception/lidar_detection/lidar_detection/__init__.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/README.md b/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/README.md rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/__init__.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/__init__.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/__init__.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/__init__.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/__init__.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/model/__init__.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/__init__.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/model/__init__.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet2pytorch.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/model/darknet2pytorch.py similarity index 98% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet2pytorch.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/model/darknet2pytorch.py index 71e97e217..662e572b1 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet2pytorch.py +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4/model/darknet2pytorch.py @@ -14,9 +14,9 @@ sys.path.append('../') -from object_detector_3d.complex_yolov4.model.yolo_layer import YoloLayer -from object_detector_3d.complex_yolov4.model.darknet_utils import parse_cfg, print_cfg, load_fc, load_conv_bn, load_conv -from object_detector_3d.complex_yolov4.utils.torch_utils import to_cpu +from lidar_detection.complex_yolov4.model.yolo_layer import YoloLayer +from lidar_detection.complex_yolov4.model.darknet_utils import parse_cfg, print_cfg, load_fc, load_conv_bn, load_conv +from lidar_detection.complex_yolov4.utils.torch_utils import to_cpu class Mish(nn.Module): diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet_utils.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/model/darknet_utils.py similarity index 99% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet_utils.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/model/darknet_utils.py index a5eb6cdff..63715a8ff 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/darknet_utils.py +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4/model/darknet_utils.py @@ -9,7 +9,7 @@ import torch sys.path.append('../') -from object_detector_3d.complex_yolov4.utils.torch_utils import convert2cpu +from lidar_detection.complex_yolov4.utils.torch_utils import convert2cpu __all__ = ['parse_cfg', 'print_cfg', 'load_conv', 'load_conv_bn', 'save_conv', 'save_conv_bn', 'load_fc', 'save_fc'] diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/model_utils.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/model/model_utils.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/model_utils.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/model/model_utils.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/yolo_layer.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/model/yolo_layer.py similarity index 98% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/yolo_layer.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/model/yolo_layer.py index 3777b0d45..82cd969ea 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/model/yolo_layer.py +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4/model/yolo_layer.py @@ -19,8 +19,8 @@ sys.path.append('../') -from object_detector_3d.complex_yolov4.utils.torch_utils import to_cpu -from object_detector_3d.complex_yolov4.model.model_utils import iou_pred_vs_target_boxes, iou_rotated_boxes_targets_vs_anchors, \ +from lidar_detection.complex_yolov4.utils.torch_utils import to_cpu +from lidar_detection.complex_yolov4.model.model_utils import iou_pred_vs_target_boxes, iou_rotated_boxes_targets_vs_anchors, \ get_polygons_areas_fix_xy diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/__init__.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/__init__.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/__init__.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/__init__.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/evaluation_utils.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/evaluation_utils.py similarity index 98% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/evaluation_utils.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/evaluation_utils.py index 0949e4c3c..2980295ef 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/evaluation_utils.py +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/evaluation_utils.py @@ -5,7 +5,7 @@ sys.path.append('../') -from object_detector_3d.complex_yolov4.utils.kitti_bev_utils import get_corners +from lidar_detection.complex_yolov4.utils.kitti_bev_utils import get_corners def cvt_box_2_polygon(box): diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/kitti_bev_utils.py similarity index 98% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/kitti_bev_utils.py index 2cdde31e5..82f6aaca1 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_bev_utils.py +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/kitti_bev_utils.py @@ -9,7 +9,7 @@ sys.path.append('../') -import object_detector_3d.complex_yolov4.utils.kitti_config as cnf +import lidar_detection.complex_yolov4.utils.kitti_config as cnf def removePoints(PointCloud, BoundaryCond): diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_config.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/kitti_config.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/kitti_config.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/kitti_config.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/torch_utils.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/torch_utils.py similarity index 100% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4/utils/torch_utils.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4/utils/torch_utils.py diff --git a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4_model.py similarity index 91% rename from src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py rename to src/perception/lidar_detection/lidar_detection/complex_yolov4_model.py index 4a334cb63..faffc03c6 100644 --- a/src/perception/object_detector_3d/object_detector_3d/complex_yolov4_model.py +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4_model.py @@ -1,5 +1,5 @@ """ -Package: object_detector_3d +Package: lidar_detection Filename: complex_yolov4_model.py Author: Gueren Sanford Email: guerensanford@gmail.com @@ -19,10 +19,10 @@ import torch # Model Imports -from object_detector_3d.complex_yolov4.model.darknet2pytorch import Darknet -from object_detector_3d.complex_yolov4.utils import kitti_bev_utils -from object_detector_3d.complex_yolov4.utils.evaluation_utils import post_processing_v2 -import object_detector_3d.complex_yolov4.utils.kitti_config as cnf +from lidar_detection.complex_yolov4.model.darknet2pytorch import Darknet +from lidar_detection.complex_yolov4.utils import kitti_bev_utils +from lidar_detection.complex_yolov4.utils.evaluation_utils import post_processing_v2 +import lidar_detection.complex_yolov4.utils.kitti_config as cnf # Message Imports from geometry_msgs.msg import Point @@ -93,7 +93,6 @@ def predict(self, inputs: torch.Tensor): [1, boxes, x y w l classes], where the classes are Car, Pedestrian, Cyclist, Van, Person_sitting. """ - # The model outputs [1, boxes, x y w l classes] return self.model(inputs) def postprocess(self, predictions: torch.Tensor, conf_thresh: int, @@ -104,9 +103,9 @@ def postprocess(self, predictions: torch.Tensor, conf_thresh: int, @param predictions[torch.Tensor] The output of the model in the shape [1, boxes, x y w l classes], where the classes are Car, Pedestrian, Cyclist, Van, Person_sitting. - @param conf_thresh[int] The mininum confidence value accepted + @param conf_thresh[float] The mininum confidence value accepted for bounding boxes. - @param nms_thresh[int] The maximum accepted intersection over + @param nms_thresh[float] The maximum accepted intersection over union value for bounding boxes. @return Object3DArray A ros2 message ready to be published. Before publishing, the header needs to be attached. diff --git a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py b/src/perception/lidar_detection/lidar_detection/lidar_detection_node.py similarity index 72% rename from src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py rename to src/perception/lidar_detection/lidar_detection/lidar_detection_node.py index 0774088da..8b282b050 100644 --- a/src/perception/object_detector_3d/object_detector_3d/lidar_detection_node.py +++ b/src/perception/lidar_detection/lidar_detection/lidar_detection_node.py @@ -1,29 +1,28 @@ """ -Package: object_detector_3d +Package: lidar_detection Filename: lidar_detection_node.py Author: Gueren Sanford Email: guerensanford@gmail.com Copyright: 2021, Nova UTD License: MIT License -Description of what this file does, what inputs it takes and what it outputs or accomplishes +Subscribes to raw lidar pointcloud data and publishes an array 3D object predictions. The +model for the predictions can be chosen using the model parameter. The predictions can be +filtered using the node's prediction confidence threshold and the non-maximum supression +threshold (limits the intersection of boxes). """ # Python Imports -import torch, math -import numpy as np -from time import time # DEBUG +import torch # Local Import -from object_detector_3d.complex_yolov4_model import ComplexYOLOv4Model -from object_detector_3d.mmdetection3d_model import MMDetection3DModel +from lidar_detection.complex_yolov4_model import ComplexYOLOv4Model +from lidar_detection.mmdetection3d_model import MMDetection3DModel # Ros Imports import rclpy -import ros2_numpy as rnp from rclpy.node import Node -import sensor_msgs_py.point_cloud2 as pc2 -# Message definitions +# Message Imports from rosgraph_msgs.msg import Clock from builtin_interfaces.msg import Time from sensor_msgs.msg import PointCloud2 @@ -32,14 +31,23 @@ class LidarDetectionNode(Node): def __init__(self): - """! Initializes the node.""" + """! Initializes the node. + @param device[str] The device the model will run on. Choices: + 'cpu' (DEFAULT) | 'cuda:{NUMBER}' + @param model[str] The type of model making the detections. Choices: + 'mmdetection3d' (DEFAULT) | 'complex_yolo' + @param conf_thresh[float] The mininum confidence value accepted + for bounding boxes. Choices: 0.7 (DEFAULT) | 0.0 - 1.0 + @param nms_thresh[float] The maximum accepted intersection accepted + for bounding boxes. Choices 0.2 (DEFAULT) | 0.0 - 1.0 + """ super().__init__("lidar_detection_node") # Declare default ROS2 node parameters - self.declare_parameter('device', 'cuda:1') # cpu | cuda:{gpu num} - self.declare_parameter('model', 'mmdetection3d') # mmdetection3d | complex_yolo - self.declare_parameter('conf_thresh', 0.7) # 0.0 - 1.0 - self.declare_parameter('nms_thresh', 0.2) # 0.0 - 1.0 + self.declare_parameter('device', 'cuda:0') + self.declare_parameter('model', 'mmdetection3d') + self.declare_parameter('conf_thresh', 0.7) + self.declare_parameter('nms_thresh', 0.2) # Get ROS2 parameters self.device = torch.device(self.get_parameter('device') \ diff --git a/src/perception/object_detector_3d/object_detector_3d/mmdetection3d_model.py b/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py similarity index 97% rename from src/perception/object_detector_3d/object_detector_3d/mmdetection3d_model.py rename to src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py index e868cc94e..34d293dac 100644 --- a/src/perception/object_detector_3d/object_detector_3d/mmdetection3d_model.py +++ b/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py @@ -1,5 +1,5 @@ """ -Package: object_detector_3d +Package: lidar_detection Filename: mmdetection3d_model.py Author: Gueren Sanford, Ragib Arnab Email: guerensanford@gmail.com @@ -15,11 +15,11 @@ import random import numpy as np import ros2_numpy as rnp -from mmcv.ops import nms_rotated # Model Imports from mmdet3d.apis import init_model, inference_detector from mmdet3d.structures.det3d_data_sample import Det3DDataSample +from mmcv.ops import nms_rotated # Message Imports from geometry_msgs.msg import Point @@ -96,9 +96,9 @@ def postprocess(self, predictions: Det3DDataSample, conf_thresh: int, over union threshold. @param predictions[Det3DDataSample] The output from the ppredict function. - @param conf_thresh[int] The mininum confidence value accepted + @param conf_thresh[float] The mininum confidence value accepted for bounding boxes. - @param nms_thresh[int] The maximum accepted intersection over + @param nms_thresh[float] The maximum accepted intersection over union value for bounding boxes. @return Object3DArray A ros2 message ready to be published. Before publishing, the header needs to be attached. diff --git a/src/perception/object_detector_3d/package.xml b/src/perception/lidar_detection/package.xml similarity index 85% rename from src/perception/object_detector_3d/package.xml rename to src/perception/lidar_detection/package.xml index d9f3267fd..8f202c1d2 100644 --- a/src/perception/object_detector_3d/package.xml +++ b/src/perception/lidar_detection/package.xml @@ -1,9 +1,10 @@ - object_detector_3d - 0.0.0 + lidar_detection + 1.0.0 TODO: Package description + Gueren Sanford Ragib Arnab TODO: License declaration diff --git a/src/perception/object_detector_3d/resource/object_detector_3d b/src/perception/lidar_detection/resource/lidar_detection similarity index 100% rename from src/perception/object_detector_3d/resource/object_detector_3d rename to src/perception/lidar_detection/resource/lidar_detection diff --git a/src/perception/lidar_detection/setup.cfg b/src/perception/lidar_detection/setup.cfg new file mode 100644 index 000000000..24db5ee2e --- /dev/null +++ b/src/perception/lidar_detection/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script_dir=$base/lib/lidar_detection +[install] +install_scripts=$base/lib/lidar_detection diff --git a/src/perception/object_detector_3d/setup.py b/src/perception/lidar_detection/setup.py similarity index 66% rename from src/perception/object_detector_3d/setup.py rename to src/perception/lidar_detection/setup.py index b2252c0b4..f5cef5b41 100644 --- a/src/perception/object_detector_3d/setup.py +++ b/src/perception/lidar_detection/setup.py @@ -1,6 +1,6 @@ from setuptools import find_packages, setup -package_name = 'object_detector_3d' +package_name = 'lidar_detection' setup( name=package_name, @@ -13,15 +13,14 @@ ], install_requires=['setuptools'], zip_safe=True, - maintainer='Ragib Arnab', - maintainer_email='ragib.arnab@gmail.com', + maintainer=['Gueren Sanford', 'Ragib Arnab'], + maintainer_email=['guerensanford@gmail.com', 'ragib.arnab@gmail.com'], description='TODO: Package description', license='TODO: License declaration', tests_require=['pytest'], entry_points={ 'console_scripts': [ - 'lidar_object_detector_3d_node = object_detector_3d.lidar_objdet3d_node:main', - 'lidar_detection_node = object_detector_3d.lidar_detection_node:main', + 'lidar_detection_node = lidar_detection.lidar_detection_node:main', ], }, ) diff --git a/src/perception/multi_object_tracker_3d/multi_object_tracker_3d/mot3d_node.py b/src/perception/multi_object_tracker_3d/multi_object_tracker_3d/mot3d_node.py index d47b37ad7..527b003a7 100644 --- a/src/perception/multi_object_tracker_3d/multi_object_tracker_3d/mot3d_node.py +++ b/src/perception/multi_object_tracker_3d/multi_object_tracker_3d/mot3d_node.py @@ -78,14 +78,14 @@ def __init__(self): self.objects_subscription = self.create_subscription( msg_type = Object3DArray, - topic = 'objdet3d_raw', + topic = '/detected/objects3d', callback = self.detection_callback, qos_profile = 10 ) self.tracked_objects_publisher = self.create_publisher( msg_type = Object3DArray, - topic = 'objdet3d_tracked', + topic = '/tracked/objects3d', qos_profile = 10 ) diff --git a/src/perception/object_detector_3d/setup.cfg b/src/perception/object_detector_3d/setup.cfg deleted file mode 100644 index 653e0b48a..000000000 --- a/src/perception/object_detector_3d/setup.cfg +++ /dev/null @@ -1,4 +0,0 @@ -[develop] -script_dir=$base/lib/object_detector_3d -[install] -install_scripts=$base/lib/object_detector_3d diff --git a/src/perception/object_detector_3d/test/test_copyright.py b/src/perception/object_detector_3d/test/test_copyright.py deleted file mode 100644 index 97a39196e..000000000 --- a/src/perception/object_detector_3d/test/test_copyright.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_copyright.main import main -import pytest - - -# Remove the `skip` decorator once the source file(s) have a copyright header -@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') -@pytest.mark.copyright -@pytest.mark.linter -def test_copyright(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found errors' diff --git a/src/perception/object_detector_3d/test/test_flake8.py b/src/perception/object_detector_3d/test/test_flake8.py deleted file mode 100644 index 27ee1078f..000000000 --- a/src/perception/object_detector_3d/test/test_flake8.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2017 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_flake8.main import main_with_errors -import pytest - - -@pytest.mark.flake8 -@pytest.mark.linter -def test_flake8(): - rc, errors = main_with_errors(argv=[]) - assert rc == 0, \ - 'Found %d code style errors / warnings:\n' % len(errors) + \ - '\n'.join(errors) diff --git a/src/perception/object_detector_3d/test/test_pep257.py b/src/perception/object_detector_3d/test/test_pep257.py deleted file mode 100644 index b234a3840..000000000 --- a/src/perception/object_detector_3d/test/test_pep257.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_pep257.main import main -import pytest - - -@pytest.mark.linter -@pytest.mark.pep257 -def test_pep257(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found code style errors / warnings' diff --git a/src/perception/visualizer/visualizer/object_visualizer_node.py b/src/perception/visualizer/visualizer/object_visualizer_node.py index 35cd58cf6..810af988a 100644 --- a/src/perception/visualizer/visualizer/object_visualizer_node.py +++ b/src/perception/visualizer/visualizer/object_visualizer_node.py @@ -16,14 +16,19 @@ class ObjectVisualizerNode(Node): def __init__(self): super().__init__('object_visualizer_node') + # Declare ROS2 parameters + self.declare_parameter('topic', '/tracked/objects3d') # or /detected/objects3d + sub_topic = self.get_parameter('topic').get_parameter_value().string_value + pub_topic = sub_topic if sub_topic[0] != '/' else sub_topic[1:] + self.subscription = self.create_subscription( msg_type = Object3DArray, - topic = '/detected/objects3d', # NEEDS MORE CHOICES + topic = sub_topic, callback = self.visualize_objects, qos_profile = 1 ) - self.visualization_publisher = self.create_publisher(MarkerArray, 'objdet3d_viz', 10) + self.visualization_publisher = self.create_publisher(MarkerArray, f"viz/{pub_topic}", 10) def visualize_objects(self, msg: Object3DArray): From 56c4742b0b9cca8fae378a7667c3fd96ae1e163c Mon Sep 17 00:00:00 2001 From: Gueren Sanford Date: Mon, 15 Apr 2024 18:35:40 -0500 Subject: [PATCH 5/8] Instructions added --- .../lidar_detection/complex_yolov4/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md b/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md index e3fc7368b..8dc45a3ca 100644 --- a/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md @@ -2,4 +2,10 @@ The PyTorch Implementation based on YOLOv4 of the paper: [Complex-YOLO: Real-time 3D Object Detection on Point Clouds](https://arxiv.org/pdf/1803.06199.pdf) -Most files are taken (some modified) from the GitHub Repo: [https://github.com/maudzung/Complex-YOLOv4-Pytorch/tree/master](https://github.com/maudzung/Complex-YOLOv4-Pytorch/tree/master) \ No newline at end of file +Most files are taken (some modified) from the GitHub Repo: [https://github.com/maudzung/Complex-YOLOv4-Pytorch/tree/master](https://github.com/maudzung/Complex-YOLOv4-Pytorch/tree/master) + +## Installation and Setup + +Follow the instrucitons below to ensure the model is setup properly: +1. Click [here](https://drive.google.com/drive/folders/16zuyjh0c7iiWRSNKQY7CnzXotecYN5vc) and download the `complex_yolov4_mse_loss.pth` file +2. Move `complex_yolov4_mse_loss.pth` into the directory `navigator/data/perception/checkpoints/complex_yolov4_mse_loss.pth` \ No newline at end of file From 70672dad59179c8b60d4024abcff55495f18919b Mon Sep 17 00:00:00 2001 From: Kevin Ge Date: Thu, 18 Apr 2024 21:04:04 -0500 Subject: [PATCH 6/8] fix: switch MMDetection to cuda:0 because OBC only has 1 GPU (device 0) --- launches/launch.object3d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/launches/launch.object3d.py b/launches/launch.object3d.py index cb7317726..91bb1ff9d 100644 --- a/launches/launch.object3d.py +++ b/launches/launch.object3d.py @@ -32,7 +32,7 @@ def generate_launch_description(): name = 'lidar_detection_node', parameters=[ {'model': 'mmdetection3d'}, - {'device': 'cuda:1'}, + {'device': 'cuda:0'}, {'conf_thresh': 0.4} ], ) From e64ae38ae07fd2ca4e34b30880f25c19a3419c2b Mon Sep 17 00:00:00 2001 From: Gueren Sanford Date: Mon, 22 Apr 2024 12:44:08 -0500 Subject: [PATCH 7/8] Minor changes before the PR. --- ...60e_kitti-3d-3class_20220301_150306-37dc2420.pth | Bin launches/launch.object3d.py | 6 ++---- .../lidar_detection/complex_yolov4/README.md | 2 +- .../lidar_detection/complex_yolov4_model.py | 2 +- .../lidar_detection/mmdetection3d_model.py | 2 +- 5 files changed, 5 insertions(+), 7 deletions(-) rename data/perception/{checkpoints => models}/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth (100%) diff --git a/data/perception/checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth b/data/perception/models/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth similarity index 100% rename from data/perception/checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth rename to data/perception/models/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth diff --git a/launches/launch.object3d.py b/launches/launch.object3d.py index cb7317726..52c0ab49a 100644 --- a/launches/launch.object3d.py +++ b/launches/launch.object3d.py @@ -50,9 +50,7 @@ def generate_launch_description(): package = 'multi_object_tracker_3d', executable = 'multi_object_tracker_3d_node', name = 'multi_object_tracker_3d_node', - parameters=[ - (path.join(param_dir, "perception/mot3d_params.yaml")), - ], + parameters=[], ) object_viz_tracked_node = Node( @@ -66,7 +64,7 @@ def generate_launch_description(): return LaunchDescription([ complex_yolo_model, - mmdetection3d_model, + # mmdetection3d_model, object_viz_deteced_node, multi_object_tracker_3d_node, object_viz_tracked_node, diff --git a/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md b/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md index 8dc45a3ca..da52fff3d 100644 --- a/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4/README.md @@ -8,4 +8,4 @@ Most files are taken (some modified) from the GitHub Repo: [https://github.com/m Follow the instrucitons below to ensure the model is setup properly: 1. Click [here](https://drive.google.com/drive/folders/16zuyjh0c7iiWRSNKQY7CnzXotecYN5vc) and download the `complex_yolov4_mse_loss.pth` file -2. Move `complex_yolov4_mse_loss.pth` into the directory `navigator/data/perception/checkpoints/complex_yolov4_mse_loss.pth` \ No newline at end of file +2. Move `complex_yolov4_mse_loss.pth` into the directory `navigator/data/perception/models/complex_yolov4_mse_loss.pth` \ No newline at end of file diff --git a/src/perception/lidar_detection/lidar_detection/complex_yolov4_model.py b/src/perception/lidar_detection/lidar_detection/complex_yolov4_model.py index faffc03c6..d13426839 100644 --- a/src/perception/lidar_detection/lidar_detection/complex_yolov4_model.py +++ b/src/perception/lidar_detection/lidar_detection/complex_yolov4_model.py @@ -31,7 +31,7 @@ # Paths to pretrained model and config config_path = './data/perception/configs/complex_yolov4.cfg' -checkpoint_path = './data/perception/checkpoints/complex_yolov4_mse_loss.pth' +checkpoint_path = './data/perception/models/complex_yolov4_mse_loss.pth' class ComplexYOLOv4Model(): def __init__(self, device: torch.device): diff --git a/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py b/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py index 34d293dac..0333a7001 100644 --- a/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py +++ b/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py @@ -28,7 +28,7 @@ # Paths to pretrained model and config config_path = './data/perception/configs/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py' -checkpoint_path = './data/perception/checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth' +checkpoint_path = './data/perception/models/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth' # dict of class/label association CLASS2LABEL = { 'Pedestrian': 0, 'Cyclist': 1, 'Car': 2 } From aca167a40467167d904da5333d6598de1a93f1ba Mon Sep 17 00:00:00 2001 From: Gueren Sanford Date: Mon, 29 Apr 2024 12:25:19 -0500 Subject: [PATCH 8/8] Updated lidar_detection_node params - Added checkpoint_path and config_path to the params of LidarDetectionNode - Fixed MMDetection3DModel class to take checkpoint and config arguements --- .../lidar_detection/lidar_detection_node.py | 10 +++++++++- .../lidar_detection/mmdetection3d_model.py | 12 ++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/perception/lidar_detection/lidar_detection/lidar_detection_node.py b/src/perception/lidar_detection/lidar_detection/lidar_detection_node.py index 8b282b050..9396f862a 100644 --- a/src/perception/lidar_detection/lidar_detection/lidar_detection_node.py +++ b/src/perception/lidar_detection/lidar_detection/lidar_detection_node.py @@ -48,6 +48,10 @@ def __init__(self): self.declare_parameter('model', 'mmdetection3d') self.declare_parameter('conf_thresh', 0.7) self.declare_parameter('nms_thresh', 0.2) + self.declare_parameter('config_path', # Paths to pretrained model and config + './data/perception/configs/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py') + self.declare_parameter('checkpoint_path', + './data/perception/models/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth') # Get ROS2 parameters self.device = torch.device(self.get_parameter('device') \ @@ -58,6 +62,10 @@ def __init__(self): .get_parameter_value().double_value model_type = self.get_parameter('model') \ .get_parameter_value().string_value + config_path = self.get_parameter('config_path') \ + .get_parameter_value().string_value + checkpoint_path = self.get_parameter('checkpoint_path') \ + .get_parameter_value().string_value # Declared for publishing msgs self.stamp = Time() @@ -66,7 +74,7 @@ def __init__(self): if model_type == 'complex_yolo': self.model = ComplexYOLOv4Model(self.device) else: - self.model = MMDetection3DModel(str(self.device)) + self.model = MMDetection3DModel(config_path, checkpoint_path, str(self.device)) # Subcribes to raw lidar data self.lidar_sub = self.create_subscription( diff --git a/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py b/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py index 0333a7001..788233b5d 100644 --- a/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py +++ b/src/perception/lidar_detection/lidar_detection/mmdetection3d_model.py @@ -26,17 +26,17 @@ from sensor_msgs.msg import PointCloud2 from navigator_msgs.msg import BoundingBox3D, Object3D, Object3DArray -# Paths to pretrained model and config -config_path = './data/perception/configs/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py' -checkpoint_path = './data/perception/models/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth' - # dict of class/label association CLASS2LABEL = { 'Pedestrian': 0, 'Cyclist': 1, 'Car': 2 } class MMDetection3DModel(): - def __init__(self, device: str): + def __init__(self, config_path: str, checkpoint_path: str, device: str): """! Initializes the node. - @param device[str] The device the model will run on + @param config_path[str] The config file for MMDetection3D, must use + the config for the specific checkpoint. + @param checkpoint_path[str] The checkpoint file for MMDetection3D. + Check their api for the list of checkpoint files. + @param device[str] The device the model will run on. """ # Declare model attributes