diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9d12d21..362ef5c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,8 @@ name: Test on: push: + branches: + - main jobs: test: diff --git a/codecov.yml b/codecov.yml index 85dc922..5df9b32 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,4 +1,3 @@ ignore: - - "spatialyze/legacy" - "spatialyze/video_processor" - "**/__init__.py" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e614aeb..d9f1f95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,9 +104,6 @@ exclude = 'spatialyze/video_processor/modules' [tool.pyright] pythonVersion = '3.10' ignore = [ - 'spatialyze/legacy/*', - 'spatialyze/trackers/object_tracker_yolov4_deepsort.py', - 'spatialyze/trackers/object_tracker_yolov5_deepsort.py', 'spatialyze/video_processor/modules', 'spatialyze/video_processor/stages/detection_estimation', 'spatialyze/video_processor/stages/segment_trajectory', diff --git a/spatialyze/data_types/views/__init__.py b/spatialyze/data_types/views/__init__.py deleted file mode 100644 index e12c94f..0000000 --- a/spatialyze/data_types/views/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .camera_view import CameraView -from .location_view import LocationView -from .metadata_view import MetadataView -from .trajectory_view import TrajectoryView -from .view import View - -metadata_view = MetadataView() - -__all__ = ["View", "CameraView", "LocationView", "TrajectoryView", "metadata_view"] diff --git a/spatialyze/data_types/views/camera_view.py b/spatialyze/data_types/views/camera_view.py deleted file mode 100644 index 03b729f..0000000 --- a/spatialyze/data_types/views/camera_view.py +++ /dev/null @@ -1,21 +0,0 @@ -from .view import View - - -class CameraView(View): - camera_id = "cameraId" - frame_id = "frameId" - frame_num = "frameNum" - file_name = "fileName" - camera_translation = "cameraTranslation" - camera_rotation = "cameraRotation" - camera_intrinsic = "cameraIntrinsic" - ego_translation = "egoTranslation" - ego_rotation = "egoRotation" - timestamp = "timestamp" - camera_heading = "cameraHeading" - ego_heading = "egoHeading" - table_name = "Cameras" - - def __init__(self): - super().__init__(self.table_name) - self.default = True diff --git a/spatialyze/data_types/views/location_view.py b/spatialyze/data_types/views/location_view.py deleted file mode 100644 index 6f74d23..0000000 --- a/spatialyze/data_types/views/location_view.py +++ /dev/null @@ -1,11 +0,0 @@ -from .view import View - - -class LocationView(View): - location = "trajBbox" - timestamp = "timestamp" - table_name = "General_Bbox" - - def __init__(self): - super().__init__(self.table_name) - self.default = True diff --git a/spatialyze/data_types/views/metadata_view.py b/spatialyze/data_types/views/metadata_view.py deleted file mode 100644 index c4a5c28..0000000 --- a/spatialyze/data_types/views/metadata_view.py +++ /dev/null @@ -1,44 +0,0 @@ -from .camera_view import CameraView -from .location_view import LocationView -from .trajectory_view import TrajectoryView -from .view import View - - -class MetadataView(View): - view_name = "metadata_view" - object_id = TrajectoryView.object_id - object_type = TrajectoryView.object_type - color = TrajectoryView.color - trajectory = TrajectoryView.trajectory - location = LocationView.location - timestamp = LocationView.timestamp - view_map = { - object_id: TrajectoryView, - object_type: TrajectoryView, - color: TrajectoryView, - trajectory: TrajectoryView, - location: LocationView, - } - - def __init__(self): - super().__init__(self.view_name) - self.default = True - self.trajectory_view = TrajectoryView() - self.location_view = LocationView() - self.camera_view = CameraView() - - def map_view(self, column_key: str): - if self.view_map[column_key] == TrajectoryView: - return self.trajectory_view - elif self.view_map[column_key] == LocationView: - return self.location_view - else: - return self.camera_view - - def resolve_key(self, column_key: str): - return ( - self.trajectory_view.resolve_key(column_key) - or self.location_view.resolve_key(column_key) - or self.camera_view.resolve_key(column_key) - or column_key - ) diff --git a/spatialyze/data_types/views/trajectory_view.py b/spatialyze/data_types/views/trajectory_view.py deleted file mode 100644 index 9c57a9e..0000000 --- a/spatialyze/data_types/views/trajectory_view.py +++ /dev/null @@ -1,15 +0,0 @@ -from .view import View - - -class TrajectoryView(View): - object_id = "itemId" - object_type = "objectType" - color = "color" - trajectory = "trajCentroids" - traj = "trajCentroids" - heading = "itemHeadings" - table_name = "Item_General_Trajectory" - - def __init__(self): - super().__init__(self.table_name) - self.default = True diff --git a/spatialyze/data_types/views/view.py b/spatialyze/data_types/views/view.py deleted file mode 100644 index b12ff10..0000000 --- a/spatialyze/data_types/views/view.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Any - - -class View: - def __init__(self, view_name: str): - self.view_name: str = view_name - self.default: bool = False - - def from_context(self, context: Any): - self.context = context - - def resolve_key(self, column_key: str): - if column_key in self.__class__.__dict__: - return self.__class__.__dict__[column_key] - else: - return None - - def contain(self, column_key: str): - return column_key in self.__dict__.keys() diff --git a/spatialyze/trackers/__init__.py b/spatialyze/trackers/__init__.py deleted file mode 100644 index d33c933..0000000 --- a/spatialyze/trackers/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .object_tracker_yolov4_deepsort import yolov4_deepsort_video_track -from .object_tracker_yolov5_deepsort import yolov5_deepsort_video_track - -__all__ = ["yolov4_deepsort_video_track", "yolov5_deepsort_video_track"] diff --git a/spatialyze/trackers/object_tracker_yolov4_deepsort.py b/spatialyze/trackers/object_tracker_yolov4_deepsort.py deleted file mode 100644 index 3bf1e11..0000000 --- a/spatialyze/trackers/object_tracker_yolov4_deepsort.py +++ /dev/null @@ -1,243 +0,0 @@ -import os - -# comment out below line to enable tensorflow logging outputs -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" -import sys - -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../yolov4-deepsort")) - -import tensorflow as tf - -physical_devices = tf.config.experimental.list_physical_devices("GPU") -if len(physical_devices) > 0: - tf.config.experimental.set_memory_growth(physical_devices[0], True) -from dataclasses import dataclass -from typing import Dict - -# from absl import app, flags, logging -# from absl.flags import FLAGS -import core.utils as utils -import cv2 -import matplotlib.pyplot as plt -import numpy as np -from core.config import cfg - -# deep sort imports -from deep_sort import nn_matching, preprocessing -from deep_sort.detection import Detection -from deep_sort.tracker import Tracker - -# from PIL import Image -from tensorflow.compat.v1 import ConfigProto, InteractiveSession -from tensorflow.python.saved_model import tag_constants -from tools import generate_detections as gdet - -from ..data_types import BoundingBox, TrackedObject - - -@dataclass -class Flags: - framework: str - weights: str - size: int - tiny: bool - model: str - iou: float - score: float - dont_show: bool - info: bool - count: bool - - -FLAGS = Flags( - framework="tf", - weights=os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../yolov4-deepsort/checkpoints/yolov4-416" - ), - size=416, - tiny=True, - model="yolov4", - iou=0.45, - score=0.50, - dont_show=True, - info=False, - count=False, -) - -# flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt') -# flags.DEFINE_string('weights', './checkpoints/yolov4-416', -# 'path to weights file') -# flags.DEFINE_integer('size', 416, 'resize images to') -# flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny') -# flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4') -# flags.DEFINE_float('iou', 0.45, 'iou threshold') -# flags.DEFINE_float('score', 0.50, 'score threshold') -# flags.DEFINE_boolean('dont_show', False, 'dont show video output') -# flags.DEFINE_boolean('info', False, 'show detailed info of tracked objects') -# flags.DEFINE_boolean('count', False, 'count objects being tracked on screen') - -# load standard tensorflow saved model -saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) -infer = saved_model_loaded.signatures["serving_default"] - - -def yolov4_deepsort_video_track(video_file: str): - # Definition of the parameters - max_cosine_distance = 0.4 - nn_budget = None - nms_max_overlap = 1.0 - - # initialize deep sort - - model_filename = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "../yolov4-deepsort/model_data/mars-small128.pb", - ) - encoder = gdet.create_box_encoder(model_filename, batch_size=1) - # calculate cosine distance metric - metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) - # initialize tracker - tracker = Tracker(metric) - - # load configuration for object detector - config = ConfigProto() - config.gpu_options.allow_growth = True - # TODO: when to use session - # session = InteractiveSession(config=config) - InteractiveSession(config=config) - STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) - input_size = 416 - - formatted_result: Dict[str, TrackedObject] = {} - cap = cv2.VideoCapture(video_file) - frame_num = 0 - # while video is running - while cap.isOpened(): - # Capture frame-by-frame - ret, frame = cap.read() - if ret: - # TODO: when to use image - # image = Image.fromarray(frame) - frame_num += 1 - # print('Frame #: ', frame_num) - # TODO: when to use frame_size - # frame_size = frame.shape[:2] - image_data = cv2.resize(frame, (input_size, input_size)) - image_data = image_data / 255.0 - image_data = image_data[np.newaxis, ...].astype(np.float32) - # start_time = time.time() - - batch_data = tf.constant(image_data) - pred_bbox = infer(batch_data) - for key, value in pred_bbox.items(): - boxes = value[:, :, 0:4] - pred_conf = value[:, :, 4:] - - boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( - boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), - scores=tf.reshape(pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), - max_output_size_per_class=50, - max_total_size=50, - iou_threshold=FLAGS.iou, - score_threshold=FLAGS.score, - ) - - # convert data to numpy arrays and slice out unused elements - num_objects = valid_detections.numpy()[0] - bboxes = boxes.numpy()[0] - bboxes = bboxes[0 : int(num_objects)] - scores = scores.numpy()[0] - scores = scores[0 : int(num_objects)] - classes = classes.numpy()[0] - classes = classes[0 : int(num_objects)] - - # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height - original_h, original_w, _ = frame.shape - bboxes = utils.format_boxes(bboxes, original_h, original_w) - - # store all predictions in one parameter for simplicity when calling functions - pred_bbox = [bboxes, scores, classes, num_objects] - - # read in all class names from config - class_names: Dict[int, str] = utils.read_class_names(cfg.YOLO.CLASSES) - - # by default allow all classes in .names file - allowed_classes = list(class_names.values()) - - # custom allowed classes (uncomment line below to customize tracker for only people) - # allowed_classes = ['person'] - - # loop through objects and use class index to get class name, allow only classes in allowed_classes list - _names = [] - deleted_indx = [] - for i in range(num_objects): - class_indx = int(classes[i]) - class_name = class_names[class_indx] - if class_name not in allowed_classes: - deleted_indx.append(i) - else: - _names.append(class_name) - names = np.array(_names) - if FLAGS.count: - cv2.putText( - frame, - "Objects being tracked: {}".format(len(names)), - (5, 35), - cv2.FONT_HERSHEY_COMPLEX_SMALL, - 2, - (0, 255, 0), - 2, - ) - print("Objects being tracked: {}".format(len(names))) - # delete detections that are not in allowed_classes - bboxes = np.delete(bboxes, deleted_indx, axis=0) - scores = np.delete(scores, deleted_indx, axis=0) - - # encode yolo detections and feed to tracker - features = encoder(frame, bboxes) - detections = [ - Detection(bbox, score, class_name, feature) - for bbox, score, class_name, feature in zip(bboxes, scores, names, features) - ] - - # initialize color map - cmap = plt.get_cmap("tab20b") - # TODO: when to use colors - # colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] - [cmap(i)[:3] for i in np.linspace(0, 1, 20)] - - # run non-maxima supression - boxs = np.array([d.tlwh for d in detections]) - scores = np.array([d.confidence for d in detections]) - classes = np.array([d.class_name for d in detections]) - indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) - detections = [detections[i] for i in indices] - - # Call the tracker - tracker.predict() - tracker.update(detections) - - # update tracks - # current_bboxes = [] - # current_labels = [] - - for track in tracker.tracks: - if not track.is_confirmed() or track.time_since_update > 1: - continue - bbox = track.to_tlbr() - class_name = track.get_class() - # current_bboxes.append([[int(bbox[0]), int(bbox[1])], [int(bbox[2]), int(bbox[3])]]) - # current_labels.append(class_name) - item_id = f"{class_name}-{str(track.track_id)}" - if item_id not in formatted_result: - formatted_result[item_id] = TrackedObject(class_name) - - formatted_result[item_id].bboxes.append( - BoundingBox(int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])) - ) - formatted_result[item_id].frame_num.append(frame_num) - - else: - break - print("# of tracked items:", len(formatted_result)) - return formatted_result diff --git a/spatialyze/trackers/object_tracker_yolov5_deepsort.py b/spatialyze/trackers/object_tracker_yolov5_deepsort.py deleted file mode 100644 index 1ffa824..0000000 --- a/spatialyze/trackers/object_tracker_yolov5_deepsort.py +++ /dev/null @@ -1,188 +0,0 @@ -import os -import sys - -CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.join(CURRENT_DIR, "../yolov5-deepsort/yolov5/")) -sys.path.append(os.path.join(CURRENT_DIR, "../yolov5-deepsort/")) - -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Union - -import torch -from deep_sort_pytorch.deep_sort import DeepSort -from deep_sort_pytorch.utils.parser import get_config -from yolov5.models.experimental import attempt_load -from yolov5.utils.datasets import LoadImages -from yolov5.utils.downloads import attempt_download -from yolov5.utils.general import ( - check_img_size, - non_max_suppression, - scale_coords, - xyxy2xywh, -) -from yolov5.utils.torch_utils import select_device - -from ..data_types import BoundingBox, TrackedObject - - -@dataclass -class YoloV5Opt: - source: str - yolo_weights: str = os.path.join(CURRENT_DIR, "../yolov5-deepsort/yolov5/weights/yolov5s.pt") - deep_sort_weights: str = os.path.join( - CURRENT_DIR, "../yolov5-deepsort/deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7" - ) - # output: str = 'inference/output' - img_size: Union[Tuple[int, int], int] = 640 - conf_thres: float = 0.4 - iou_thres: float = 0.5 - # fourcc: str = 'mp4v' - device: str = "" - # show_vid: bool = False - # save_vid: bool = False - # save_txt: bool = False - classes: Optional[List[int]] = None - agnostic_nms: bool = False - augment: bool = False - # evaluate: bool = False - config_deepsort: str = os.path.join( - CURRENT_DIR, "../yolov5-deepsort/deep_sort_pytorch/configs/deep_sort.yaml" - ) - - -def detect(opt: YoloV5Opt): - source, yolo_weights, deep_sort_weights, imgsz = ( - opt.source, - opt.yolo_weights, - opt.deep_sort_weights, - opt.img_size, - ) - - crop = BoundingBox(0, 0, 100, 100) - - # initialize deepsort - cfg = get_config() - cfg.merge_from_file(opt.config_deepsort) - attempt_download(deep_sort_weights, repo="mikel-brostrom/Yolov5_DeepSort_Pytorch") - deepsort = DeepSort( - deep_sort_weights, - max_dist=cfg.DEEPSORT.MAX_DIST, - min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, - max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, - max_age=cfg.DEEPSORT.MAX_AGE, - n_init=cfg.DEEPSORT.N_INIT, - nn_budget=cfg.DEEPSORT.NN_BUDGET, - use_cuda=True, - ) - - # Initialize - device = select_device(opt.device) - - half = device.type != "cpu" # half precision only supported on CUDA - # Load model - model = attempt_load(yolo_weights, map_location=device) # load FP32 model - stride = int(model.stride.max()) # model stride - imgsz = check_img_size(imgsz, s=stride) # check img_size - names = model.module.names if hasattr(model, "module") else model.names # get class names - if half: - model.half() # to FP16 - - dataset = LoadImages(source, img_size=imgsz, stride=stride) - - # Get names and colors - names = model.module.names if hasattr(model, "module") else model.names - - # Run inference - # if device.type != "cpu": - # _, img, _, _ = dataset[0] - # h, w = img.shape[1:] - - # # crop image - # x1, y1, x2, y2 = [ - # int(v / 100.0) - # for v in [ - # w * crop.x1, - # h * crop.y1, - # w * crop.x2, - # h * crop.y2, - # ] - # ] - - # img = img[:, y1:y2, x1:x2] - # model( - # torch.zeros(1, 3, img.shape[1], img.shape[2]) - # .to(device) - # .type_as(next(model.parameters())) - # ) # run once - - formatted_result: Dict[str, TrackedObject] = {} - for frame_idx, (_, img, im0s, _, _) in enumerate(dataset): - h, w = img.shape[1:] - - # crop image - x1, y1, x2, y2 = [ - int(v / 100.0) - for v in [ - w * crop.x1, - h * crop.y1, - w * crop.x2, - h * crop.y2, - ] - ] - img = img[:, y1:y2, x1:x2] - - img = torch.from_numpy(img).to(device) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - if img.ndimension() == 3: - img = img.unsqueeze(0) - - # Inference - pred = model(img, augment=opt.augment)[0] - - # Apply NMS - pred = non_max_suppression( - pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms - ) - - # Process detections - for det in pred: # detections per image - if det is None or not len(det): - deepsort.increment_ages() - continue - - # add padding from cropped frame - det[:, :4] += torch.tensor([[x1, y1, x1, y1]]).to(device) - - # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords( - (h, w), - det[:, :4], - im0s.shape, - ).round() - - xywhs = xyxy2xywh(det[:, 0:4]) - confs = det[:, 4] - clss = det[:, 5] - - # pass detections to deepsort - outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0s) - - # collect result bounding boxes - for output in outputs: - y1, x1, y2, x2, id, c = [int(o) for o in output] - bboxes = BoundingBox(x1, y1, x2, y2) - item_id = f"{names[c]}-{str(id)}" - - if item_id not in formatted_result: - formatted_result[item_id] = TrackedObject(object_type=names[c]) - - formatted_result[item_id].bboxes.append(bboxes) - formatted_result[item_id].frame_num.append(frame_idx) - - return formatted_result - - -def yolov5_deepsort_video_track(opt: YoloV5Opt): - with torch.no_grad(): - return detect(opt)