diff --git a/.env b/.env index b38ceb3..b1c70b4 100644 --- a/.env +++ b/.env @@ -1 +1 @@ -mamba activate spatialyze \ No newline at end of file +mamba activate spatialyze-ablation \ No newline at end of file diff --git a/environment.yml b/environment.yml index 1d2d81b..26b9abd 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: spatialyze +name: spatialyze-ablation channels: - conda-forge diff --git a/playground/run-ablation.ipynb b/playground/run-ablation.ipynb index 8ec0c55..2a29aaa 100644 --- a/playground/run-ablation.ipynb +++ b/playground/run-ablation.ipynb @@ -172,6 +172,7 @@ "from spatialyze.video_processor.stages.tracking_3d.from_tracking_2d_and_road import FromTracking2DAndRoad\n", "from spatialyze.video_processor.stages.tracking_3d.from_tracking_2d_and_depth import FromTracking2DAndDepth\n", "from spatialyze.video_processor.stages.tracking_3d.tracking_3d import Tracking3DResult, Tracking3D\n", + "from spatialyze.video_processor.stages.tracking_3d.from_tracking_2d_and_detection_3d import FromTracking2DAndDetection3D as FromT2DAndD3D\n", "\n", "from spatialyze.video_processor.stages.segment_trajectory import SegmentTrajectory\n", "from spatialyze.video_processor.stages.segment_trajectory.construct_segment_trajectory import SegmentPoint\n", @@ -362,7 +363,7 @@ " # names = set(sampled_scenes)\n", " filtered_videos = [\n", " n for n in videos\n", - " if n[6:10] in names # and 'FRONT' in n # and n.endswith('FRONT')\n", + " if n[6:10] in names and 'FRONT' in n # and n.endswith('FRONT')\n", " ]\n", " N = len(filtered_videos)\n", " print('# of filtered videos:', N)\n", @@ -420,14 +421,14 @@ "\n", " # Ingest Trackings\n", " ego_meta = frames.interpolated_frames\n", - " sortmeta = FromTracking2DAndRoad.get(output)\n", + " sortmeta = Tracking3D.get(output)\n", " assert sortmeta is not None\n", " segment_trajectories = FromTracking3D.get(output)\n", " tracks = get_tracks(sortmeta, ego_meta, segment_trajectories)\n", " for obj_id, track in tracks.items():\n", " trajectory = format_trajectory(name, obj_id, track)\n", " if trajectory:\n", - " insert_trajectory(database, *trajectory)\n", + " insert_trajectory(database, *trajectory[0])\n", "\n", " # Ingest Camera\n", " accs: 'ACameraConfig' = []\n", @@ -569,9 +570,6 @@ "\n", " # Object Filter\n", " if object_filter:\n", - " # if isinstance(object_filter, bool):\n", - " # object_filter = ['car', 'truck']\n", - " # TODO: filter objects based on predicate\n", " pipeline.add_filter(ObjectTypeFilter(predicate=predicate))\n", "\n", " # 3D Detection\n", @@ -592,10 +590,11 @@ " cache=ss_cache,\n", " ))\n", "\n", - " if geo_depth:\n", - " pipeline.add_filter(FromTracking2DAndRoad())\n", - " else:\n", - " pipeline.add_filter(FromTracking2DAndDepth())\n", + " pipeline.add_filter(FromT2DAndD3D())\n", + " # if geo_depth:\n", + " # pipeline.add_filter(FromTracking2DAndRoad())\n", + " # else:\n", + " # pipeline.add_filter(FromTracking2DAndDepth())\n", "\n", " # Segment Trajectory\n", " # pipeline.add_filter(FromTracking3D())\n", @@ -902,9 +901,8 @@ }, "outputs": [], "source": [ - "tests = ['de', 'optde', 'noopt', 'inview', 'objectfilter', 'geo', 'opt']\n", - "# tests = ['de', 'optde']\n", - "# tests = ['de']\n", + "tests = ['optde', 'de', 'noopt', 'inview', 'objectfilter', 'geo', 'opt']\n", + "tests = ['de', 'noopt', 'inview', 'objectfilter']\n", "# random.shuffle(tests)\n", "\n", "for _test in tests:\n", diff --git a/playground/run-all-local b/playground/run-all-local new file mode 100644 index 0000000..55878f3 --- /dev/null +++ b/playground/run-all-local @@ -0,0 +1,10 @@ +#!/bin/bash + +tmux new-session -d -s run-test -n run-test-local || echo 'hi' +tmux send-keys -t run-test-local 'docker container start mobilitydb' Enter +tmux send-keys -t run-test-local 'cd ~/Documents/spatialyze' Enter +tmux send-keys -t run-test-local 'rm -rf ./outputs/run/*' Enter +tmux send-keys -t run-test-local 'rm -rf ./run-ablation.py' Enter +tmux send-keys -t run-test-local 'python ./spatialyze/utils/ingest_road.py "./data/scenic/road-network/boston-seaport"' Enter +tmux send-keys -t run-test-local 'jupyter nbconvert --to python ./playground/run-ablation.ipynb && mv playground/run-ablation.py .' Enter +tmux send-keys -t run-test-local 'python run-ablation.py' Enter \ No newline at end of file diff --git a/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_depth.py b/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_depth.py index a663cfe..cc3eafa 100644 --- a/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_depth.py +++ b/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_depth.py @@ -43,7 +43,7 @@ def _run(self, payload: "Payload") -> "tuple[bitarray | None, dict[str, list] | t.detection_id, t.object_id, point_from_camera, - point, + tuple(point.tolist()), t.bbox_left, t.bbox_top, t.bbox_w, diff --git a/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_detection_3d.py b/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_detection_3d.py index b3bfbc7..9a48cd4 100644 --- a/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_detection_3d.py +++ b/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_detection_3d.py @@ -6,7 +6,7 @@ from .tracking_3d import Metadatum, Tracking3D, Tracking3DResult -class FromTracking2DAndDetection2D(Tracking3D): +class FromTracking2DAndDetection3D(Tracking3D): def _run(self, payload: "Payload") -> "tuple[bitarray | None, dict[str, list] | None]": metadata: "list[Metadatum]" = [] trajectories: "dict[int, list[Tracking3DResult]]" = {} @@ -34,19 +34,18 @@ def _run(self, payload: "Payload") -> "tuple[bitarray | None, dict[str, list] | detection_map = { did: (det, p, pfc) for det, p, pfc, did - in zip(dets, points, points_from_camera, dids) + in zip(dets, points.tolist(), points_from_camera.tolist(), dids) } trackings3d: "dict[int, Tracking3DResult]" = {} for object_id, t in tracking.items(): did = t.detection_id det, p, pfc = detection_map[did] - xfc, yxc, zxc = pfc.tolist() trackings3d[object_id] = Tracking3DResult( t.frame_idx, t.detection_id, t.object_id, - (xfc, yxc, zxc), + pfc, p, t.bbox_left, t.bbox_top, diff --git a/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_road.py b/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_road.py index 34e6f40..dcf8659 100644 --- a/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_road.py +++ b/spatialyze/video_processor/stages/tracking_3d/from_tracking_2d_and_road.py @@ -63,21 +63,16 @@ def _run(self, payload: "Payload"): points = rotated_directions * ts + translation[:, np.newaxis] points_from_camera = rotate(points - translation[:, np.newaxis], rotation.inverse) - for t, oid, point, point_from_camera in zip(_ts, oids, points.T, points_from_camera.T): + for t, oid, point, point_from_camera in zip(_ts, oids, points.T.tolist(), points_from_camera.T.tolist()): assert point_from_camera.shape == (3,) assert isinstance(oid, int) or oid.is_integer() oid = int(oid) - point_from_camera = ( - point_from_camera[0], - point_from_camera[1], - point_from_camera[2], - ) trackings3d[oid] = Tracking3DResult( t.frame_idx, t.detection_id, oid, - point_from_camera, - point, + tuple(point_from_camera), + tuple(point), t.bbox_left, t.bbox_top, t.bbox_w, diff --git a/spatialyze/video_processor/stages/tracking_3d/tracking_3d.py b/spatialyze/video_processor/stages/tracking_3d/tracking_3d.py index e98154c..a4c7859 100644 --- a/spatialyze/video_processor/stages/tracking_3d/tracking_3d.py +++ b/spatialyze/video_processor/stages/tracking_3d/tracking_3d.py @@ -2,9 +2,6 @@ from dataclasses import dataclass from typing import Any, Dict -import numpy as np -import numpy.typing as npt - from ...types import DetectionId, Float3 from ..stage import Stage @@ -15,7 +12,7 @@ class Tracking3DResult: detection_id: DetectionId object_id: float point_from_camera: "Float3" - point: "npt.NDArray[np.floating]" + point: "Float3" bbox_left: float bbox_top: float bbox_w: float diff --git a/spatialyze/world.py b/spatialyze/world.py index 7d222a7..23a50d9 100644 --- a/spatialyze/world.py +++ b/spatialyze/world.py @@ -22,12 +22,10 @@ ) from .video_processor.stages.detection_estimation import DetectionEstimation from .video_processor.stages.in_view.in_view import InView +from .video_processor.stages.stage import Stage from .video_processor.stages.tracking_2d.strongsort import StrongSORT -from .video_processor.stages.tracking_3d.from_tracking_2d_and_depth import ( - FromTracking2DAndDepth, -) -from .video_processor.stages.tracking_3d.from_tracking_2d_and_road import ( - FromTracking2DAndRoad, +from .video_processor.stages.tracking_3d.from_tracking_2d_and_detection_3d import ( + FromTracking2DAndDetection3D, ) from .video_processor.stages.tracking_3d.tracking_3d import Metadatum as T3DMetadatum from .video_processor.stages.tracking_3d.tracking_3d import Tracking3D @@ -107,35 +105,24 @@ def _execute(world: "World", optimization=True): # for gc in world._geogConstructs: # gc.ingest(database) # analyze predicates to generate pipeline - objtypes_filter = ObjectTypeFilter(predicate=world.predicates) + steps: "list[Stage]" = [] + if optimization: + steps.append(InView(distance=50, predicate=world.predicates)) + steps.append(DecodeFrame()) + steps.append(YoloDetection()) if optimization: - pipeline = Pipeline( - [ - DecodeFrame(), - InView(distance=50, predicate=world.predicates), - YoloDetection(), - objtypes_filter, - FromDetection2DAndRoad(), - *( - [DetectionEstimation()] - if all(t in ["car", "truck"] for t in objtypes_filter.types) - else [] - ), - StrongSORT(), - FromTracking2DAndRoad(), - ] - ) + objtypes_filter = ObjectTypeFilter(predicate=world.predicates) + steps.append(objtypes_filter) + steps.append(FromDetection2DAndRoad()) + if all(t in ["car", "truck"] for t in objtypes_filter.types): + steps.append(DetectionEstimation()) else: - pipeline = Pipeline( - [ - DecodeFrame(), - YoloDetection(), - DepthEstimation(), - FromDetection2DAndDepth(), - StrongSORT(), - FromTracking2DAndDepth(), - ] - ) + steps.append(DepthEstimation()) + steps.append(FromDetection2DAndDepth()) + steps.append(StrongSORT()) + steps.append(FromTracking2DAndDetection3D()) + + pipeline = Pipeline(steps) qresults: "dict[str, list[tuple]]" = {} vresults: "dict[str, list[T3DMetadatum]]" = {} diff --git a/tests/workflow/test_optimized_workflow.py b/tests/workflow/test_optimized_workflow.py index 8203790..4911352 100644 --- a/tests/workflow/test_optimized_workflow.py +++ b/tests/workflow/test_optimized_workflow.py @@ -12,7 +12,6 @@ from spatialyze.video_processor.stages.tracking_3d.tracking_3d import Tracking3DResult from spatialyze.world import World, _execute from spatialyze.video_processor.cache import disable_cache -from spatialyze.video_processor.metadata_json_encoder import MetadataJSONEncoder OUTPUT_DIR = './data/pipeline/test-results' @@ -75,7 +74,7 @@ def test_optimized_workflow(): assert tuple(p.detection_id) == tuple(g.detection_id), (p.detection_id, g.detection_id) assert p.object_id == g.object_id, (p.object_id, g.object_id) assert np.allclose(np.array(p.point_from_camera), np.array(g.point_from_camera)), (p.point_from_camera, g.point_from_camera) - assert np.allclose(np.array(p.point.tolist()), np.array(g.point)), (p.point, g.point) + assert np.allclose(np.array(p.point), np.array(g.point)), (p.point, g.point) assert p.bbox_left == g.bbox_left, (p.bbox_left, g.bbox_left) assert p.bbox_top == g.bbox_top, (p.bbox_top, g.bbox_top) assert p.bbox_w == g.bbox_w, (p.bbox_w, g.bbox_w)