diff --git a/.github/workflows/clean-up-and-format.yml b/.github/workflows/clean-up-and-format.yml index de0c76f..20a40df 100644 --- a/.github/workflows/clean-up-and-format.yml +++ b/.github/workflows/clean-up-and-format.yml @@ -29,7 +29,7 @@ jobs: run: ./scripts/commit-and-push.sh "[CI] cleanup python notebooks" format: - name: Format and Lint + name: Format runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -60,7 +60,3 @@ jobs: - name: Commit formatted changes run: ./scripts/commit-and-push.sh "[CI] format" - - - name: Analyze the code with flake8 - if: always() - run: python -m flake8 spatialyze --statistics diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..b827007 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,25 @@ +name: Lint +on: + push: + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + token: ${{ secrets.GH_PAT || github.token }} + + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install Dependencies + run: | + pip install --upgrade pip + pip install flake8 + + - name: Analyze the code with flake8 + run: python -m flake8 spatialyze --statistics diff --git a/.github/workflows/test-video-processor.yml b/.github/workflows/test-video-processor.yml deleted file mode 100644 index c0777bd..0000000 --- a/.github/workflows/test-video-processor.yml +++ /dev/null @@ -1,80 +0,0 @@ -name: Test Video Processor -on: - push: - -jobs: - test-video-processor: - name: Test Video Processor - runs-on: ubuntu-latest - services: - mobilitydb: - image: mobilitydb/mobilitydb:14-3.2-1 - ports: - - 25440:5432 - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - token: ${{ secrets.GH_PAT || github.token }} - submodules: recursive - - - name: Setup Micromamba - uses: mamba-org/setup-micromamba@v1 - with: - environment-file: environment.yml - init-shell: none - cache-downloads: false - - - name: Check Python Versions - shell: micromamba-shell {0} - run: | - python --version - python3 --version - which python - which python3 - which pip - which pip3 - which poetry - - - name: Install Dependencies - shell: micromamba-shell {0} - run: poetry install --no-interaction --without dev --with test - - - name: Install lap (Hack) - shell: micromamba-shell {0} - run: | - pip install --upgrade pip - pip install lap - - - name: Check Installed Packages - shell: micromamba-shell {0} - run: pip list - - - name: Extend MobilityDB with User-Defined functions - shell: micromamba-shell {0} - run: | - pushd scripts/pg-extender - python ../generate_pg_extender.py - cat install.sql - psql -h localhost -p 25440 -d mobilitydb -U docker -c "SET client_min_messages TO WARNING;" -c "\i install.sql;" - popd - env: - PGPASSWORD: docker - - - name: Ingest data - shell: micromamba-shell {0} - run: | - python ./scripts/ingest_road.py - env: - AP_PORT: 25440 - - - name: Unit Test - shell: micromamba-shell {0} - run: pytest tests/video_processor - env: - AP_PORT: 25440 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3339377..6284ab5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,8 +3,89 @@ on: push: jobs: - test: - name: Test + test-video-processor: + name: Test Video Processor + runs-on: ubuntu-latest + services: + mobilitydb: + image: mobilitydb/mobilitydb:14-3.2-1 + ports: + - 25440:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + token: ${{ secrets.GH_PAT || github.token }} + submodules: recursive + + - name: Setup Micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: environment.yml + init-shell: none + cache-downloads: false + + - name: Check Python Versions + shell: micromamba-shell {0} + run: | + python --version + python3 --version + which python + which python3 + which pip + which pip3 + which poetry + + - name: Install Dependencies + shell: micromamba-shell {0} + run: poetry install --no-interaction --without dev --with test + + - name: Install lap (Hack) + shell: micromamba-shell {0} + run: | + pip install --upgrade pip + pip install lap + + - name: Check Installed Packages + shell: micromamba-shell {0} + run: pip list + + - name: Extend MobilityDB with User-Defined functions + shell: micromamba-shell {0} + run: | + pushd scripts/pg-extender + python ../generate_pg_extender.py + cat install.sql + psql -h localhost -p 25440 -d mobilitydb -U docker -c "SET client_min_messages TO WARNING;" -c "\i install.sql;" + popd + env: + PGPASSWORD: docker + + - name: Ingest data + shell: micromamba-shell {0} + run: | + python ./scripts/ingest_road.py + env: + AP_PORT: 25440 + + - name: Unit Test + shell: micromamba-shell {0} + run: pytest --cov=spatialyze --cov-report=xml tests/video_processor + env: + AP_PORT: 25440 + + - uses: actions/upload-artifact@v3 + with: + name: video-processor-coverage + path: ./coverage.xml + + test-engine-and-interface: + name: Test Engine and Interface runs-on: ubuntu-latest services: mobilitydb: @@ -131,9 +212,60 @@ jobs: AP_PORT_RESET: 25443 AP_PORT_IMPORT: 25444 AP_PORT_SQL: 25445 - - - name: Upload coverage reports to Codecov - run: | - curl -Os https://uploader.codecov.io/latest/linux/codecov - chmod +x codecov - ./codecov -t ${{ secrets.CODECOV_TOKEN }} + + - uses: actions/upload-artifact@v3 + with: + name: engine-and-interface-coverage + path: ./coverage.xml + + upload-coverage: + name: Upload Coverage + needs: [test-video-processor, test-engine-and-interface] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + token: ${{ secrets.GH_PAT || github.token }} + + - name: Download Coverage + uses: actions/download-artifact@v3 + + - name: Upload to Codecov + uses: codecov/codecov-action@v3 + with: + verbose: true + + # - name: Download Coverage + # uses: actions/download-artifact@v3 + # with: + # name: video-processor-coverage + # path: ./video-processor-coverage + + # - name: Download Coverage + # uses: actions/download-artifact@v3 + # with: + # name: engine-and-interface-coverage + # path: ./engine-and-interface-coverage + + # - name: Combine Coverage + # shell: micromamba-shell {0} + # run: | + # pip install coverage + # coverage combine ./video-processor-coverage ./engine-and-interface-coverage + # coverage xml + # coverage report + # coverage html + # coverage-badge -o coverage.svg -f + + # - name: Upload Coverage + # uses: actions/upload-artifact@v3 + # with: + # name: coverage + # path: ./coverage.xml + + # - name: Upload Coverage + # uses: actions/upload-artifact@v3 + # with: + # name: coverage-badge + # path: ./coverage.svg \ No newline at end of file diff --git a/README.md b/README.md index f3e4050..414fc13 100644 --- a/README.md +++ b/README.md @@ -2,23 +2,23 @@

Github Actions Test Status - Github Actions Test Video Processor Status Github Actions Type Check Status - Github Actions Type Check Status Codecov Coverage Status + Github Actions Type Check Status

diff --git a/codecov.yml b/codecov.yml index 85dc922..1501075 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,4 +1,9 @@ ignore: - - "spatialyze/legacy" - - "spatialyze/video_processor" + - "spatialyze/video_processor/stages/detection_estimation" + - "spatialyze/video_processor/stages/segment_trajectory" + - "spatialyze/video_processor/stages/depth_estimation.py" + - "spatialyze/video_processor/stages/strongsort_with_skip.py" + - "spatialyze/video_processor/utils/preprocess.py" + - "spatialyze/video_processor/utils/process_pipeline.py" + - "spatialyze/video_processor/utils/query_analyzer.py" - "**/__init__.py" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e614aeb..d9f1f95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,9 +104,6 @@ exclude = 'spatialyze/video_processor/modules' [tool.pyright] pythonVersion = '3.10' ignore = [ - 'spatialyze/legacy/*', - 'spatialyze/trackers/object_tracker_yolov4_deepsort.py', - 'spatialyze/trackers/object_tracker_yolov5_deepsort.py', 'spatialyze/video_processor/modules', 'spatialyze/video_processor/stages/detection_estimation', 'spatialyze/video_processor/stages/segment_trajectory', diff --git a/spatialyze/data_types/__init__.py b/spatialyze/data_types/__init__.py index 35f9a86..252deaa 100644 --- a/spatialyze/data_types/__init__.py +++ b/spatialyze/data_types/__init__.py @@ -1,4 +1,3 @@ -from . import views from .bounding_box import BoundingBox from .box import Box from .camera import Camera @@ -14,6 +13,5 @@ "Camera", "TrackedObject", "Trajectory", - "views", "FetchCameraTuple", ] diff --git a/spatialyze/data_types/views/__init__.py b/spatialyze/data_types/views/__init__.py deleted file mode 100644 index e12c94f..0000000 --- a/spatialyze/data_types/views/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .camera_view import CameraView -from .location_view import LocationView -from .metadata_view import MetadataView -from .trajectory_view import TrajectoryView -from .view import View - -metadata_view = MetadataView() - -__all__ = ["View", "CameraView", "LocationView", "TrajectoryView", "metadata_view"] diff --git a/spatialyze/data_types/views/camera_view.py b/spatialyze/data_types/views/camera_view.py deleted file mode 100644 index 03b729f..0000000 --- a/spatialyze/data_types/views/camera_view.py +++ /dev/null @@ -1,21 +0,0 @@ -from .view import View - - -class CameraView(View): - camera_id = "cameraId" - frame_id = "frameId" - frame_num = "frameNum" - file_name = "fileName" - camera_translation = "cameraTranslation" - camera_rotation = "cameraRotation" - camera_intrinsic = "cameraIntrinsic" - ego_translation = "egoTranslation" - ego_rotation = "egoRotation" - timestamp = "timestamp" - camera_heading = "cameraHeading" - ego_heading = "egoHeading" - table_name = "Cameras" - - def __init__(self): - super().__init__(self.table_name) - self.default = True diff --git a/spatialyze/data_types/views/location_view.py b/spatialyze/data_types/views/location_view.py deleted file mode 100644 index 6f74d23..0000000 --- a/spatialyze/data_types/views/location_view.py +++ /dev/null @@ -1,11 +0,0 @@ -from .view import View - - -class LocationView(View): - location = "trajBbox" - timestamp = "timestamp" - table_name = "General_Bbox" - - def __init__(self): - super().__init__(self.table_name) - self.default = True diff --git a/spatialyze/data_types/views/metadata_view.py b/spatialyze/data_types/views/metadata_view.py deleted file mode 100644 index c4a5c28..0000000 --- a/spatialyze/data_types/views/metadata_view.py +++ /dev/null @@ -1,44 +0,0 @@ -from .camera_view import CameraView -from .location_view import LocationView -from .trajectory_view import TrajectoryView -from .view import View - - -class MetadataView(View): - view_name = "metadata_view" - object_id = TrajectoryView.object_id - object_type = TrajectoryView.object_type - color = TrajectoryView.color - trajectory = TrajectoryView.trajectory - location = LocationView.location - timestamp = LocationView.timestamp - view_map = { - object_id: TrajectoryView, - object_type: TrajectoryView, - color: TrajectoryView, - trajectory: TrajectoryView, - location: LocationView, - } - - def __init__(self): - super().__init__(self.view_name) - self.default = True - self.trajectory_view = TrajectoryView() - self.location_view = LocationView() - self.camera_view = CameraView() - - def map_view(self, column_key: str): - if self.view_map[column_key] == TrajectoryView: - return self.trajectory_view - elif self.view_map[column_key] == LocationView: - return self.location_view - else: - return self.camera_view - - def resolve_key(self, column_key: str): - return ( - self.trajectory_view.resolve_key(column_key) - or self.location_view.resolve_key(column_key) - or self.camera_view.resolve_key(column_key) - or column_key - ) diff --git a/spatialyze/data_types/views/trajectory_view.py b/spatialyze/data_types/views/trajectory_view.py deleted file mode 100644 index 9c57a9e..0000000 --- a/spatialyze/data_types/views/trajectory_view.py +++ /dev/null @@ -1,15 +0,0 @@ -from .view import View - - -class TrajectoryView(View): - object_id = "itemId" - object_type = "objectType" - color = "color" - trajectory = "trajCentroids" - traj = "trajCentroids" - heading = "itemHeadings" - table_name = "Item_General_Trajectory" - - def __init__(self): - super().__init__(self.table_name) - self.default = True diff --git a/spatialyze/data_types/views/view.py b/spatialyze/data_types/views/view.py deleted file mode 100644 index b12ff10..0000000 --- a/spatialyze/data_types/views/view.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Any - - -class View: - def __init__(self, view_name: str): - self.view_name: str = view_name - self.default: bool = False - - def from_context(self, context: Any): - self.context = context - - def resolve_key(self, column_key: str): - if column_key in self.__class__.__dict__: - return self.__class__.__dict__[column_key] - else: - return None - - def contain(self, column_key: str): - return column_key in self.__dict__.keys() diff --git a/spatialyze/legacy/layers.py b/spatialyze/legacy/layers.py deleted file mode 100644 index 6ff0072..0000000 --- a/spatialyze/legacy/layers.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright Niantic 2019. Patent Pending. All rights reserved. -# -# This software is licensed under the terms of the Monodepth2 licence -# which allows for non-commercial use only, the full terms of which are made -# available in the LICENSE file. - -from __future__ import absolute_import, division, print_function - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F - - -def disp_to_depth(disp, min_depth, max_depth): - """Convert network's sigmoid output into depth prediction - The formula for this conversion is given in the 'additional considerations' - section of the paper. - """ - min_disp = 1 / max_depth - max_disp = 1 / min_depth - scaled_disp = min_disp + (max_disp - min_disp) * disp - depth = 1 / scaled_disp - return scaled_disp, depth - - -def transformation_from_parameters(axisangle, translation, invert=False): - """Convert the network's (axisangle, translation) output into a 4x4 matrix""" - R = rot_from_axisangle(axisangle) - t = translation.clone() - - if invert: - R = R.transpose(1, 2) - t *= -1 - - T = get_translation_matrix(t) - - if invert: - M = torch.matmul(R, T) - else: - M = torch.matmul(T, R) - - return M - - -def get_translation_matrix(translation_vector): - """Convert a translation vector into a 4x4 transformation matrix""" - T = torch.zeros(translation_vector.shape[0], 4, 4).to(device=translation_vector.device) - - t = translation_vector.contiguous().view(-1, 3, 1) - - T[:, 0, 0] = 1 - T[:, 1, 1] = 1 - T[:, 2, 2] = 1 - T[:, 3, 3] = 1 - T[:, :3, 3, None] = t - - return T - - -def rot_from_axisangle(vec): - """Convert an axisangle rotation into a 4x4 transformation matrix - (adapted from https://github.com/Wallacoloo/printipi) - Input 'vec' has to be Bx1x3 - """ - angle = torch.norm(vec, 2, 2, True) - axis = vec / (angle + 1e-7) - - ca = torch.cos(angle) - sa = torch.sin(angle) - C = 1 - ca - - x = axis[..., 0].unsqueeze(1) - y = axis[..., 1].unsqueeze(1) - z = axis[..., 2].unsqueeze(1) - - xs = x * sa - ys = y * sa - zs = z * sa - xC = x * C - yC = y * C - zC = z * C - xyC = x * yC - yzC = y * zC - zxC = z * xC - - rot = torch.zeros((vec.shape[0], 4, 4)).to(device=vec.device) - - rot[:, 0, 0] = torch.squeeze(x * xC + ca) - rot[:, 0, 1] = torch.squeeze(xyC - zs) - rot[:, 0, 2] = torch.squeeze(zxC + ys) - rot[:, 1, 0] = torch.squeeze(xyC + zs) - rot[:, 1, 1] = torch.squeeze(y * yC + ca) - rot[:, 1, 2] = torch.squeeze(yzC - xs) - rot[:, 2, 0] = torch.squeeze(zxC - ys) - rot[:, 2, 1] = torch.squeeze(yzC + xs) - rot[:, 2, 2] = torch.squeeze(z * zC + ca) - rot[:, 3, 3] = 1 - - return rot - - -class ConvBlock(nn.Module): - """Layer to perform a convolution followed by ELU""" - - def __init__(self, in_channels, out_channels): - super(ConvBlock, self).__init__() - - self.conv = Conv3x3(in_channels, out_channels) - self.nonlin = nn.ELU(inplace=True) - - def forward(self, x): - out = self.conv(x) - out = self.nonlin(out) - return out - - -class Conv3x3(nn.Module): - """Layer to pad and convolve input""" - - def __init__(self, in_channels, out_channels, use_refl=True): - super(Conv3x3, self).__init__() - - if use_refl: - self.pad = nn.ReflectionPad2d(1) - else: - self.pad = nn.ZeroPad2d(1) - self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3) - - def forward(self, x): - out = self.pad(x) - out = self.conv(out) - return out - - -class BackprojectDepth(nn.Module): - """Layer to transform a depth image into a point cloud""" - - def __init__(self, batch_size, height, width): - super(BackprojectDepth, self).__init__() - - self.batch_size = batch_size - self.height = height - self.width = width - - meshgrid = np.meshgrid(range(self.width), range(self.height), indexing="xy") - self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32) - self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords), requires_grad=False) - - self.ones = nn.Parameter( - torch.ones(self.batch_size, 1, self.height * self.width), requires_grad=False - ) - - self.pix_coords = torch.unsqueeze( - torch.stack([self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0 - ) - self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1) - self.pix_coords = nn.Parameter( - torch.cat([self.pix_coords, self.ones], 1), requires_grad=False - ) - - def forward(self, depth, inv_K): - cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords) - cam_points = depth.view(self.batch_size, 1, -1) * cam_points - cam_points = torch.cat([cam_points, self.ones], 1) - - return cam_points - - -class Project3D(nn.Module): - """Layer which projects 3D points into a camera with intrinsics K and at position T""" - - def __init__(self, batch_size, height, width, eps=1e-7): - super(Project3D, self).__init__() - - self.batch_size = batch_size - self.height = height - self.width = width - self.eps = eps - - def forward(self, points, K, T): - P = torch.matmul(K, T)[:, :3, :] - - cam_points = torch.matmul(P, points) - - pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps) - pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width) - pix_coords = pix_coords.permute(0, 2, 3, 1) - pix_coords[..., 0] /= self.width - 1 - pix_coords[..., 1] /= self.height - 1 - pix_coords = (pix_coords - 0.5) * 2 - return pix_coords - - -def upsample(x): - """Upsample input tensor by a factor of 2""" - return F.interpolate(x, scale_factor=2, mode="nearest") - - -def get_smooth_loss(disp, img): - """Computes the smoothness loss for a disparity image - The color image is used for edge-aware smoothness - """ - grad_disp_x = torch.abs(disp[:, :, :, :-1] - disp[:, :, :, 1:]) - grad_disp_y = torch.abs(disp[:, :, :-1, :] - disp[:, :, 1:, :]) - - grad_img_x = torch.mean(torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:]), 1, keepdim=True) - grad_img_y = torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :]), 1, keepdim=True) - - grad_disp_x *= torch.exp(-grad_img_x) - grad_disp_y *= torch.exp(-grad_img_y) - - return grad_disp_x.mean() + grad_disp_y.mean() - - -class SSIM(nn.Module): - """Layer to compute the SSIM loss between a pair of images""" - - def __init__(self): - super(SSIM, self).__init__() - self.mu_x_pool = nn.AvgPool2d(3, 1) - self.mu_y_pool = nn.AvgPool2d(3, 1) - self.sig_x_pool = nn.AvgPool2d(3, 1) - self.sig_y_pool = nn.AvgPool2d(3, 1) - self.sig_xy_pool = nn.AvgPool2d(3, 1) - - self.refl = nn.ReflectionPad2d(1) - - self.C1 = 0.01**2 - self.C2 = 0.03**2 - - def forward(self, x, y): - x = self.refl(x) - y = self.refl(y) - - mu_x = self.mu_x_pool(x) - mu_y = self.mu_y_pool(y) - - sigma_x = self.sig_x_pool(x**2) - mu_x**2 - sigma_y = self.sig_y_pool(y**2) - mu_y**2 - sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y - - SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2) - SSIM_d = (mu_x**2 + mu_y**2 + self.C1) * (sigma_x + sigma_y + self.C2) - - return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1) - - -def compute_depth_errors(gt, pred): - """Computation of error metrics between predicted and ground truth depths""" - thresh = torch.max((gt / pred), (pred / gt)) - a1 = (thresh < 1.25).float().mean() - a2 = (thresh < 1.25**2).float().mean() - a3 = (thresh < 1.25**3).float().mean() - - rmse = (gt - pred) ** 2 - rmse = torch.sqrt(rmse.mean()) - - rmse_log = (torch.log(gt) - torch.log(pred)) ** 2 - rmse_log = torch.sqrt(rmse_log.mean()) - - abs_rel = torch.mean(torch.abs(gt - pred) / gt) - - sq_rel = torch.mean((gt - pred) ** 2 / gt) - - return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 diff --git a/spatialyze/legacy/lens.py b/spatialyze/legacy/lens.py deleted file mode 100644 index 30a74d5..0000000 --- a/spatialyze/legacy/lens.py +++ /dev/null @@ -1,278 +0,0 @@ -from __future__ import annotations - -from math import radians -from typing import List, Tuple - -import numpy as np - - -class Lens: - def __init__(self, resolution, cam_origin): - """ - Construct a lens for the camera that translates to 3D world coordinates. - - Args: - field_of_view: Angle of field of view of camera - resolution: Tuple of video resolution - cam_origin: Points of where camera is located in the world - skew_factor: (Optional) Float factor to correct shearness of camera - """ - x, y = resolution - self.cam_origin = cam_origin - cam_x, cam_y = cam_origin - - def pixel_to_world(self, pixel_coord: List[float], depth: float): - """ - Translate pixel coordinates to world coordinates. - """ - return None - - def pixels_to_world(self, pixel_coords, depths): - """ - Translate multiple pixel coordinates to world coordinates. - """ - return None - - def world_to_pixel(self, world_coord: List[float], depth: float): - """ - Translate world coordinates to pixel coordinates - """ - return None - - def __eq__(self, other): - return self.__dict__ == other.__dict__ - - -class VRLens(Lens): - def __init__(self, resolution, cam_origin, yaw, roll, pitch): - """ - Construct a lens for the camera that translates to 3D world, spherical - coordinates. - - Args: - field_of_view: Angle of field of view of camera - resolution: Tuple of video resolution - cam_origin: Points of where camera is located in the world - skew_factor: (Optional) Float factor to correct shearness of camera - """ - x, y = resolution - self.cam_origin = cam_origin - cam_x, cam_y, cam_z = cam_origin - - yaw, pitch, roll = np.deg2rad(yaw), np.deg2rad(pitch), np.deg2rad(roll) - # Transformation 1 - # X_1, X_2, X_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), -np.sin(pitch) - - # Y_1 = np.cos(yaw)*np.sin(pitch)*np.sin(roll) - np.sin(yaw)*np.cos(roll) - # Y_2 = np.sin(yaw)*np.sin(pitch)*np.sin(roll) + np.cos(yaw)*np.cos(roll) - # Y_3 = np.cos(pitch)*np.sin(roll) - - # Z_1 = np.cos(yaw)*np.sin(pitch)*np.cos(roll) + np.sin(yaw)*np.sin(roll) - # Z_2 = np.sin(yaw)*np.sin(pitch)*np.cos(roll) - np.cos(yaw)*np.sin(roll) - # Z_3 = np.cos(pitch)*np.cos(roll) - - # self.transform = np.matrix([[X_1, Y_1, Z_1, cam_x], - # [X_2, Y_2, Z_2, cam_y], - # [X_3, Y_3, Z_3, cam_z], - # [0, 0, 0, 1] - # ]) - - # Transformation 2 - # z = yaw, y = pitch, x = roll - # R_1, R_2, R_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), np.sin(pitch) - # R_4 = np.sin(roll)*np.sin(pitch)*np.cos(yaw) - np.cos(roll)*np.sin(yaw) - # R_5 = np.sin(roll)*np.sin(pitch)*np.sin(yaw) + np.cos(roll)*np.cos(yaw) - # R_6 = np.sin(roll)*np.cos(pitch) - # R_7 = np.cos(roll)*np.sin(pitch)*np.cos(yaw) - np.sin(roll)*np.sin(yaw) - # R_8 = np.sin(roll)*np.cos(yaw) + np.cos(roll)*np.sin(pitch)*np.sin(yaw) - # R_9 = np.cos(roll)*np.cos(pitch) - - # self.transform = np.matrix([[R_1, R_2, R_3, cam_x], - # [R_4, R_5, R_6, cam_y], - # [R_7, R_8, R_9, cam_z], - # [0, 0, 0, 1] - # ]) - - # Transformation 3 - # z = yaw, y = pitch, x = roll - # R_1, R_2, R_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), np.sin(pitch) - # R_4 = np.sin(roll)*np.sin(pitch)*np.cos(yaw) - np.cos(roll)*np.sin(yaw) - # R_5 = np.sin(roll)*np.sin(pitch)*np.sin(yaw) + np.cos(roll)*np.cos(yaw) - # R_6 = np.sin(roll)*-np.cos(pitch) - # R_7 = -np.cos(roll)*np.sin(pitch)*np.cos(yaw) - np.sin(roll)*np.sin(yaw) - # R_8 = np.sin(roll)*np.cos(yaw) - np.cos(roll)*np.sin(pitch)*np.sin(yaw) - # R_9 = np.cos(roll)*np.cos(pitch) - - # rotation_mat = np.matrix([[R_1, R_2, R_3], - # [R_4, R_5, R_6], - # [R_7, R_8, R_9]]) - - # cam_org_vec = np.matrix([[cam_x], [cam_y], [cam_z]]) - # self.col_vec = np.ravel(rotation_mat @ cam_org_vec) - # col_x, col_y, col_z = self.col_vec - # self.transform = np.matrix([[R_1, R_2, R_3, -col_x], - # [R_4, R_5, R_6, -col_y], - # [R_7, R_8, R_9, -col_z], - # [0, 0, 0, 1] - # ]) - - # Transformation 4 - # X_1, X_2, X_3 = np.cos(pitch)*np.cos(yaw), np.cos(pitch)*np.sin(yaw), -np.sin(pitch) - - # Y_1 = np.cos(yaw)*np.sin(pitch)*np.sin(roll) - np.sin(yaw)*np.cos(roll) - # Y_2 = np.sin(yaw)*np.sin(pitch)*np.sin(roll) + np.cos(yaw)*np.cos(roll) - # Y_3 = np.cos(pitch)*np.sin(roll) - - # Z_1 = np.cos(yaw)*np.sin(pitch)*np.cos(roll) + np.sin(yaw)*np.sin(roll) - # Z_2 = np.sin(yaw)*np.sin(pitch)*np.cos(roll) - np.cos(yaw)*np.sin(roll) - # Z_3 = np.cos(pitch)*np.cos(roll) - - # rotation_mat = np.matrix([[X_1, Y_1, Z_1], - # [X_2, Y_2, Z_2], - # [X_3, Y_3, Z_3]]) - # cam_org_vec = np.matrix([[cam_x], [cam_y], [cam_z]]) - # self.col_vec = np.ravel(rotation_mat @ cam_org_vec) - # col_x, col_y, col_z = self.col_vec - # self.transform = np.matrix([[X_1, Y_1, Z_1, col_x], - # [X_2, Y_2, Z_2, col_y], - # [X_3, Y_3, Z_3, col_z], - # [0, 0, 0, 1] - # ]) - - # Transformation 5 -- Lefthanded rotation matrix - R_1, R_2, R_3 = np.cos(pitch) * np.cos(yaw), np.cos(pitch) * np.sin(yaw), -np.sin(pitch) - R_4 = np.sin(roll) * np.sin(pitch) * np.cos(yaw) - np.cos(roll) * np.sin(yaw) - R_5 = np.sin(roll) * np.sin(pitch) * np.sin(yaw) + np.cos(roll) * np.cos(yaw) - R_6 = np.sin(roll) * np.cos(pitch) - - R_7 = np.cos(roll) * np.sin(pitch) * np.cos(yaw) + np.sin(roll) * np.sin(yaw) - R_8 = np.cos(roll) * np.sin(pitch) * np.sin(yaw) - np.sin(roll) * np.cos(yaw) - R_9 = np.cos(roll) * np.cos(pitch) - - rotation_mat = np.matrix([[R_1, R_2, R_3], [R_4, R_5, R_6], [R_7, R_8, R_9]]) - cam_org_vec = np.matrix(np.array([[cam_x], [cam_y], [cam_z]])) - self.col_vec = np.ravel(rotation_mat @ cam_org_vec) - col_x, col_y, col_z = self.col_vec - self.transform = np.matrix( - [ - [R_1, R_2, R_3, -col_x], - [R_4, R_5, R_6, -col_y], - [R_7, R_8, R_9, -col_z], - [0, 0, 0, 1], - ] - ) - - self.inv_transform = np.linalg.inv(self.transform) - - def pixel_to_world(self, pixel_coord: List[float], depth: float): - """ - Translate pixel coordinates to world coordinates. - """ - x, y = pixel_coord - pixel = np.matrix(np.array([[x], [y], [depth], [0]])) - return self.transform @ pixel - - def pixels_to_world(self, pixel_coords, depths): - """ - Translate multiple pixel coordinates to world coordinates. - """ - x, y = pixel_coords - pixels = np.matrix([x, y, depths, np.ones(len(depths))]) - print(pixels) - return self.transform @ pixels - - def world_to_pixel(self, world_coord: List[float], depth: float): - """ - Translate world coordinates to pixel coordinates - """ - x, y, z, w = world_coord - world_pixel = np.matrix(np.array([[x], [y], [z], [w]])) - return self.inv_transform @ world_pixel - - def world_to_pixels(self, world_coords): - """ - Translate world coordinates to pixel coordinates - """ - x, y, z = world_coords - world_pixel = np.matrix([x, y, z, np.zeros(len(x))]) - return self.inv_transform @ world_pixel - - -class PinholeLens(Lens): - # TODO: (@Vanessa) change all the places where pinhole lens appears and change arguments - def __init__( - self, - resolution: Tuple[float, float], - cam_origin: Tuple[float, float, float], - field_of_view, - skew_factor, - ): - """ - Construct a lens for the camera that translates to 3D world coordinates. - - Args: - field_of_view: Angle of field of view of camera - resolution: Tuple of video resolution - cam_origin: Points of where camera is located in the world - skew_factor: (Optional) Float factor to correct shearness of camera - depth: Float of depth of view from the camera - """ - self.fov = field_of_view - x, y = resolution - self.focal_x = (x / 2) / np.tan(radians(field_of_view / 2)) - self.focal_y = (y / 2) / np.tan(radians(field_of_view / 2)) - self.cam_origin = cam_origin - cam_x, cam_y, cam_z = cam_origin - self.alpha = skew_factor - self.inv_transform = np.linalg.inv( - np.matrix([[self.focal_x, self.alpha, cam_x], [0, self.focal_y, cam_y], [0, 0, 1]]) - ) - self.transform = np.matrix( - np.array( - [[self.focal_x, self.alpha, cam_x, 0], [0, self.focal_y, cam_y, 0], [0, 0, 1, 0]] - ) - ) - - def __eq__(self, other): - return ( - isinstance(other, PinholeLens) - and self.fov == other.fov - and self.focal_x == other.focal_x - and self.focal_y == other.focal_y - and self.cam_origin == other.cam_origin - and self.alpha == other.alpha - and (self.inv_transform == other.inv_transform).all() - and (self.transform == other.transform).all() - ) - - def pixel_to_world(self, pixel_coord: List[float], depth: float): - """ - Translate pixel coordinates to world coordinates. - """ - x, y = pixel_coord - pixel = np.matrix(np.array([[x], [y], [depth]])) - return (self.inv_transform @ pixel).flatten().tolist()[0] - - def pixels_to_world(self, pixel_coords, depths): - """ - Translate multiple pixel coordinates to world coordinates. - """ - x, y = pixel_coords - pixels = np.matrix([x, y, depths]) - return self.inv_transform @ pixels - - def world_to_pixel(self, world_coord: List[float], depth: float): - """ - Translate world coordinates to pixel coordinates - """ - x, y, z = world_coord - world_pixel = np.matrix(np.array([[x], [y], [z], [1.0]])) - return self.transform @ world_pixel - - def world_to_pixels(self, world_coords): - """ - Translate world coordinates to pixel coordinates - """ - x, y, z = world_coords - world_pixel = np.matrix([x, y, z, np.ones(len(x))]) - return self.transform @ world_pixel diff --git a/spatialyze/legacy/metadata_context.py b/spatialyze/legacy/metadata_context.py deleted file mode 100644 index 5b2fbce..0000000 --- a/spatialyze/legacy/metadata_context.py +++ /dev/null @@ -1,415 +0,0 @@ -from __future__ import annotations - -import ast -import copy -import os -from typing import Callable, List, Optional - -from decompyle3 import deparse_code2str - -from spatialyze.data_types.views import MetadataView, View, metadata_view -from spatialyze.legacy.metadata_util import ( - COUNT, - Tmax, - Tmin, - common_aggregation, - common_geo, - convert_time, - decompile_filter, - new_decompile_filter, -) - - -class Project: - # TODO: Add checks for names - # Select Node (contains Column Nodes and Aggregate Nodes - # within Column Nodes) - - def __init__(self, root): - self.root = root - self.distinct = False - self.column_nodes = [] - - def append(self, column_node): - self.column_nodes.append(column_node) - - def find(self, column_name): - for column_node in self.column_nodes: - if column_node.column_name == column_name: - return column_node - return None - - def remove(self, column_name): - column_node = self.find(column_name) - self.column_nodes.remove(column_node) - - def is_empty(self): - return len(self.column_nodes) == 0 - - -class Column: - def __init__(self, column_name: str): - self.column_name: str = column_name - self.aggr_nodes: List[Aggregate] = [] - - def aggregate(self, func_name: str, parameters: List[str] = [], special_args: List[str] = []): - if func_name in common_aggregation: - if len(special_args) > 0: - agg_node = eval(func_name)(func_name, parameters, special_args) - else: - agg_node = eval(func_name)(func_name, parameters) - else: - agg_node = Aggregate(func_name, parameters) - self.aggr_nodes.append(agg_node) - return self - - def get_coordinates(self): - # self.aggregate("asMFJSON", special_args=["coordinates"]) - self.aggregate("asMFJSON") - - def interval(self, starttime, endtime): - self.aggregate("atPeriodSet", parameters=["'{[%s, %s)}'" % (starttime, endtime)]) - - -class Aggregate: - def __init__(self, func_name: str, parameters: list = []): - self.func_name = func_name - self.parameters = parameters - - -class asMFJSON(Aggregate): - def __init__(self, func_name="asMFJSON", parameters: list = [], interesting_fields=[]): - super().__init__(func_name, parameters) - self.interesting_fields = interesting_fields - - # def function_map(self): - - -class Scan: - def __init__(self, root): - self.view: Optional[View] = None - self.root = root - - def add_view(self, view: View): - self.view = view - - -class Filter: - def __init__(self, root): - self.predicates = [] - self.root = root - - def append(self, predicate): - self.predicates.append(predicate) - predicate.root = self - predicate.decompile() - return self.root.view(use_view=predicate.view_context) - - def is_empty(self): - return len(self.predicates) == 0 - - def get_view(self): - return self.root.scan.view - - -class Predicate: - def __init__(self, predicate: Callable[[int], bool], evaluated_var={}): - self.predicate = predicate - s = deparse_code2str(self.predicate.__code__, out=open(os.devnull, "w")) - self.t = ast.parse(s) - self.evaluated_var = evaluated_var - self.root = None - - def decompile(self): - # assert self.root - ( - self.attribute, - self.operation, - self.comparator, - self.bool_ops, - self.cast_types, - self.view_context, - ) = decompile_filter(self.t, self.evaluated_var, self.root.get_view()) - - def new_decompile(self): - ( - self.attribute, - self.operation, - self.comparator, - self.bool_ops, - self.cast_types, - self.view_context, - ) = new_decompile_filter(self.t, self.evaluated_var, None) - - def get_compile(self): - return self.attribute, self.operation, self.comparator, self.bool_ops, self.cast_types - - -class Group: - def __init__(self, root): - self.group = None - - -class MetadataContext: - """Context Root Node""" - - def __init__(self, single_mode=True): - # Initialize the root, which is itself - self.root = self - self.start_time = None - self.project = Project(self.root) - self.scan = Scan(self.root) - self.filter = Filter(self.root) - self.groupby = None - self.single_mode = single_mode - # self.orderby_nodes = [orderby_node1, orderby_node2...] # we dont need these for now - - def select_column(self, column_key): - """Select a specific column""" - mapped_view = metadata_view.map_view(column_key) - if self.scan.view is None: - self.scan.view = mapped_view - elif ( - self.scan.view.default - and mapped_view.default - and self.scan.view.view_name != mapped_view.view_name - ): - self.scan.view = metadata_view - - view_name = mapped_view.view_name - column_node = Column(view_name + "." + column_key) - self.project.append(column_node) - return column_node - - def delete_column(self, column_name): - """Remove column in column nodes in question""" - self.project.remove(column_name) - - def clear(self): - """Restart a context from scratch""" - self.project = Project(self.root) - self.scan = Scan(self.root) - self.filter = Filter(self.root) - - def get_columns(self, *argv, distinct=False): - if not self.single_mode: - self.project.distinct = distinct - for arg in argv: - arg(self) - return self - else: - new_context = copy.deepcopy(self) - new_context.project.distinct = distinct - for arg in argv: - new_context = arg(new_context) - return new_context - - # The following functions would be Apperception commands - def predicate(self, p, evaluated_var={}): - if not self.single_mode: - new_predicate = Predicate(p, evaluated_var) - self.filter.append(new_predicate) - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - - new_predicate = Predicate(p, evaluated_var) - new_context = new_context.filter.append(new_predicate) - return new_context - - def selectkey(self, distinct=False): - if not self.single_mode: - self.project.distinct = distinct - # self.select_column(MetadataView.camera_id) - self.select_column(MetadataView.object_id) - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - new_context.project.distinct = distinct - - # new_context.select_column(MetadataView.camera_id) - new_context.select_column(MetadataView.object_id) - return new_context - - def get_object_type(self, distinct=False): - if not self.single_mode: - self.project.distinct = distinct - # self.select_column(MetadataView.camera_id) - self.select_column(MetadataView.object_type) - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - new_context.project.distinct = distinct - - # new_context.select_column(MetadataView.camera_id) - new_context.select_column(MetadataView.object_type) - return new_context - - def get_trajectory(self, time_interval=[], distinct=False): - # TODO: return a proxy type - if not self.single_mode: - self.project.distinct = distinct - traj_column = self.select_column(MetadataView.trajectory) - starttime, endtime = convert_time(self.start_time, time_interval) - traj_column.interval(starttime, endtime) - traj_column.get_coordinates() - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - new_context.project.distinct = distinct - traj_column = new_context.select_column(MetadataView.trajectory) - starttime, endtime = convert_time(self.start_time, time_interval) - traj_column.interval(starttime, endtime) - traj_column.get_coordinates() - return new_context - - def get_geo(self, time_interval=[], distinct=False): - # TODO: return a proxy type - if not self.single_mode: - self.project.distinct = distinct - for geo_func in common_geo: - new_trajColumn = self.select_column(MetadataView.location) - new_trajColumn.aggregate(geo_func) - - self.interval(time_interval) - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - new_context.project.distinct = distinct - for geo_func in common_geo: - new_trajColumn = new_context.select_column(MetadataView.location) - new_trajColumn.aggregate(geo_func) - - new_context.interval(time_interval) - return new_context - - def interval(self, time_interval): - # TODO: return a proxy type - start, end = convert_time(self.start_time, time_interval) - if not self.single_mode: - self.predicate(lambda obj: Tmin(obj.location) >= start, {"start": "'" + start + "'"}) - self.predicate(lambda obj: Tmax(obj.location) < end, {"end": "'" + end + "'"}) - return self - else: - new_context = self.predicate( - lambda obj: Tmin(obj.location) >= start, {"start": "'" + start + "'"} - ).predicate(lambda obj: Tmax(obj.location) < end, {"end": "'" + end + "'"}) - return new_context - - def get_time(self, distinct=False): - # TODO: return a proxy type - if not self.single_mode: - self.project.distinct = distinct - new_trajColumn = self.select_column(MetadataView.location) - new_trajColumn.aggregate("Tmin") - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - new_context.project.distinct = distinct - new_trajColumn = new_context.select_column(MetadataView.location) - new_trajColumn.aggregate("Tmin") - return new_context - - def get_distance(self, time_interval=[], distinct=False): - # TODO: return a proxy type - if not self.single_mode: - self.project.distinct = distinct - traj_column = self.select_column(MetadataView.trajectory) - starttime, endtime = convert_time(self.start_time, time_interval) - traj_column.interval(starttime, endtime) - traj_column.aggregate("cumulativeLength") - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - new_context.project.distinct = distinct - starttime, endtime = convert_time(self.start_time, time_interval) - traj_column.interval(starttime, endtime) - traj_column.aggregate("cumulativeLength") - return new_context - - def get_speed(self, time_interval=[], distinct=False): - # TODO: return a proxy type - if not self.single_mode: - self.project.distinct = distinct - traj_column = self.select_column(MetadataView.trajectory) - starttime, endtime = convert_time(self.start_time, time_interval) - traj_column.interval(starttime, endtime) - traj_column.aggregate("speed") - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - new_context.project.distinct = distinct - traj_column = new_context.select_column(MetadataView.trajectory) - starttime, endtime = convert_time(self.start_time, time_interval) - traj_column.interval(starttime, endtime) - traj_column.aggregate("speed") - return new_context - - def count(self, key): - # make a copy of self first - new_context = copy.deepcopy(self) - - count_map = { - MetadataContext.get_trajectory: "trajCentroids", - MetadataContext.get_time: "Tmin(trajBbox)", - MetadataContext.selectkey: "distinct(cameraId, itemId)", - } - traj_column = new_context.select_column(count_map[key]) - traj_column.aggregate(COUNT) - return new_context - - def group(self, key): - # make a copy of self first - new_context = copy.deepcopy(self) - new_context.groupby = Group(key) - - def view(self, view_name="", use_view=None): - # TODO:Not fully functioned yet - if not self.single_mode: - if use_view: - self.scan.add_view(use_view) - else: - temp_view = View(view_name) - temp_view.context = self - self.scan.add_view(temp_view) - return self - else: - # make a copy of self first - new_context = copy.deepcopy(self) - if use_view: - new_context.scan.add_view(use_view) - else: - temp_view = View(view_name) - temp_view.context = self - new_context.scan.add_view(temp_view) - # need to figure out the return value of the view command; - return new_context - - def join(self, join_view, join_type="", join_condition=""): - # make a copy of self first - new_context = copy.deepcopy(self) - - if join_view.view_name == metadata_view.view_name: - new_context.scan.join(metadata_view.trajectory_view) - new_context.scan.join(metadata_view.location_view) - else: - new_context.scan.join(join_view) - - return new_context - - -primarykey = MetadataContext.selectkey -trajectory = MetadataContext.get_trajectory -distance = MetadataContext.get_distance -speed = MetadataContext.get_speed -geometry = MetadataContext.get_geo -object_type = MetadataContext.get_object_type -time = MetadataContext.get_time diff --git a/spatialyze/legacy/metadata_context_executor.py b/spatialyze/legacy/metadata_context_executor.py deleted file mode 100644 index fa55344..0000000 --- a/spatialyze/legacy/metadata_context_executor.py +++ /dev/null @@ -1,138 +0,0 @@ -import numpy as np -import psycopg2 - -from spatialyze.data_types.views import View, metadata_view -from spatialyze.legacy.metadata_context import ( - Aggregate, - Column, - Filter, - MetadataContext, - Predicate, - Project, - Scan, - asMFJSON, -) -from spatialyze.legacy.metadata_util import common_aggregation -from spatialyze.utils import join - - -class MetadataContextExecutor: - """Executor class to execute the context input - Essentially translates the context to a SQL query that - the backend and interpret - """ - - def __init__(self, conn, new_context: MetadataContext = None): - if new_context: - self.context(new_context) - self.conn = conn - - def connect_db( - self, host="localhost", user=None, password=None, port=25432, database_name=None - ): - """Connect to the database""" - self.conn = psycopg2.connect( - database=database_name, user=user, password=password, host=host, port=port - ) - - def context(self, new_context: MetadataContext): - self.current_context = new_context - return self - - def visit(self, create_view: bool, view_name: str): - select_query = self.visit_project(self.current_context.project) - from_query = self.visit_scan(self.current_context.scan) - where_query = self.visit_filter(self.current_context.filter) - if create_view: - db_query = ( - "CREATE VIEW " + view_name + " AS " + select_query + from_query + where_query + ";" - ) - print(db_query + "\n") - return "SELECT * FROM " + view_name + ";" - else: - db_query = select_query + from_query + where_query + ";" - print(db_query + "\n") - return db_query - - def visit_project(self, project_node: Project): - select_query: str = "SELECT " - if project_node.distinct: - select_query += "distinct on(itemId) " - if project_node.is_empty(): - return select_query + "* " - for column_node in project_node.column_nodes: - select_query += self.visit_column(column_node) - select_query += ", " - select_query = select_query[:-2] - return select_query - - def visit_scan(self, scan_node: Scan): - from_query: str = " From " - if scan_node.view: - if scan_node.view.default: - if scan_node.view == metadata_view: - from_query += ( - metadata_view.trajectory_view.view_name - + " INNER JOIN " - + metadata_view.location_view.view_name - + " USING(itemId) " - ) - else: - from_query = from_query + scan_node.view.view_name + " " - # for view_node in scan_node.views: - # from_query += self.visit_table(view_node) - # from_query += ", " - # from_query = from_query[:-2] - return from_query - - def visit_filter(self, filter_node: Filter): - where_query = " Where " - if filter_node.is_empty(): - return "" - for predicate_node in filter_node.predicates: - where_query += self.visit_predicate(predicate_node) - where_query += " AND " - where_query = where_query[:-5] - return where_query - - def visit_column(self, column_node: Column): - aggregated = column_node.column_name - for aggr_node in column_node.aggr_nodes: - aggregated = translate_aggregation(aggr_node, aggregated) - print(aggregated) - return aggregated - - def visit_table(self, view_node: View): - return view_node.view_name - - def visit_predicate(self, predicate_node: Predicate): - attribute, operation, comparator, bool_ops, cast_types = predicate_node.get_compile() - # assert(len(attribute) == len(operation) == len(comparator) == len(bool_ops) == len(cast_types)) - predicate_query = "" - for i in range(len(attribute)): - attr = attribute[i] - op = operation[i] - comp = comparator[i] - bool_op = bool_ops[i] - # cast_type = cast_types[i] - # cast_str = "::" + cast_type if cast_type != "" else "" - # predicate_query += bool_op + attr + cast_str + op + comp + cast_str - predicate_query += bool_op + attr + op + comp - return predicate_query - - def execute(self, create_view: bool = False, view_name: str = ""): - self.cursor = self.conn.cursor() - self.cursor.execute(self.visit(create_view=create_view, view_name=view_name)) - return np.asarray(self.cursor.fetchall()) - - -def translate_aggregation(aggr_node: Aggregate, aggregated: str): - aggregated = f"{aggr_node.func_name}({join([aggregated, *aggr_node.parameters])})" - - if isinstance(aggr_node, asMFJSON) and aggr_node.func_name in common_aggregation: - if len(aggr_node.interesting_fields) > 0: - interesting_field = aggr_node.interesting_fields[0] - aggregated += f"::json->'{interesting_field}'" - else: - aggregated += "::json" - return aggregated diff --git a/spatialyze/legacy/metadata_tests.py b/spatialyze/legacy/metadata_tests.py deleted file mode 100644 index 9b1f233..0000000 --- a/spatialyze/legacy/metadata_tests.py +++ /dev/null @@ -1,135 +0,0 @@ -import unittest - -import psycopg2 - -from spatialyze.legacy.metadata_context import ( - MetadataContext, - geometry, - primarykey, - time, -) -from spatialyze.legacy.metadata_context_executor import MetadataContextExecutor - -test_context = MetadataContext() - -conn = psycopg2.connect( - database="mobilitydb", user="docker", password="docker", host="localhost", port=5432 -) - -test_executor = MetadataContextExecutor(conn) -# test_executor.connect_db(user="postgres", password="postgres", database_name="postgres") -# Test simple queries using Context class - - -class TestStringMethods(unittest.TestCase): - def test_commands(self): - test_executor.context(test_context.selectkey()) - print(test_executor.execute()) - print("------------------------------------") - - test_executor.context(test_context.get_trajectory()) - print(test_executor.execute()) - print("------------------------------------") - - test_executor.context( - test_context.get_geo().interval("0001-01-01 00:00:00", "9999-12-31 23:59:59.999999") - ) - print(test_executor.execute()) - print("------------------------------------") - - test_executor.context(test_context.get_geo()) - print(test_executor.execute()) - print("------------------------------------") - - test_executor.context(test_context.get_time()) - print(test_executor.execute()) - print("------------------------------------") - - test_executor.context(test_context.get_speed()) - print(test_executor.execute()) - print("------------------------------------") - - test_executor.context(test_context.get_distance()) - print(test_executor.execute()) - print("------------------------------------") - - test_executor.context(test_context.get_columns(primarykey, geometry, time)) - print("###### bboxes and times are: ", test_executor.execute()) - print("------------------------------------") - - # test_executor.context(test_context.count(MetadataContext.selectkey)) - # print(test_executor.execute()) - # print("------------------------------------") - - def test_usecases(self): - # test_executor.context(test_context.predicate(lambda obj:obj.object_id == "Item_1").get_geo()) - # print(test_executor.execute()) - # print("------------------------------------") - - # This query could be confusing since the user may understand it as getting the trajectory of the objects when they are at the intersection - # but the trajectory is actually an attribute, so it's always the entire trajectory - # If the user really wants to get a certain period of trajectory they have to filter out the timestamps - volume = "stbox 'STBOX Z((1.81788543, 2.17411856, 0),(2.79369985, 3.51919659, 2))'" - filtered_world = test_context.predicate(lambda obj: obj.object_type == "car").predicate( - lambda obj: obj.location in volume, {"volume": volume} - ) - trajectory = filtered_world.get_trajectory(distinct=True) - test_executor.context(trajectory) - print(test_executor.execute()) - print("------------------------------------") - - # to get the video over the entire trajectory(amber case) - test_executor.context(filtered_world.selectkey(distinct=True)) - filtered_ids = test_executor.execute() - print("filtered_IDS are *****:", filtered_ids) - - id_array = [filtered_id[0] for filtered_id in filtered_ids] - entire_video = test_context.predicate( - lambda obj: obj.object_id in id_array, {"id_array": id_array} - ).get_columns(primarykey, geometry, time) - test_executor.context(entire_video) - print(test_executor.execute()) - print("------------------------------------") - - # test_executor.context(test_context.predicate(lambda obj:obj.color == "red").group(get_time).predicate(lambda obj:count(obj) >= 3).get_time()) - # print(test_executor.execute()) - # print("------------------------------------") - - # def test_table_join(self): - # ### Inner Join - # new_meta_context = test_context.selectkey().get_distance().get_speed().view().join(metadata_view) ### create a temporary view without reference - # test_executor.context(new_meta_context.predicate(lambda obj:obj.object_type == 'car')) - # car_newmeta = test_executor.execute() - # print(car_newmeta) - # print("------------------------------------") - - # test_executor.context(new_meta_context.predicate(lambda obj:obj.object_type == 'car').view(view_name="car_view")) - # car_newmeta_view = test_executor.execute() - # print(car_newmeta_view) ### this should be the same result as previous execution - # print("------------------------------------") - - # ### Query from new view - # test_executor.context(test_context.view(use_view = car_newmeta_view).selectkey().get_trajectory().get_speed()) - # print(test_executor.execute()) - # print("------------------------------------") - - # def test_mix(self): - # stbox = "stbox \'STBOX Z((1.81788543, 2.17411856, 0),(2.79369985, 3.51919659, 2))\'" - # proposal_context = test_context.get_trajectory().predicate(lambda obj:obj.object_type == 'car').predicate(lambda obj:obj.location in volume, {"volume":stbox}) - # test_executor.context(proposal_context) - # print(test_executor.execute()) - # print("------------------------------------") - - # test_executor.context(test_context.count(key=MetadataContext.selectkey).predicate(lambda obj: obj.color == "red").group(lambda obj: obj.color)) - # print(test_executor.execute()) - # print("------------------------------------") - - # test_executor.context(test_context.get_time().predicate(lambda obj:obj.color == "red" and obj.location in volume and count(obj.id), {"volume":stbox}).group(lambda obj: obj.color)) - # print(test_executor.execute()) - # print("------------------------------------") - # def test_usecases(self): - # TODO: Define use cases here - - -if __name__ == "__main__": - unittest.main() diff --git a/spatialyze/legacy/metadata_util.py b/spatialyze/legacy/metadata_util.py deleted file mode 100644 index 41f040d..0000000 --- a/spatialyze/legacy/metadata_util.py +++ /dev/null @@ -1,262 +0,0 @@ -import ast -import datetime - -from spatialyze.data_types.views import metadata_view - -common_geo = ["Xmin", "Ymin", "Zmin", "Xmax", "Ymax", "Zmax"] -common_aggregation = ["asMFJSON", common_geo] - - -# Map to translate ast comparators to SQL comparators -comparator_map = { - ast.Eq: "==", # pypika takes in python function, so it should be `==` not `=` - ast.NotEq: ">=", - ast.Lt: "<", - ast.LtE: "<=", - ast.Gt: ">", - ast.GtE: ">=", -} - -# Map to translate ast propositions to SQL propositions -propositional_map = {ast.And: "AND", ast.Or: "OR"} - - -def decompile_comparator(comparator, evaluated_var, view): - # print(evaluated_var) - # print(ast.dump(comparator)) - result_comparator = "" - view_context = view - if isinstance(comparator, ast.Call): - func_name = comparator.func.id - result_comparator = func_name + "(" - args = comparator.args - for arg in args: - if isinstance(arg, ast.Attribute): - table_name = arg.value.id - table_attr = arg.attr - view_context, table_name, column_name = resolve_default_view(table_attr, view) - # TODO: else not default - result_comparator += table_name + "." + column_name - elif isinstance(arg, ast.Str): - result_comparator += arg.s - elif isinstance(arg, ast.Name): - if arg.id in evaluated_var: - result_comparator += evaluated_var[arg.id] - else: - result_comparator += arg.id - result_comparator += "," - result_comparator = result_comparator[:-1] + ")" - elif isinstance(comparator, ast.Attribute): - table_name = comparator.value.id - table_attr = comparator.attr - # TODO: if view == None: - # TODO: unresolved, dynamically determine the scan views based on both predicates and select - view_context, table_name, column_name = resolve_default_view(table_attr, view) - result_comparator = table_name + "." + column_name - elif isinstance(comparator, ast.Str): - result_comparator = "'" + comparator.s + "'" - elif isinstance(comparator, ast.Name): - if comparator.id in evaluated_var: - evaluated_variable = evaluated_var[comparator.id] - else: - evaluated_variable = comparator.id - result_comparator = evaluated_variable - else: - print(comparator) - - return result_comparator, view_context - - -def resolve_default_view(attr_name, view): - view_context = view - if view is None: - column_name = metadata_view.trajectory_view.resolve_key(attr_name) - if column_name: - view_context = metadata_view.trajectory_view - else: - column_name = metadata_view.location_view.resolve_key(attr_name) - view_context = metadata_view.location_view - table_name = view_context.view_name - elif view.default: - if view.view_name == "metadata_view": - column_name = view.resolve_key(attr_name) - table_name = view.map_view(column_name).view_name - else: - column_name = view.resolve_key(attr_name) - if not column_name: - view_context = metadata_view - column_name = metadata_view.resolve_key(attr_name) - table_name = metadata_view.map_view(column_name).view_name - else: - table_name = view.view_name - - return view_context, table_name, column_name - - -def decompile_filter(ast_tree, evaluated_var, view): - print(ast.dump(ast_tree)) - attributes = [] - operations = [] - comparators = [] - bool_ops = [""] - cast_types = [] - for ast_node in ast.walk(ast_tree): - module_body = ast_node.body[0] - if isinstance(module_body, ast.Return): - value = module_body.value - # if isinstance(value, ast.BoolOp) - # case where we allow multiple constraints in a single filter, usually for OR - if isinstance(value, ast.Compare): - left = value.left - attribute, left_comebine_view = decompile_comparator(left, evaluated_var, view) - right = value.comparators[0] - comparator, right_combine_view = decompile_comparator(right, evaluated_var, view) - - op = value.ops[0] - if type(op) in comparator_map: - operation = comparator_map[type(op)] - elif isinstance(op, ast.In): - if isinstance(comparator, list): - operation = " IN " - elif isinstance(comparator, str): - operation = "overlap" - - if operation == "overlap": - attribute = "overlap(%s, %s)" % (attribute, comparator) - operation = "=" - comparator = "true" - elif operation == " IN ": - comparator = list_to_str(comparator) - - attributes.append(attribute) - operations.append(operation) - comparators.append(comparator) - - return ( - attributes, - operations, - comparators, - bool_ops, - cast_types, - left_comebine_view or right_combine_view, - ) - - -def new_decompile_filter(ast_tree, evaluated_var, view): - print(ast.dump(ast_tree)) - attributes = [] - operations = [] - comparators = [] - bool_ops = [] - cast_types = [] - - for ast_node in ast.walk(ast_tree): - module_body = ast_node.body[0] - if isinstance(module_body, ast.Return): - value = module_body.value - if isinstance(value, ast.BoolOp): - assert isinstance(value.op, ast.Or) - bool_ops.append("|") - for cmp in value.values: - assert isinstance(cmp, ast.Compare) - - left = cmp.left - attribute, left_comebine_view = decompile_comparator(left, evaluated_var, view) - right = cmp.comparators[0] - comparator, right_combine_view = decompile_comparator( - right, evaluated_var, view - ) - - op = cmp.ops[0] - if type(op) in comparator_map: - operation = comparator_map[type(op)] - elif isinstance(op, ast.In): - if isinstance(comparator, list): - operation = " IN " - elif isinstance(comparator, str): - operation = "overlap" - - if operation == "overlap": - attribute = "overlap(%s, %s)" % (attribute, comparator) - operation = "=" - comparator = "true" - elif operation == " IN ": - comparator = list_to_str(comparator) - - attributes.append(attribute) - operations.append(operation) - comparators.append(comparator) - - # case where we allow multiple constraints in a single filter, usually for OR - elif isinstance(value, ast.Compare): - left = value.left - attribute, left_comebine_view = decompile_comparator(left, evaluated_var, view) - right = value.comparators[0] - comparator, right_combine_view = decompile_comparator(right, evaluated_var, view) - - op = value.ops[0] - if type(op) in comparator_map: - operation = comparator_map[type(op)] - elif isinstance(op, ast.In): - if isinstance(comparator, list): - operation = " IN " - elif isinstance(comparator, str): - operation = "overlap" - - if operation == "overlap": - attribute = "overlap(%s, %s)" % (attribute, comparator) - operation = "=" - comparator = "true" - elif operation == " IN ": - comparator = list_to_str(comparator) - - attributes.append(attribute) - operations.append(operation) - comparators.append(comparator) - - return ( - attributes, - operations, - comparators, - bool_ops, - cast_types, - left_comebine_view or right_combine_view, - ) - - -def list_to_str(lst): - result = "(" - for s in lst: - result = result + "'" + s + "'" + "," - result = result[:-1] + ")" - return result - - -def convert_time(start, interval=[]): - if len(interval) == 0: - starttime = str(datetime.datetime.min) - endtime = str(datetime.datetime.max) - else: - starttime = str(start + datetime.timedelta(seconds=interval[0])) - endtime = str(start + datetime.timedelta(seconds=interval[1])) - return starttime, endtime - - -def overlap(stbox1, stbox2): - """Translate the overlap function to psql overlap function""" - return "Overlap(%s, %s)" % (stbox1, stbox2) - - -def Tmin(stbox): - """Translate the Tmin function to psql Tmin function""" - return "Tmin" - - -def Tmax(stbox): - """Translate the Tmax function to psql Tmax function""" - return "Tmax" - - -def COUNT(key): - """SQL Count""" - return "COUNT(%s)" % key diff --git a/spatialyze/legacy/mono_depth_estimator.py b/spatialyze/legacy/mono_depth_estimator.py deleted file mode 100644 index e50f7d6..0000000 --- a/spatialyze/legacy/mono_depth_estimator.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import absolute_import, division, print_function - -import os - -import monodepth2.networks -import numpy as np -import PIL.Image as pil -import torch -from monodepth2.utils import download_model_if_doesnt_exist -from torchvision import transforms - -from spatialyze.legacy.layers import disp_to_depth - -# Create depth frames for each frame from a video. - - -def create_depth_frames(video_byte_array, model_name="mono+stereo_640x192", no_cuda=False): - """Function to predict for a video.""" - assert ( - model_name is not None - ), "You must specify the --model_name parameter; see README.md for an example" - - if torch.cuda.is_available() and not no_cuda: - device = torch.device("cuda") - else: - device = torch.device("cpu") - - download_model_if_doesnt_exist(model_name) - model_path = os.path.join("models", model_name) - print("-> Loading model from ", model_path) - encoder_path = os.path.join(model_path, "encoder.pth") - depth_decoder_path = os.path.join(model_path, "depth.pth") - - # LOADING PRETRAINED MODEL - print(" Loading pretrained encoder") - encoder = monodepth2.networks.ResnetEncoder(18, False) - loaded_dict_enc = torch.load(encoder_path, map_location=device) - - # extract the height and width of image that this model was trained with - feed_height = loaded_dict_enc["height"] - feed_width = loaded_dict_enc["width"] - filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()} - encoder.load_state_dict(filtered_dict_enc) - encoder.to(device) - encoder.eval() - - print(" Loading pretrained decoder") - depth_decoder = monodepth2.networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) - - loaded_dict = torch.load(depth_decoder_path, map_location=device) - depth_decoder.load_state_dict(loaded_dict) - - depth_decoder.to(device) - depth_decoder.eval() - - num_frames, original_height, original_width, _ = video_byte_array.shape - disp_map = np.zeros((num_frames, original_height, original_width)) - - # Go through each frame and predict the depth map - for i in range(num_frames): - input_image = pil.fromarray(np.uint8(video_byte_array[i])).convert("RGB") - input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS) - input_image = transforms.ToTensor()(input_image).unsqueeze(0) - - # PREDICTION - input_image = input_image.to(device) - features = encoder(input_image) - outputs = depth_decoder(features) - - disp = outputs[("disp", 0)] - disp_resized = torch.nn.functional.interpolate( - disp, (original_height, original_width), mode="bilinear", align_corners=False - ) - - # Saving numpy file - # Save the resized disp instead - scaled_disp, _ = disp_to_depth(disp_resized.squeeze(), 0.1, 100) - disp_map[i] = scaled_disp.cpu().detach().numpy() - return disp_map diff --git a/spatialyze/legacy/point.py b/spatialyze/legacy/point.py deleted file mode 100644 index 70c20d5..0000000 --- a/spatialyze/legacy/point.py +++ /dev/null @@ -1,11 +0,0 @@ -from dataclasses import dataclass -from typing import Tuple - - -@dataclass -class Point: - point_id: str - object_id: str - coordinate: Tuple[float, float, float] - time: float - point_type: str diff --git a/spatialyze/legacy/scenic_util.py b/spatialyze/legacy/scenic_util.py deleted file mode 100644 index 52c022b..0000000 --- a/spatialyze/legacy/scenic_util.py +++ /dev/null @@ -1,615 +0,0 @@ -import datetime -import json -import os -from typing import Iterable, List, Tuple - -import numpy as np -import pandas as pd -from pyquaternion import Quaternion - -from spatialyze.data_types import Box -from spatialyze.utils import bbox_to_data3d, join - -CREATE_ITEMTRAJ_SQL = """ -CREATE TABLE IF NOT EXISTS Item_General_Trajectory( - itemId TEXT, - objectType TEXT, - frameId TEXT, - color TEXT, - trajCentroids tgeompoint, - largestBbox stbox, - PRIMARY KEY (itemId) -); -""" - -CREATE_BBOXES_SQL = """ -CREATE TABLE IF NOT EXISTS General_Bbox( - itemId TEXT, - trajBbox stbox, - FOREIGN KEY(itemId) - REFERENCES Item_General_Trajectory(itemId) -); -""" - -CREATE_CAMERA_SQL = """ -CREATE TABLE IF NOT EXISTS Cameras( - cameraId TEXT, - worldId TEXT, - frameId TEXT, - frameNum Int, - fileName TEXT, - cameraTranslation geometry, - cameraRotation real[4], - cameraIntrinsic real[3][3], - egoTranslation geometry, - egoRotation real[4], - timestamp TEXT -); -""" - - -def fetch_camera_config(scene_name, sample_data): - """ - return - [{ - camera_id: scene name, - frame_id, - frame_num: the frame sequence number - filename: image file name, - camera_translation, - camera_rotation, - camera_intrinsic(since it's a matrix, save as a nested array), - ego_translation, - ego_rotation, - timestamp - }, - ... - ] - """ - camera_config = [] - - # TODO: different camera in one frame has same timestamp for same object - # how to store same scene in different cameras - all_frames = sample_data[ - (sample_data["scene_name"] == scene_name) - # & (sample_data["filename"].str.contains("/CAM_FRONT/", regex=False)) - ] - - for idx, frame in all_frames.iterrows(): - config = {} - config["camera_id"] = scene_name - config["frame_id"] = frame["sample_token"] - config["frame_num"] = frame["frame_order"] - config["filename"] = frame["filename"] - config["camera_translation"] = frame["camera_translation"] - config["camera_rotation"] = frame["camera_rotation"] - config["camera_intrinsic"] = frame["camera_intrinsic"] - config["ego_translation"] = frame["ego_translation"] - config["ego_rotation"] = frame["ego_rotation"] - config["timestamp"] = frame["timestamp"] - camera_config.append(config) - - return camera_config - - -# Create a camera table - - -def create_or_insert_camera_table(conn, world_name, camera): - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - """ - Create and Populate A camera table with the given camera object. - """ - # Doping Cameras table if already exists. - cursor.execute("DROP TABLE IF EXISTS Cameras") - # Formal_Scenic_cameras table stands for the formal table which won't be erased - # Test for now - - cursor.execute(CREATE_CAMERA_SQL) - print("Camera Table created successfully........") - insert_camera( - conn, - world_name, - fetch_camera_config(camera.id, camera.object_recognition.sample_data), - ) - return CREATE_CAMERA_SQL - - -# Helper function to insert the camera - - -def insert_camera(conn, world_name, camera_config): - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - values = [] - for config in camera_config: - values.append( - f"""( - '{config['camera_id']}', - '{world_name}', - '{config['frame_id']}', - {config['frame_num']}, - '{config['filename']}', - 'POINT Z ({' '.join(map(str, config['camera_translation']))})', - ARRAY{config['camera_rotation']}, - ARRAY{config['camera_intrinsic']}, - 'POINT Z ({' '.join(map(str, config['ego_translation']))})', - ARRAY{config['ego_rotation']}, - '{config['timestamp']}' - )""" - ) - - cursor.execute( - f""" - INSERT INTO Cameras ( - cameraId, - worldId, - frameId, - frameNum, - fileName, - cameraTranslation, - cameraRotation, - cameraIntrinsic, - egoTranslation, - egoRotation, - timestamp - ) - VALUES {','.join(values)}; - """ - ) - - print("New camera inserted successfully.........") - conn.commit() - - -# create collections in db and set index for quick query - - -def insert_data(data_dir, db): - with open(os.path.join(data_dir, "v1.0-mini", "sample_data.json")) as f: - sample_data_json = json.load(f) - db["sample_data"].insert_many(sample_data_json) - db["sample_data"].create_index("token") - db["sample_data"].create_index("filename") - - with open(os.path.join(data_dir, "v1.0-mini", "attribute.json")) as f: - attribute_json = json.load(f) - db["attribute"].insert_many(attribute_json) - db["attribute"].create_index("token") - - with open(os.path.join(data_dir, "v1.0-mini", "calibrated_sensor.json")) as f: - calibrated_sensor_json = json.load(f) - db["calibrated_sensor"].insert_many(calibrated_sensor_json) - db["calibrated_sensor"].create_index("token") - - with open(os.path.join(data_dir, "v1.0-mini", "category.json")) as f: - category_json = json.load(f) - db["category"].insert_many(category_json) - db["category"].create_index("token") - - with open(os.path.join(data_dir, "v1.0-mini", "ego_pose.json")) as f: - ego_pose_json = json.load(f) - db["ego_pose"].insert_many(ego_pose_json) - db["ego_pose"].create_index("token") - - with open(os.path.join(data_dir, "v1.0-mini", "instance.json")) as f: - instance_json = json.load(f) - db["instance"].insert_many(instance_json) - db["instance"].create_index("token") - - with open(os.path.join(data_dir, "v1.0-mini", "sample_annotation.json")) as f: - sample_annotation_json = json.load(f) - db["sample_annotation"].insert_many(sample_annotation_json) - db["sample_annotation"].create_index("token") - - with open(os.path.join(data_dir, "v1.0-mini", "frame_num.json")) as f: - frame_num_json = json.load(f) - db["frame_num"].insert_many(frame_num_json) - db["frame_num"].create_index("token") - - -def transform_box(box: Box, camera): - box.translate(-np.array(camera["egoTranslation"])) - box.rotate(Quaternion(camera["egoRotation"]).inverse) - - box.translate(-np.array(camera["cameraTranslation"])) - box.rotate(Quaternion(camera["cameraRotation"]).inverse) - - -# import matplotlib.pyplot as plt -# def overlay_bbox(image, corners): -# frame = cv2.imread(image) -# frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) -# for i in range(len(corners)): -# current_coner = (corners[0][i], corners[1][i]) -# cv2.circle(frame,tuple([int(current_coner[0]), int(current_coner[1])]),4,(255,0,0),thickness=5) -# plt.rcParams["figure.figsize"] = (20,20) -# plt.figure() -# plt.imshow(frame) -# plt.show() - - -def recognize(scene_name, sample_data, annotation): - """ - return: - annotations: { - object_id: { - bboxes: [[[x1, y1, z1], [x2, y2, z2]], ...] - object_type, - frame_num, - frame_id, - } - ... - } - """ - - annotations = {} - - # TODO: different camera in one frame has same timestamp for same object - # how to store same scene in different cameras - img_files = sample_data[ - (sample_data["scene_name"] == scene_name) - # & (sample_data["filename"].str.contains("/CAM_FRONT/", regex=False)) - ].sort_values(by="frame_order") - - for _, img_file in img_files.iterrows(): - # get bboxes and categories of all the objects appeared in the image file - sample_token = img_file["sample_token"] - frame_num = img_file["frame_order"] - all_annotations = annotation[annotation["sample_token"] == sample_token] - # camera_info = {} - # camera_info['cameraTranslation'] = img_file['camera_translation'] - # camera_info['cameraRotation'] = img_file['camera_rotation'] - # camera_info['cameraIntrinsic'] = np.array(img_file['camera_intrinsic']) - # camera_info['egoRotation'] = img_file['ego_rotation'] - # camera_info['egoTranslation'] = img_file['ego_translation'] - - for _, ann in all_annotations.iterrows(): - item_id = ann["instance_token"] - if item_id not in annotations: - annotations[item_id] = {"bboxes": [], "frame_num": []} - annotations[item_id]["object_type"] = ann["category"] - - box = Box(ann["translation"], ann["size"], Quaternion(ann["rotation"])) - - corners = box.corners() - - # if item_id == '6dd2cbf4c24b4caeb625035869bca7b5': - # # print("corners", corners) - # # transform_box(box, camera_info) - # # print("transformed box: ", box.corners()) - # # corners_2d = box.map_2d(np.array(camera_info['cameraIntrinsic'])) - # corners_2d = transformation(box.center, camera_info) - # print("2d_corner: ", corners_2d) - # overlay_bbox("v1.0-mini/samples/CAM_FRONT/n015-2018-07-24-11-22-45+0800__CAM_FRONT__1532402927612460.jpg", corners_2d) - - bbox = [corners[:, 1], corners[:, 7]] - annotations[item_id]["bboxes"].append(bbox) - annotations[item_id]["frame_num"].append(int(frame_num)) - - print("Recognization done, saving to database......") - return annotations - - -def add_recognized_objs(conn, formatted_result, start_time, default_depth=True): - clean_tables(conn) - for item_id in formatted_result: - object_type = formatted_result[item_id]["object_type"] - recognized_bboxes = np.array(formatted_result[item_id]["bboxes"]) - tracked_cnt = formatted_result[item_id]["frame_num"] - top_left = np.vstack( - (recognized_bboxes[:, 0, 0], recognized_bboxes[:, 0, 1], recognized_bboxes[:, 0, 2]) - ) - # if default_depth: - # top_left_depths = np.ones(len(recognized_bboxes)) - # else: - # top_left_depths = self.__get_depths_of_points(recognized_bboxes[:,0,0], recognized_bboxes[:,0,1]) - - # # Convert bottom right coordinates to world coordinates - bottom_right = np.vstack( - (recognized_bboxes[:, 1, 0], recognized_bboxes[:, 1, 1], recognized_bboxes[:, 1, 2]) - ) - # if default_depth: - # bottom_right_depths = np.ones(len(tracked_cnt)) - # else: - # bottom_right_depths = self.__get_depths_of_points(recognized_bboxes[:,1,0], recognized_bboxes[:,1,1]) - - top_left = np.array(top_left.T) - bottom_right = np.array(bottom_right.T) - obj_traj = [] - for i in range(len(top_left)): - current_tl = top_left[i] - current_br = bottom_right[i] - obj_traj.append([current_tl.tolist(), current_br.tolist()]) - - bboxes_to_postgres( - conn, - item_id, - object_type, - "default_color", - start_time, - tracked_cnt, - obj_traj, - type="yolov4", - ) - # bbox_to_tasm() - - -# Insert bboxes to postgres - - -def bboxes_to_postgres( - conn, item_id, object_type, color, start_time, timestamps, bboxes, type="yolov3" -): - if type == "yolov3": - timestamps = range(timestamps) - - converted_bboxes = [bbox_to_data3d(bbox) for bbox in bboxes] - pairs = [] - deltas = [] - for meta_box in converted_bboxes: - pairs.append(meta_box[0]) - deltas.append(meta_box[1:]) - postgres_timestamps = convert_timestamps(start_time, timestamps) - create_or_insert_general_trajectory( - conn, item_id, object_type, color, postgres_timestamps, bboxes, pairs - ) - # print(f"{item_id} saved successfully") - - -# Create general trajectory table -def create_or_insert_general_trajectory( - conn, item_id, object_type, color, postgres_timestamps, bboxes, pairs -): - cursor = conn.cursor() - """ - Create and Populate A Trajectory table using mobilityDB. - Now the timestamp matches, the starting time should be the meta data of the world - Then the timestamp should be the timestamp regarding the world starting time - """ - - # Formal_Scenic_Item_General_Trajectory table stands for the formal table which won't be erased - # Test for now - - cursor.execute(CREATE_ITEMTRAJ_SQL) - cursor.execute( - "CREATE INDEX IF NOT EXISTS traj_idx ON Item_General_Trajectory USING GiST(trajCentroids);" - ) - conn.commit() - # Formal_Scenic_General_Bbox table stands for the formal table which won't be erased - # Test for now - - cursor.execute(CREATE_BBOXES_SQL) - cursor.execute("CREATE INDEX IF NOT EXISTS item_idx ON General_Bbox(itemId);") - cursor.execute("CREATE INDEX IF NOT EXISTS traj_bbox_idx ON General_Bbox USING GiST(trajBbox);") - conn.commit() - # Insert the trajectory of the first item - insert_general_trajectory(conn, item_id, object_type, color, postgres_timestamps, bboxes, pairs) - - -# Insert general trajectory -def insert_general_trajectory( - conn, - item_id: str, - object_type: str, - color: str, - postgres_timestamps: List[str], - bboxes: List[ - List[List[float]] - ], # TODO: should be (float, float, float), (float, float, float))[] - pairs: List[Tuple[float, float, float]], -): - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - - # Inserting bboxes into Bbox table - insert_bbox_trajectories_builder = [] - min_tl = np.full(3, np.inf) - max_br = np.full(3, np.NINF) - - traj_centroids = [] - - for timestamp, (tl, br), current_point in zip(postgres_timestamps, bboxes, pairs): - min_tl = np.minimum(tl, min_tl) - max_br = np.maximum(br, max_br) - - # Insert bbox - insert_bbox_trajectories_builder.append( - f""" - INSERT INTO General_Bbox (itemId, trajBbox) - VALUES ( - '{item_id}', - STBOX 'STBOX ZT( - ({join([*tl, timestamp])}), - ({join([*br, timestamp])}) - )' - ); - """ - ) - - # Construct trajectory - traj_centroids.append(f"POINT Z ({join(current_point, ' ')})@{timestamp}") - - # Insert the item_trajectory separately - insert_trajectory = f""" - INSERT INTO Item_General_Trajectory (itemId, objectType, color, trajCentroids, largestBbox) - VALUES ( - '{item_id}', - '{object_type}', - '{color}', - '{{{', '.join(traj_centroids)}}}', - STBOX 'STBOX Z( - ({join(min_tl)}), - ({join(max_br)}) - )' - ); - """ - - cursor.execute(insert_trajectory) - cursor.execute("".join(insert_bbox_trajectories_builder)) - - # Commit your changes in the database - conn.commit() - - -def clean_tables(conn): - cursor = conn.cursor() - cursor.execute("DROP TABLE IF EXISTS General_Bbox;") - cursor.execute("DROP TABLE IF EXISTS Item_General_Trajectory;") - conn.commit() - - -def export_tables(conn): - # create a query to specify which values we want from the database. - s = "SELECT *" - s += " FROM " - s_trajectory = s + "Item_General_Trajectory" - s_bbox = s + "General_Bbox" - s_camera = s + "Cameras" - - # set up our database connection. - db_cursor = conn.cursor() - - # Use the COPY function on the SQL we created above. - SQL_trajectory_output = "COPY ({0}) TO STDOUT WITH CSV HEADER".format(s_trajectory) - SQL_bbox_output = "COPY ({0}) TO STDOUT WITH CSV HEADER".format(s_bbox) - SQL_camera_output = "COPY ({0}) TO STDOUT WITH CSV HEADER".format(s_camera) - - # Set up a variable to store our file path and name. - trajectory_file = "test_trajectory.csv" - with open(trajectory_file, "w") as trajectory_output: - db_cursor.copy_expert(SQL_trajectory_output, trajectory_output) - - bbox_file = "test_bbox.csv" - with open(bbox_file, "w") as bbox_output: - db_cursor.copy_expert(SQL_bbox_output, bbox_output) - - camera_file = "test_camera.csv" - with open(camera_file, "w") as camera_output: - db_cursor.copy_expert(SQL_camera_output, camera_output) - - -def import_tables(conn, data_path): - # # Old Version: - # cur = conn.cursor() - # cur.execute(CREATE_CAMERA_SQL) - # cur.execute(CREATE_ITEMTRAJ_SQL) - # cur.execute(CREATE_BBOXES_SQL) - # conn.commit() - # with open("test_camera.csv", "r") as camera_f: - # cur.copy_expert(file=camera_f, sql="COPY Cameras FROM STDIN CSV HEADER DELIMITER as ','") - # with open("test_trajectory.csv", "r") as trajectory_f: - # cur.copy_expert( - # file=trajectory_f, - # sql="COPY Item_General_Trajectory FROM STDIN CSV HEADER DELIMITER as ','", - # ) - # with open("test_bbox.csv", "r") as bbox_f: - # cur.copy_expert(file=bbox_f, sql="COPY General_Bbox FROM STDIN CSV HEADER DELIMITER as ','") - - # conn.commit() - - # Current Version: - # Import CSV - data_Cameras = pd.read_csv(r"test_camera.csv") - df_Cameras = pd.DataFrame(data_Cameras) - - data_Item_General_Trajectory = pd.read_csv(r"test_trajectory.csv") - df_Item_General_Trajectory = pd.DataFrame(data_Item_General_Trajectory) - - data_General_Bbox = pd.read_csv(r"test_bbox.csv") - df_General_Bbox = pd.DataFrame(data_General_Bbox) - - # Connect to SQL Server - cursor = conn.cursor() - - # Create Table - cursor.execute("DROP TABLE IF EXISTS Cameras CASCADE;") - cursor.execute("DROP TABLE IF EXISTS Item_General_Trajectory CASCADE;") - cursor.execute("DROP TABLE IF EXISTS General_Bbox CASCADE;") - - cursor.execute( - """ - CREATE TABLE Cameras ( - cameraId TEXT, - frameId TEXT, - frameNum Int, - fileName TEXT, - cameraTranslation geometry, - cameraRotation real[4], - cameraIntrinsic real[3][3], - egoTranslation geometry, - egoRotation real[4], - timestamp timestamptz, - cameraHeading real, - egoHeading real - ) - """ - ) - - cursor.execute( - """ - CREATE TABLE Item_General_Trajectory ( - itemId TEXT, - cameraId TEXT, - objectType TEXT, - color TEXT, - trajCentroids tgeompoint, - largestBbox stbox, - itemHeadings tfloat, - PRIMARY KEY (itemId) - ) - """ - ) - - cursor.execute( - """ - CREATE TABLE General_Bbox ( - itemId TEXT, - cameraId TEXT, - trajBbox stbox, - FOREIGN KEY(itemId) - REFERENCES Item_General_Trajectory(itemId) - ) - """ - ) - - # Insert DataFrame to Table - # for i,row in irisData.iterrows(): - # sql = "INSERT INTO irisdb.iris VALUES (%s,%s,%s,%s,%s)" - # cursor.execute(sql, tuple(row)) - for i, row in df_Cameras.iterrows(): - cursor.execute( - """ - INSERT INTO Cameras (cameraId, frameId, frameNum, fileName, cameraTranslation, cameraRotation, cameraIntrinsic, egoTranslation, egoRotation, timestamp, cameraHeading, egoHeading) - VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) - """, - tuple(row), - ) - - for i, row in df_Item_General_Trajectory.iterrows(): - cursor.execute( - """ - INSERT INTO Item_General_Trajectory (itemId, cameraId, objectType, color, trajCentroids, largestBbox, itemHeadings) - VALUES (%s,%s,%s,%s,%s,%s,%s) - """, - tuple(row), - ) - - for i, row in df_General_Bbox.iterrows(): - cursor.execute( - """ - INSERT INTO General_Bbox (itemId, cameraId, trajBbox) - VALUES (%s,%s,%s) - """, - tuple(row), - ) - - conn.commit() - - -# Helper function to convert the timestam to the timestamp formula pg-trajectory uses - - -def convert_timestamps(start_time: datetime.datetime, timestamps: Iterable[int]): - return [str(start_time + datetime.timedelta(seconds=t)) for t in timestamps] diff --git a/spatialyze/legacy/tracker.py b/spatialyze/legacy/tracker.py deleted file mode 100644 index 12bf60e..0000000 --- a/spatialyze/legacy/tracker.py +++ /dev/null @@ -1,146 +0,0 @@ -from typing import Callable, Optional - -import cv2 -import numpy as np - - -class Tracker: - def __init__( - self, tracker_type="default", customized_tracker: "Optional[Callable[[], Tracker]]" = None - ): - """ - Constructs a Tracker. - Args: - tracker_type: indicator of whether using customized tracker - customized_tracker: user specified tracker algorithm - """ - self.tracker_type = tracker_type - self.customized_tracker = customized_tracker - - def video_track(self, video_data, bboxes, first_frame): - self.video_data = video_data - if self.tracker_type == "default": - self.tracker = SingleObjectTracker() - return self.tracker.video_track(video_data, bboxes[0], first_frame) - elif self.tracker_type == "multi": - self.tracker = MultiObjectsTracker() - print("boxes at tracker", bboxes) - return self.tracker.video_track(video_data, bboxes, first_frame) - elif self.customized_tracker is not None: - self.tracker = self.customized_tracker() - return self.tracker.video_track(video_data, bboxes, first_frame) - raise Exception() - - def __iter__(self): - return iter(self.tracker) - - def __next__(self): - return next(self.tracker) - - -class SingleObjectTracker(Tracker): - """ - OpenCV Single Object Tracker - https://www.pyimagesearch.com/2018/07/30/opencv-object-tracking/ - """ - - def __init__(self, tracker_type="CSRT"): - """ - Constructs a Tracker. - Args: - tracker_type: type of the opencv tracker, default to be "CSRT" - """ - self.tracker = cv2.TrackerCSRT_create() - - def video_track(self, video_data, bbox, first_frame): - self.video_data = video_data - if self.tracker.init(first_frame, bbox): - return iter(self) - else: - return None - - def __iter__(self): - self.video_iter = iter(self.video_data) - self.framect = 0 - return self - - def __next__(self): - frame = next(self.video_iter) - self.framect += 1 - ok, bbox = self.tracker.update(frame) - if ok: - p1 = [int(bbox[0]), int(bbox[1])] - p2 = [int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])] - cv2.rectangle(frame, p1, p2, (255, 255, 255), 2, 2) - else: - p1 = [0, 0] - p2 = [0, 0] - # Tracking failure - cv2.putText( - frame, - "Tracking failure detected", - (100, 80), - cv2.FONT_HERSHEY_SIMPLEX, - 0.75, - (0, 0, 255), - 2, - ) - - # Return the new bounding box and frameidx - return frame, [[p1, p2]], self.framect - - -class MultiObjectsTracker(Tracker): - """ - OpenCV Multi Object Tracker - https://www.pyimagesearch.com/2018/08/06/tracking-multiple-objects-with-opencv/ - """ - - def __init__(self, tracker_type="Multi"): - """ - Constructs a Tracker. - Args: - tracker_type: type of the opencv tracker, default to be "CSRT" - """ - self.trackers = [] - - def video_track(self, video_data, bboxes, first_frame): - # print(bboxes) - self.video_data = video_data - for bbox in bboxes: - tracker = cv2.TrackerCSRT_create() - tracker.init(first_frame, bbox) - self.trackers.append(tracker) - return iter(self) - - def __iter__(self): - self.video_iter = iter(self.video_data) - self.framect = 0 - return self - - def __next__(self): - frame = next(self.video_iter) - self.framect += 1 - tracker_boxes = np.zeros((len(self.trackers), 2, 2)) - for i in range(len(self.trackers)): - current_tracker = self.trackers[i] - ok, bbox = current_tracker.update(frame) - if ok: - p1 = [int(bbox[0]), int(bbox[1])] - p2 = [int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])] - tracker_boxes[i] = np.array([p1, p2]) - # tracker_boxes.append([p1,p2]) - cv2.rectangle(frame, tuple(p1), tuple(p2), (255, 255, 255), 2, 2) - else: - # Tracking failure - cv2.putText( - frame, - "Tracking failure detected, Tracker %d" % i, - (100, 80), - cv2.FONT_HERSHEY_SIMPLEX, - 0.75, - (0, 0, 255), - 2, - ) - - return frame, tracker_boxes, self.framect diff --git a/spatialyze/legacy/video_context.py b/spatialyze/legacy/video_context.py deleted file mode 100644 index 11f1d60..0000000 --- a/spatialyze/legacy/video_context.py +++ /dev/null @@ -1,120 +0,0 @@ -from __future__ import annotations - -import datetime -from dataclasses import dataclass, field -from typing import Any, Dict, Optional - -import psycopg2 - - -@dataclass -class Camera: - def __init__(self, cam_id, point, ratio, video_file, metadata_id, lens): - self.cam_id = cam_id - self.ratio = ratio - self.video_file = video_file - self.metadata_id = metadata_id - self.properties = {} - - # Contain objects that still have yet to be added to the backend - # If user calls recognize, those items will have already been - # stored in the backend. These are reserved for objects that users - # have not added to the camera. - self.items = [] - self.object_recognition = None - - def add_item(self, item: Item): - # Add item - self.items.append(item) - - def add_property(self, properties, property_type: str, new_prop): - # TODO: add type annotation - # Add property - self.properties[property_type].append(new_prop) - - # Add a default add_recog_obj = True - def recognize(self, sample_data, annotation): - # Create object recognition node - object_rec_node = ObjectRecognition(sample_data, annotation) - self.object_recognition = object_rec_node - return object_rec_node - - -@dataclass -class Item: - """Item node""" - - item_id: str - item_type: str - location: Any # TODO: what is the type of location? - properties: dict = field(default_factory=dict) # TODO: what is the type of properties? - - -# Object Recognition node -class ObjectRecognition: - def __init__(self, sample_data, annotation): - self.sample_data = sample_data - self.annotation = annotation - self.properties = {} - - def add_properties(self, properties): - self.properties = properties - - -class VideoContext: - def __init__(self, name: str, units): - self.root: VideoContext = self - self.name: str = name - self.units = units - self.camera_nodes: Dict[str, Camera] = {} - self.start_time: datetime.datetime = datetime.datetime(2021, 6, 8, 7, 10, 28) - self.conn: Optional[psycopg2.connection] = None - - def connect_db(self, host="localhost", user=None, password=None, port=5432, database_name=None): - """Connect to the database""" - self.conn = psycopg2.connect( - database=database_name, user=user, password=password, host=host, port=port - ) - - def get_name(self): - return self.name - - def get_units(self): - return self.units - - # Establish camera - def camera(self, scenic_scene_name): - camera_node = self.__get_camera(scenic_scene_name) - if not camera_node: - camera_node = Camera(scenic_scene_name) - self.__add_camera(scenic_scene_name, camera_node) - return camera_node - - def properties(self, cam_id: str, properties, property_type): - camera_node = self.__get_camera(cam_id) - if not camera_node: - return None - - camera_node.add_properties(properties, property_type) - # Display error - - def get_camera(self, cam_id: str): - return self.__get_camera(cam_id) - - def __get_camera(self, cam_id: str): - """Get camera""" - if cam_id in self.camera_nodes.keys(): - return self.camera_nodes[cam_id] - return None - - def __add_camera(self, cam_id: str, camera_node: Camera): - """Add camera""" - self.camera_nodes[cam_id] = camera_node - - def remove_camera(self, cam_id: str): - """Remove camera""" - del self.camera_nodes[cam_id] - - def clear(self): - """Clear""" - self.camera_nodes = {} diff --git a/spatialyze/legacy/video_context_executor.py b/spatialyze/legacy/video_context_executor.py deleted file mode 100644 index 5e55857..0000000 --- a/spatialyze/legacy/video_context_executor.py +++ /dev/null @@ -1,101 +0,0 @@ -from typing import TYPE_CHECKING, Any, List, Set - -from spatialyze.legacy.scenic_util import ( - add_recognized_objs, - create_or_insert_camera_table, - recognize, -) -from spatialyze.legacy.video_context import Camera, VideoContext -from spatialyze.legacy.video_util import ( - create_or_insert_world_table, - metadata_to_tasm, - video_data_to_tasm, -) - -if TYPE_CHECKING: - from ..data_types import BoundingBox - - -class VideoContextExecutor: - # TODO: Add checks for Nones - - def __init__(self, conn: Any, new_video_context: VideoContext = None, tasm=None): - if new_video_context: - self.context(new_video_context) - self.conn = conn - self.tasm = tasm - - def context(self, video_context: VideoContext): - self.current_context = video_context - return self - - def visit(self): - video_query = self.visit_world() - return video_query - - def visit_world(self): - # Query to store world in database - name, units = self.current_context.name, self.current_context.units - create_or_insert_world_table(self.conn, name, units) - - all_sqls = [] - cameras = self.current_context.camera_nodes - if len(cameras) != 0: - for c in cameras.values(): - camera_sql = self.visit_camera(c) - all_sqls.append(camera_sql) - return all_sqls - - def visit_camera(self, camera_node: Camera): - world_name = self.current_context.name - camera_sql = create_or_insert_camera_table(self.conn, world_name, camera_node) - if camera_node.object_recognition is not None: - self.visit_obj_rec(camera_node, camera_node.object_recognition) - if self.tasm: - video_data_to_tasm(camera_node, camera_node.metadata_id, self.tasm) - return camera_sql - - def visit_obj_rec(self, camera_node, object_rec_node): - cam_id = camera_node.id - - start_time = self.current_context.start_time - - tracking_results = recognize( - cam_id, object_rec_node.sample_data, object_rec_node.annotation - ) - add_recognized_objs(self.conn, tracking_results, start_time) - if self.tasm: - metadata_to_tasm(tracking_results, camera_node.metadata_id, self.tasm) - - def execute(self): - return self.visit() - - -def is_area_recognized(area: "BoundingBox", recognized: Set["BoundingBox"]): - for other in recognized: - if area.is_in(other): - return True - return False - - -def to_recognize_whole_frame(recognition_areas: List["BoundingBox"]): - if len(recognition_areas) == 0: - return False - - area = recognition_areas[0] - if area.is_whole_frame() or ( - area.x1 == 0 and area.y1 == 0 and area.x2 == 100 and area.y2 == 100 - ): - return True - - (y1, x1), (y2, x2) = recognition_areas[0].to_tuples() - for area in recognition_areas[1:]: - if area.is_whole_frame(): - return True - - x1 = min(area.x1, x1, 0) - x2 = max(area.x2, x2, 100) - y1 = min(area.y1, y1, 0) - y2 = max(area.y2, y2, 100) - - return (x2 - x1) * (y2 - y1) >= 100 * 100 / 2.0 diff --git a/spatialyze/legacy/video_util.py b/spatialyze/legacy/video_util.py deleted file mode 100644 index dc28209..0000000 --- a/spatialyze/legacy/video_util.py +++ /dev/null @@ -1,449 +0,0 @@ -from __future__ import annotations - -import datetime -import random -from typing import TYPE_CHECKING, Any, Dict, Optional - -import cv2 -import numpy as np -from typing_extensions import Literal - -if TYPE_CHECKING: - from ..data_types import Lens, TrackedObject - from .tracker import Tracker - -# TODO: add more units -Units = Literal["metrics"] - - -def video_data_to_tasm(video_file, metadata_id, t): - t.store(video_file, metadata_id) - - -def metadata_to_tasm(formatted_result: Dict[str, Any], metadata_id, t): - import tasm - - metadata_info = [] - - def bound_width(x): - return min(max(0, x), 3840) - - def bound_height(y): - return min(max(0, y), 2160) - - for obj, info in formatted_result.items(): - object_type = info.object_type - for bbox, frame in zip(info.bboxes, info.tracked_cnt): - x1 = bound_width(bbox.x1) - y1 = bound_height(bbox.y1) - x2 = bound_width(bbox.x2) - y2 = bound_height(bbox.y2) - if frame < 0 or x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0: - import pdb - - pdb.set_trace() - metadata_info.append(tasm.MetadataInfo(metadata_id, object_type, frame, x1, y1, x2, y2)) - metadata_info.append(tasm.MetadataInfo(metadata_id, obj, frame, x1, y1, x2, y2)) - - t.add_bulk_metadata(metadata_info) - - -def create_or_insert_world_table(conn, name, units: Units): - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - """ - Create and Populate A world table with the given world object. - """ - # Doping Worlds table if already exists. - cursor.execute("DROP TABLE IF EXISTS Worlds;") - # Creating table with the first world - sql = """CREATE TABLE IF NOT EXISTS Worlds( - worldId TEXT PRIMARY KEY, - units TEXT - );""" - cursor.execute(sql) - print("Worlds Table created successfully........") - insert_world(conn, name, units) - return sql - - -# Helper function to insert the world - - -def insert_world(conn, name, units): - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - cursor.execute( - """INSERT INTO Worlds (worldId, units) """ + """VALUES (\'%s\', \'%s\');""" % (name, units) - ) - print("New world inserted successfully........") - # Insert the existing cameras of the current world into the camera table - conn.commit() - - -def create_or_insert_camera_table(conn, world_name, camera): - """Create a camera table""" - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - """ - Create and Populate A camera table with the given camera object. - """ - # Creating table with the first camera - sql = "\n".join( - [ - "CREATE TABLE IF NOT EXISTS Cameras(", - " cameraId TEXT,", - " worldId TEXT,", - " ratio real,", - " origin geometry,", - " focalpoints geometry,", - " fov INTEGER,", - " skev_factor real,", - " width integer,", - " height integer", - ");", - ] - ) - cursor.execute(sql) - print("Camera Table created successfully........") - insert_camera(conn, world_name, camera) - return sql - - -def insert_camera(conn, world_name, camera_node): - """Helper function to insert the camera""" - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - lens = camera_node.lens - focal_x = str(lens.focal_x) - focal_y = str(lens.focal_y) - cam_x, cam_y, cam_z = str(lens.cam_origin[0]), str(lens.cam_origin[1]), str(lens.cam_origin[2]) - width, height = camera_node.dimension - cursor.execute( - """INSERT INTO Cameras (cameraId, worldId, ratio, origin, focalpoints, fov, skev_factor, width, height) """ - + """VALUES (\'%s\', \'%s\', %f, \'POINT Z (%s %s %s)\', \'POINT(%s %s)\', %s, %f, %d, %d);""" - % ( - camera_node.cam_id, - world_name, - camera_node.ratio, - cam_x, - cam_y, - cam_z, - focal_x, - focal_y, - lens.fov, - lens.alpha, - width, - height, - ) - ) - print("New camera inserted successfully.........") - conn.commit() - - -def get_video_dimension(video_file: str): - vid: cv2.VideoCapture = cv2.VideoCapture(video_file) - width = vid.get(cv2.CAP_PROP_FRAME_WIDTH) - height = vid.get(cv2.CAP_PROP_FRAME_HEIGHT) - # width and height are floats - return (int(width), int(height)) - - -def recognize( - video_file: str, - recog_algo: str = "", - tracker_type: str = "default", - customized_tracker: Optional[Tracker] = None, -): - """Default object recognition (YOLOv5)""" - from ..trackers import yolov4_deepsort_video_track - from ..trackers.object_tracker_yolov5_deepsort import ( - YoloV5Opt, - yolov5_deepsort_video_track, - ) - - # recognition = item.ItemRecognition(recog_algo = recog_algo, tracker_type = tracker_type, customized_tracker = customized_tracker) - # return recognition.video_item_recognize(video.byte_array) - if recog_algo == "yolov4": - return yolov4_deepsort_video_track(video_file) - else: - # use YoloV5 as default - return yolov5_deepsort_video_track(YoloV5Opt(video_file)) - - -def add_recognized_objs( - conn: Any, - lens: Lens, - formatted_result: Dict[str, TrackedObject], - start_time: datetime.datetime, - world_id: str = "default", - properties: dict = {"color": {}}, - default_depth: bool = True, -): - # # TODO: move cleaning to apperception_benchmark.py - # clean_tables(conn) - for item_id in formatted_result: - object_type = formatted_result[item_id].object_type - recognized_bboxes = np.array([bbox.tolist() for bbox in formatted_result[item_id].bboxes]) - tracked_cnt = formatted_result[item_id].frame_num - top_left = np.vstack((recognized_bboxes[:, 0, 0], recognized_bboxes[:, 0, 1])) - if default_depth: - top_left_depths = np.ones(len(recognized_bboxes)) - # else: - # top_left_depths = self.__get_depths_of_points( - # recognized_bboxes[:, 0, 0], recognized_bboxes[:, 0, 1] - # ) - top_left = lens.pixels_to_world(top_left, top_left_depths) - - # Convert bottom right coordinates to world coordinates - bottom_right = np.vstack((recognized_bboxes[:, 1, 0], recognized_bboxes[:, 1, 1])) - if default_depth: - bottom_right_depths = np.ones(len(tracked_cnt)) - # else: - # bottom_right_depths = self.__get_depths_of_points( - # recognized_bboxes[:, 1, 0], recognized_bboxes[:, 1, 1] - # ) - bottom_right = lens.pixels_to_world(bottom_right, bottom_right_depths) - - top_left = np.array(top_left.T) - bottom_right = np.array(bottom_right.T) - obj_traj = [] - for i in range(len(top_left)): - current_tl = top_left[i] - current_br = bottom_right[i] - obj_traj.append([current_tl.tolist(), current_br.tolist()]) - - bbox_to_postgres( - conn, - item_id, - object_type, - "default_color" if item_id not in properties["color"] else properties["color"][item_id], - random.uniform(-10, 10), # heading - start_time, - tracked_cnt, - obj_traj, - world_id, - type="yolov4", - ) - # bbox_to_tasm() - - -def convert_timestamps(start_time, timestamps): - """Helper function to convert the timestam to the timestamp formula pg-trajectory uses""" - return [str(start_time + datetime.timedelta(seconds=t)) for t in timestamps] - - -def bbox_to_data3d(bbox): - """Helper function to convert trajectory to centroids - Compute the center, x, y, z delta of the bbox - """ - tl, br = bbox - x_delta = (br[0] - tl[0]) / 2 - y_delta = (br[1] - tl[1]) / 2 - z_delta = (br[2] - tl[2]) / 2 - center = (tl[0] + x_delta, tl[1] + y_delta, tl[2] + z_delta) - - return center, x_delta, y_delta, z_delta - - -def bbox_to_postgres( - conn, - item_id, - object_type, - color, - heading, - start_time, - timestamps, - bboxes, - world_id="default", - type="yolov3", -): - """Insert bboxes to postgres""" - if type == "yolov3": - timestamps = range(timestamps) - - converted_bboxes = [bbox_to_data3d(bbox) for bbox in bboxes] - pairs = [] - deltas = [] - for meta_box in converted_bboxes: - pairs.append(meta_box[0]) - deltas.append(meta_box[1:]) - postgres_timestamps = convert_timestamps(start_time, timestamps) - create_or_insert_general_trajectory( - conn, item_id, object_type, color, heading, postgres_timestamps, bboxes, pairs, world_id - ) - print(f"{item_id} saved successfully") - - -def clean_tables(conn): - cursor = conn.cursor() - cursor.execute("DROP TABLE IF EXISTS General_Bbox;") - cursor.execute("DROP TABLE IF EXISTS Item_General_Trajectory;") - conn.commit() - - -def create_or_insert_general_trajectory( - conn, - item_id, - object_type, - color, - heading, - postgres_timestamps, - bboxes, - pairs, - world_id="default", -): - """Create general trajectory table""" - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - """ - Create and Populate A Trajectory table using mobilityDB. - Now the timestamp matches, the starting time should be the meta data of the world - Then the timestamp should be the timestamp regarding the world starting time - """ - - # Creating table with the first item - create_itemtraj_sql = """CREATE TABLE IF NOT EXISTS Item_General_Trajectory( - itemId TEXT, - objectType TEXT, - color TEXT, - heading REAL, - trajCentroids tgeompoint, - largestBbox stbox, - worldId TEXT, - PRIMARY KEY (itemId) - );""" - cursor.execute(create_itemtraj_sql) - cursor.execute( - "CREATE INDEX IF NOT EXISTS traj_idx ON Item_General_Trajectory USING GiST(trajCentroids);" - ) - conn.commit() - # Creating table with the first item - create_bboxes_sql = """CREATE TABLE IF NOT EXISTS General_Bbox( - itemId TEXT, - trajBbox stbox, - worldId TEXT, - FOREIGN KEY(itemId) - REFERENCES Item_General_Trajectory(itemId) - );""" - cursor.execute(create_bboxes_sql) - cursor.execute("CREATE INDEX IF NOT EXISTS item_idx ON General_Bbox(itemId);") - cursor.execute("CREATE INDEX IF NOT EXISTS traj_bbox_idx ON General_Bbox USING GiST(trajBbox);") - conn.commit() - # Insert the trajectory of the first item - insert_general_trajectory( - conn, item_id, object_type, color, heading, postgres_timestamps, bboxes, pairs, world_id - ) - - -def insert_general_trajectory( - conn, - item_id, - object_type, - color, - heading, - postgres_timestamps, - bboxes, - pairs, - world_id="default", -): - """Insert general trajectory""" - # Creating a cursor object using the cursor() method - cursor = conn.cursor() - # Inserting bboxes into Bbox table - insert_bbox_trajectory = "" - insert_format = ( - "INSERT INTO General_Bbox (itemId, worldId, trajBbox) " - + "VALUES ('%s','%s'," % (item_id + "-" + world_id, world_id) - ) - # Insert the item_trajectory separately - insert_trajectory = ( - "INSERT INTO Item_General_Trajectory (itemId, worldId, objectType, color, heading, trajCentroids, largestBbox) " - + "VALUES ('%s', '%s', '%s', '%s', '%s', " - % (item_id + "-" + world_id, world_id, object_type, color, heading) - ) - traj_centroids = "'{" - min_ltx, min_lty, min_ltz, max_brx, max_bry, max_brz = ( - float("inf"), - float("inf"), - float("inf"), - float("-inf"), - float("-inf"), - float("-inf"), - ) - # max_ltx, max_lty, max_ltz, min_brx, min_bry, min_brz = float('-inf'), float('-inf'), float('-inf'), float('inf'), float('inf'), float('inf') - for i in range(len(postgres_timestamps)): - postgres_timestamp = postgres_timestamps[i] - # Insert bbox - # print(bboxes[i]) - tl, br = bboxes[i] - min_ltx, min_lty, min_ltz, max_brx, max_bry, max_brz = ( - min(tl[0], min_ltx), - min(tl[1], min_lty), - min(tl[2], min_ltz), - max(br[0], max_brx), - max(br[1], max_bry), - max(br[2], max_brz), - ) - # max_ltx, max_lty, max_ltz, min_brx, min_bry, min_brz = max(tl[0], max_ltx), max(tl[1], max_lty), max(tl[2], max_ltz),\ - # min(br[0], min_brx), min(br[1], min_bry), min(br[2], min_brz) - current_bbox_sql = "stbox 'STBOX ZT((%s, %s, %s, %s), (%s, %s, %s, %s))');" % ( - tl[0], - tl[1], - tl[2], - postgres_timestamp, - br[0], - br[1], - br[2], - postgres_timestamp, - ) - insert_bbox_trajectory += insert_format + current_bbox_sql - # Construct trajectory - current_point = pairs[i] - tg_pair_centroid = "POINT Z (%s %s %s)@%s," % ( - str(current_point[0]), - str(current_point[1]), - str(current_point[2]), - postgres_timestamp, - ) - traj_centroids += tg_pair_centroid - traj_centroids = traj_centroids[:-1] - traj_centroids += "}', " - insert_trajectory += traj_centroids - insert_trajectory += "stbox 'STBOX Z((%s, %s, %s)," % ( - min_ltx, - min_lty, - min_ltz, - ) + "(%s, %s, %s))'); " % (max_brx, max_bry, max_brz) - # print(insert_trajectory) - cursor.execute(insert_trajectory) - cursor.execute(insert_bbox_trajectory) - # Commit your changes in the database - conn.commit() - - -def merge_trajectory(item_id, new_postgres_timestamps, new_bboxes, new_pairs): - # Fetch the timestamps of the current trajectory from the database - # Filter out the already had timestamp from the new timestamps - # Construct the adding trajectory - # Calling the merge function of mobilitydb - # do the same thing for the bboxes - return - - -def fetch_camera(conn, world_id="default", cam_id=[]): - cursor = conn.cursor() - - if cam_id == []: - query = ( - """SELECT cameraId, ratio, ST_X(origin), ST_Y(origin), ST_Z(origin), ST_X(focalpoints), ST_Y(focalpoints), fov, skev_factor """ - + """FROM Cameras WHERE worldId = \'%s\';""" % world_id - ) - else: - query = ( - """SELECT cameraId, ratio, ST_X(origin), ST_Y(origin), ST_Z(origin), ST_X(focalpoints), ST_Y(focalpoints), fov, skev_factor """ - + """FROM Cameras WHERE cameraId IN (\'%s\') AND worldId = \'%s\';""" - % (",".join(cam_id), world_id) - ) - cursor.execute(query) - return cursor.fetchall() diff --git a/spatialyze/legacy/world_executor.py b/spatialyze/legacy/world_executor.py deleted file mode 100644 index 2c452fb..0000000 --- a/spatialyze/legacy/world_executor.py +++ /dev/null @@ -1,153 +0,0 @@ -import numpy as np -import psycopg2 - -from spatialyze.legacy.metadata_context import geometry, primarykey, time -from spatialyze.legacy.metadata_context_executor import MetadataContextExecutor -from spatialyze.legacy.video_context_executor import VideoContextExecutor -from spatialyze.utils import ( - create_transform_matrix, - datetimes_to_framenums, - fetch_camera, - get_video_roi, - world_to_pixel, -) - - -class WorldExecutor: - def __init__(self, world=None): - if world: - self.create_world(world) - self.tasm = None - - def connect_db( - self, host="localhost", user=None, password=None, port=25432, database_name=None - ): - self.conn = psycopg2.connect( - database=database_name, user=user, password=password, host=host, port=port - ) - - def create_world(self, world): - self.curr_world = world - return self - - def enable_tasm(self): - import tasm - - if not self.tasm: - self.tasm = tasm.TASM() - - def get_camera(self, scene_name, frame_num): - assert self.curr_world, self.conn - cameras = fetch_camera(self.conn, scene_name, frame_num) - # each camera appear like: - ### (cameraId, ratio, origin3d, focalpoints2d, fov, skev_factor) - - return cameras - - def tasm_get_video(self, metadata_results): - # Get the metadata context executing query text, let tasm get video call it - # the tasm would execute the query to get the ids, bboxes and timestamps - # then it can use these to tile the video and get it - cam_nodes = self.curr_world.get_video_cams - tasm = self.curr_world.fetch_tasm() - for cam_node in cam_nodes: - current_metadata_identifier = cam_node.metadata_id - current_video_file = cam_nodes.video_file - tasm.activate_regret_based_tiling(current_video_file, current_metadata_identifier) - for label, timestamps in metadata_results.items(): - tasm.get_video_roi( - f"./output/{label}.mp4", # output path - current_video_file, # name in TASM - current_metadata_identifier, # metadata identifier in TASM - label, # label name - timestamps[0], # first frame inclusive - timestamps[-1], # last frame exclusive - ) - tasm.retile_based_on_regret(current_video_file, current_metadata_identifier) - - def get_video(self, metadata_results): - start_time = self.curr_world.VideoContext.start_time - # print("Start time is", start_time) - # The cam nodes are raw data from the database - # TODO: I forget why we used the data from the db instead of directly fetch - # from the world - cam_nodes = self.curr_world.get_video_cams - video_files = [] - for i in range(len(cam_nodes)): - cam_id, ratio, cam_x, cam_y, cam_z, focal_x, focal_y, fov, skew_factor = cam_nodes[i] - cam_video_file = self.curr_world.VideoContext.camera_nodes[cam_id].video_file - - transform_matrix = create_transform_matrix(focal_x, focal_y, cam_x, cam_y, skew_factor) - - for item_id, vals in metadata_results.items(): - world_coords, timestamps = vals - # print("timestamps are", timestamps) - world_coords = np.array(world_coords) - - cam_coords = world_to_pixel(world_coords, transform_matrix) - - vid_times = datetimes_to_framenums(start_time, timestamps) - # print(vid_times) - - vid_fname = ( - "./output/" - + self.curr_world.VideoContext.camera_nodes[cam_id].metadata_id - + item_id - + ".mp4" - ) - # print(vid_fname) - get_video_roi(vid_fname, cam_video_file, cam_coords, vid_times) - video_files.append(vid_fname) - print("output video files", ",".join(video_files)) - return video_files - - def execute(self): - # Edit logic for execution here through checks of whether VideoContext or MetadataContext is being used - video_executor = VideoContextExecutor(self.conn, self.curr_world.VideoContext, self.tasm) - video_executor.execute() - - if self.curr_world.MetadataContext.scan.view is None: - return - - if self.curr_world.GetVideo: - if self.tasm: - metadata_executor = MetadataContextExecutor( - self.conn, self.curr_world.MetadataContext.get_columns(primarykey, time) - ) - metadata_results = video_fetch_reformat_tasm(metadata_executor.execute()) - return self.tasm_get_video(metadata_results) - else: - metadata_executor = MetadataContextExecutor( - self.conn, - self.curr_world.MetadataContext.get_columns(primarykey, geometry, time), - ) - metadata_results = video_fetch_reformat(metadata_executor.execute()) - return self.get_video(metadata_results) - - metadata_executor = MetadataContextExecutor(self.conn, self.curr_world.MetadataContext) - return metadata_executor.execute() - - -def video_fetch_reformat_tasm(fetched_meta): - result = {} - for meta in fetched_meta: - item_id, timestamp = meta[0], meta[1] - if item_id in result: - result[item_id]["tracked_cnt"].append(timestamp) - else: - result[item_id] = {"tracked_cnt": [timestamp]} - - return result - - -def video_fetch_reformat(fetched_meta): - result = {} - for meta in fetched_meta: - item_id, coordinates, timestamp = meta[0], meta[1:-1], meta[-1] - if item_id in result: - result[item_id][0].append(coordinates) - result[item_id][1].append(timestamp) - else: - result[item_id] = [[coordinates], [timestamp]] - - return result diff --git a/spatialyze/trackers/__init__.py b/spatialyze/trackers/__init__.py deleted file mode 100644 index d33c933..0000000 --- a/spatialyze/trackers/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .object_tracker_yolov4_deepsort import yolov4_deepsort_video_track -from .object_tracker_yolov5_deepsort import yolov5_deepsort_video_track - -__all__ = ["yolov4_deepsort_video_track", "yolov5_deepsort_video_track"] diff --git a/spatialyze/trackers/object_tracker_yolov4_deepsort.py b/spatialyze/trackers/object_tracker_yolov4_deepsort.py deleted file mode 100644 index 3bf1e11..0000000 --- a/spatialyze/trackers/object_tracker_yolov4_deepsort.py +++ /dev/null @@ -1,243 +0,0 @@ -import os - -# comment out below line to enable tensorflow logging outputs -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" -import sys - -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../yolov4-deepsort")) - -import tensorflow as tf - -physical_devices = tf.config.experimental.list_physical_devices("GPU") -if len(physical_devices) > 0: - tf.config.experimental.set_memory_growth(physical_devices[0], True) -from dataclasses import dataclass -from typing import Dict - -# from absl import app, flags, logging -# from absl.flags import FLAGS -import core.utils as utils -import cv2 -import matplotlib.pyplot as plt -import numpy as np -from core.config import cfg - -# deep sort imports -from deep_sort import nn_matching, preprocessing -from deep_sort.detection import Detection -from deep_sort.tracker import Tracker - -# from PIL import Image -from tensorflow.compat.v1 import ConfigProto, InteractiveSession -from tensorflow.python.saved_model import tag_constants -from tools import generate_detections as gdet - -from ..data_types import BoundingBox, TrackedObject - - -@dataclass -class Flags: - framework: str - weights: str - size: int - tiny: bool - model: str - iou: float - score: float - dont_show: bool - info: bool - count: bool - - -FLAGS = Flags( - framework="tf", - weights=os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../yolov4-deepsort/checkpoints/yolov4-416" - ), - size=416, - tiny=True, - model="yolov4", - iou=0.45, - score=0.50, - dont_show=True, - info=False, - count=False, -) - -# flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt') -# flags.DEFINE_string('weights', './checkpoints/yolov4-416', -# 'path to weights file') -# flags.DEFINE_integer('size', 416, 'resize images to') -# flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny') -# flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4') -# flags.DEFINE_float('iou', 0.45, 'iou threshold') -# flags.DEFINE_float('score', 0.50, 'score threshold') -# flags.DEFINE_boolean('dont_show', False, 'dont show video output') -# flags.DEFINE_boolean('info', False, 'show detailed info of tracked objects') -# flags.DEFINE_boolean('count', False, 'count objects being tracked on screen') - -# load standard tensorflow saved model -saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) -infer = saved_model_loaded.signatures["serving_default"] - - -def yolov4_deepsort_video_track(video_file: str): - # Definition of the parameters - max_cosine_distance = 0.4 - nn_budget = None - nms_max_overlap = 1.0 - - # initialize deep sort - - model_filename = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "../yolov4-deepsort/model_data/mars-small128.pb", - ) - encoder = gdet.create_box_encoder(model_filename, batch_size=1) - # calculate cosine distance metric - metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) - # initialize tracker - tracker = Tracker(metric) - - # load configuration for object detector - config = ConfigProto() - config.gpu_options.allow_growth = True - # TODO: when to use session - # session = InteractiveSession(config=config) - InteractiveSession(config=config) - STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) - input_size = 416 - - formatted_result: Dict[str, TrackedObject] = {} - cap = cv2.VideoCapture(video_file) - frame_num = 0 - # while video is running - while cap.isOpened(): - # Capture frame-by-frame - ret, frame = cap.read() - if ret: - # TODO: when to use image - # image = Image.fromarray(frame) - frame_num += 1 - # print('Frame #: ', frame_num) - # TODO: when to use frame_size - # frame_size = frame.shape[:2] - image_data = cv2.resize(frame, (input_size, input_size)) - image_data = image_data / 255.0 - image_data = image_data[np.newaxis, ...].astype(np.float32) - # start_time = time.time() - - batch_data = tf.constant(image_data) - pred_bbox = infer(batch_data) - for key, value in pred_bbox.items(): - boxes = value[:, :, 0:4] - pred_conf = value[:, :, 4:] - - boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( - boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), - scores=tf.reshape(pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), - max_output_size_per_class=50, - max_total_size=50, - iou_threshold=FLAGS.iou, - score_threshold=FLAGS.score, - ) - - # convert data to numpy arrays and slice out unused elements - num_objects = valid_detections.numpy()[0] - bboxes = boxes.numpy()[0] - bboxes = bboxes[0 : int(num_objects)] - scores = scores.numpy()[0] - scores = scores[0 : int(num_objects)] - classes = classes.numpy()[0] - classes = classes[0 : int(num_objects)] - - # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height - original_h, original_w, _ = frame.shape - bboxes = utils.format_boxes(bboxes, original_h, original_w) - - # store all predictions in one parameter for simplicity when calling functions - pred_bbox = [bboxes, scores, classes, num_objects] - - # read in all class names from config - class_names: Dict[int, str] = utils.read_class_names(cfg.YOLO.CLASSES) - - # by default allow all classes in .names file - allowed_classes = list(class_names.values()) - - # custom allowed classes (uncomment line below to customize tracker for only people) - # allowed_classes = ['person'] - - # loop through objects and use class index to get class name, allow only classes in allowed_classes list - _names = [] - deleted_indx = [] - for i in range(num_objects): - class_indx = int(classes[i]) - class_name = class_names[class_indx] - if class_name not in allowed_classes: - deleted_indx.append(i) - else: - _names.append(class_name) - names = np.array(_names) - if FLAGS.count: - cv2.putText( - frame, - "Objects being tracked: {}".format(len(names)), - (5, 35), - cv2.FONT_HERSHEY_COMPLEX_SMALL, - 2, - (0, 255, 0), - 2, - ) - print("Objects being tracked: {}".format(len(names))) - # delete detections that are not in allowed_classes - bboxes = np.delete(bboxes, deleted_indx, axis=0) - scores = np.delete(scores, deleted_indx, axis=0) - - # encode yolo detections and feed to tracker - features = encoder(frame, bboxes) - detections = [ - Detection(bbox, score, class_name, feature) - for bbox, score, class_name, feature in zip(bboxes, scores, names, features) - ] - - # initialize color map - cmap = plt.get_cmap("tab20b") - # TODO: when to use colors - # colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] - [cmap(i)[:3] for i in np.linspace(0, 1, 20)] - - # run non-maxima supression - boxs = np.array([d.tlwh for d in detections]) - scores = np.array([d.confidence for d in detections]) - classes = np.array([d.class_name for d in detections]) - indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) - detections = [detections[i] for i in indices] - - # Call the tracker - tracker.predict() - tracker.update(detections) - - # update tracks - # current_bboxes = [] - # current_labels = [] - - for track in tracker.tracks: - if not track.is_confirmed() or track.time_since_update > 1: - continue - bbox = track.to_tlbr() - class_name = track.get_class() - # current_bboxes.append([[int(bbox[0]), int(bbox[1])], [int(bbox[2]), int(bbox[3])]]) - # current_labels.append(class_name) - item_id = f"{class_name}-{str(track.track_id)}" - if item_id not in formatted_result: - formatted_result[item_id] = TrackedObject(class_name) - - formatted_result[item_id].bboxes.append( - BoundingBox(int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])) - ) - formatted_result[item_id].frame_num.append(frame_num) - - else: - break - print("# of tracked items:", len(formatted_result)) - return formatted_result diff --git a/spatialyze/trackers/object_tracker_yolov5_deepsort.py b/spatialyze/trackers/object_tracker_yolov5_deepsort.py deleted file mode 100644 index 1ffa824..0000000 --- a/spatialyze/trackers/object_tracker_yolov5_deepsort.py +++ /dev/null @@ -1,188 +0,0 @@ -import os -import sys - -CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.join(CURRENT_DIR, "../yolov5-deepsort/yolov5/")) -sys.path.append(os.path.join(CURRENT_DIR, "../yolov5-deepsort/")) - -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Union - -import torch -from deep_sort_pytorch.deep_sort import DeepSort -from deep_sort_pytorch.utils.parser import get_config -from yolov5.models.experimental import attempt_load -from yolov5.utils.datasets import LoadImages -from yolov5.utils.downloads import attempt_download -from yolov5.utils.general import ( - check_img_size, - non_max_suppression, - scale_coords, - xyxy2xywh, -) -from yolov5.utils.torch_utils import select_device - -from ..data_types import BoundingBox, TrackedObject - - -@dataclass -class YoloV5Opt: - source: str - yolo_weights: str = os.path.join(CURRENT_DIR, "../yolov5-deepsort/yolov5/weights/yolov5s.pt") - deep_sort_weights: str = os.path.join( - CURRENT_DIR, "../yolov5-deepsort/deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7" - ) - # output: str = 'inference/output' - img_size: Union[Tuple[int, int], int] = 640 - conf_thres: float = 0.4 - iou_thres: float = 0.5 - # fourcc: str = 'mp4v' - device: str = "" - # show_vid: bool = False - # save_vid: bool = False - # save_txt: bool = False - classes: Optional[List[int]] = None - agnostic_nms: bool = False - augment: bool = False - # evaluate: bool = False - config_deepsort: str = os.path.join( - CURRENT_DIR, "../yolov5-deepsort/deep_sort_pytorch/configs/deep_sort.yaml" - ) - - -def detect(opt: YoloV5Opt): - source, yolo_weights, deep_sort_weights, imgsz = ( - opt.source, - opt.yolo_weights, - opt.deep_sort_weights, - opt.img_size, - ) - - crop = BoundingBox(0, 0, 100, 100) - - # initialize deepsort - cfg = get_config() - cfg.merge_from_file(opt.config_deepsort) - attempt_download(deep_sort_weights, repo="mikel-brostrom/Yolov5_DeepSort_Pytorch") - deepsort = DeepSort( - deep_sort_weights, - max_dist=cfg.DEEPSORT.MAX_DIST, - min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, - max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, - max_age=cfg.DEEPSORT.MAX_AGE, - n_init=cfg.DEEPSORT.N_INIT, - nn_budget=cfg.DEEPSORT.NN_BUDGET, - use_cuda=True, - ) - - # Initialize - device = select_device(opt.device) - - half = device.type != "cpu" # half precision only supported on CUDA - # Load model - model = attempt_load(yolo_weights, map_location=device) # load FP32 model - stride = int(model.stride.max()) # model stride - imgsz = check_img_size(imgsz, s=stride) # check img_size - names = model.module.names if hasattr(model, "module") else model.names # get class names - if half: - model.half() # to FP16 - - dataset = LoadImages(source, img_size=imgsz, stride=stride) - - # Get names and colors - names = model.module.names if hasattr(model, "module") else model.names - - # Run inference - # if device.type != "cpu": - # _, img, _, _ = dataset[0] - # h, w = img.shape[1:] - - # # crop image - # x1, y1, x2, y2 = [ - # int(v / 100.0) - # for v in [ - # w * crop.x1, - # h * crop.y1, - # w * crop.x2, - # h * crop.y2, - # ] - # ] - - # img = img[:, y1:y2, x1:x2] - # model( - # torch.zeros(1, 3, img.shape[1], img.shape[2]) - # .to(device) - # .type_as(next(model.parameters())) - # ) # run once - - formatted_result: Dict[str, TrackedObject] = {} - for frame_idx, (_, img, im0s, _, _) in enumerate(dataset): - h, w = img.shape[1:] - - # crop image - x1, y1, x2, y2 = [ - int(v / 100.0) - for v in [ - w * crop.x1, - h * crop.y1, - w * crop.x2, - h * crop.y2, - ] - ] - img = img[:, y1:y2, x1:x2] - - img = torch.from_numpy(img).to(device) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - if img.ndimension() == 3: - img = img.unsqueeze(0) - - # Inference - pred = model(img, augment=opt.augment)[0] - - # Apply NMS - pred = non_max_suppression( - pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms - ) - - # Process detections - for det in pred: # detections per image - if det is None or not len(det): - deepsort.increment_ages() - continue - - # add padding from cropped frame - det[:, :4] += torch.tensor([[x1, y1, x1, y1]]).to(device) - - # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords( - (h, w), - det[:, :4], - im0s.shape, - ).round() - - xywhs = xyxy2xywh(det[:, 0:4]) - confs = det[:, 4] - clss = det[:, 5] - - # pass detections to deepsort - outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0s) - - # collect result bounding boxes - for output in outputs: - y1, x1, y2, x2, id, c = [int(o) for o in output] - bboxes = BoundingBox(x1, y1, x2, y2) - item_id = f"{names[c]}-{str(id)}" - - if item_id not in formatted_result: - formatted_result[item_id] = TrackedObject(object_type=names[c]) - - formatted_result[item_id].bboxes.append(bboxes) - formatted_result[item_id].frame_num.append(frame_idx) - - return formatted_result - - -def yolov5_deepsort_video_track(opt: YoloV5Opt): - with torch.no_grad(): - return detect(opt) diff --git a/spatialyze/video_processor/stages/in_view/__init__.py b/spatialyze/video_processor/stages/in_view/__init__.py index fd7ea9a..e6b00b4 100644 --- a/spatialyze/video_processor/stages/in_view/__init__.py +++ b/spatialyze/video_processor/stages/in_view/__init__.py @@ -1,4 +1,3 @@ from .in_view import InView -from .in_view_old import InViewOld -__all__ = ["InView", "InViewOld"] +__all__ = ["InView"] diff --git a/spatialyze/video_processor/stages/in_view/in_view_old.py b/spatialyze/video_processor/stages/in_view/in_view_old.py deleted file mode 100644 index e775ae9..0000000 --- a/spatialyze/video_processor/stages/in_view/in_view_old.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Dict, List, Optional, Tuple - -from bitarray import bitarray - -from spatialyze.database import database - -from ...camera_config import Float3 -from ...payload import Payload -from ..stage import Stage - - -class InViewOld(Stage): - def __init__(self, distance: float, segment_type: str, min_distance=False) -> None: - super().__init__() - self.distance = distance - self.segment_type = segment_type - self.min_distance = min_distance - - def _run(self, payload: "Payload") -> "Tuple[Optional[bitarray], Optional[Dict[str, list]]]": - keep = bitarray(payload.keep) - translations: "List[Float3]" = [] - headings: "List[float]" = [] - indices: "List[int]" = [] - for i, f in enumerate(payload.video): - if keep[i]: - translations.append(f.ego_translation) - headings.append(f.ego_heading) - indices.append(i) - - translations_str = ",\n".join(map(_tuple_to_point, translations)) - headings_str = ",\n".join(map(str, headings)) - results = database.execute( - f""" - SELECT - {f"minDistance(t, '{self.segment_type}'::text) < {self.distance} AND" if self.min_distance else ""} - inView('intersection', h, t, {self.distance}, 35) - FROM - UNNEST( - ARRAY[{translations_str}], - ARRAY[{headings_str}] - ) AS ego(t, h)""" - ) - - for i, (r,) in enumerate(results): - keep[indices[i]] = r - - return keep, None - - -def _tuple_to_point(t: "Float3"): - return f"'POINT Z ({' '.join(map(str, t))})'::geometry"