Refactor video decoding and processing logic

orbbec · Sep 14, 2024 · 4cd52c1 · 4cd52c1
1 parent 1d530ab
commit 4cd52c1
Show file tree

Hide file tree

Showing 2 changed files with 110 additions and 92 deletions.
diff --git a/examples/net_device.py b/examples/net_device.py
@@ -1,17 +1,17 @@
 import platform
-import subprocess
-
 import cv2
 import numpy as np
-
-from pyorbbecsdk import (Pipeline, Context, Config, OBSensorType,
-                         OBFormat, OBError)
+import av
+import io
+import threading
+import time
+import pygame
+import os
+from pyorbbecsdk import (Pipeline, Context, Config, OBSensorType, OBFormat, OBError)
 from utils import frame_to_bgr_image
 
 ESC_KEY = 27
 
-# Only Femto Mega and Gemini2 XL support this sample
-
 def get_stream_profile(pipeline, sensor_type, width, height, fmt, fps):
     profile_list = pipeline.get_stream_profile_list(sensor_type)
     try:
@@ -20,40 +20,55 @@ def get_stream_profile(pipeline, sensor_type, width, height, fmt, fps):
         profile = profile_list.get_default_video_stream_profile()
     return profile
 
-
-def decode_h265_frame(color_frame, color_format='hevc'):
-    # This function is only supported on Linux.
-    # and requires ffmpeg to be installed.
-    if color_format == 'h265':
-        color_format = 'hevc'
-    elif color_format == 'h264':
-        color_format = 'h264'  # Actually, this remains unchanged but added for clarity.
-
-    cmd_in = [
-        'ffmpeg',
-        '-f', color_format,
-        '-i', 'pipe:',
-        '-f', 'rawvideo',
-        '-pix_fmt', 'bgr24',
-        'pipe:'
-    ]
-
-    byte_data = color_frame.get_data().tobytes()
-
-    proc = subprocess.Popen(cmd_in, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    out, err = proc.communicate(input=byte_data)
-
-    if proc.returncode != 0:
-        raise ValueError(f'FFmpeg did not run successfully: {err.decode()}')
-    if len(out) == 0:
-        return None
-    decoded_frame = np.frombuffer(out, dtype=np.uint8).reshape(color_frame.get_height(), color_frame.get_width(), 3)
-    return decoded_frame
-
+def decode_h26x_frame(decoder, byte_data):
+    try:
+        packet = av.Packet(byte_data)
+        frames = decoder.decode(packet)
+        for frame in frames:
+            return frame.to_ndarray(format='bgr24')
+    except av.AVError as e:
+        print(f"Decoding error: {e}")
+    return None
+
+class FrameProcessor(threading.Thread):
+    def __init__(self, decoder, display_width, display_height):
+        super().__init__()
+        self.decoder = decoder
+        self.latest_frame = None
+        self.processed_frame = None
+        self.lock = threading.Lock()
+        self.running = True
+        self.daemon = True
+        self.display_width = display_width
+        self.display_height = display_height
+
+    def run(self):
+        while self.running:
+            with self.lock:
+                if self.latest_frame is not None:
+                    color_image = decode_h26x_frame(self.decoder, self.latest_frame)
+                    if color_image is not None:
+                        # Resize the image to 1080p
+                        resized_image = cv2.resize(color_image, (self.display_width, self.display_height))
+                        rgb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
+                        self.processed_frame = rgb_image
+                    self.latest_frame = None
+            time.sleep(0.001)
+
+    def update_frame(self, frame):
+        with self.lock:
+            self.latest_frame = frame
+
+    def get_processed_frame(self):
+        with self.lock:
+            return self.processed_frame
+
+    def stop(self):
+        self.running = False
 
 def main():
     ctx = Context()
-    ip = input("Enter the ip address of the device (default: 192.168.1.10): ") or "192.168.1.10"
+    ip = input("Enter the IP address of the device (default: 192.168.1.10): ") or "192.168.1.10"
     device = ctx.create_net_device(ip, 8090)
     if device is None:
         print("Failed to create net device")
@@ -62,65 +77,66 @@ def main():
     config = Config()
     pipeline = Pipeline(device)
 
-    # Setup color stream
-    color_profile = get_stream_profile(pipeline, OBSensorType.COLOR_SENSOR, 1280, 0, OBFormat.MJPG, 10)
+    # Set up 4K capture
+    color_profile = get_stream_profile(pipeline, OBSensorType.COLOR_SENSOR, 3840, 2160, OBFormat.H264, 25)
     config.enable_stream(color_profile)
 
-    # Setup depth stream
-    depth_profile = get_stream_profile(pipeline, OBSensorType.DEPTH_SENSOR, 640, 0, OBFormat.Y16, 10)
-    config.enable_stream(depth_profile)
-
     pipeline.start(config)
-    warning_printed = False
 
+    color_codec_name = 'h264' if color_profile.get_format() == OBFormat.H264 else 'hevc'
     try:
-        while True:
-            frames = pipeline.wait_for_frames(100)
-            if not frames:
-                continue
-
-            color_frame = frames.get_color_frame()
-            depth_frame = frames.get_depth_frame()
-
-            if color_frame and color_frame.get_format() in [OBFormat.H265, OBFormat.H264]:
-                if platform.system() == 'Linux':
-                    color_format = 'h265' if color_frame.get_format() == OBFormat.H265 else 'h264'
-                    color_image = decode_h265_frame(color_frame, color_format)
-                else:
-                    if not warning_printed:
-                        print("H264 and H265 are not supported on this system.")
-                        warning_printed = True
-                    color_image = None
-            elif color_frame:
-                color_image = frame_to_bgr_image(color_frame)
-            else:
-                color_image = None
-
-            if depth_frame:
-                depth_data = np.frombuffer(depth_frame.get_data(), dtype=np.uint16).reshape(depth_frame.get_height(),
-                                                                                            depth_frame.get_width())
-                scale = depth_frame.get_depth_scale()
-                depth_data = (depth_data * scale).astype(np.uint16)
-                depth_image = cv2.normalize(depth_data, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
-                depth_image = cv2.applyColorMap(depth_image, cv2.COLORMAP_JET)
-            else:
-                depth_image = None
-
-            if color_image is not None and depth_image is not None:
-                target_size = (640, 480)
-                images_to_show = [img for img in [color_image, depth_image] if img is not None]
-                # Resize each image to 640x480
-                images_to_show = [cv2.resize(img, target_size) for img in images_to_show]
-
-                cv2.imshow("net_device", np.hstack(images_to_show))
-            key = cv2.waitKey(1)
-            if key in [ord('q'), ESC_KEY]:
+        decoder = av.codec.CodecContext.create(color_codec_name, 'r')
+    except av.AVError as e:
+        print(f"Failed to create decoder for {color_codec_name}: {e}")
+        pipeline.stop()
+        return
+
+    # Set display resolution to 720p
+    display_width, display_height = 1280, 720
+    frame_processor = FrameProcessor(decoder, display_width, display_height)
+    frame_processor.start()
+
+    pygame.init()
+    screen = pygame.display.set_mode((display_width, display_height))
+    pygame.display.set_caption("4K Net Device Viewer (720p Display)")
+    clock = pygame.time.Clock()
+
+    running = True
+    try:
+        while running:
+            for event in pygame.event.get():
+                if event.type == pygame.QUIT:
+                    running = False
+                elif event.type == pygame.KEYDOWN:
+                    if event.key == pygame.K_ESCAPE:
+                        running = False
+
+            if not running:
                 break
-    except KeyboardInterrupt:
-        pass
+
+            frames = pipeline.wait_for_frames(100)
+            if frames:
+                color_frame = frames.get_color_frame()
+                if color_frame:
+                    byte_data = color_frame.get_data()
+                    if len(byte_data) > 0:
+                        frame_processor.update_frame(byte_data)
+
+            processed_frame = frame_processor.get_processed_frame()
+            if processed_frame is not None:
+                surf = pygame.surfarray.make_surface(processed_frame.swapaxes(0, 1))
+                screen.blit(surf, (0, 0))
+                pygame.display.flip()
+
+            clock.tick(30)  # Limit to 30 FPS
+
     finally:
+        print("Stopping frame processor...")
+        frame_processor.stop()
+        print("Stopping pipeline...")
         pipeline.stop()
-
+        print("Exiting the program...")
+        os._exit(0)
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/requirements.txt b/requirements.txt
@@ -2,5 +2,7 @@ pybind11==2.11.0
 pybind11-global==2.11.0
 opencv-python
 numpy<2.0  # see https://github.com/orbbec/pyorbbecsdk/issues/47
-plyfile
-open3d
+plyfile # for saving point cloud
+open3d # for visualization point cloud
+av # for h264 decoding
+pygame # for visualization