New entrypoint script for spinning up ray and nos-grpc-server

This allows us to specify the OMP_NUM_THREADS correctly (`ray.init()` does not allow us to set the OMP_NUM_THREADS for some reason.) Updates: - Updated pixeltable benchmarks with multi-proc enabled Features: - New entrypoint script for spinning up ray and nos-grpc-server - Faster server bringup compared to `ray.init()` - Allows us to specify the `OMP_NUM_THREADS` correctly
autonomi-ai · Aug 15, 2023 · d5d0505 · d5d0505
1 parent 530f723
commit d5d0505
Show file tree

Hide file tree

Showing 11 changed files with 40 additions and 33 deletions.
diff --git a/docker-compose.cpu.yml b/docker-compose.cpu.yml
@@ -9,7 +9,6 @@ services:
       args:
         - TARGET=cpu
         - BASE_IMAGE=python:3.8.10-slim
-    command: nos-grpc-server
     ports:
       - 50051:50051
     environment:

diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml
@@ -9,7 +9,6 @@ services:
       args:
         - TARGET=gpu
         - BASE_IMAGE=nvidia/cuda:11.8.0-base-ubuntu22.04
-    command: nos-grpc-server
     ports:
       - 50051:50051
       - 8265:8265

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -105,7 +105,8 @@ RUN pip install . --no-deps && \
 WORKDIR /app/$PROJECT
 ENV NOS_ENV=${PYENV}
 
-CMD ["nos-grpc-server"]
+ADD scripts/entrypoint.sh .
+CMD ["./entrypoint.sh"]
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>
 # Install NOS test environment

diff --git a/examples/notebook/inference-client-example.ipynb b/examples/notebook/inference-client-example.ipynb
@@ -118,7 +118,6 @@
     "services:\n",
     "  nos-server:\n",
     "    image: autonomi/nos:latest-cpu\n",
-    "    command: nos-grpc-server\n",
     "    ports:\n",
     "      - 50051:50051\n",
     "    environment:\n",
@@ -133,7 +132,7 @@
     "          memory: 6G\n",
     "```\n",
     "\n",
-    "We first spin up a `nos-server` service mounting the necessary host directories (`~/.nosd`) and exposing the gRPC port. The command `nos-grpc-server` spins up the gRPC server with the default 50051 port that can be used to send inference requests. The `NOS_HOME` directory is set to `/app/.nos` where all the models and optimization artifacts are stored. This directory is mounted on your host machine at `~/.nosd`. "
+    "We first spin up a `nos-server` service mounting the necessary host directories (`~/.nosd`) and exposing the gRPC port. The `NOS_HOME` directory is set to `/app/.nos` where all the models and optimization artifacts are stored. This directory is mounted on your host machine at `~/.nosd`. "
    ]
   },
   {

diff --git a/examples/quickstart/docker-compose.quickstart.yml b/examples/quickstart/docker-compose.quickstart.yml
@@ -3,7 +3,6 @@ version: "3.8"
 services:
   nos-server:
     image: autonomi/nos:latest-cpu
-    command: nos-grpc-server
     ports:
       - 50051:50051
     environment:

diff --git a/makefiles/Makefile.mmdet.mk b/makefiles/Makefile.mmdet.mk
@@ -19,7 +19,7 @@ docker-build-mmdet-dev:
 docker-run-mmdet-grpc-server: docker-build-mmdet-dev
 	make .docker-run-mmdet TARGET=dev \
 	DOCKER_ARGS="--gpus all -v $(shell pwd):/nos -v ${HOME}/.nosd:/app/.nos -p 50051:50051 -p 8265:8265" \
-	DOCKER_CMD="nos-grpc-server"
+	DOCKER_CMD=""
 
 docker-run-mmdet-interactive: docker-build-mmdet-dev
 	make .docker-run-mmdet TARGET=dev \

diff --git a/nos/executors/ray.py b/nos/executors/ray.py
@@ -23,7 +23,7 @@
 logger = logging.getLogger(__name__)
 
 NOS_RAY_NS = os.getenv("NOS_RAY_NS", "nos-dev")
-NOS_RAY_RUNTIME_ENV = os.getenv("NOS_RAY_ENV", None)
+NOS_RAY_ENV = os.environ.get("NOS_ENV", os.getenv("CONDA_DEFAULT_ENV", None))
 NOS_RAY_OBJECT_STORE_MEMORY = int(os.getenv("NOS_RAY_OBJECT_STORE_MEMORY", 2 * 1024 * 1024 * 1024))  # 2GB
 NOS_DASHBOARD_ENABLED = os.getenv("NOS_DASHBOARD_ENABLED", True)
 
@@ -32,7 +32,7 @@
 class RayRuntimeSpec:
     namespace: str = NOS_RAY_NS
     """Namespace for Ray runtime."""
-    runtime_env: str = NOS_RAY_RUNTIME_ENV
+    runtime_env: str = NOS_RAY_ENV
     """Runtime environment for Ray runtime."""
 
 
@@ -74,8 +74,6 @@ def init(self, max_attempts: int = 5, timeout: int = 60, retry_interval: int = 5
             timeout: Time to wait for Ray to start. Defaults to 60 seconds.
             retry_interval: Time to wait between retries. Defaults to 5 seconds.
         """
-        level = getattr(logging, LOGGING_LEVEL)
-
         # Ignore predefined RAY_ADDRESS environment variable.
         if "RAY_ADDRESS" in os.environ:
             del os.environ["RAY_ADDRESS"]
@@ -93,16 +91,10 @@ def init(self, max_attempts: int = 5, timeout: int = 60, retry_interval: int = 5
                     "[bold green] InferenceExecutor :: Connecting to backend ... [/bold green]"
                 ) as status:
                     logger.debug(f"Connecting to executor: namespace={self.spec.namespace}")
-                    assert NOS_DASHBOARD_ENABLED, f"NOS_DASHBOARD_ENABLED={NOS_DASHBOARD_ENABLED}"
                     ray.init(
                         address="auto",
                         namespace=self.spec.namespace,
                         ignore_reinit_error=True,
-                        include_dashboard=NOS_DASHBOARD_ENABLED,
-                        configure_logging=True,
-                        logging_level=logging.ERROR,
-                        log_to_driver=level <= logging.ERROR,
-                        dashboard_host="0.0.0.0" if NOS_DASHBOARD_ENABLED else None,
                     )
                     status.stop()
                     console.print("[bold green] ✓ InferenceExecutor :: Connected to backend. [/bold green]")
@@ -143,10 +135,11 @@ def start(self) -> None:
                 namespace=self.spec.namespace,
                 object_store_memory=NOS_RAY_OBJECT_STORE_MEMORY,
                 ignore_reinit_error=False,
-                include_dashboard=False,
+                include_dashboard=NOS_DASHBOARD_ENABLED,
                 configure_logging=True,
                 logging_level=logging.ERROR,
                 log_to_driver=level <= logging.ERROR,
+                dashboard_host="0.0.0.0" if NOS_DASHBOARD_ENABLED else None,
             )
             logger.debug(f"Started executor: namespace={self.spec.namespace} (time={time.time() - start_t:.2f}s)")
         except ConnectionError as exc:

diff --git a/nos/server/_runtime.py b/nos/server/_runtime.py
@@ -30,7 +30,7 @@
 NOS_DOCKER_IMAGE_TRT_RUNTIME = f"autonomi/nos:{__version__}-trt-runtime"
 
 NOS_INFERENCE_SERVICE_CONTAINER_NAME = "nos-inference-service"
-NOS_INFERENCE_SERVICE_CMD = "nos-grpc-server"
+NOS_INFERENCE_SERVICE_CMD = ["./entrypoint.sh"]
 
 NOS_SUPPORTED_DEVICES = ("cpu", "cuda", "mps", "neuron")
 
@@ -45,7 +45,7 @@ class InferenceServiceRuntimeConfig:
     name: str = NOS_INFERENCE_SERVICE_CONTAINER_NAME
     """Container name (unique)."""
 
-    command: Union[str, List[str]] = field(default_factory=lambda: [NOS_INFERENCE_SERVICE_CMD])
+    command: Union[str, List[str]] = field(default_factory=lambda: NOS_INFERENCE_SERVICE_CMD)
     """Command to run."""
 
     ports: Dict[int, int] = field(default_factory=lambda: {DEFAULT_GRPC_PORT: DEFAULT_GRPC_PORT})

diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+set -e
+set -x
+
+echo "Starting Ray server with OMP_NUM_THREADS=${OMP_NUM_THREADS}..."
+# Get OMP_NUM_THREADS from environment variable, if set otherwise use 1
+OMP_NUM_THREADS=${OMP_NUM_THREADS} ray start --head
+
+echo "Starting NOS server..."
+nos-grpc-server
diff --git a/tests/integrations/benchmark-pixeltable.md b/tests/integrations/benchmark-pixeltable.md
@@ -51,6 +51,25 @@ Timing records (0.0.7 - 2023-07-14) w/o SHM
 ```
 
 ### CPU benchmarks
+
+```bash
+Timing records (0.0.9 - 2023-08-15) - OMP_NUM_THREADS=32 - CPU (0h:04m:32s)
+                      desc  elapsed    n  latency_ms     fps
+0             noop_294x240     1.13  168        6.73  148.67
+1             noop_640x480     1.21  168        7.20  138.84
+2            noop_1280x720     4.99  168       29.70   33.67
+3           noop_2880x1620    17.31  168      103.04    9.71
+4     yolox_medium_294x240    17.43  168      103.75    9.64
+5     yolox_medium_640x480    17.17  168      102.20    9.78
+6    yolox_medium_1280x720    77.32  168      460.24    2.17
+7   yolox_medium_2880x1620    92.55  168      550.89    1.82
+8           openai_224x224     3.53  168       21.01   47.59
+9           openai_640x480     4.95  168       29.46   33.94
+10         openai_1280x720     6.79  168       40.42   24.74
+11        openai_2880x1620    16.77  168       99.82   10.02
+```
+
+
 ```bash
 Timing records (0.0.9 - 2023-08-15) - OMP_NUM_THREADS=1 - CPU (0h:27m:56s)
                       desc  elapsed    n  latency_ms     fps

diff --git a/tests/integrations/test_pixeltable.py b/tests/integrations/test_pixeltable.py
@@ -123,19 +123,7 @@ def test_pixeltable_integration():
     # RH, RW = 480, 640
     for (RW, RH) in [(224, 224)] + BENCHMARK_IMAGE_SHAPES:
         t.add_column(pt.Column(f"frame_{RW}x{RH}", computed_with=t.frame.resize((RW, RH))))
-
-    # Insert video files, and compute detections
-    t.insert_rows(
-        [
-            [
-                FILENAME,
-            ]
-            for path in VIDEO_FILES
-        ],
-        columns=[
-            "video",
-        ],
-    )
+    t.insert_rows([VIDEO_FILES],columns=["video",],)  # fmt: skip
 
     # Run inference (see acceptance criteria from timing table above)
     timing_records = []