diff --git a/docker-compose.cpu.yml b/docker-compose.cpu.yml index 9559ab90..80558212 100644 --- a/docker-compose.cpu.yml +++ b/docker-compose.cpu.yml @@ -9,7 +9,6 @@ services: args: - TARGET=cpu - BASE_IMAGE=python:3.8.10-slim - command: nos-grpc-server ports: - 50051:50051 environment: diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml index 27814b26..5089a8b0 100644 --- a/docker-compose.gpu.yml +++ b/docker-compose.gpu.yml @@ -9,7 +9,6 @@ services: args: - TARGET=gpu - BASE_IMAGE=nvidia/cuda:11.8.0-base-ubuntu22.04 - command: nos-grpc-server ports: - 50051:50051 - 8265:8265 diff --git a/docker/Dockerfile b/docker/Dockerfile index 80d0bc06..0e13c2e4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -105,7 +105,8 @@ RUN pip install . --no-deps && \ WORKDIR /app/$PROJECT ENV NOS_ENV=${PYENV} -CMD ["nos-grpc-server"] +ADD scripts/entrypoint.sh . +CMD ["./entrypoint.sh"] # >>>>>>>>>>>>>>>>>>>>>>>>>>>> # Install NOS test environment diff --git a/examples/notebook/inference-client-example.ipynb b/examples/notebook/inference-client-example.ipynb index e412deb7..02fffef0 100644 --- a/examples/notebook/inference-client-example.ipynb +++ b/examples/notebook/inference-client-example.ipynb @@ -118,7 +118,6 @@ "services:\n", " nos-server:\n", " image: autonomi/nos:latest-cpu\n", - " command: nos-grpc-server\n", " ports:\n", " - 50051:50051\n", " environment:\n", @@ -133,7 +132,7 @@ " memory: 6G\n", "```\n", "\n", - "We first spin up a `nos-server` service mounting the necessary host directories (`~/.nosd`) and exposing the gRPC port. The command `nos-grpc-server` spins up the gRPC server with the default 50051 port that can be used to send inference requests. The `NOS_HOME` directory is set to `/app/.nos` where all the models and optimization artifacts are stored. This directory is mounted on your host machine at `~/.nosd`. " + "We first spin up a `nos-server` service mounting the necessary host directories (`~/.nosd`) and exposing the gRPC port. The `NOS_HOME` directory is set to `/app/.nos` where all the models and optimization artifacts are stored. This directory is mounted on your host machine at `~/.nosd`. " ] }, { diff --git a/examples/quickstart/docker-compose.quickstart.yml b/examples/quickstart/docker-compose.quickstart.yml index dcba5384..7c688038 100644 --- a/examples/quickstart/docker-compose.quickstart.yml +++ b/examples/quickstart/docker-compose.quickstart.yml @@ -3,7 +3,6 @@ version: "3.8" services: nos-server: image: autonomi/nos:latest-cpu - command: nos-grpc-server ports: - 50051:50051 environment: diff --git a/makefiles/Makefile.mmdet.mk b/makefiles/Makefile.mmdet.mk index 04199e0f..042d35eb 100644 --- a/makefiles/Makefile.mmdet.mk +++ b/makefiles/Makefile.mmdet.mk @@ -19,7 +19,7 @@ docker-build-mmdet-dev: docker-run-mmdet-grpc-server: docker-build-mmdet-dev make .docker-run-mmdet TARGET=dev \ DOCKER_ARGS="--gpus all -v $(shell pwd):/nos -v ${HOME}/.nosd:/app/.nos -p 50051:50051 -p 8265:8265" \ - DOCKER_CMD="nos-grpc-server" + DOCKER_CMD="" docker-run-mmdet-interactive: docker-build-mmdet-dev make .docker-run-mmdet TARGET=dev \ diff --git a/nos/executors/ray.py b/nos/executors/ray.py index 4a27dffb..f83003f3 100644 --- a/nos/executors/ray.py +++ b/nos/executors/ray.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) NOS_RAY_NS = os.getenv("NOS_RAY_NS", "nos-dev") -NOS_RAY_RUNTIME_ENV = os.getenv("NOS_RAY_ENV", None) +NOS_RAY_ENV = os.environ.get("NOS_ENV", os.getenv("CONDA_DEFAULT_ENV", None)) NOS_RAY_OBJECT_STORE_MEMORY = int(os.getenv("NOS_RAY_OBJECT_STORE_MEMORY", 2 * 1024 * 1024 * 1024)) # 2GB NOS_DASHBOARD_ENABLED = os.getenv("NOS_DASHBOARD_ENABLED", True) @@ -32,7 +32,7 @@ class RayRuntimeSpec: namespace: str = NOS_RAY_NS """Namespace for Ray runtime.""" - runtime_env: str = NOS_RAY_RUNTIME_ENV + runtime_env: str = NOS_RAY_ENV """Runtime environment for Ray runtime.""" @@ -74,8 +74,6 @@ def init(self, max_attempts: int = 5, timeout: int = 60, retry_interval: int = 5 timeout: Time to wait for Ray to start. Defaults to 60 seconds. retry_interval: Time to wait between retries. Defaults to 5 seconds. """ - level = getattr(logging, LOGGING_LEVEL) - # Ignore predefined RAY_ADDRESS environment variable. if "RAY_ADDRESS" in os.environ: del os.environ["RAY_ADDRESS"] @@ -93,16 +91,10 @@ def init(self, max_attempts: int = 5, timeout: int = 60, retry_interval: int = 5 "[bold green] InferenceExecutor :: Connecting to backend ... [/bold green]" ) as status: logger.debug(f"Connecting to executor: namespace={self.spec.namespace}") - assert NOS_DASHBOARD_ENABLED, f"NOS_DASHBOARD_ENABLED={NOS_DASHBOARD_ENABLED}" ray.init( address="auto", namespace=self.spec.namespace, ignore_reinit_error=True, - include_dashboard=NOS_DASHBOARD_ENABLED, - configure_logging=True, - logging_level=logging.ERROR, - log_to_driver=level <= logging.ERROR, - dashboard_host="0.0.0.0" if NOS_DASHBOARD_ENABLED else None, ) status.stop() console.print("[bold green] ✓ InferenceExecutor :: Connected to backend. [/bold green]") @@ -143,10 +135,11 @@ def start(self) -> None: namespace=self.spec.namespace, object_store_memory=NOS_RAY_OBJECT_STORE_MEMORY, ignore_reinit_error=False, - include_dashboard=False, + include_dashboard=NOS_DASHBOARD_ENABLED, configure_logging=True, logging_level=logging.ERROR, log_to_driver=level <= logging.ERROR, + dashboard_host="0.0.0.0" if NOS_DASHBOARD_ENABLED else None, ) logger.debug(f"Started executor: namespace={self.spec.namespace} (time={time.time() - start_t:.2f}s)") except ConnectionError as exc: diff --git a/nos/server/_runtime.py b/nos/server/_runtime.py index b4e11bf3..8b2068ce 100644 --- a/nos/server/_runtime.py +++ b/nos/server/_runtime.py @@ -30,7 +30,7 @@ NOS_DOCKER_IMAGE_TRT_RUNTIME = f"autonomi/nos:{__version__}-trt-runtime" NOS_INFERENCE_SERVICE_CONTAINER_NAME = "nos-inference-service" -NOS_INFERENCE_SERVICE_CMD = "nos-grpc-server" +NOS_INFERENCE_SERVICE_CMD = ["./entrypoint.sh"] NOS_SUPPORTED_DEVICES = ("cpu", "cuda", "mps", "neuron") @@ -45,7 +45,7 @@ class InferenceServiceRuntimeConfig: name: str = NOS_INFERENCE_SERVICE_CONTAINER_NAME """Container name (unique).""" - command: Union[str, List[str]] = field(default_factory=lambda: [NOS_INFERENCE_SERVICE_CMD]) + command: Union[str, List[str]] = field(default_factory=lambda: NOS_INFERENCE_SERVICE_CMD) """Command to run.""" ports: Dict[int, int] = field(default_factory=lambda: {DEFAULT_GRPC_PORT: DEFAULT_GRPC_PORT}) diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh new file mode 100755 index 00000000..e70c162a --- /dev/null +++ b/scripts/entrypoint.sh @@ -0,0 +1,10 @@ +#!/bin/sh +set -e +set -x + +echo "Starting Ray server with OMP_NUM_THREADS=${OMP_NUM_THREADS}..." +# Get OMP_NUM_THREADS from environment variable, if set otherwise use 1 +OMP_NUM_THREADS=${OMP_NUM_THREADS} ray start --head + +echo "Starting NOS server..." +nos-grpc-server diff --git a/tests/integrations/benchmark-pixeltable.md b/tests/integrations/benchmark-pixeltable.md index 3d30ecaa..88c456ea 100644 --- a/tests/integrations/benchmark-pixeltable.md +++ b/tests/integrations/benchmark-pixeltable.md @@ -51,6 +51,25 @@ Timing records (0.0.7 - 2023-07-14) w/o SHM ``` ### CPU benchmarks + +```bash +Timing records (0.0.9 - 2023-08-15) - OMP_NUM_THREADS=32 - CPU (0h:04m:32s) + desc elapsed n latency_ms fps +0 noop_294x240 1.13 168 6.73 148.67 +1 noop_640x480 1.21 168 7.20 138.84 +2 noop_1280x720 4.99 168 29.70 33.67 +3 noop_2880x1620 17.31 168 103.04 9.71 +4 yolox_medium_294x240 17.43 168 103.75 9.64 +5 yolox_medium_640x480 17.17 168 102.20 9.78 +6 yolox_medium_1280x720 77.32 168 460.24 2.17 +7 yolox_medium_2880x1620 92.55 168 550.89 1.82 +8 openai_224x224 3.53 168 21.01 47.59 +9 openai_640x480 4.95 168 29.46 33.94 +10 openai_1280x720 6.79 168 40.42 24.74 +11 openai_2880x1620 16.77 168 99.82 10.02 +``` + + ```bash Timing records (0.0.9 - 2023-08-15) - OMP_NUM_THREADS=1 - CPU (0h:27m:56s) desc elapsed n latency_ms fps diff --git a/tests/integrations/test_pixeltable.py b/tests/integrations/test_pixeltable.py index 0c882fc0..617a8906 100644 --- a/tests/integrations/test_pixeltable.py +++ b/tests/integrations/test_pixeltable.py @@ -123,19 +123,7 @@ def test_pixeltable_integration(): # RH, RW = 480, 640 for (RW, RH) in [(224, 224)] + BENCHMARK_IMAGE_SHAPES: t.add_column(pt.Column(f"frame_{RW}x{RH}", computed_with=t.frame.resize((RW, RH)))) - - # Insert video files, and compute detections - t.insert_rows( - [ - [ - FILENAME, - ] - for path in VIDEO_FILES - ], - columns=[ - "video", - ], - ) + t.insert_rows([VIDEO_FILES],columns=["video",],) # fmt: skip # Run inference (see acceptance criteria from timing table above) timing_records = []