Skip to content

Commit

Permalink
Support single-container gRPC serving + HTTP proxy (#546)
Browse files Browse the repository at this point in the history
## Summary

This allows us to deploy a single docker-image service on to clouds like
Fly.io and RunPod.

- Updated entrypoint to support both gRPC and HTTP serving
- Updated default gRPC host to localhost to avoid ipv6 issues within
docker
- Updated docker-compose files to support single-container gRPC serving
+ HTTP proxy
- Updated default `nos.client.Client()` address to use `localhost:50051`
for gRPC and avoid having to manually set for examples.

## Related issues

<!-- For example: "Closes #1234" -->

## Checks

- [ ] `make lint`: I've run `make lint` to lint the changes in this PR.
- [ ] `make test`: I've made sure the tests (`make test-cpu` or `make
test`) are passing.
- Additional tests:
   - [ ] Benchmark tests (when contributing new models)
   - [ ] GPU/HW tests
  • Loading branch information
spillai authored Mar 8, 2024
1 parent 148724f commit a83e3ae
Show file tree
Hide file tree
Showing 22 changed files with 76 additions and 64 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ NOS provides an OpenAI-compatible server with streaming support so that you can
```python
from nos.client import Client

client = Client("[::]:50051")
client = Client()

model = client.Module("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
response = model.chat(message="Tell me a story of 1000 words with emojis", _stream=True)
Expand Down Expand Up @@ -105,7 +105,7 @@ Build MidJourney discord bots in seconds.
```python
from nos.client import Client

client = Client("[::]:50051")
client = Client()

sdxl = client.Module("stabilityai/stable-diffusion-xl-base-1-0")
image, = sdxl(prompts=["hippo with glasses in a library, cartoon styling"],
Expand Down Expand Up @@ -146,7 +146,7 @@ Build [scalable semantic search of images/videos](https://docs.nos.run/docs/demo
```python
from nos.client import Client

client = Client("[::]:50051")
client = Client()

clip = client.Module("openai/clip-vit-base-patch32")
txt_vec = clip.encode_text(texts=["fox jumped over the moon"])
Expand Down Expand Up @@ -187,7 +187,7 @@ Perform [real-time audio transcription](./examples/tutorials/04-serving-multiple
from pathlib import Path
from nos.client import Client

client = Client("[::]:50051")
client = Client()

model = client.Module("openai/whisper-small.en")
with client.UploadFile(Path("audio.wav")) as remote_path:
Expand Down Expand Up @@ -225,7 +225,7 @@ Run classical computer-vision tasks in 2 lines of code.
from pathlib import Path
from nos.client import Client

client = Client("[::]:50051")
client = Client()

model = client.Module("yolox/medium")
response = model(images=[Image.open("image.jpg")])
Expand Down
8 changes: 4 additions & 4 deletions docs/OVERVIEW.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ This command pulls and starts the latest GPU docker server with all the NOS good
```python
from nos.client import Client
client = Client("[::]:50051")
client = Client()
sdxl = client.Module("stabilityai/stable-diffusion-xl-base-1-0")
image, = sdxl(prompts=["fox jumped over the moon"],
Expand Down Expand Up @@ -119,7 +119,7 @@ curl \
```python
from nos.client import Client
client = Client("[::]:50051")
client = Client()
clip = client.Module("openai/clip-vit-base-patch32")
txt_vec = clip.encode_text(texts=["fox jumped over the moon"])
Expand Down Expand Up @@ -159,7 +159,7 @@ curl \
from pathlib import Path
from nos.client import Client
client = Client("[::]:50051")
client = Client()
model = client.Module("openai/whisper-large-v2")
with client.UploadFile(Path("audio.wav")) as remote_path:
Expand Down Expand Up @@ -197,7 +197,7 @@ curl \
from pathlib import Path
from nos.client import Client
client = Client("[::]:50051")
client = Client()
model = client.Module("yolox/medium")
response = model(images=[Image.open("image.jpg")])
Expand Down
2 changes: 1 addition & 1 deletion docs/guides/running-inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Let's start by importing the NOS client and creating an `Client` instance. The c
from nos.client import Client, TaskType

# Create a client that connects to the server via gRPC (over 50051)
client = Client("[::]:50051")
client = Client()

# We provide helper functions to wait for the server to be ready
# if the server is simultaneously spun up in a separate process.
Expand Down
4 changes: 2 additions & 2 deletions docs/guides/serving-custom-models.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Via the `serve.yaml`, NOS automatically registers the new **WhisperX** model und
```python linenums="1" title="client.py"
from nos.client import Client
client = Client("[::]:50051")
client = Client()
```

!!!note
Expand Down Expand Up @@ -126,7 +126,7 @@ Once the model is served, we can use the client to call the `transcribe` method
```python linenums="1" title="client.py"
from nos.client import Client
client = Client("[::]:50051")
client = Client()
client.WaitForServer() # Wait for the server to start
model = client.Module("mbain-whisperx")
Expand Down
2 changes: 1 addition & 1 deletion examples/docker/docker-compose.cpu.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "3.8"

services:
nos-server-cpu:
server-cpu:
image: autonomi/nos:latest-cpu
environment:
- NOS_HOME=/app/.nos
Expand Down
19 changes: 2 additions & 17 deletions examples/docker/docker-compose.gpu-with-gateway.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,18 @@
version: "3.8"

services:
nos-http-gateway:
server:
image: autonomi/nos:latest-gpu
command: nos-http-server --host 0.0.0.0 --port 8000 --workers 1
command: /app/entrypoint.sh --http
environment:
- NOS_HOME=/app/.nos
- NOS_LOGGING_LEVEL=INFO
- NOS_GRPC_HOST=nos-server
volumes:
- ~/.nosd:/app/.nos
- /dev/shm:/dev/shm
ports:
- 8000:8000
ipc: host
depends_on:
- nos-server

nos-server:
image: autonomi/nos:latest-gpu
environment:
- NOS_HOME=/app/.nos
- NOS_LOGGING_LEVEL=INFO
volumes:
- ~/.nosd:/app/.nos
- /dev/shm:/dev/shm
ports:
- 50051:50051
ipc: host
deploy:
resources:
reservations:
Expand Down
2 changes: 1 addition & 1 deletion examples/docker/docker-compose.gpu.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "3.8"

services:
nos-server-gpu:
server-gpu:
image: autonomi/nos:latest-gpu
environment:
- NOS_HOME=/app/.nos
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def test_embeddings_inf2_client(model_id):
from nos.client import Client

# Create a client
client = Client("[::]:50051")
client = Client()
assert client.WaitForServer()

# Load the embeddings model
Expand Down
2 changes: 1 addition & 1 deletion examples/inf2/sdxl/tests/test_sdxl_inf2_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def test_sdxl_inf2_client(model_id):
from nos.client import Client

# Create a client
client = Client("[::]:50051")
client = Client()
assert client.WaitForServer()

# Load the embeddings model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
if __name__ == "__main__":
from typing import List

client = Client("[::]:50051")
client = Client()
assert client is not None
assert client.WaitForServer()
assert client.IsHealthy()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


if __name__ == "__main__":
client = Client("[::]:50051")
client = Client()
assert client is not None
assert client.WaitForServer()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
from nos.client import Client


GRPC_PORT = 50051
model_id = "tinyllama-1.1b-chat"


if __name__ == "__main__":
client = Client(f"[::]:{GRPC_PORT}")
client = Client()
assert client.WaitForServer()

# Load the llama chat model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
path = Path(args.filename)

# Create a client
address = "[::]:50051"
print(f"Connecting to client at {address} ...")
client = Client(address)
print("Connecting to client...")
client = Client()
client.WaitForServer()

# Transcribe with Whisper
Expand Down
3 changes: 2 additions & 1 deletion nos/cli/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from nos.client import Client
from nos.common.exceptions import ClientException
from nos.constants import DEFAULT_GRPC_ADDRESS


predict_cli = typer.Typer(name="predict", help="NOS gRPC Prediction CLI.", no_args_is_help=True)
Expand All @@ -36,7 +37,7 @@ class gRPCConfig:
@predict_cli.callback()
def grpc_config(
ctx: typer.Context,
address: str = typer.Option("[::]:50051", "-a", "--address", help="Address of the gRPC server."),
address: str = typer.Option(DEFAULT_GRPC_ADDRESS, "-a", "--address", help="Address of the gRPC server."),
):
"""Common gRPC options"""
client = Client(address)
Expand Down
1 change: 1 addition & 0 deletions nos/cli/templates/docker-compose.serve.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ services:
environment:
- NOS_HOME=/app/.nos
- NOS_LOGGING_LEVEL={{ logging_level }}
- NOS_GRPC_HOST=[::]
{%- if env_file|length > 0 %}
env_file:
{%- for envf in env_file %}
Expand Down
6 changes: 3 additions & 3 deletions nos/client/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
ServerReadyException,
)
from nos.common.shm import NOS_SHM_ENABLED, SharedMemoryTransportManager
from nos.constants import DEFAULT_GRPC_PORT, GRPC_MAX_MESSAGE_LENGTH, NOS_PROFILING_ENABLED
from nos.constants import DEFAULT_GRPC_ADDRESS, GRPC_MAX_MESSAGE_LENGTH, NOS_PROFILING_ENABLED
from nos.logging import logger
from nos.protoc import import_module
from nos.version import __version__
Expand Down Expand Up @@ -64,11 +64,11 @@ class Client:
```
"""

def __init__(self, address: str = f"[::]:{DEFAULT_GRPC_PORT}"):
def __init__(self, address: str = DEFAULT_GRPC_ADDRESS):
"""Initializes the gRPC client.
Args:
address (str): Address for the gRPC server. Defaults to f"[::]:{DEFAULT_GRPC_PORT}".
address (str): Address for the gRPC server. Defaults to DEFAULT_GRPC_ADDRESS.
"""
self.address: str = address
self._channel: grpc.Channel = None
Expand Down
4 changes: 2 additions & 2 deletions nos/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
NOS_LOG_DIR.mkdir(parents=True, exist_ok=True)
NOS_TMP_DIR.mkdir(parents=True, exist_ok=True)

DEFAULT_HTTP_HOST = os.getenv("NOS_HTTP_HOST", "127.0.0.1")
DEFAULT_HTTP_HOST = os.getenv("NOS_HTTP_HOST", "localhost")
DEFAULT_HTTP_PORT = int(os.getenv("NOS_HTTP_PORT", 8000))
DEFAULT_HTTP_ADDRESS = f"{DEFAULT_HTTP_HOST}:{DEFAULT_HTTP_PORT}"

DEFAULT_GRPC_HOST = os.getenv("NOS_GRPC_HOST", "[::]")
DEFAULT_GRPC_HOST = os.getenv("NOS_GRPC_HOST", "localhost")
DEFAULT_GRPC_PORT = int(os.getenv("NOS_GRPC_PORT", 50051))
DEFAULT_GRPC_ADDRESS = f"{DEFAULT_GRPC_HOST}:{DEFAULT_GRPC_PORT}"

Expand Down
18 changes: 9 additions & 9 deletions nos/test/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from loguru import logger

from nos.constants import DEFAULT_GRPC_PORT
from nos.constants import DEFAULT_GRPC_HOST, DEFAULT_GRPC_PORT
from nos.protoc import import_module


Expand Down Expand Up @@ -62,7 +62,7 @@ async def grpc_server(ray_executor):
("grpc.max_receive_message_length", GRPC_MAX_MESSAGE_LENGTH),
]
server = aio.server(options=options)
address = f"[::]:{GRPC_TEST_PORT}"
address = f"{DEFAULT_GRPC_HOST}:{GRPC_TEST_PORT}"
nos_service_pb2_grpc.add_InferenceServiceServicer_to_server(InferenceServiceImpl(), server)
server.add_insecure_port(address)

Expand All @@ -78,23 +78,23 @@ def grpc_client():
"""Test gRPC client (Port: 50052)."""
from nos.client import Client

yield Client(f"[::]:{GRPC_TEST_PORT}")
yield Client(f"{DEFAULT_GRPC_HOST}:{GRPC_TEST_PORT}")


@pytest.fixture(scope="session")
def grpc_client_cpu():
"""Test gRPC client to be used with CPU docker runtime (Port: 50053)."""
from nos.client import Client

yield Client(f"[::]:{GRPC_TEST_PORT_CPU}")
yield Client(f"{DEFAULT_GRPC_HOST}:{GRPC_TEST_PORT_CPU}")


@pytest.fixture(scope="session")
def grpc_client_gpu():
"""Test gRPC client to be used with GPU docker runtime (Port: 50054)."""
from nos.client import Client

yield Client(f"[::]:{GRPC_TEST_PORT_GPU}")
yield Client(f"{DEFAULT_GRPC_HOST}:{GRPC_TEST_PORT_GPU}")


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -230,7 +230,7 @@ def local_http_client_with_server(grpc_server): # noqa: F811
from nos.server.http._service import app_factory

# Yield the HTTP client once the server is up and initialized
with TestClient(app_factory(address=f"[::]:{GRPC_TEST_PORT}")) as _client:
with TestClient(app_factory(address=f"{DEFAULT_GRPC_HOST}:{GRPC_TEST_PORT}")) as _client:
yield _client


Expand All @@ -247,7 +247,7 @@ def http_client_with_cpu_backend(grpc_server_docker_runtime_cpu): # noqa: F811
from nos.server.http._service import app_factory

# Yield the HTTP client once the server is up and initialized
with TestClient(app_factory(address=f"[::]:{GRPC_TEST_PORT_CPU}")) as _client:
with TestClient(app_factory(address=f"{DEFAULT_GRPC_HOST}:{GRPC_TEST_PORT_CPU}")) as _client:
yield _client


Expand All @@ -264,7 +264,7 @@ def http_client_with_gpu_backend(grpc_server_docker_runtime_gpu): # noqa: F811
from nos.server.http._service import app_factory

# Yield the HTTP client once the server is up and initialized
with TestClient(app_factory(address=f"[::]:{GRPC_TEST_PORT_GPU}")) as _client:
with TestClient(app_factory(address=f"{DEFAULT_GRPC_HOST}:{GRPC_TEST_PORT_GPU}")) as _client:
yield _client


Expand Down Expand Up @@ -301,7 +301,7 @@ def http_server_with_gpu_backend(grpc_client_with_gpu_backend): # noqa: F811

def _run_uvicorn_server():
uvicorn.run(
app_factory(address=f"[::]:{GRPC_TEST_PORT_GPU}", env="dev"),
app_factory(address=f"{DEFAULT_GRPC_HOST}:{GRPC_TEST_PORT_GPU}", env="dev"),
host="localhost",
port=HTTP_TEST_PORT,
workers=1,
Expand Down
2 changes: 1 addition & 1 deletion nos/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.0"
__version__ = "0.2.1"
Loading

0 comments on commit a83e3ae

Please sign in to comment.