Add machine-to-machine API authentication example

This PR allows developers to authenticate their requests with a server with machine-to-machine authentication. This is useful when the server is accessible to the public but requires a secure API-key authentication to access the HTTP services.
autonomi-ai · May 3, 2024 · 3acbe8e · 3acbe8e
1 parent b0de4df
commit 3acbe8e
Show file tree

Hide file tree

Showing 10 changed files with 189 additions and 43 deletions.
diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
@@ -1,5 +1,5 @@
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>
-# Auto-generated by agi-pack (version=0.2.0).
+# Auto-generated by agi-pack (version=0.3.0).
 FROM debian:buster-slim AS base-cpu
 
 # Setup environment variables
@@ -39,15 +39,16 @@ RUN --mount=type=cache,target=/var/cache/apt \
     git \
     && echo "system install complete"
 
-# Install mambaforge, with cache mounting ${CONDA_PKGS_DIRS} for faster builds
+# Install miniconda, with cache mounting ${CONDA_PKGS_DIRS} for faster builds
 RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
-  curl -sLo ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \
-  && chmod +x ~/mambaforge.sh \
-  && ~/mambaforge.sh -b -p ${AGIPACK_PATH}/conda \
-  && ${AGIPACK_PATH}/conda/bin/mamba init bash \
-  && ${AGIPACK_PATH}/conda/bin/mamba config --add channels conda-forge \
-  && ${AGIPACK_PATH}/conda/bin/mamba create -n ${AGIPACK_PYENV} python=${PYTHON_VERSION} -y \
-  && rm ~/mambaforge.sh
+  curl -sLo ~/miniconda.sh "https://repo.anaconda.com/miniconda/Miniconda3-latest-$(uname)-$(uname -m).sh" \
+  && chmod +x ~/miniconda.sh \
+  && ~/miniconda.sh -b -p ${AGIPACK_PATH}/conda \
+  && ${AGIPACK_PATH}/conda/bin/conda init bash \
+  && ${AGIPACK_PATH}/conda/bin/conda config --add channels conda-forge \
+  && ${AGIPACK_PATH}/conda/bin/conda create -n ${AGIPACK_PYENV} python=${PYTHON_VERSION} -y \
+  && ${AGIPACK_PATH}/conda/bin/conda install mamba -y \
+  && rm ~/miniconda.sh
 
 # Upgrade pip
 RUN pip install --upgrade pip
@@ -110,7 +111,7 @@ ENV RAY_CONDA_HOME=/opt/conda
 ENV RAY_ENABLE_MAC_LARGE_OBJECT_STORE=1
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>
-# Auto-generated by agi-pack (version=0.2.0).
+# Auto-generated by agi-pack (version=0.3.0).
 FROM base-cpu AS cpu
 
 # Setup working directory
@@ -128,7 +129,7 @@ RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
 RUN echo "run commands complete"
 CMD ["bash", "-c", "/app/entrypoint.sh"]
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>
-# Auto-generated by agi-pack (version=0.2.0).
+# Auto-generated by agi-pack (version=0.3.0).
 FROM cpu AS test-cpu
 
 # Install additional system packages

diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu
@@ -1,5 +1,5 @@
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>
-# Auto-generated by agi-pack (version=0.2.0).
+# Auto-generated by agi-pack (version=0.3.0).
 FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS base-gpu
 
 # Setup environment variables
@@ -39,15 +39,16 @@ RUN --mount=type=cache,target=/var/cache/apt \
     git \
     && echo "system install complete"
 
-# Install mambaforge, with cache mounting ${CONDA_PKGS_DIRS} for faster builds
+# Install miniconda, with cache mounting ${CONDA_PKGS_DIRS} for faster builds
 RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
-  curl -sLo ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \
-  && chmod +x ~/mambaforge.sh \
-  && ~/mambaforge.sh -b -p ${AGIPACK_PATH}/conda \
-  && ${AGIPACK_PATH}/conda/bin/mamba init bash \
-  && ${AGIPACK_PATH}/conda/bin/mamba config --add channels conda-forge \
-  && ${AGIPACK_PATH}/conda/bin/mamba create -n ${AGIPACK_PYENV} python=${PYTHON_VERSION} -y \
-  && rm ~/mambaforge.sh
+  curl -sLo ~/miniconda.sh "https://repo.anaconda.com/miniconda/Miniconda3-latest-$(uname)-$(uname -m).sh" \
+  && chmod +x ~/miniconda.sh \
+  && ~/miniconda.sh -b -p ${AGIPACK_PATH}/conda \
+  && ${AGIPACK_PATH}/conda/bin/conda init bash \
+  && ${AGIPACK_PATH}/conda/bin/conda config --add channels conda-forge \
+  && ${AGIPACK_PATH}/conda/bin/conda create -n ${AGIPACK_PYENV} python=${PYTHON_VERSION} -y \
+  && ${AGIPACK_PATH}/conda/bin/conda install mamba -y \
+  && rm ~/miniconda.sh
 
 # Upgrade pip
 RUN pip install --upgrade pip
@@ -116,7 +117,7 @@ ENV RAY_CONDA_HOME=/opt/conda
 ENV RAY_ENABLE_MAC_LARGE_OBJECT_STORE=1
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>
-# Auto-generated by agi-pack (version=0.2.0).
+# Auto-generated by agi-pack (version=0.3.0).
 FROM base-gpu AS gpu
 
 # Setup working directory
@@ -134,7 +135,7 @@ RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
 RUN echo "run commands complete"
 CMD ["bash", "-c", "/app/entrypoint.sh"]
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>
-# Auto-generated by agi-pack (version=0.2.0).
+# Auto-generated by agi-pack (version=0.3.0).
 FROM gpu AS test-gpu
 
 # Install additional system packages

diff --git a/examples/tutorials/05-serving-with-docker/docker-compose.yml b/examples/tutorials/05-serving-with-docker/docker-compose.yml
@@ -8,7 +8,7 @@ services:
       - NOS_HOME=/app/.nos
       - NOS_LOGGING_LEVEL=INFO
       - NOS_GRPC_HOST=nos-grpc-server
-      - NOS_HTTP_ENV=dev
+      - NOS_HTTP_ENV=prod
     volumes:
       - ~/.nosd:/app/.nos
       - /dev/shm:/dev/shm
@@ -22,6 +22,7 @@ services:
     image: autonomi/nos:latest-gpu
     environment:
       - NOS_HOME=/app/.nos
+      - NOS_GRPC_HOST=[::]
       - NOS_LOGGING_LEVEL=INFO
     volumes:
       - ~/.nosd:/app/.nos

diff --git a/examples/tutorials/06-serving-with-m2m-auth/docker-compose.yml b/examples/tutorials/06-serving-with-m2m-auth/docker-compose.yml
@@ -0,0 +1,34 @@
+version: "3.8"
+
+services:
+  nos-http-gateway:
+    image: autonomi/nos:latest-cpu
+    command: nos-http-server --host 0.0.0.0 --port 8000 --workers 1
+    environment:
+      - NOS_HOME=/app/.nos
+      - NOS_LOGGING_LEVEL=INFO
+      - NOS_GRPC_HOST=nos-grpc-server
+      - NOS_HTTP_ENV=prod
+      - NOS_M2M_API_KEYS=sk-test-key-1,sk-test-key-2
+    volumes:
+      - ~/.nosd:/app/.nos
+      - /dev/shm:/dev/shm
+    ports:
+      - 8000:8000
+    ipc: host
+    depends_on:
+      - nos-grpc-server
+
+  nos-grpc-server:
+    image: autonomi/nos:latest-cpu
+    environment:
+      - NOS_HOME=/app/.nos
+      - NOS_GRPC_HOST=[::]
+      - NOS_LOGGING_LEVEL=INFO
+    volumes:
+      - ~/.nosd:/app/.nos
+      - /dev/shm:/dev/shm
+    ports:
+      - 50051:50051
+    ipc: host
+
diff --git a/examples/tutorials/06-serving-with-m2m-auth/tests/test_m2m_auth.py b/examples/tutorials/06-serving-with-m2m-auth/tests/test_m2m_auth.py
@@ -0,0 +1,27 @@
+import requests
+
+
+if __name__ == "__main__":
+    BASE_URL = "http://localhost:8000"
+
+    # Test health
+    response = requests.get(f"{BASE_URL}/v1/health")
+    response.raise_for_status()
+
+    # Test model info without authentication
+    response = requests.get(f"{BASE_URL}/v1/models")
+    assert response.status_code == 401, "Expected 401 Unauthorized"
+
+    # Test model info with invalid authentication
+    response = requests.get(f"{BASE_URL}/v1/models", headers={"X-Api-Key": "invalid-api-key"})
+    assert response.status_code == 403, "Expected 403 Forbidden"
+
+    # Test model info with valid authentication
+    response = requests.get(f"{BASE_URL}/v1/models", headers={"X-Api-Key": "sk-test-key-1"})
+    response.raise_for_status()
+    assert response.status_code == 200, "Expected 200 OK"
+
+    # Test model inference without authentication
+    response = requests.get(f"{BASE_URL}/v1/models", headers={"X-Api-Key": "sk-test-key-2"})
+    response.raise_for_status()
+    assert response.status_code == 200, "Expected 200 OK"
diff --git a/nos/client/grpc.py b/nos/client/grpc.py
@@ -165,7 +165,8 @@ def WaitForServer(self, timeout: int = 60, retry_interval: int = 5) -> None:
                 if int(elapsed) > 10:
                     logger.warning("Waiting for server to start... (elapsed={:.0f}s)".format(time.time() - st))
                 time.sleep(retry_interval)
-        raise ServerReadyException("Failed to ping server.")
+        default_msg = """\n If you are running the server in docker, make sure the server sets `NOS_GRPC_HOST=[::]` and the client sets `NOS_GRPC_HOST=<server-container-name>` in their environment variables."""
+        raise ServerReadyException(f"Failed to ping server. {default_msg}")
 
     def GetServiceVersion(self) -> str:
         """Get service version.

diff --git a/nos/server/http/_exceptions.py b/nos/server/http/_exceptions.py
@@ -0,0 +1,26 @@
+from fastapi import status
+from fastapi.responses import JSONResponse
+from loguru import logger
+from starlette.requests import Request
+
+
+async def default_exception_middleware(request: Request, call_next):
+    try:
+        return await call_next(request)
+    except Exception as exc:
+        base_error_message = f"Internal server error: [method={request.method}], url={request.url}]"
+        logger.error(f"Internal server error: [method={request.method}, url={request.url}, exc={exc}]")
+        return JSONResponse(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            content={"message": base_error_message},
+        )
+
+
+async def default_exception_handler(request, error):
+    """Default exception handler for all routes."""
+    base_error_message = f"Internal server error: [method={request.method}], url={request.url}]"
+    logger.error(f"Internal server error: [method={request.method}, url={request.url}, error={error}]")
+    return JSONResponse(
+        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        content={"message": f"{base_error_message}."},
+    )
diff --git a/nos/server/http/_security.py b/nos/server/http/_security.py
@@ -0,0 +1,38 @@
+import os
+
+from fastapi import Depends, HTTPException, Request, status
+from fastapi.security import APIKeyHeader
+from loguru import logger
+
+
+valid_m2m_keys = {}
+for key in os.getenv("NOS_M2M_API_KEYS", "").split(","):
+    if len(key) > 0:
+        logger.debug(f"Adding valid_m2m_keys [key={key}]")
+        valid_m2m_keys[key] = key
+api_key_header = APIKeyHeader(name="X-Api-Key", auto_error=False)
+
+
+async def validate_m2m_key(request: Request, api_key: str = Depends(api_key_header)) -> bool:
+    logger.debug(f"validate_m2m_key [api_key={api_key}]")
+
+    if not api_key:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Missing X-Api-Key Key header",
+        )
+
+    if api_key not in valid_m2m_keys:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Invalid Machine-to-Machine Key",
+        )
+
+    assert isinstance(api_key, str)
+    return True
+
+
+if valid_m2m_keys:
+    ValidMachineToMachine = Depends(validate_m2m_key)
+else:
+    ValidMachineToMachine = None
diff --git a/nos/server/http/_service.py b/nos/server/http/_service.py
@@ -22,6 +22,8 @@
 from nos.protoc import import_module
 from nos.version import __version__
 
+from ._exceptions import default_exception_handler, default_exception_middleware
+from ._security import ValidMachineToMachine as ValidM2M
 from ._utils import decode_item, encode_item
 from .integrations.openai.models import (
     ChatCompletionsRequest,
@@ -60,28 +62,14 @@ class InferenceRequest:
 class InferenceService:
     """HTTP server application for NOS API."""
 
-    version: str = field(default="v1")
-    """NOS version."""
-
     address: str = field(default=DEFAULT_GRPC_ADDRESS)
     """gRPC address."""
 
-    env: str = field(default=HTTP_ENV)
-    """Environment (dev/prod/test)."""
-
     model_config = ConfigDict(arbitrary_types_allowed=True)
     """Model configuration."""
 
     def __post_init__(self):
         """Post initialization."""
-        self.app = FastAPI(
-            title="NOS REST API",
-            description=f"NOS REST API (version={__version__}, api_version={self.version})",
-            version=self.version,
-            debug=self.env != "prod",
-        )
-        logger.debug(f"Starting NOS REST API (version={__version__}, env={self.env})")
-
         self.client = Client(self.address)
         logger.debug(f"Connecting to gRPC server (address={self.client.address})")
 
@@ -144,12 +132,33 @@ def app_factory(version: str = HTTP_API_VERSION, address: str = DEFAULT_GRPC_ADD
     Returns:
         (FastAPI) FastAPI application.
     """
-    nos_app = InferenceService(version=version, address=address, env=env)
-    app = nos_app.app
+    from fastapi.middleware.cors import CORSMiddleware
+
+    svc = InferenceService(address=address)
+    logger.info(f"app_factory [env={env}]: Adding CORS middleware ...")
+    app = FastAPI(
+        title="NOS REST API",
+        description=f"NOS REST API (version={__version__}, api_version={version})",
+        version=version,
+        debug=env != "prod",
+    )
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    logger.debug(f"Starting NOS REST API (version={__version__}, env={env})")
+
+    # Add default exception handler
+    logger.info(f"app_factory [env={env}]: Adding default exception handlers ...")
+    app.middleware("http")(default_exception_middleware)
+    app.add_exception_handler(Exception, default_exception_handler)
 
     def get_client() -> Client:
         """Get the inference client."""
-        return nos_app.client
+        return svc.client
 
     def unnormalize_id(model_id: str) -> str:
         """Un-normalize the model identifier."""
@@ -192,14 +201,19 @@ def health(client: Client = Depends(get_client)) -> JSONResponse:
     @app.get(f"/{version}/models", status_code=status.HTTP_200_OK, response_model=Model)
     def models(
         client: Client = Depends(get_client),
+        user: Depends = ValidM2M,
     ) -> Model:
         """List all available models."""
         _model_table = build_model_table(client)
         logger.debug(f"Listing models [models={_model_table.values()}]")
         return Model(data=list(_model_table.values()))
 
     @app.get(f"/{version}/models/" + "{model:path}", response_model=ChatModel)
-    def model_info(model: str, client: Client = Depends(get_client)) -> ChatModel:
+    def model_info(
+        model: str,
+        client: Client = Depends(get_client),
+        user: Depends = ValidM2M,
+    ) -> ChatModel:
         """Get model information."""
         _model_table = build_model_table(client)
         try:
@@ -211,6 +225,7 @@ def model_info(model: str, client: Client = Depends(get_client)) -> ChatModel:
     def chat(
         request: ChatCompletionsRequest,
         client: Client = Depends(get_client),
+        user: Depends = ValidM2M,
     ) -> StreamingResponse:
         """Perform chat completion on the given input data."""
         logger.debug(f"Received chat request [model={request.model}, messages={request.messages}]")
@@ -270,6 +285,7 @@ def openai_streaming_generator():
     def infer(
         request: InferenceRequest,
         client: Client = Depends(get_client),
+        user: Depends = ValidM2M,
     ) -> JSONResponse:
         """Perform inference on the given input data.
 
@@ -319,6 +335,7 @@ def infer_file(
         file: Optional[UploadFile] = File(None),
         url: Optional[str] = Form(None),
         client: Client = Depends(get_client),
+        user: Depends = ValidM2M,
     ) -> JSONResponse:
         """Perform inference on the given input data using multipart/form-data.
 

diff --git a/nos/version.py b/nos/version.py
@@ -1 +1 @@
-__version__ = "0.2.1"
+__version__ = "0.3.0"