Skip to content

Commit

Permalink
Migrate to pydantic>=2.5, torch==2.1.1, ray[default]>=2.9 (#534)
Browse files Browse the repository at this point in the history
## Summary
- upgrades to pydantic, torch, ray and agi-pack to support
`pydantic>2.5` upgrade
- Ray requires an upgrade to `2.9` for pydantic>2 update:
ray-project/ray#39722
- Updated `agi-pack` with `pydantic>2` support:
spillai/agi-pack#16
- various migration upgrades to ModelSpec and other dataclasses with new
pydantic `field_validator` syntax


## Related issues

#220 

## Checks

- [x] `make lint`: I've run `make lint` to lint the changes in this PR.
- [x] `make test`: I've made sure the tests (`make test-cpu` or `make
test`) are passing.
- Additional tests:
   - [x] Benchmark tests (when contributing new models)
   - [x] GPU/HW tests
  • Loading branch information
spillai authored Jan 29, 2024
1 parent 2329af3 commit 8153126
Show file tree
Hide file tree
Showing 16 changed files with 112 additions and 88 deletions.
8 changes: 4 additions & 4 deletions docker/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# >>>>>>>>>>>>>>>>>>>>>>>>>>>
# Auto-generated by agi-pack (version=0.1.19).
# Auto-generated by agi-pack (version=0.2.0).
FROM debian:buster-slim AS base-cpu

# Setup environment variables
Expand Down Expand Up @@ -80,7 +80,7 @@ ADD ./requirements/requirements.server.txt /tmp/requirements.server.txt
RUN echo "running commands"
RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
--mount=type=cache,target=${PIP_CACHE_DIR} \
mamba install pytorch==2.0.1 torchvision>=0.16 torchaudio>=0.10.2 cpuonly -c pytorch
mamba install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 cpuonly -c pytorch
RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
--mount=type=cache,target=${PIP_CACHE_DIR} \
pip config set global.extra-index-url https://download.pytorch.org/whl/cpu
Expand Down Expand Up @@ -110,7 +110,7 @@ ENV RAY_CONDA_HOME=/opt/conda
ENV RAY_ENABLE_MAC_LARGE_OBJECT_STORE=1

# >>>>>>>>>>>>>>>>>>>>>>>>>>>
# Auto-generated by agi-pack (version=0.1.19).
# Auto-generated by agi-pack (version=0.2.0).
FROM base-cpu AS cpu

# Setup working directory
Expand All @@ -128,7 +128,7 @@ RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
RUN echo "run commands complete"
CMD ["bash", "-c", "/app/entrypoint.sh"]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>
# Auto-generated by agi-pack (version=0.1.19).
# Auto-generated by agi-pack (version=0.2.0).
FROM cpu AS test-cpu

# Install additional system packages
Expand Down
11 changes: 6 additions & 5 deletions docker/Dockerfile.gpu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# >>>>>>>>>>>>>>>>>>>>>>>>>>>
# Auto-generated by agi-pack (version=0.1.19).
# Auto-generated by agi-pack (version=0.2.0).
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS base-gpu

# Setup environment variables
Expand Down Expand Up @@ -58,8 +58,9 @@ RUN pip install --upgrade pip
RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
mamba install -yv \
accelerate>0.18.0 \
pytorch==2.0.1 \
torchvision \
pytorch==2.1.1 \
torchaudio==2.1.1 \
torchvision==0.16.1 \
pytorch-cuda=11.8 \
cudatoolkit=11.8 \
-c pytorch -c nvidia \
Expand Down Expand Up @@ -115,7 +116,7 @@ ENV RAY_CONDA_HOME=/opt/conda
ENV RAY_ENABLE_MAC_LARGE_OBJECT_STORE=1

# >>>>>>>>>>>>>>>>>>>>>>>>>>>
# Auto-generated by agi-pack (version=0.1.19).
# Auto-generated by agi-pack (version=0.2.0).
FROM base-gpu AS gpu

# Setup working directory
Expand All @@ -133,7 +134,7 @@ RUN --mount=type=cache,target=${CONDA_PKGS_DIRS} \
RUN echo "run commands complete"
CMD ["bash", "-c", "/app/entrypoint.sh"]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>
# Auto-generated by agi-pack (version=0.1.19).
# Auto-generated by agi-pack (version=0.2.0).
FROM gpu AS test-gpu

# Install additional system packages
Expand Down
2 changes: 1 addition & 1 deletion docker/agibuild.cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ images:
- ./scripts/entrypoint.sh:/app/entrypoint.sh
- ./requirements/requirements.server.txt:/tmp/requirements.server.txt
run:
- mamba install pytorch==2.0.1 torchvision>=0.16 torchaudio>=0.10.2 cpuonly -c pytorch
- mamba install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 cpuonly -c pytorch
- pip config set global.extra-index-url https://download.pytorch.org/whl/cpu
- pip install -r /tmp/requirements.server.txt && rm -rf /tmp/requirements.server.txt
- mamba install -y -c conda-forge x264=='1!161.3030' ffmpeg=4.3.2
Expand Down
5 changes: 3 additions & 2 deletions docker/agibuild.gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ images:
- git
conda:
- accelerate>0.18.0
- pytorch==2.0.1
- torchvision
- pytorch==2.1.1
- torchaudio==2.1.1
- torchvision==0.16.1
- pytorch-cuda=11.8
- cudatoolkit=11.8
- -c pytorch -c nvidia
Expand Down
5 changes: 2 additions & 3 deletions nos/common/runtime.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import importlib
from typing import Any, Dict, List

from pydantic.dataclasses import dataclass
from pydantic import BaseModel


def is_package_available(name: str) -> bool:
Expand All @@ -24,8 +24,7 @@ def is_torch_neuronx_available():
return is_package_available("torch_neuronx")


@dataclass
class RuntimeEnv:
class RuntimeEnv(BaseModel):
conda: Dict[str, Any]
"""Conda environment specification."""

Expand Down
98 changes: 58 additions & 40 deletions nos/common/spec.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import copy
import inspect
import math
import re
from dataclasses import asdict, field
from dataclasses import field
from functools import cached_property
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, get_args, get_origin

import humanize
from pydantic import validator
from pydantic import BaseModel, Field, field_validator
from pydantic.dataclasses import dataclass

from nos.common.cloudpickle import dumps, loads
Expand Down Expand Up @@ -140,8 +139,7 @@ def AnnotatedParameter(
return ObjectTypeInfo(annotation, parameter)


@dataclass
class FunctionSignature:
class FunctionSignature(BaseModel):
"""Function signature that fully describes the remote-model to be executed
including `inputs`, `outputs`, `func_or_cls` to be executed,
initialization `args`/`kwargs`."""
Expand All @@ -151,22 +149,42 @@ class FunctionSignature:
method: str
"""Class method name. (e.g. forward, __call__ etc)"""

init_args: Tuple[Any, ...] = field(default_factory=tuple)
init_args: Tuple[Any, ...] = Field(default_factory=tuple)
"""Arguments to initialize the model instance."""
init_kwargs: Dict[str, Any] = field(default_factory=dict)
init_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to initialize the model instance."""

parameters: Dict[str, Any] = field(init=False)
parameters: Dict[str, Any] = Field(init=False, default_factory=dict)
"""Input function signature (as returned by inspect.signature)."""
return_annotation: Any = field(init=False)
return_annotation: Any = Field(init=False, default=None)
"""Output / return function signature (as returned by inspect.signature)."""

input_annotations: Dict[str, Any] = field(default_factory=dict)
input_annotations: Dict[str, Any] = Field(default_factory=dict)
"""Mapping of input keyword arguments to dtypes."""
output_annotations: Union[Any, Dict[str, Any], None] = field(default=None)
output_annotations: Union[Any, Dict[str, Any], None] = Field(default=None)
"""Mapping of output names to dtypes."""

def __post_init__(self):
def __init__(
self,
func_or_cls: Callable,
method: str,
init_args: Tuple[Any, ...] = (),
init_kwargs: Dict[str, Any] = {}, # noqa: B006
parameters: Dict[str, Any] = {}, # noqa: B006
return_annotation: Any = None,
input_annotations: Dict[str, Any] = {}, # noqa: B006
output_annotations: Union[Any, Dict[str, Any], None] = None,
):
super().__init__(
func_or_cls=func_or_cls,
method=method,
init_args=init_args,
init_kwargs=init_kwargs,
parameters=parameters,
return_annotation=return_annotation,
input_annotations=input_annotations,
output_annotations=output_annotations,
)
if not callable(self.func_or_cls):
raise ValueError(f"Invalid function/class provided, func_or_cls={self.func_or_cls}.")

Expand All @@ -181,10 +199,6 @@ def __post_init__(self):
self.return_annotation = sig.return_annotation
logger.debug(f"Function signature [method={self.method}, sig={sig}].")

def __repr__(self) -> str:
"""Return the function signature representation."""
return f"FunctionSignature({asdict(self)})"

@staticmethod
def validate(inputs: Dict[str, Any], sig: Dict[str, Any]) -> Dict[str, Any]:
"""Validate the input dict against the defined signature (input or output)."""
Expand All @@ -196,14 +210,16 @@ def validate(inputs: Dict[str, Any], sig: Dict[str, Any]) -> Dict[str, Any]:
# TODO (spillai): Validate input types and shapes.
return inputs

@validator("init_args", pre=True)
@field_validator("init_args", mode="before")
@classmethod
def _validate_init_args(cls, init_args: Union[Tuple[Any, ...], Any]) -> Tuple[Any, ...]:
"""Validate the initialization arguments."""
# TODO (spillai): Check the function signature of the func_or_cls class and validate
# the init_args against the signature.
return init_args

@validator("init_kwargs", pre=True)
@field_validator("init_kwargs", mode="before")
@classmethod
def _validate_init_kwargs(cls, init_kwargs: Dict[str, Any]) -> Dict[str, Any]:
"""Validate the initialization keyword arguments."""
# TODO (spillai): Check the function signature of the func_or_cls class and validate
Expand Down Expand Up @@ -249,21 +265,20 @@ def get_outputs_spec(self) -> Dict[str, Union[ObjectTypeInfo, Dict[str, ObjectTy
return AnnotatedParameter(self.output_annotations)


@dataclass
class ModelResources:
class ModelResources(BaseModel):
"""Model resources (device/host memory etc)."""

runtime: str = field(default="auto")
runtime: str = Field(default="auto")
"""Runtime type (cpu, gpu, trt, etc).
See `nos.server._runtime.InferenceServiceRuntime` for the list of supported runtimes.
"""
cpus: float = 0
cpus: float = Field(default=0.0)
"""Number of CPUs (defaults to 0 CPUs)."""
memory: Union[None, int, str] = field(default=0)
memory: Union[None, int, str] = Field(default=0)
"""Host / CPU memory"""
device: str = field(default="auto")
device: str = Field(default="auto")
"""Device identifier (nvidia-2080, nvidia-4090, apple-m2, etc)."""
device_memory: Union[int, str] = field(default="auto")
device_memory: Union[int, str] = Field(default="auto")
"""Device / GPU memory."""

def __repr__(self) -> str:
Expand All @@ -277,7 +292,7 @@ def __repr__(self) -> str:
f"""memory={memory}, device_memory={device_memory})"""
)

@validator("runtime")
@field_validator("runtime")
def _validate_runtime(cls, runtime: str) -> str:
"""Validate the runtime."""
from nos.server._runtime import InferenceServiceRuntime
Expand All @@ -287,7 +302,7 @@ def _validate_runtime(cls, runtime: str) -> str:
raise ValueError(f"Invalid runtime, runtime={runtime}.")
return runtime

@validator("cpus")
@field_validator("cpus")
def _validate_cpus(cls, cpus: Union[float, str]) -> float:
"""Validate the number of CPUs."""
if isinstance(cpus, str):
Expand All @@ -299,7 +314,7 @@ def _validate_cpus(cls, cpus: Union[float, str]) -> float:
raise ValueError(err_msg)
return cpus

@validator("memory")
@field_validator("memory")
def _validate_memory(cls, memory: Union[int, str]) -> int:
"""Validate the host memory."""
if memory is None:
Expand All @@ -314,7 +329,7 @@ def _validate_memory(cls, memory: Union[int, str]) -> int:
raise ValueError(err_msg)
return memory

@validator("device")
@field_validator("device")
def _validate_device(cls, device: str) -> str:
"""Validate the device."""
if device.startswith("nvidia-"):
Expand All @@ -327,7 +342,7 @@ def _validate_device(cls, device: str) -> str:
raise ValueError(f"Invalid device, device={device}.")
return device

@validator("device_memory")
@field_validator("device_memory")
def _validate_device_memory(cls, device_memory: Union[int, str]) -> Union[int, Literal["auto"]]:
"""Validate the device memory."""
if isinstance(device_memory, str) and device_memory != "auto":
Expand Down Expand Up @@ -473,7 +488,7 @@ class ModelSpecMetadata:
"""Model identifier."""
method: str
"""Model method name."""
task: TaskType = None
task: Union[TaskType, None] = field(default=None)
"""Task type (e.g. image_embedding, image_generation, object_detection_2d, etc)."""

def __repr__(self) -> str:
Expand Down Expand Up @@ -503,8 +518,7 @@ def profile(self) -> Dict[str, Any]:
return {}


@dataclass
class ModelSpec:
class ModelSpec(BaseModel):
"""Model specification for the registry.
ModelSpec captures all the relevant information for
Expand All @@ -513,15 +527,20 @@ class ModelSpec:

id: str
"""Model identifier."""
signature: Dict[str, FunctionSignature] = field(default_factory=dict)
signature: Dict[str, FunctionSignature] = Field(default_factory=dict)
"""Model function signatures to export (method -> FunctionSignature)."""
runtime_env: RuntimeEnv = None
runtime_env: Union[RuntimeEnv, None] = Field(default=None)
"""Runtime environment with custom packages."""

def __init__(
self, id: str, signature: Dict[str, FunctionSignature] = {}, runtime_env: RuntimeEnv = None # noqa: B006
):
super().__init__(id=id, signature=signature, runtime_env=runtime_env)

def __repr__(self):
return f"""ModelSpec(id={self.id}, methods=({', '.join(list(self.signature.keys()))}), tasks=({', '.join([str(self.task(m)) for m in self.signature])}))"""

@validator("id", pre=True)
@field_validator("id", mode="before")
def _validate_id(cls, id: str) -> str:
"""Validate the model identifier."""
regex = re.compile(r"^[a-zA-Z0-9\/._-]+$") # allow alphanumerics, `/`, `.`, `_`, and `-`
Expand All @@ -531,7 +550,7 @@ def _validate_id(cls, id: str) -> str:
)
return id

@validator("signature", pre=True)
@field_validator("signature", mode="before")
def _validate_signature(
cls, signature: Union[FunctionSignature, Dict[str, FunctionSignature]], **kwargs: Dict[str, Any]
) -> Dict[str, FunctionSignature]:
Expand Down Expand Up @@ -705,7 +724,7 @@ def from_cls(
for method in methods:
# Add the function signature
sig = FunctionSignature(
func_or_cls,
func_or_cls=func_or_cls,
method=method,
init_args=init_args,
init_kwargs=init_kwargs,
Expand All @@ -718,14 +737,13 @@ def from_cls(
spec = cls(
model_id,
signature=signature,
metadata=metadata,
runtime_env=runtime_env,
)
return spec

def _to_proto(self) -> nos_service_pb2.GenericResponse:
"""Convert the model spec to proto."""
spec = copy.deepcopy(self)
spec: ModelSpec = loads(dumps(self, protocol=-1))
# Note (spillai): We only serialize the input/output
# signatures and method of the spec. Notably, the
# `func_or_cls` attribute is not serialized to avoid
Expand Down
Loading

0 comments on commit 8153126

Please sign in to comment.