diff --git a/_internal/build_model_spec_metadata.py b/_internal/build_model_spec_metadata.py new file mode 100644 index 00000000..853a015c --- /dev/null +++ b/_internal/build_model_spec_metadata.py @@ -0,0 +1,21 @@ +from nos import hub +from nos.common import TaskType +from nos.common.spec import ModelResources, ModelSpec, ModelSpecMetadata, _metadata_path +from nos.logging import logger + + +spec: ModelSpec = hub.load_spec(task=TaskType.IMAGE_EMBEDDING, model_name="openai/clip") +spec._metadata = ModelSpecMetadata( + name=spec.name, + task=spec.task, + resources={ + "cpu": ModelResources(runtime="cpu", device="cpu", device_memory=2 * 1024**3, cpus=2), + "gpu": ModelResources(runtime="gpu", device="cuda", device_memory=2 * 1024**3, cpus=1), + }, +) + +path = _metadata_path(spec) +if not path.exists(): + path.parent.mkdir(parents=True, exist_ok=True) + spec._metadata.to_json(path) + logger.info(f"Saved metadata to {path}") diff --git a/nos/common/spec.py b/nos/common/spec.py index 5e3bc407..eb32ccc6 100644 --- a/nos/common/spec.py +++ b/nos/common/spec.py @@ -5,9 +5,9 @@ from functools import cached_property import json -from dataclasses import field +from dataclasses import asdict, field from functools import cached_property -from typing import Any, Callable, Dict, List, MutableSet, Optional, Tuple, Type, Union, get_args, get_origin +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union, get_args, get_origin from pydantic import validator from pydantic.dataclasses import dataclass @@ -16,7 +16,7 @@ from nos.common.exceptions import NosInputValidationException from nos.common.tasks import TaskType from nos.common.types import Batch, EmbeddingSpec, ImageSpec, ImageT, TensorSpec, TensorT # noqa: F401 -from nos.constants import NOS_PATH +from nos.constants import NOS_MODELS_DIR from nos.logging import logger from nos.protoc import import_module @@ -210,14 +210,28 @@ def from_packages(cls, packages: List[str]) -> Dict[str, Any]: class ModelResources: """Model resources (device/host memory etc).""" + runtime: str = "cpu" + """Runtime type (cpu, gpu, trt-runtime, etc). + See `nos.server._runtime.InferenceServiceRuntime` for the list of supported runtimes. + """ device: str = "cpu" """Device type (cpu, cuda, mps, neuron, etc).""" device_memory: Union[int, str] = field(default=512 * 1024**2) """Device memory (defaults to 512 MB).""" cpus: float = 0 """Number of CPUs (defaults to 0 CPUs).""" - memory: Union[int, str] = field(default=32 * 1024**2) - """Host memory (defaults to 32 MB)""" + memory: Union[int, str] = field(default=256 * 1024**2) + """Host memory (defaults to 256 MB)""" + + @validator("runtime") + def _validate_runtime(cls, runtime: str) -> str: + """Validate the runtime.""" + from nos.server._runtime import InferenceServiceRuntime + + # Check if runtime is subset of supported runtimes. + if runtime not in InferenceServiceRuntime.configs.keys(): + raise ValueError(f"Invalid runtime, runtime={runtime}.") + return runtime @validator("device") def _validate_device(cls, device: str) -> str: @@ -236,8 +250,8 @@ def _validate_device_memory(cls, device_memory: Union[int, str]) -> int: if isinstance(device_memory, str): raise NotImplementedError() - if device_memory <= 32 * 1024**2 or device_memory > 128 * 1024**3: - err_msg = f"Invalid device memory provided, device_memory={device_memory / 1024**2} MB. Provide a value between 32 MB and 128 GB." + if device_memory < 256 * 1024**2 or device_memory > 128 * 1024**3: + err_msg = f"Invalid device memory provided, device_memory={device_memory / 1024**2} MB. Provide a value between 256 MB and 128 GB." logger.error(err_msg) raise ValueError(err_msg) return device_memory @@ -260,8 +274,8 @@ def _validate_memory(cls, memory: Union[int, str]) -> int: if isinstance(memory, str): raise NotImplementedError() - if memory <= 32 * 1024**2 or memory > 128 * 1024**3: - err_msg = f"Invalid device memory provided, memory={memory / 1024**2} MB. Provide a value between 32 MB and 128 GB." + if memory < 256 * 1024**2 or memory > 128 * 1024**3: + err_msg = f"Invalid device memory provided, memory={memory / 1024**2} MB. Provide a value between 256 MB and 128 GB." logger.error(err_msg) raise ValueError(err_msg) return memory @@ -278,21 +292,31 @@ class ModelSpecMetadata: """Model identifier.""" task: TaskType """Task type (e.g. image_embedding, image_generation, object_detection_2d, etc).""" - runtime: set - """Runtimes supported (e.g. cpu, gpu, trt, etc). - See `nos.server._runtime.InferenceServiceRuntime` for the list of supported runtimes. - """ - resources: ModelResources + resources: Dict[str, ModelResources] = field(default_factory=dict) """Model resource limits (device/host memory, etc).""" + """Key is the runtime type (cpu, gpu, trt-runtime, etc).""" - @validator("runtime") - def _validate_runtime(cls, runtime: str) -> str: - """Validate the runtime.""" - from nos.server._runtime import InferenceServiceRuntime + def __repr__(self) -> str: + return f"""ModelSpecMetadata(name={self.name}, task={self.task}, """ f"""resources={self.resources})""" - if runtime not in InferenceServiceRuntime.configs: - raise ValueError(f"Invalid runtime, runtime={runtime}.") - return runtime + def to_json(self, filename: str) -> Dict[str, Any]: + """Convert the model spec to json.""" + specd = asdict(self) + with open(filename, "w") as f: + json.dump(specd, f, indent=4) + return specd + + @classmethod + def from_json(cls, filename: str) -> "ModelSpecMetadata": + """Convert the model spec from json.""" + with open(filename, "r") as f: + specd = json.load(f) + return cls(**specd) + + +def _metadata_path(spec: "ModelSpec") -> str: + """Return the metadata path for a model.""" + return NOS_MODELS_DIR / f"metadata/{spec.id}/metadata.json" @dataclass @@ -311,22 +335,27 @@ class ModelSpec: """Model function signature.""" runtime_env: RuntimeEnv = None """Runtime environment with custom packages.""" - _metadata: ModelSpecMetadata = None + _metadata: ModelSpecMetadata = field(init=False, default=None) """Model specification metadata. The contents of the metadata (profiles, metrics, etc) are specified in a separate file.""" + class Config: + """Custom configuration to keep _metadata private for now.""" + + underscore_attrs_are_private = True + def __repr__(self): - return f"""ModeSpec(name={self.name}, task={self.task})""" f"""\n {self.signature}""" + return f"""ModelSpec(name={self.name}, task={self.task})""" f"""\n {self.signature}""" @cached_property def metadata(self) -> ModelSpecMetadata: try: - path = NOS_PATH / f"data/models/{self.id}/metadata.json" - with open(str(path), "r") as f: - metadata = ModelSpecMetadata(**json.load(f)) + path = _metadata_path(self) + if not path.exists(): + raise FileNotFoundError(f"Model metadata not found. [path={path}]") + metadata = ModelSpecMetadata.from_json(str(path)) logger.info(f"Loaded model metadata [name={self.name}, path={path}, metadata={metadata}]") - except FileNotFoundError: - logger.warning(f"Model metadata not found. [path={path}]") + except Exception: metadata = None return metadata diff --git a/nos/common/tasks.py b/nos/common/tasks.py index 57244d91..09a632be 100644 --- a/nos/common/tasks.py +++ b/nos/common/tasks.py @@ -1,7 +1,7 @@ from enum import Enum -class TaskType(Enum): +class TaskType(str, Enum): """Task types.""" OBJECT_DETECTION_2D = "object_detection_2d" diff --git a/nos/data/spec.py b/nos/data/spec.py deleted file mode 100644 index 65f34e67..00000000 --- a/nos/data/spec.py +++ /dev/null @@ -1,14 +0,0 @@ -from nos import hub -from nos.common import TaskType -from nos.common.spec import ModelResources, ModelSpec, ModelSpecMetadata - - -# path = NOS_PATH / f"data/models/{id}/metadata.json" -spec: ModelSpec = hub.load_spec(task=TaskType.IMAGE_EMBEDDING, model_name="openai/clip") -spec._metadata = ModelSpecMetadata( - name=spec.name, - task=spec.task, - runtime={"cpu", "gpu", "trt"}, - resources=ModelResources(device="cuda", device_memory=2 * 1024**3, cpus=1), -) -import pdb; pdb.set_trace() diff --git a/nos/managers/model.py b/nos/managers/model.py index b93ee837..1f09d376 100644 --- a/nos/managers/model.py +++ b/nos/managers/model.py @@ -90,9 +90,6 @@ class ModelHandle: def __post_init__(self): """Initialize the actor handles.""" - import pdb - - pdb.set_trace() self._actors = [self.get_actor(self.spec) for _ in range(self.num_replicas)] self._actor_pool = ray.util.ActorPool(self._actors) self._results_queue_size = self.num_replicas