From 711dfc86b7a7abbca266190d763238ad64dc1e70 Mon Sep 17 00:00:00 2001 From: Michael Honaker Date: Tue, 28 May 2024 10:49:43 -0400 Subject: [PATCH] Address Review Comments Signed-off-by: Michael Honaker --- .../model_management/directory_model_sizer.py | 21 ++++++++++++------- .../runtime/model_management/model_manager.py | 16 +++++++------- caikit/runtime/names.py | 5 +++++ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/caikit/runtime/model_management/directory_model_sizer.py b/caikit/runtime/model_management/directory_model_sizer.py index 22f51fe79..1b5d8c733 100644 --- a/caikit/runtime/model_management/directory_model_sizer.py +++ b/caikit/runtime/model_management/directory_model_sizer.py @@ -33,7 +33,11 @@ class DirectoryModelSizer(ModelSizerBase): """DirectoryModelSizer. This class calculates a models size based on the - size of the files in the model directory""" + size of the files in the model directory + + ! Note: It caches the size of the directory after first sizing which can cause + race conditions in certain situations. + """ name = "DIRECTORY" @@ -52,15 +56,18 @@ def get_model_size(self, model_id, local_model_path, model_type) -> int: Returns: The estimated size in bytes of memory that would be used by loading this model """ - # Cache model's size - if local_model_path not in self.model_directory_size: - self.model_directory_size[local_model_path] = self.__get_directory_size( - model_id, local_model_path - ) + # Return the cached model size if one exists + if model_size := self.model_directory_size.get(local_model_path): + return model_size - return self.model_directory_size[local_model_path] + # Calculate the model size and add it to the cache. This uses last in + # methodology so that the most recent size is used during parallel access + dir_size = self.__get_directory_size(model_id, local_model_path) + self.model_directory_size[local_model_path] = dir_size + return dir_size def __get_directory_size(self, model_id, local_model_path) -> int: + """Get the size of a directory""" try: if os.path.isdir(local_model_path): # Walk the directory to size all files diff --git a/caikit/runtime/model_management/model_manager.py b/caikit/runtime/model_management/model_manager.py index 0e89d15d4..5d7242dc9 100644 --- a/caikit/runtime/model_management/model_manager.py +++ b/caikit/runtime/model_management/model_manager.py @@ -42,6 +42,11 @@ from caikit.runtime.model_management.loaded_model import LoadedModel from caikit.runtime.model_management.model_loader_base import ModelLoaderBase from caikit.runtime.model_management.model_sizer_base import ModelSizerBase +from caikit.runtime.names import ( + DEFAULT_LOADER_NAME, + DEFAULT_SIZER_NAME, + LOCAL_MODEL_TYPE, +) from caikit.runtime.types.caikit_runtime_exception import CaikitRuntimeException log = alog.use_channel("MODEL-MANAGR") @@ -65,9 +70,6 @@ "Summary of the duration (in seconds) of loadModel RPCs", ["model_type"], ) -LOCAL_MODEL_TYPE = "LOCAL" -DEFAULT_LOADER_NAME = "default" -DEFAULT_SIZER_NAME = "default" class ModelManager: # pylint: disable=too-many-instance-attributes @@ -79,8 +81,6 @@ class ModelManager: # pylint: disable=too-many-instance-attributes __model_size_gauge_lock = threading.Lock() - _LOCAL_MODEL_TYPE = "standalone-model" - ## Construction ## @classmethod @@ -461,7 +461,7 @@ def retrieve_model(self, model_id: str) -> ModuleBase: loaded_model = self.load_model( model_id=model_id, local_model_path=local_model_path, - model_type=self._LOCAL_MODEL_TYPE, + model_type=LOCAL_MODEL_TYPE, wait=True, retries=get_config().runtime.lazy_load_retries, ) @@ -538,7 +538,7 @@ def deploy_model( return self.load_model( model_id=model_id, local_model_path=model_dir, - model_type=self._LOCAL_MODEL_TYPE, + model_type=LOCAL_MODEL_TYPE, **kwargs, ) @@ -643,7 +643,7 @@ def _local_models_dir_sync(self, wait: bool = False, load: bool = True): self.load_model( model_id, model_path, - self._LOCAL_MODEL_TYPE, + LOCAL_MODEL_TYPE, wait=False, retries=get_config().runtime.lazy_load_retries, ) diff --git a/caikit/runtime/names.py b/caikit/runtime/names.py index 3bdac38e1..71e333105 100644 --- a/caikit/runtime/names.py +++ b/caikit/runtime/names.py @@ -49,6 +49,11 @@ log = alog.use_channel("RNTM-NAMES") +################################# Model Management Names ####################### +LOCAL_MODEL_TYPE = "standalone-model" +DEFAULT_LOADER_NAME = "default" +DEFAULT_SIZER_NAME = "default" + ################################# Service Names ################################