diff --git a/examples/runtime/README.md b/examples/runtime/README.md index eb1ae93..8388da9 100644 --- a/examples/runtime/README.md +++ b/examples/runtime/README.md @@ -34,9 +34,9 @@ python3 run_train_and_inference.py This will create a gRPC client to call `.train()` on the object detector module, which currently a stub. Note that the training parameters are not actually being used here and only are present for illustration purposes to show how args are passed. The result of the train job is simply the same model provided as the base, exported under a new model ID, `new_model`. -Then, an inference request is sent to the _original_ model, which calls `.run()`, to produce the prediction that is logged by the script. +Then, an inference request is sent to the newly loaded model, which calls `.run()`, to produce the prediction that is logged by the script. -NOTE: it would be a good idea to show how to load and run the new model in this demo, but since loading is generally handled by kserve/model mesh, for now we just hit the old one. +NOTE: in order to hit the new model, we need to set `runtime.lazy_load_local_models=True` in the runtime config, which by default will sync the local model dir with the in memory runtime (i.e., load models that have been added and unload models that have been deleted) every 10 seconds. If inference fails, try setting the log level to `debug2` and ensure that you see the runtime polling for new models periodically. ## A Deeper Look diff --git a/examples/runtime/common.py b/examples/runtime/common.py index ef4276b..64daa91 100644 --- a/examples/runtime/common.py +++ b/examples/runtime/common.py @@ -42,5 +42,7 @@ # Example model ID to use when creating a small model at model dir init time DEMO_MODEL_ID = "my_model" +# New model that we are going to train and run an inference call on +NEW_MODEL_ID = "new_model" RUNTIME_PORT = 8085 diff --git a/examples/runtime/run_train_and_inference.py b/examples/runtime/run_train_and_inference.py index 648c52d..c11ea08 100644 --- a/examples/runtime/run_train_and_inference.py +++ b/examples/runtime/run_train_and_inference.py @@ -11,20 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Runs a sample train [currently a stub], and exports the trained model under a new ID. +"""Runs a sample train [currently a stub], and exports the trained model under a new ID, +then hits it with an inference request. """ # Standard -from pathlib import Path from time import sleep import os -import sys # Third Party from common import ( DEMO_MODEL_ID, MODELS_DIR, - TRAINING_DATA_DIR, + NEW_MODEL_ID, TRAINING_IMG_DIR, TRAINING_LABELS_FILE, ) @@ -73,9 +72,6 @@ import grpc import numpy as np -# First Party -from caikit.interfaces.vision import data_model as caikit_dm - ### build the training request # Training params; the only thing that changes between newer/older versions of caikit is that @@ -98,7 +94,7 @@ def get_train_request(): ) } return odt_request_pb2.ObjectDetectionTaskTransformersObjectDetectorTrainRequest( - model_name="new_model", **train_param_dict + model_name=NEW_MODEL_ID, **train_param_dict ) @@ -157,16 +153,20 @@ def get_inference_request(): print(response) print("*" * 30) - sleep(5) + # The train command is basically an alias to save here - by default, if lazy_load_local_models + # is True in the module, config, we sync new models from the model dir every + # lazy_load_poll_period_seconds, which by deafult is 10 seconds. So 15 should be plenty of time + # for the new_model to export and load. + sleep(15) inference_stub = computervisionservice_pb2_grpc.ComputerVisionServiceStub( channel=channel ) - # NOTE: This just hits the old model, since normally the loading would be handled by something - # like kserve/model mesh. But it might be more helpful to show how to manually load the model - # and hit it here, just for reference. + # NOTE: if this fails, make sure lazy_load_local_models is true in the config. + # If needed, increase the log.level in the runtime config; setting level to + # debug2 or higher should show polling of the local model dir, load calls, etc. response = inference_stub.ObjectDetectionTaskPredict( - get_inference_request(), metadata=[("mm-model-id", DEMO_MODEL_ID)], timeout=1 + get_inference_request(), metadata=[("mm-model-id", NEW_MODEL_ID)], timeout=1 ) print("*" * 30) print("RESPONSE from INFERENCE gRPC\n") diff --git a/runtime_config.yaml b/runtime_config.yaml index 2cfa699..e298455 100644 --- a/runtime_config.yaml +++ b/runtime_config.yaml @@ -6,7 +6,7 @@ jvm_options: [] runtime: library: caikit_computer_vision local_models_dir: models - lazy_load_local_models": True + lazy_load_local_models: True grpc: enabled: True http: @@ -21,6 +21,8 @@ runtime: log: formatter: pretty # optional: log formatter is set to json by default + # By default log level is info - you can override it as shown below. + # level: debug2 model_management: finders: