Update runtime demo to hit new model

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
caikit · Feb 20, 2024 · 8e2c7f7 · 8e2c7f7
1 parent b52c510
commit 8e2c7f7
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 15 deletions.
diff --git a/examples/runtime/README.md b/examples/runtime/README.md
@@ -34,9 +34,9 @@ python3 run_train_and_inference.py
 
 This will create a gRPC client to call `.train()` on the object detector module, which currently a stub. Note that the training parameters are not actually being used here and only are present for illustration purposes to show how args are passed. The result of the train job is simply the same model provided as the base, exported under a new model ID, `new_model`.
 
-Then, an inference request is sent to the _original_ model, which calls `.run()`, to produce the prediction that is logged by the script.
+Then, an inference request is sent to the newly loaded model, which calls `.run()`, to produce the prediction that is logged by the script.
 
-NOTE: it would be a good idea to show how to load and run the new model in this demo, but since loading is generally handled by kserve/model mesh, for now we just hit the old one.
+NOTE: in order to hit the new model, we need to set `runtime.lazy_load_local_models=True` in the runtime config, which by default will sync the local model dir with the in memory runtime (i.e., load models that have been added and unload models that have been deleted) every 10 seconds. If inference fails, try setting the log level to `debug2` and ensure that you see the runtime polling for new models periodically.
 
 ## A Deeper Look
 

diff --git a/examples/runtime/common.py b/examples/runtime/common.py
@@ -42,5 +42,7 @@
 
 # Example model ID to use when creating a small model at model dir init time
 DEMO_MODEL_ID = "my_model"
+# New model that we are going to train and run an inference call on
+NEW_MODEL_ID = "new_model"
 
 RUNTIME_PORT = 8085
diff --git a/examples/runtime/run_train_and_inference.py b/examples/runtime/run_train_and_inference.py
@@ -11,20 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Runs a sample train [currently a stub], and exports the trained model under a new ID.
+"""Runs a sample train [currently a stub], and exports the trained model under a new ID,
+then hits it with an inference request.
 """
 
 # Standard
-from pathlib import Path
 from time import sleep
 import os
-import sys
 
 # Third Party
 from common import (
     DEMO_MODEL_ID,
     MODELS_DIR,
-    TRAINING_DATA_DIR,
+    NEW_MODEL_ID,
     TRAINING_IMG_DIR,
     TRAINING_LABELS_FILE,
 )
@@ -73,9 +72,6 @@
 import grpc
 import numpy as np
 
-# First Party
-from caikit.interfaces.vision import data_model as caikit_dm
-
 
 ### build the training request
 # Training params; the only thing that changes between newer/older versions of caikit is that
@@ -98,7 +94,7 @@ def get_train_request():
             )
         }
     return odt_request_pb2.ObjectDetectionTaskTransformersObjectDetectorTrainRequest(
-        model_name="new_model", **train_param_dict
+        model_name=NEW_MODEL_ID, **train_param_dict
     )
 
 
@@ -157,16 +153,20 @@ def get_inference_request():
     print(response)
     print("*" * 30)
 
-    sleep(5)
+    # The train command is basically an alias to save here - by default, if lazy_load_local_models
+    # is True in the module, config, we sync new models from the model dir every
+    # lazy_load_poll_period_seconds, which by deafult is 10 seconds. So 15 should be plenty of time
+    # for the new_model to export and load.
+    sleep(15)
 
     inference_stub = computervisionservice_pb2_grpc.ComputerVisionServiceStub(
         channel=channel
     )
-    # NOTE: This just hits the old model, since normally the loading would be handled by something
-    # like kserve/model mesh. But it might be more helpful to show how to manually load the model
-    # and hit it here, just for reference.
+    # NOTE: if this fails, make sure lazy_load_local_models is true in the config.
+    # If needed, increase the log.level in the runtime config; setting level to
+    # debug2 or higher should show polling of the local model dir, load calls, etc.
     response = inference_stub.ObjectDetectionTaskPredict(
-        get_inference_request(), metadata=[("mm-model-id", DEMO_MODEL_ID)], timeout=1
+        get_inference_request(), metadata=[("mm-model-id", NEW_MODEL_ID)], timeout=1
     )
     print("*" * 30)
     print("RESPONSE from INFERENCE gRPC\n")