Add input byte validation tests for shape tensor, reformat free tensor

triton-inference-server · Jul 21, 2024 · dcbc28c · dcbc28c
1 parent 3183f8f
commit dcbc28c
Show file tree

Hide file tree

Showing 4 changed files with 155 additions and 1 deletion.
diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py
@@ -35,6 +35,7 @@
 import numpy as np
 import tritonclient.grpc as tritongrpcclient
 from tritonclient.utils import InferenceServerException, np_to_triton_dtype
+import tritonclient.utils.shared_memory as shm
 
 
 class InputValTest(unittest.TestCase):
@@ -211,6 +212,77 @@ def get_input_array(input_size, np_dtype):
             err_str,
         )
 
+    def test_wrong_input_shape_tensor_size(self):
+        def inference_helper(model_name, batch_size=1):
+            triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+            if batch_size > 1:
+                dummy_input_data = np.random.rand(batch_size, 32, 32).astype(np.float32)
+            else:
+                dummy_input_data = np.random.rand(32, 32).astype(np.float32)
+            shape_tensor_data = np.asarray([4, 4], dtype=np.int32)
+
+            # Pass incorrect input byte size date for shape tensor
+            # Use shared memory to bypass the shape check in client library
+            input_byte_size = (shape_tensor_data.size - 1) * np.dtype(np.int32).itemsize
+
+            input_shm_handle = shm.create_shared_memory_region(
+                "INPUT0_SHM",
+                "/INPUT0_SHM",
+                input_byte_size,
+            )
+            shm.set_shared_memory_region(
+                input_shm_handle,
+                [
+                    shape_tensor_data,
+                ],
+            )
+            triton_client.register_system_shared_memory(
+                "INPUT0_SHM",
+                "/INPUT0_SHM",
+                input_byte_size,
+            )
+
+            inputs = [
+                tritongrpcclient.InferInput(
+                    "DUMMY_INPUT0",
+                    dummy_input_data.shape,
+                    np_to_triton_dtype(np.float32),
+                ),
+                tritongrpcclient.InferInput(
+                    "INPUT0",
+                    shape_tensor_data.shape,
+                    np_to_triton_dtype(np.int32),
+                ),
+            ]
+            inputs[0].set_data_from_numpy(dummy_input_data)
+            inputs[1].set_shared_memory("INPUT0_SHM", input_byte_size)
+
+            outputs = [
+                tritongrpcclient.InferRequestedOutput("DUMMY_OUTPUT0"),
+                tritongrpcclient.InferRequestedOutput("OUTPUT0"),
+            ]
+
+            try:
+                # Perform inference
+                with self.assertRaises(InferenceServerException) as e:
+                    triton_client.infer(
+                        model_name=model_name, inputs=inputs, outputs=outputs
+                    )
+                err_str = str(e.exception)
+                correct_input_byte_size = (
+                    shape_tensor_data.size * np.dtype(np.int32).itemsize
+                )
+                self.assertIn(
+                    f"input byte size mismatch for input 'INPUT0' for model '{model_name}'. Expected {correct_input_byte_size}, got {input_byte_size}",
+                    err_str,
+                )
+            finally:
+                shm.destroy_shared_memory_region(input_shm_handle)
+                triton_client.unregister_system_shared_memory("INPUT0_SHM")
+
+        inference_helper(model_name="plan_nobatch_zero_1_float32_int32")
+        inference_helper(model_name="plan_zero_1_float32_int32", batch_size=8)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh
@@ -123,6 +123,8 @@ dynamic_batching {
 EOL
 
 cp -r $DATADIR/qa_model_repository/graphdef_object_int32_int32 models/.
+cp -r $DATADIR/qa_shapetensor_model_repository/plan_nobatch_zero_1_float32_int32 models/.
+cp -r $DATADIR/qa_shapetensor_model_repository/plan_zero_1_float32_int32 models/.
 
 SERVER_ARGS="--model-repository=`pwd`/models"
 run_server

diff --git a/qa/L0_trt_reformat_free/test.sh b/qa/L0_trt_reformat_free/test.sh
@@ -75,7 +75,7 @@ if [ $? -ne 0 ]; then
     cat $CLIENT_LOG
     RET=1
 else
-    check_test_results $TEST_RESULT_FILE 4
+    check_test_results $TEST_RESULT_FILE 6
     if [ $? -ne 0 ]; then
         cat $CLIENT_LOG
         echo -e "\n***\n*** Test Result Verification Failed\n***"

diff --git a/qa/L0_trt_reformat_free/trt_reformat_free_test.py b/qa/L0_trt_reformat_free/trt_reformat_free_test.py
@@ -37,6 +37,7 @@
 import test_util as tu
 import tritonclient.http as tritonhttpclient
 import tritonclient.utils.shared_memory as shm
+from tritonclient.utils import InferenceServerException
 
 
 def div_up(a, b):
@@ -141,6 +142,41 @@ def test_nobatch_chw2_input(self):
             "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
         )
 
+    def test_wrong_nobatch_chw2_input(self):
+        model_name = "plan_nobatch_CHW2_LINEAR_float16_float16_float16"
+        input_np = np.arange(26, dtype=np.float16).reshape((13, 2, 1))
+
+        # Use shared memory to bypass the shape check in client library, because
+        # for non-linear format tensor, the data buffer is padded and thus the
+        # data byte size may not match what is calculated from tensor shape
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT0", [13, 2, 1], "FP16"))
+        # Send the original size input instead of the reformatted size input.
+        self.add_reformat_free_data_as_shared_memory("input0", inputs[-1], input_np)
+
+        inputs.append(tritonhttpclient.InferInput("INPUT1", [13, 2, 1], "FP16"))
+        # Send the original size input instead of the reformatted size input.
+        self.add_reformat_free_data_as_shared_memory("input1", inputs[-1], input_np)
+
+        outputs = []
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+        )
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+        )
+
+        with self.assertRaises(InferenceServerException) as e:
+            results = self.triton_client.infer(
+                model_name=model_name, inputs=inputs, outputs=outputs
+            )
+
+        err_str = str(e.exception)
+        self.assertIn(
+            "input byte size mismatch for input 'INPUT0' for model 'plan_nobatch_CHW2_LINEAR_float16_float16_float16'. Expected 56, got 52",
+            err_str,
+        )
+
     def test_chw2_input(self):
         model_name = "plan_CHW2_LINEAR_float16_float16_float16"
         for bs in [1, 8]:
@@ -186,6 +222,50 @@ def test_chw2_input(self):
                 "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
             )
 
+    def test_wrong_chw2_input(self):
+        model_name = "plan_CHW2_LINEAR_float16_float16_float16"
+        for bs in [1, 8]:
+            input_np = np.arange(26 * bs, dtype=np.float16).reshape((bs, 13, 2, 1))
+
+            # Use shared memory to bypass the shape check in client library,
+            # because for non-linear format tensor, the data buffer is padded
+            # and thus the data byte size may not match what is calculated from
+            # tensor shape
+            inputs = []
+            inputs.append(tritonhttpclient.InferInput("INPUT0", [bs, 13, 2, 1], "FP16"))
+            # Send the original size input instead of the reformatted size input.
+            self.add_reformat_free_data_as_shared_memory(
+                "input0" + str(bs), inputs[-1], input_np
+            )
+
+            inputs.append(tritonhttpclient.InferInput("INPUT1", [bs, 13, 2, 1], "FP16"))
+            # Send the original size input instead of the reformatted size input.
+            self.add_reformat_free_data_as_shared_memory(
+                "input1" + str(bs), inputs[-1], input_np
+            )
+
+            outputs = []
+            outputs.append(
+                tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+            )
+            outputs.append(
+                tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+            )
+
+            with self.assertRaises(InferenceServerException) as e:
+                results = self.triton_client.infer(
+                    model_name=model_name, inputs=inputs, outputs=outputs
+                )
+            err_str = str(e.exception)
+            # reformatted input size - (bs, 14, 2, 1) * size(float16)
+            expected_size = bs * 28 * 2
+            # original input size - (bs, 13, 2, 1) * size(float16)
+            received_size = bs * 26 * 2
+            self.assertIn(
+                f"input byte size mismatch for input 'INPUT0' for model 'plan_CHW2_LINEAR_float16_float16_float16'. Expected {expected_size}, got {received_size}",
+                err_str,
+            )
+
     def test_nobatch_chw32_input(self):
         model_name = "plan_nobatch_CHW32_LINEAR_float32_float32_float32"
         input_np = np.arange(26, dtype=np.float32).reshape((13, 2, 1))