diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py index 49ac51dd3a..60004b9008 100755 --- a/qa/L0_input_validation/input_validation_test.py +++ b/qa/L0_input_validation/input_validation_test.py @@ -35,6 +35,7 @@ import numpy as np import tritonclient.grpc as tritongrpcclient from tritonclient.utils import InferenceServerException, np_to_triton_dtype +import tritonclient.utils.shared_memory as shm class InputValTest(unittest.TestCase): @@ -211,6 +212,77 @@ def get_input_array(input_size, np_dtype): err_str, ) + def test_wrong_input_shape_tensor_size(self): + def inference_helper(model_name, batch_size=1): + triton_client = tritongrpcclient.InferenceServerClient("localhost:8001") + if batch_size > 1: + dummy_input_data = np.random.rand(batch_size, 32, 32).astype(np.float32) + else: + dummy_input_data = np.random.rand(32, 32).astype(np.float32) + shape_tensor_data = np.asarray([4, 4], dtype=np.int32) + + # Pass incorrect input byte size date for shape tensor + # Use shared memory to bypass the shape check in client library + input_byte_size = (shape_tensor_data.size - 1) * np.dtype(np.int32).itemsize + + input_shm_handle = shm.create_shared_memory_region( + "INPUT0_SHM", + "/INPUT0_SHM", + input_byte_size, + ) + shm.set_shared_memory_region( + input_shm_handle, + [ + shape_tensor_data, + ], + ) + triton_client.register_system_shared_memory( + "INPUT0_SHM", + "/INPUT0_SHM", + input_byte_size, + ) + + inputs = [ + tritongrpcclient.InferInput( + "DUMMY_INPUT0", + dummy_input_data.shape, + np_to_triton_dtype(np.float32), + ), + tritongrpcclient.InferInput( + "INPUT0", + shape_tensor_data.shape, + np_to_triton_dtype(np.int32), + ), + ] + inputs[0].set_data_from_numpy(dummy_input_data) + inputs[1].set_shared_memory("INPUT0_SHM", input_byte_size) + + outputs = [ + tritongrpcclient.InferRequestedOutput("DUMMY_OUTPUT0"), + tritongrpcclient.InferRequestedOutput("OUTPUT0"), + ] + + try: + # Perform inference + with self.assertRaises(InferenceServerException) as e: + triton_client.infer( + model_name=model_name, inputs=inputs, outputs=outputs + ) + err_str = str(e.exception) + correct_input_byte_size = ( + shape_tensor_data.size * np.dtype(np.int32).itemsize + ) + self.assertIn( + f"input byte size mismatch for input 'INPUT0' for model '{model_name}'. Expected {correct_input_byte_size}, got {input_byte_size}", + err_str, + ) + finally: + shm.destroy_shared_memory_region(input_shm_handle) + triton_client.unregister_system_shared_memory("INPUT0_SHM") + + inference_helper(model_name="plan_nobatch_zero_1_float32_int32") + inference_helper(model_name="plan_zero_1_float32_int32", batch_size=8) + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh index be7054895c..fc70abd969 100755 --- a/qa/L0_input_validation/test.sh +++ b/qa/L0_input_validation/test.sh @@ -123,6 +123,8 @@ dynamic_batching { EOL cp -r $DATADIR/qa_model_repository/graphdef_object_int32_int32 models/. +cp -r $DATADIR/qa_shapetensor_model_repository/plan_nobatch_zero_1_float32_int32 models/. +cp -r $DATADIR/qa_shapetensor_model_repository/plan_zero_1_float32_int32 models/. SERVER_ARGS="--model-repository=`pwd`/models" run_server diff --git a/qa/L0_trt_reformat_free/test.sh b/qa/L0_trt_reformat_free/test.sh index ebdc83a5b8..2daf2f0648 100755 --- a/qa/L0_trt_reformat_free/test.sh +++ b/qa/L0_trt_reformat_free/test.sh @@ -75,7 +75,7 @@ if [ $? -ne 0 ]; then cat $CLIENT_LOG RET=1 else - check_test_results $TEST_RESULT_FILE 4 + check_test_results $TEST_RESULT_FILE 6 if [ $? -ne 0 ]; then cat $CLIENT_LOG echo -e "\n***\n*** Test Result Verification Failed\n***" diff --git a/qa/L0_trt_reformat_free/trt_reformat_free_test.py b/qa/L0_trt_reformat_free/trt_reformat_free_test.py index 0c91b2b0f3..6c8c2afa6d 100755 --- a/qa/L0_trt_reformat_free/trt_reformat_free_test.py +++ b/qa/L0_trt_reformat_free/trt_reformat_free_test.py @@ -37,6 +37,7 @@ import test_util as tu import tritonclient.http as tritonhttpclient import tritonclient.utils.shared_memory as shm +from tritonclient.utils import InferenceServerException def div_up(a, b): @@ -141,6 +142,41 @@ def test_nobatch_chw2_input(self): "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np), ) + def test_wrong_nobatch_chw2_input(self): + model_name = "plan_nobatch_CHW2_LINEAR_float16_float16_float16" + input_np = np.arange(26, dtype=np.float16).reshape((13, 2, 1)) + + # Use shared memory to bypass the shape check in client library, because + # for non-linear format tensor, the data buffer is padded and thus the + # data byte size may not match what is calculated from tensor shape + inputs = [] + inputs.append(tritonhttpclient.InferInput("INPUT0", [13, 2, 1], "FP16")) + # Send the original size input instead of the reformatted size input. + self.add_reformat_free_data_as_shared_memory("input0", inputs[-1], input_np) + + inputs.append(tritonhttpclient.InferInput("INPUT1", [13, 2, 1], "FP16")) + # Send the original size input instead of the reformatted size input. + self.add_reformat_free_data_as_shared_memory("input1", inputs[-1], input_np) + + outputs = [] + outputs.append( + tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True) + ) + outputs.append( + tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True) + ) + + with self.assertRaises(InferenceServerException) as e: + results = self.triton_client.infer( + model_name=model_name, inputs=inputs, outputs=outputs + ) + + err_str = str(e.exception) + self.assertIn( + "input byte size mismatch for input 'INPUT0' for model 'plan_nobatch_CHW2_LINEAR_float16_float16_float16'. Expected 56, got 52", + err_str, + ) + def test_chw2_input(self): model_name = "plan_CHW2_LINEAR_float16_float16_float16" for bs in [1, 8]: @@ -186,6 +222,50 @@ def test_chw2_input(self): "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np), ) + def test_wrong_chw2_input(self): + model_name = "plan_CHW2_LINEAR_float16_float16_float16" + for bs in [1, 8]: + input_np = np.arange(26 * bs, dtype=np.float16).reshape((bs, 13, 2, 1)) + + # Use shared memory to bypass the shape check in client library, + # because for non-linear format tensor, the data buffer is padded + # and thus the data byte size may not match what is calculated from + # tensor shape + inputs = [] + inputs.append(tritonhttpclient.InferInput("INPUT0", [bs, 13, 2, 1], "FP16")) + # Send the original size input instead of the reformatted size input. + self.add_reformat_free_data_as_shared_memory( + "input0" + str(bs), inputs[-1], input_np + ) + + inputs.append(tritonhttpclient.InferInput("INPUT1", [bs, 13, 2, 1], "FP16")) + # Send the original size input instead of the reformatted size input. + self.add_reformat_free_data_as_shared_memory( + "input1" + str(bs), inputs[-1], input_np + ) + + outputs = [] + outputs.append( + tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True) + ) + outputs.append( + tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True) + ) + + with self.assertRaises(InferenceServerException) as e: + results = self.triton_client.infer( + model_name=model_name, inputs=inputs, outputs=outputs + ) + err_str = str(e.exception) + # reformatted input size - (bs, 14, 2, 1) * size(float16) + expected_size = bs * 28 * 2 + # original input size - (bs, 13, 2, 1) * size(float16) + received_size = bs * 26 * 2 + self.assertIn( + f"input byte size mismatch for input 'INPUT0' for model 'plan_CHW2_LINEAR_float16_float16_float16'. Expected {expected_size}, got {received_size}", + err_str, + ) + def test_nobatch_chw32_input(self): model_name = "plan_nobatch_CHW32_LINEAR_float32_float32_float32" input_np = np.arange(26, dtype=np.float32).reshape((13, 2, 1))