diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py index aff73bc18..559f60889 100755 --- a/src/python/library/tritonclient/grpc/_infer_input.py +++ b/src/python/library/tritonclient/grpc/_infer_input.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -30,7 +30,7 @@ from tritonclient.grpc import service_pb2 from tritonclient.utils import * -from ._utils import raise_error +from ._utils import num_elements, raise_error class InferInput: @@ -54,6 +54,7 @@ def __init__(self, name, shape, datatype): self._input.ClearField("shape") self._input.shape.extend(shape) self._input.datatype = datatype + self._data_shape = None self._raw_content = None def name(self): @@ -86,6 +87,36 @@ def shape(self): """ return self._input.shape + def validate_data(self): + """Validate input has data and input shape matches input data. + + Returns + ------- + None + """ + # Input must set only one of the following fields: '_raw_content', + # 'shared_memory_region' in '_input.parameters' + cnt = 0 + cnt += self._raw_content != None + cnt += "shared_memory_region" in self._input.parameters + if cnt != 1: + return + + # Skip due to trt reformat free tensor + if "shared_memory_region" in self._input.parameters: + return + + # Not using shared memory + expected_num_elements = num_elements(self._input.shape) + data_num_elements = num_elements(self._data_shape) + if expected_num_elements != data_num_elements: + raise_error( + "input '{}' got unexpected elements count {}, expected {}".format( + self._input.name, data_num_elements, expected_num_elements + ) + ) + return + def set_shape(self, shape): """Set the shape of input. @@ -171,6 +202,7 @@ def set_data_from_numpy(self, input_tensor): self._raw_content = b"" else: self._raw_content = input_tensor.tobytes() + self._data_shape = input_tensor.shape return self def set_shared_memory(self, region_name, byte_size, offset=0): @@ -193,6 +225,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0): """ self._input.ClearField("contents") self._raw_content = None + self._data_shape = None self._input.parameters["shared_memory_region"].string_param = region_name self._input.parameters["shared_memory_byte_size"].int64_param = byte_size diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py index dae6d71f8..bbc13db4b 100755 --- a/src/python/library/tritonclient/grpc/_utils.py +++ b/src/python/library/tritonclient/grpc/_utils.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -96,6 +96,7 @@ def _get_inference_request( if request_id != "": request.id = request_id for infer_input in inputs: + infer_input.validate_data() request.inputs.extend([infer_input._get_tensor()]) if infer_input._get_content() is not None: request.raw_input_contents.extend([infer_input._get_content()]) diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py index 85beabd2f..f237edcfa 100755 --- a/src/python/library/tritonclient/http/_infer_input.py +++ b/src/python/library/tritonclient/http/_infer_input.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -28,6 +28,7 @@ import numpy as np from tritonclient.utils import ( np_to_triton_dtype, + num_elements, raise_error, serialize_bf16_tensor, serialize_byte_tensor, @@ -55,6 +56,7 @@ def __init__(self, name, shape, datatype): self._datatype = datatype self._parameters = {} self._data = None + self._data_shape = None self._raw_data = None def name(self): @@ -87,6 +89,37 @@ def shape(self): """ return self._shape + def validate_data(self): + """Validate input has data and input shape matches input data. + + Returns + ------- + None + """ + # Input must set only one of the following fields: 'data', 'binary_data_size', + # 'shared_memory_region' in 'parameters' + cnt = 0 + cnt += self._data != None + cnt += "binary_data_size" in self._parameters + cnt += "shared_memory_region" in self._parameters + if cnt != 1: + return + + # Skip due to trt reformat free tensor + if "shared_memory_region" in self._parameters: + return + + # Not using shared memory + expected_num_elements = num_elements(self._shape) + data_num_elements = num_elements(self._data_shape) + if expected_num_elements != data_num_elements: + raise_error( + "input '{}' got unexpected elements count {}, expected {}".format( + self._name, data_num_elements, expected_num_elements + ) + ) + return + def set_shape(self, shape): """Set the shape of input. @@ -211,6 +244,7 @@ def set_data_from_numpy(self, input_tensor, binary_data=True): else: self._raw_data = input_tensor.tobytes() self._parameters["binary_data_size"] = len(self._raw_data) + self._data_shape = input_tensor.shape return self def set_shared_memory(self, region_name, byte_size, offset=0): @@ -232,6 +266,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0): The updated input """ self._data = None + self._data_shape = None self._raw_data = None self._parameters.pop("binary_data_size", None) diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py index 0f4456c9a..d4ffc5b70 100755 --- a/src/python/library/tritonclient/http/_utils.py +++ b/src/python/library/tritonclient/http/_utils.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -106,7 +106,11 @@ def _get_inference_request( if timeout is not None: parameters["timeout"] = timeout - infer_request["inputs"] = [this_input._get_tensor() for this_input in inputs] + infer_request["inputs"] = [] + for infer_input in inputs: + infer_input.validate_data() + infer_request["inputs"].append(infer_input._get_tensor()) + if outputs: infer_request["outputs"] = [ this_output._get_tensor() for this_output in outputs diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py index 7f3079c66..304bbfec9 100755 --- a/src/python/library/tritonclient/utils/__init__.py +++ b/src/python/library/tritonclient/utils/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -40,6 +40,27 @@ def raise_error(msg): raise InferenceServerException(msg=msg) from None +def num_elements(shape): + """ + Calculate the number of elements in an array given its shape. + + Parameters + ---------- + shape : list or tuple + Shape of the array. + + Returns + ------- + int + Number of elements in the array. + """ + + num_elements = 1 + for dim in shape: + num_elements *= dim + return num_elements + + def serialized_byte_size(tensor_value): """ Get the underlying number of bytes for a numpy ndarray.