diff --git a/src/infer_request.cc b/src/infer_request.cc index 67860dd10..e31cd3e5a 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1211,7 +1211,12 @@ InferenceRequest::Normalize() // FIXME: Temporarily skips byte size checks for GPU tensors. See // DLIS-6820. } else { - const std::vector& input_dims = input.ShapeWithBatchDim(); + // Shape tensor with dynamic batching does not introduce a new + // dimension to the tensor but adds an additional value to the 1-D + // array. + const std::vector& input_dims = + input.IsShapeTensor() ? input.OriginalShape() + : input.ShapeWithBatchDim(); int64_t expected_byte_size = INT_MAX; expected_byte_size = triton::common::GetByteSize(data_type, input_dims); diff --git a/src/infer_request.h b/src/infer_request.h index 0d7283682..6d231309d 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -122,14 +122,7 @@ class InferenceRequest { // into batch + shape. const std::vector& ShapeWithBatchDim() const { - if (tensor_type_ == TensorType::SHAPE_TENSOR) { - // Shape tensor with dynamic batching does not introduce a new - // dimension to the tensor but adds an additional value to the 1-D - // array. - return original_shape_; - } else { return shape_with_batch_dim_; - } } std::vector* MutableShapeWithBatchDim() {