diff --git a/src/infer_request.cc b/src/infer_request.cc
index 67860dd10..e31cd3e5a 100644
--- a/src/infer_request.cc
+++ b/src/infer_request.cc
@@ -1211,7 +1211,12 @@ InferenceRequest::Normalize()
           // FIXME: Temporarily skips byte size checks for GPU tensors. See
           // DLIS-6820.
         } else {
-          const std::vector<int64_t>& input_dims = input.ShapeWithBatchDim();
+          // Shape tensor with dynamic batching does not introduce a new
+          // dimension to the tensor but adds an additional value to the 1-D
+          // array.
+          const std::vector<int64_t>& input_dims =
+              input.IsShapeTensor() ? input.OriginalShape()
+                                    : input.ShapeWithBatchDim();
           int64_t expected_byte_size = INT_MAX;
           expected_byte_size =
               triton::common::GetByteSize(data_type, input_dims);
diff --git a/src/infer_request.h b/src/infer_request.h
index 0d7283682..6d231309d 100644
--- a/src/infer_request.h
+++ b/src/infer_request.h
@@ -122,14 +122,7 @@ class InferenceRequest {
     // into batch + shape.
     const std::vector<int64_t>& ShapeWithBatchDim() const
     {
-      if (tensor_type_ == TensorType::SHAPE_TENSOR) {
-        // Shape tensor with dynamic batching does not introduce a new
-        // dimension to the tensor but adds an additional value to the 1-D
-        // array.
-        return original_shape_;
-      } else {
         return shape_with_batch_dim_;
-      }
     }
     std::vector<int64_t>* MutableShapeWithBatchDim()
     {