Skip to content

Commit

Permalink
resolving merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
KrishnanPrash committed Aug 18, 2024
2 parents dd10a84 + 9598a80 commit f7067f5
Show file tree
Hide file tree
Showing 16 changed files with 628 additions and 62 deletions.
14 changes: 6 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,13 @@ if(NOT TRITON_CORE_HEADERS_ONLY)
# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
set (LIB_DIR "lib")
# /etc/os-release does not exist on Windows
if(EXISTS "/etc/os-release")
file(STRINGS /etc/os-release DISTRO REGEX "^NAME=")
string(REGEX REPLACE "NAME=\"(.*)\"" "\\1" DISTRO "${DISTRO}")
message(STATUS "Distro Name: ${DISTRO}")
if(DISTRO MATCHES "CentOS.*")
if(LINUX)
file(STRINGS "/etc/os-release" DISTRO_ID_LIKE REGEX "ID_LIKE")
if(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
set (LIB_DIR "lib64")
endif()
endif()
endif(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
endif(LINUX)
set(TRITON_CORE_HEADERS_ONLY OFF)

# Need to use ExternalProject for our builds so that we can get the
# correct dependencies between Triton shared library components and
Expand Down
78 changes: 75 additions & 3 deletions include/triton/core/tritonserver.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct TRITONSERVER_Server;
struct TRITONSERVER_ServerOptions;
struct TRITONSERVER_Metric;
struct TRITONSERVER_MetricFamily;
struct TRITONSERVER_MetricArgs;

///
/// TRITONSERVER API Version
Expand Down Expand Up @@ -91,7 +92,7 @@ struct TRITONSERVER_MetricFamily;
/// }
///
#define TRITONSERVER_API_VERSION_MAJOR 1
#define TRITONSERVER_API_VERSION_MINOR 33
#define TRITONSERVER_API_VERSION_MINOR 34

/// Get the TRITONBACKEND API version supported by the Triton shared
/// library. This value can be compared against the
Expand Down Expand Up @@ -2615,7 +2616,8 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerInferAsync(
///
typedef enum TRITONSERVER_metrickind_enum {
TRITONSERVER_METRIC_KIND_COUNTER,
TRITONSERVER_METRIC_KIND_GAUGE
TRITONSERVER_METRIC_KIND_GAUGE,
TRITONSERVER_METRIC_KIND_HISTOGRAM
} TRITONSERVER_MetricKind;

/// Create a new metric family object. The caller takes ownership of the
Expand Down Expand Up @@ -2644,6 +2646,44 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew(
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_MetricFamilyDelete(struct TRITONSERVER_MetricFamily* family);

/// Get the TRITONSERVER_MetricKind of the metric family.
///
/// \param family The metric family object to query.
/// \param kind Returns the TRITONSERVER_MetricKind of metric.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_GetMetricFamilyKind(
struct TRITONSERVER_MetricFamily* family, TRITONSERVER_MetricKind* kind);

/// Create a new metric args object. The caller takes ownership of the
/// TRITONSERVER_MetricArgs object and must call TRITONSERVER_MetricArgsDelete
/// to release the object.
///
/// \param args Returns the new metric args object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsNew(
struct TRITONSERVER_MetricArgs** args);

/// Set metric args with histogram metric parameter.
///
/// \param args The metric args object to set.
/// \param buckets The array of bucket boundaries for the expected range of
/// observed values.
///
/// \param buckets_count The number of bucket boundaries.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_MetricArgsSetHistogram(
struct TRITONSERVER_MetricArgs* args, const double* buckets,
const uint64_t buckets_count);

/// Delete a metric args object.
///
/// \param args The metric args object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsDelete(
struct TRITONSERVER_MetricArgs* args);

/// Create a new metric object. The caller takes ownership of the
/// TRITONSERVER_Metric object and must call
/// TRITONSERVER_MetricDelete to release the object. The caller is also
Expand All @@ -2661,6 +2701,28 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNew(
struct TRITONSERVER_MetricFamily* family,
const struct TRITONSERVER_Parameter** labels, const uint64_t label_count);

/// Create a new metric object. The caller takes ownership of the
/// TRITONSERVER_Metric object and must call
/// TRITONSERVER_MetricDelete to release the object. The caller is also
/// responsible for ownership of the labels passed in.
/// Each label can be deleted immediately after creating the metric with
/// TRITONSERVER_ParameterDelete if not re-using the labels.
/// Metric args can be deleted immediately after creating the metric with
/// TRITONSERVER_MetricArgsDelete if not re-using the metric args.
///
/// \param metric Returns the new metric object.
/// \param family The metric family to add this new metric to.
/// \param labels The array of labels to associate with this new metric.
/// \param label_count The number of labels.
/// \param args Metric args that store additional arguments to construct
/// particular metric types, e.g. histogram.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNewWithArgs(
struct TRITONSERVER_Metric** metric,
struct TRITONSERVER_MetricFamily* family,
const struct TRITONSERVER_Parameter** labels, const uint64_t label_count,
const struct TRITONSERVER_MetricArgs* args);

/// Delete a metric object.
/// All TRITONSERVER_Metric* objects should be deleted BEFORE their
/// corresponding TRITONSERVER_MetricFamily* objects have been deleted.
Expand Down Expand Up @@ -2705,7 +2767,17 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricIncrement(
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricSet(
struct TRITONSERVER_Metric* metric, double value);

/// Get the TRITONSERVER_MetricKind of metric and its corresponding family.
/// Sample an observation and count it to the appropriate bucket of a metric.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_HISTOGRAM and returns
/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
///
/// \param metric The metric object to update.
/// \param value The amount for metric to sample observation.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricObserve(
struct TRITONSERVER_Metric* metric, double value);

/// Get the TRITONSERVER_MetricKind of metric of its corresponding family.
///
/// \param metric The metric object to query.
/// \param kind Returns the TRITONSERVER_MetricKind of metric.
Expand Down
12 changes: 6 additions & 6 deletions python/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
exit_on_error=True,
strict_model_config=False,
model_control_mode=tritonserver.ModelControlMode.EXPLICIT,
exit_timeout=10,
exit_timeout=30,
)


Expand Down Expand Up @@ -345,11 +345,6 @@ def test_ready(self):
server = tritonserver.Server(self._server_options).start()
self.assertTrue(server.ready())

@pytest.mark.xfail(
tritonserver.__version__ <= "2.48.0",
reason="Known issue on stop: Exit timeout expired. Exiting immediately",
raises=tritonserver.InternalError,
)
def test_stop(self):
server = tritonserver.Server(self._server_options).start(wait_until_ready=True)

Expand All @@ -362,6 +357,11 @@ def test_stop(self):
{
"backend": "python",
"parameters": {"decoupled": {"string_value": "False"}},
# Keep instance count low for fast startup/cleanup.
# Alternatively can use KIND_CPU here, but keeping gpus/count explicit.
"instance_group": [
{"kind": "KIND_GPU", "gpus": [0], "count": 1}
],
}
)
},
Expand Down
15 changes: 13 additions & 2 deletions python/tritonserver/_c/tritonserver_pybind.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -1439,7 +1439,18 @@ class PyServer : public PyWrapper<struct TRITONSERVER_Server> {
return reinterpret_cast<uintptr_t>(this->Ptr());
}

void Stop() const { ThrowIfError(TRITONSERVER_ServerStop(triton_object_)); }
void Stop() const
{
// ServerStop is blocking for the duration of the server exit timeout, so
// ensure to release the GIL. This can allow request release callbacks
// to be interleaved while server is waiting for live requests/models
// to complete. Without releasing GIL, this function may acquire the GIL
// first and block the Triton request from being released/freed, thus
// blocking the server's shutdown in a circular manner thinking a model is
// still alive.
py::gil_scoped_release release;
ThrowIfError(TRITONSERVER_ServerStop(triton_object_));
}

void RegisterModelRepository(
const std::string& repository_path,
Expand Down
63 changes: 42 additions & 21 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,17 @@ InferenceRequest::Normalize()
for (auto& pr : original_inputs_) {
auto& input = pr.second;
*input.MutableShape() = input.OriginalShape();

const inference::ModelInput* input_config;
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
if (input_config->is_shape_tensor()) {
// For a shape tensor, mark that the input is a shape tensor.
input.SetIsShapeTensor();
} else if (input_config->is_non_linear_format_io()) {
// If a tensor uses a non-linear IO format, indicate that the input uses
// a non-linear IO format.
input.SetIsNonLinearFormatIo();
}
}
} else {
// Model does support Triton-style batching so each input tensor
Expand All @@ -1024,15 +1035,19 @@ InferenceRequest::Normalize()
batch_size_ = 0;
for (auto& pr : original_inputs_) {
auto& input = pr.second;
const inference::ModelInput* input_config;
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));

// For a shape tensor, keep the tensor's shape as it is and mark
// that the input is a shape tensor.
const inference::ModelInput* input_config;
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
if (input_config->is_shape_tensor()) {
*input.MutableShape() = input.OriginalShape();
input.SetIsShapeTensor(true);
input.SetIsShapeTensor();
continue;
} else if (input_config->is_non_linear_format_io()) {
// If a tensor uses a non-linear IO format, indicate that the input uses
// a non-linear IO format.
input.SetIsNonLinearFormatIo();
}

if (input.OriginalShape().size() == 0) {
Expand Down Expand Up @@ -1182,28 +1197,26 @@ InferenceRequest::Normalize()
{
const auto& data_type = input.DType();

// FIXME: Skip byte size validation for TensorRT backend because it breaks
// shape-size assumption. See DLIS-6805 for proper fix for TRT backend
// reformat_free tensors.
bool skip_byte_size_check = false;
constexpr char trt_prefix[] = "tensorrt_";
const std::string& platform = model_raw_->Config().platform();
skip_byte_size_check |= (platform.rfind(trt_prefix) == 0);

if (!skip_byte_size_check) {
// Non-linear IO format input byte size validation will be handled in the
// TensorRT backend.
if (!input.IsNonLinearFormatIo()) {
TRITONSERVER_MemoryType input_memory_type;
// Because Triton expects STRING type to be in special format
// (prepend 4 bytes to specify string length), so need to add all the
// first 4 bytes for each element to find expected byte size
if (data_type == inference::DataType::TYPE_STRING) {
RETURN_IF_ERROR(
ValidateBytesInputs(input_id, input, &input_memory_type));

// FIXME: Temporarily skips byte size checks for GPU tensors. See
// DLIS-6820.
skip_byte_size_check |=
(input_memory_type == TRITONSERVER_MEMORY_GPU);
} else {
const auto& input_dims = input.ShapeWithBatchDim();
// Shape tensor with dynamic batching does not introduce a new
// dimension to the tensor but adds an additional value to the 1-D
// array.
const std::vector<int64_t>& input_dims =
input.IsShapeTensor() ? input.OriginalShape()
: input.ShapeWithBatchDim();
int64_t expected_byte_size = INT_MAX;
expected_byte_size =
triton::common::GetByteSize(data_type, input_dims);
Expand Down Expand Up @@ -1506,7 +1519,7 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
// Input
//
InferenceRequest::Input::Input()
: is_shape_tensor_(false), data_(new MemoryReference),
: tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}
Expand All @@ -1515,16 +1528,17 @@ InferenceRequest::Input::Input(
const std::string& name, const inference::DataType datatype,
const int64_t* shape, const uint64_t dim_count)
: name_(name), datatype_(datatype),
original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
data_(new MemoryReference), has_host_policy_specific_data_(false)
original_shape_(shape, shape + dim_count),
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}

InferenceRequest::Input::Input(
const std::string& name, const inference::DataType datatype,
const std::vector<int64_t>& shape)
: name_(name), datatype_(datatype), original_shape_(shape),
is_shape_tensor_(false), data_(new MemoryReference),
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}
Expand All @@ -1540,9 +1554,16 @@ InferenceRequest::Input::SetMetadata(
}

Status
InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
InferenceRequest::Input::SetIsShapeTensor()
{
tensor_type_ = TensorType::SHAPE_TENSOR;
return Status::Success;
}

Status
InferenceRequest::Input::SetIsNonLinearFormatIo()
{
is_shape_tensor_ = is_shape_tensor;
tensor_type_ = TensorType::NON_LINEAR;
return Status::Success;
}

Expand Down
20 changes: 17 additions & 3 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class InferenceRequest {
// Input tensor
class Input {
public:
enum class TensorType { TENSOR, SHAPE_TENSOR, NON_LINEAR };

Input();
Input(
const std::string& name, const inference::DataType datatype,
Expand Down Expand Up @@ -134,10 +136,22 @@ class InferenceRequest {
}

// Whether or not the input is a tensorrt shape tensor
bool IsShapeTensor() const { return is_shape_tensor_; }
bool IsShapeTensor() const
{
return tensor_type_ == TensorType::SHAPE_TENSOR;
}

// Specifies whether the input uses a non-linear IO format
bool IsNonLinearFormatIo() const
{
return tensor_type_ == TensorType::NON_LINEAR;
}

// Set the input to be treated as a shape tensor.
Status SetIsShapeTensor(const bool is_shape_tensor);
Status SetIsShapeTensor();

// Set the input uses a non-linear IO format
Status SetIsNonLinearFormatIo();

// The data for this input.
const std::shared_ptr<Memory>& Data() const { return data_; }
Expand Down Expand Up @@ -240,7 +254,7 @@ class InferenceRequest {
std::vector<int64_t> original_shape_;
std::vector<int64_t> shape_;
std::vector<int64_t> shape_with_batch_dim_;
bool is_shape_tensor_;
TensorType tensor_type_;
std::shared_ptr<Memory> data_;

bool has_host_policy_specific_data_;
Expand Down
Loading

0 comments on commit f7067f5

Please sign in to comment.