Skip to content

Commit

Permalink
feat: Metrics API enhanced for Failure counters (#377)
Browse files Browse the repository at this point in the history
Enhances existing metrics API to incorporate failure and failure counts in inference requests
---------

Co-authored-by: Kevin Wang <kevinwa@ipp1-0780.ipp1u1.colossus.nvidia.com>
Co-authored-by: krishung5 <krish@nvidia.com>
  • Loading branch information
3 people committed Jul 12, 2024
1 parent 627eb5a commit f4d898b
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 50 deletions.
15 changes: 13 additions & 2 deletions src/backend_model_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "backend_config.h"
#include "backend_model.h"
#include "cuda_utils.h"
#include "infer_stats.h"
#include "metrics.h"
#include "model_config.pb.h"
#include "numa_utils.h"
Expand Down Expand Up @@ -558,7 +559,8 @@ TritonModelInstance::PrepareRequestsOrRespond(
// If any errors occurred, respond with error for each request.
if (!status.IsOk()) {
for (auto& r : requests) {
InferenceRequest::RespondIfError(r, status, true /* release_requests */);
InferenceRequest::RespondIfError(
r, status, true /* release_requests */, FailureReason::OTHER);
}
// Log a single error for batch of requests for better visibility
LOG_STATUS_ERROR(status, "Requests failed pre-execution checks");
Expand Down Expand Up @@ -685,7 +687,16 @@ TritonModelInstance::Execute(
for (TRITONBACKEND_Request* tr : triton_requests) {
std::unique_ptr<InferenceRequest> ur(
reinterpret_cast<InferenceRequest*>(tr));
InferenceRequest::RespondIfError(ur, status, true /* release_requests */);
// NOTE: If a backend both returns an error in
// TRITONBACKEND_ModelInstanceExecute and reports an error with
// TRITONBACKEND_ModelInstanceReportStatistics, this can result in double
// counting of the failure metric for the same request. However, it is
// currently not expected for this to be a common case, as the return
// value of TRITONBACKEND_ModelInstanceExecute is used to express
// ownership of the request rather than success of an inference request.
// See tritonbackend.h for more details on this.
InferenceRequest::RespondIfError(
ur, status, true /* release_requests */, FailureReason::BACKEND);
}

TRITONSERVER_ErrorDelete(err);
Expand Down
11 changes: 7 additions & 4 deletions src/dynamic_batch_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ IsStaleState(Payload::State payload_state)
void
FinishSkippedRequests(
std::vector<std::deque<std::unique_ptr<InferenceRequest>>>&& requests,
const Status& response_status)
const Status& response_status, FailureReason reason)
{
for (auto& queue : requests) {
for (auto& request : queue) {
InferenceRequest::RespondIfError(request, response_status, true);
InferenceRequest::RespondIfError(
request, response_status, true /* release_requests */, reason);
}
}
}
Expand All @@ -69,8 +70,10 @@ FinishRejectedCancelledRequests(
const static Status rejected_status =
Status(Status::Code::UNAVAILABLE, "Request timeout expired");
const static Status cancelled_status = Status(Status::Code::CANCELLED);
FinishSkippedRequests(std::move(rejected_requests), rejected_status);
FinishSkippedRequests(std::move(cancelled_requests), cancelled_status);
FinishSkippedRequests(
std::move(rejected_requests), rejected_status, FailureReason::REJECTED);
FinishSkippedRequests(
std::move(cancelled_requests), cancelled_status, FailureReason::CANCELED);
}

DynamicBatchScheduler::DynamicBatchScheduler(
Expand Down
33 changes: 19 additions & 14 deletions src/ensemble_scheduler/ensemble_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,23 +81,26 @@ class RequestTracker {
std::lock_guard<std::mutex> lk(mtx_);
inflight_request_counter_--;
if (inflight_request_counter_ == 0) {
if (request_ != nullptr) {
#ifdef TRITON_ENABLE_STATS
const auto& infer_stats = context_stats_aggregator_.ImmutableInferStats();
request_->ReportStatisticsWithDuration(
metric_reporter_, status_.IsOk(), compute_start_ns_,
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
if (status_.IsOk()) {
stats_aggregator_->UpdateInferBatchStatsWithDuration(
metric_reporter_, std::max(1U, request_->BatchSize()),
const auto& infer_stats =
context_stats_aggregator_.ImmutableInferStats();
request_->ReportStatisticsWithDuration(
metric_reporter_, status_.IsOk(), compute_start_ns_,
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
}
if (status_.IsOk()) {
stats_aggregator_->UpdateInferBatchStatsWithDuration(
metric_reporter_, std::max(1U, request_->BatchSize()),
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
}
#endif
InferenceRequest::Release(
std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
InferenceRequest::Release(
std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
}
}
return (inflight_request_counter_ == 0);
}
Expand Down Expand Up @@ -1136,7 +1139,8 @@ EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
"more "
"ensemble steps can be made");
InferenceRequest::RespondIfError(
request_tracker_->Request(), ensemble_status_);
request_tracker_->Request(), ensemble_status_,
false /* release_requests */, FailureReason::OTHER);
} else {
request_tracker_->Request()->ResponseFactory()->SendFlags(
TRITONSERVER_RESPONSE_COMPLETE_FINAL);
Expand All @@ -1149,7 +1153,8 @@ EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
ensemble_status_);
} else {
InferenceRequest::RespondIfError(
request_tracker_->Request(), ensemble_status_);
request_tracker_->Request(), ensemble_status_,
false /* release_requests */, FailureReason::OTHER);
}
}

Expand Down
64 changes: 43 additions & 21 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,25 @@ InferenceRequest::Run(std::unique_ptr<InferenceRequest>& request)
return status;
}

FailureReason
stringToFailureReason(const std::string& error_type)
{
if (error_type == "REJECTED") {
return FailureReason::REJECTED;
}
if (error_type == "CANCELED") {
return FailureReason::CANCELED;
}
if (error_type == "BACKEND") {
return FailureReason::BACKEND;
}
return FailureReason::OTHER;
}

void
InferenceRequest::RespondIfError(
std::unique_ptr<InferenceRequest>& request, const Status& status,
const bool release_request)
const bool release_request, FailureReason reason)
{
if (status.IsOk()) {
return;
Expand All @@ -442,7 +457,10 @@ InferenceRequest::RespondIfError(
InferenceResponse::SendWithStatus(
std::move(response), TRITONSERVER_RESPONSE_COMPLETE_FINAL, status),
(request->LogRequest() + "failed to send error response").c_str());

#ifdef TRITON_ENABLE_STATS
request->ReportErrorStatistics(
request->model_raw_->MetricReporter().get(), reason);
#endif
// If releasing the request then invoke the release callback which
// gives ownership to the callback. So can't access 'request' after
// this point.
Expand All @@ -452,20 +470,6 @@ InferenceRequest::RespondIfError(
}
}

void
InferenceRequest::RespondIfError(
std::vector<std::unique_ptr<InferenceRequest>>& requests,
const Status& status, const bool release_requests)
{
if (status.IsOk()) {
return;
}

for (auto& request : requests) {
RespondIfError(request, status, release_requests);
}
}

Status
InferenceRequest::Release(
std::unique_ptr<InferenceRequest>&& request, const uint32_t release_flags)
Expand Down Expand Up @@ -1371,6 +1375,21 @@ InferenceRequest::ValidateBytesInputs(
}

#ifdef TRITON_ENABLE_STATS

void
InferenceRequest::ReportErrorStatistics(
MetricModelReporter* metric_reporter, FailureReason reason)
{
INFER_STATS_DECL_TIMESTAMP(request_end_ns);
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns, reason);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
reason);
}
}

void
InferenceRequest::ReportStatistics(
MetricModelReporter* metric_reporter, bool success,
Expand Down Expand Up @@ -1407,10 +1426,12 @@ InferenceRequest::ReportStatistics(
}
} else {
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns);
metric_reporter, request_start_ns_, request_end_ns,
FailureReason::BACKEND);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
FailureReason::BACKEND);
}
}
}
Expand Down Expand Up @@ -1443,10 +1464,12 @@ InferenceRequest::ReportStatisticsWithDuration(
}
} else {
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns);
metric_reporter, request_start_ns_, request_end_ns,
FailureReason::OTHER);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
FailureReason::OTHER);
}
}
}
Expand Down Expand Up @@ -1850,5 +1873,4 @@ operator!=(
{
return !(lhs == rhs);
}

}} // namespace triton::core
16 changes: 14 additions & 2 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,8 @@ class InferenceRequest {
// 'release_request' is true 'request' is returned as nullptr.
static void RespondIfError(
std::unique_ptr<InferenceRequest>& request, const Status& status,
const bool release_request = false);
const bool release_request = false,
FailureReason reason = FailureReason::OTHER);

// Send an error response to a set of 'requests'. If 'status' is
// Success then no responses are sent and the requests are not
Expand All @@ -603,7 +604,8 @@ class InferenceRequest {
// returned with all nullptrs.
static void RespondIfError(
std::vector<std::unique_ptr<InferenceRequest>>& requests,
const Status& status, const bool release_requests = false);
const Status& status, const bool release_requests = false,
FailureReason reason = FailureReason::OTHER);

// Release the request. Call the release callback and transfer
// ownership of the request to the callback. On return 'request' is
Expand Down Expand Up @@ -673,6 +675,16 @@ class InferenceRequest {
const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
const uint64_t compute_output_start_ns, const uint64_t compute_end_ns);

// Report the error statistics to stats collectors associated with the
// request.
// FIXME: A separate function may not be necessary here, but is being used
// cautiously in case of unforeseen issues such as possibly capturing a trace
// twice. This should be revisited and better tested to see if the
// ReportStatistics function can be used as-is for the newly captured failure
// cases.
void ReportErrorStatistics(
MetricModelReporter* metric_reporter, FailureReason reason);

// Report the statistics to stats collectors associated with the request.
// Duration and timestamps provide two granularities for stats collectors.
void ReportStatisticsWithDuration(
Expand Down
23 changes: 21 additions & 2 deletions src/infer_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,28 @@ namespace triton { namespace core {

#ifdef TRITON_ENABLE_STATS

// This function converts FailureReason enum values to std::string
std::string
failureReasonToString(FailureReason reason)
{
switch (reason) {
case FailureReason::REJECTED:
return "REJECTED";
case FailureReason::CANCELED:
return "CANCELED";
case FailureReason::BACKEND:
return "BACKEND";
case FailureReason::OTHER:
return "OTHER";
default:
return "OTHER";
}
}

void
InferenceStatsAggregator::UpdateFailure(
MetricModelReporter* metric_reporter, const uint64_t request_start_ns,
const uint64_t request_end_ns)
const uint64_t request_end_ns, FailureReason reason)
{
std::lock_guard<std::mutex> lock(mu_);

Expand All @@ -48,7 +66,8 @@ InferenceStatsAggregator::UpdateFailure(

#ifdef TRITON_ENABLE_METRICS
if (metric_reporter != nullptr) {
metric_reporter->IncrementCounter("inf_failure", 1);
std::string reason_str = failureReasonToString(reason);
metric_reporter->IncrementCounter("inf_failure_" + reason_str, 1);
}
#endif // TRITON_ENABLE_METRICS
}
Expand Down
5 changes: 4 additions & 1 deletion src/infer_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@

namespace triton { namespace core {

// Define the FailureReason enum within the triton::core namespace
enum class FailureReason { REJECTED, CANCELED, BACKEND, OTHER };

class MetricModelReporter;


Expand Down Expand Up @@ -136,7 +139,7 @@ class InferenceStatsAggregator {
// Add durations to Infer stats for a failed inference request.
void UpdateFailure(
MetricModelReporter* metric_reporter, const uint64_t request_start_ns,
const uint64_t request_end_ns);
const uint64_t request_end_ns, FailureReason reason);

// Add durations to infer stats for a successful inference request.
void UpdateSuccess(
Expand Down
18 changes: 17 additions & 1 deletion src/metric_model_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#ifdef TRITON_ENABLE_METRICS

#include "constants.h"
#include "infer_stats.h"
#include "triton/common/logging.h"

// Global config group has 'name' of empty string.
Expand Down Expand Up @@ -101,6 +102,13 @@ MetricReporterConfig::ParseQuantiles(std::string options)
//
// MetricModelReporter
//
const std::map<FailureReason, std::string>
MetricModelReporter::failure_reasons_map = {
{FailureReason::REJECTED, "REJECTED"},
{FailureReason::CANCELED, "CANCELED"},
{FailureReason::BACKEND, "BACKEND"},
{FailureReason::OTHER, "OTHER"}};

Status
MetricModelReporter::Create(
const ModelIdentifier& model_id, const int64_t model_version,
Expand Down Expand Up @@ -189,7 +197,6 @@ MetricModelReporter::InitializeCounters(
{
// Always setup these counters, regardless of config
counter_families_["inf_success"] = &Metrics::FamilyInferenceSuccess();
counter_families_["inf_failure"] = &Metrics::FamilyInferenceFailure();
counter_families_["inf_count"] = &Metrics::FamilyInferenceCount();
counter_families_["inf_exec_count"] =
&Metrics::FamilyInferenceExecutionCount();
Expand Down Expand Up @@ -227,6 +234,15 @@ MetricModelReporter::InitializeCounters(
counters_[name] = CreateMetric<prometheus::Counter>(*family_ptr, labels);
}
}

// Initialize failure metrics with reasons
for (const auto& reason_pair : failure_reasons_map) {
std::map<std::string, std::string> extended_labels = labels;
extended_labels["reason"] = reason_pair.second;
counters_["inf_failure_" + reason_pair.second] =
CreateMetric<prometheus::Counter>(
Metrics::FamilyInferenceFailure(), extended_labels);
}
}

void
Expand Down
2 changes: 2 additions & 0 deletions src/metric_model_reporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ class MetricModelReporter {
// Lookup summary metric by name, and observe the value if it exists.
void ObserveSummary(const std::string& name, double value);

static const std::map<FailureReason, std::string> failure_reasons_map;

private:
MetricModelReporter(
const ModelIdentifier& model_id, const int64_t model_version,
Expand Down
Loading

0 comments on commit f4d898b

Please sign in to comment.