diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index ad04d57f8..42154cdca 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -1722,9 +1722,9 @@ TRITONBACKEND_BackendAttributeSetParallelModelInstanceLoading( /// /// \param batcher User-defined placeholder for backend to store and /// retrieve information about the batching strategy for this -/// model.RITONBACKEND_ISPEC return a TRITONSERVER_Error indicating success or -/// failure. \param model The backend model for which Triton is forming a batch. -/// \return a TRITONSERVER_Error indicating success or failure. +/// model. Returns a TRITONSERVER_Error indicating success +/// or failure. \param model The backend model for which Triton is forming a +/// batch. \return a TRITONSERVER_Error indicating success or failure. TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatcherInitialize( TRITONBACKEND_Batcher** batcher, TRITONBACKEND_Model* model); diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h index b8e25df72..ef5a45d6a 100644 --- a/include/triton/core/tritonserver.h +++ b/include/triton/core/tritonserver.h @@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily; /// } /// #define TRITONSERVER_API_VERSION_MAJOR 1 -#define TRITONSERVER_API_VERSION_MINOR 32 +#define TRITONSERVER_API_VERSION_MINOR 33 /// Get the TRITONBACKEND API version supported by the Triton shared /// library. This value can be compared against the @@ -732,7 +732,8 @@ typedef enum tritonserver_traceactivity_enum { TRITONSERVER_TRACE_REQUEST_END = 6, TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT = 7, TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT = 8, - TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9 + TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9, + TRITONSERVER_TRACE_CUSTOM_ACTIVITY = 10 } TRITONSERVER_InferenceTraceActivity; /// Get the string representation of a trace activity. The returned @@ -838,6 +839,18 @@ TRITONSERVER_InferenceTraceTensorNew( TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp); +/// Report a trace activity. All the traces reported using this API will be +/// using TRITONSERVER_TRACE_CUSTOM_ACTIVITY type. +/// +/// \param trace The trace object. +/// \param timestamp The timestamp associated with the trace activity. +/// \param name The trace activity name. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONSERVER_DECLSPEC TRITONSERVER_Error* +TRITONSERVER_InferenceTraceReportActivity( + TRITONSERVER_InferenceTrace* trace, uint64_t timestamp, + const char* activity_name); + /// Delete a trace object. /// /// \param trace The trace object. @@ -921,7 +934,6 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceSetContext( struct TRITONSERVER_InferenceTrace* trace, const char* trace_context); - /// Get TRITONSERVER_InferenceTrace context. /// /// \param trace The trace. diff --git a/src/infer_trace.cc b/src/infer_trace.cc index cce46e262..4301e2c5b 100644 --- a/src/infer_trace.cc +++ b/src/infer_trace.cc @@ -26,6 +26,13 @@ #include "infer_trace.h" +#define TRITONJSON_STATUSTYPE triton::core::Status +#define TRITONJSON_STATUSRETURN(M) \ + return triton::core::Status(triton::core::Status::Code::INTERNAL, (M)) +#define TRITONJSON_STATUSSUCCESS triton::core::Status::Success +#include "triton/common/logging.h" +#include "triton/common/triton_json.h" + namespace triton { namespace core { #ifdef TRITON_ENABLE_TRACING @@ -48,6 +55,26 @@ InferenceTrace::Release() release_fn_(reinterpret_cast(this), userp_); } +void +InferenceTrace::RecordActivityName( + uint64_t timestamp_ns, std::string activity_name) +{ + std::lock_guard lock(mu_); + triton::common::TritonJson::Value context_json( + triton::common::TritonJson::ValueType::OBJECT); + if (!context_.empty()) { + Status status = context_json.Parse(context_); + if (!status.IsOk()) { + LOG_ERROR << "Error parsing trace context"; + } + } + std::string key = std::to_string(timestamp_ns); + context_json.SetStringObject(key.c_str(), activity_name); + triton::common::TritonJson::WriteBuffer buffer; + context_json.Write(&buffer); + context_ = buffer.Contents(); +} + std::shared_ptr InferenceTraceProxy::SpawnChildTrace() { diff --git a/src/infer_trace.h b/src/infer_trace.h index 4de6df788..4f16cf380 100644 --- a/src/infer_trace.h +++ b/src/infer_trace.h @@ -28,6 +28,7 @@ #include #include #include +#include #include "constants.h" #include "status.h" @@ -69,12 +70,17 @@ class InferenceTrace { void SetModelVersion(int64_t v) { model_version_ = v; } void SetRequestId(const std::string& request_id) { request_id_ = request_id; } void SetContext(const std::string& context) { context_ = context; } + void RecordActivityName(uint64_t timestamp_ns, std::string activity_name); // Report trace activity. void Report( - const TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns) + const TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, + std::string activity_name = "") { if ((level_ & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) > 0) { + if (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY) { + RecordActivityName(timestamp_ns, activity_name); + } activity_fn_( reinterpret_cast(this), activity, timestamp_ns, userp_); @@ -82,13 +88,15 @@ class InferenceTrace { } // Report trace activity at the current time. - void ReportNow(const TRITONSERVER_InferenceTraceActivity activity) + void ReportNow( + const TRITONSERVER_InferenceTraceActivity activity, + std::string activity_name = "") { if ((level_ & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) > 0) { - Report( - activity, std::chrono::duration_cast( - std::chrono::steady_clock::now().time_since_epoch()) - .count()); + auto now = std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()) + .count(); + Report(activity, now, activity_name); } } @@ -128,6 +136,7 @@ class InferenceTrace { // across traces static std::atomic next_id_; std::string context_; + std::mutex mu_; }; // @@ -152,6 +161,10 @@ class InferenceTraceProxy { void SetRequestId(const std::string& n) { trace_->SetRequestId(n); } void SetModelVersion(int64_t v) { trace_->SetModelVersion(v); } void SetContext(const std::string& context) { trace_->SetContext(context); } + void RecordActivityName(uint64_t timestamp_ns, std::string activity_name) + { + trace_->RecordActivityName(timestamp_ns, activity_name); + } void Report( const TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns) diff --git a/src/tritonserver.cc b/src/tritonserver.cc index eae83ef2f..82642d5dc 100644 --- a/src/tritonserver.cc +++ b/src/tritonserver.cc @@ -950,6 +950,8 @@ TRITONSERVER_InferenceTraceActivityString( return "TENSOR_BACKEND_INPUT"; case TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT: return "TENSOR_BACKEND_OUTPUT"; + case TRITONSERVER_TRACE_CUSTOM_ACTIVITY: + return "CUSTOM_ACTIVITY"; } return ""; @@ -1115,6 +1117,23 @@ TRITONSERVER_InferenceTraceSpawnChildTrace( #endif // TRITON_ENABLE_TRACING } +TRITONSERVER_DECLSPEC TRITONSERVER_Error* +TRITONSERVER_InferenceTraceReportActivity( + TRITONSERVER_InferenceTrace* trace, uint64_t timestamp, + const char* activity_name) +{ +#ifdef TRITON_ENABLE_TRACING + tc::InferenceTrace* ltrace = reinterpret_cast(trace); + if (trace != nullptr) { + ltrace->Report( + TRITONSERVER_TRACE_CUSTOM_ACTIVITY, timestamp, activity_name); + } + return nullptr; // Success +#else + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); +#endif // TRITON_ENABLE_TRACING +} TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceSetContext( diff --git a/src/tritonserver_stub.cc b/src/tritonserver_stub.cc index b8449d1bc..cd1e03e15 100644 --- a/src/tritonserver_stub.cc +++ b/src/tritonserver_stub.cc @@ -1115,6 +1115,11 @@ TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup() { } +TRITONAPI_DECLSPEC void +TRITONSERVER_InferenceTraceReportActivity() +{ +} + TRITONAPI_DECLSPEC void TRITONBACKEND_BackendAttributeSetParallelModelInstanceLoading() {