Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add model_load_time metric #397

Merged
merged 17 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ constexpr char kInitialStateFolder[] = "initial_state";

// Metric names
constexpr char kPendingRequestMetric[] = "inf_pending_request_count";
constexpr char kModelLoadTimeMetric[] = "model_load_time";

constexpr uint64_t NANOS_PER_SECOND = 1000000000;
constexpr uint64_t NANOS_PER_MILLIS = 1000000;
Expand Down
10 changes: 10 additions & 0 deletions src/metric_model_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ MetricModelReporter::InitializeGauges(
{
// Always setup these inference request metrics, regardless of config
gauge_families_[kPendingRequestMetric] = &Metrics::FamilyInferenceQueueSize();
gauge_families_[kModelLoadTimeMetric] = &Metrics::FamilyModelLoadTime();

for (auto& iter : gauge_families_) {
const auto& name = iter.first;
Expand Down Expand Up @@ -392,6 +393,15 @@ MetricModelReporter::IncrementGauge(const std::string& name, double value)
}
}

void
MetricModelReporter::SetGauge(const std::string& name, double value)
{
auto gauge = GetGauge(name);
if (gauge) {
gauge->Set(value);
}
}

void
MetricModelReporter::DecrementGauge(const std::string& name, double value)
{
Expand Down
2 changes: 2 additions & 0 deletions src/metric_model_reporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ class MetricModelReporter {
const MetricReporterConfig& Config();
// Lookup counter metric by name, and increment it by value if it exists.
void IncrementCounter(const std::string& name, double value);
// Overwrite gauge to value
void SetGauge(const std::string& name, double value);
indrajit96 marked this conversation as resolved.
Show resolved Hide resolved
// Increase gauge by value.
void IncrementGauge(const std::string& name, double value);
// Decrease gauge by value.
Expand Down
5 changes: 5 additions & 0 deletions src/metrics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ Metrics::Metrics()
"execution per-model.")
.Register(*registry_)),

model_load_time_family_(prometheus::BuildGauge()
.Name("nv_model_load_duration_secs")
.Help("Model load time in seconds")
.Register(*registry_)),

pinned_memory_pool_total_family_(
prometheus::BuildGauge()
.Name("nv_pinned_memory_pool_total_bytes")
Expand Down
7 changes: 7 additions & 0 deletions src/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,12 @@ class Metrics {
return GetSingleton()->inf_pending_request_count_family_;
}

// Metric family of load time per model
static prometheus::Family<prometheus::Gauge>& FamilyModelLoadTime()
{
return GetSingleton()->model_load_time_family_;
}

// Metric families of per-model response cache metrics
// NOTE: These are used in infer_stats for perf_analyzer
static prometheus::Family<prometheus::Counter>& FamilyCacheHitCount()
Expand Down Expand Up @@ -300,6 +306,7 @@ class Metrics {
prometheus::Family<prometheus::Counter>&
inf_compute_output_duration_us_family_;
prometheus::Family<prometheus::Gauge>& inf_pending_request_count_family_;
prometheus::Family<prometheus::Gauge>& model_load_time_family_;

prometheus::Family<prometheus::Gauge>& pinned_memory_pool_total_family_;
prometheus::Family<prometheus::Gauge>& pinned_memory_pool_used_family_;
Expand Down
40 changes: 38 additions & 2 deletions src/model_repository_manager/model_lifecycle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "backend_model.h"
#include "constants.h"
#include "filesystem/api.h"
#include "metrics.h"
#include "model.h"
#include "model_config_utils.h"
#include "repo_agent.h"
Expand Down Expand Up @@ -470,6 +471,12 @@ ModelLifeCycle::AsyncLoad(
ModelInfo* model_info = linfo.get();

LOG_INFO << "loading: " << model_id << ":" << version;
const uint64_t model_load_ns =
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();

model_info->load_start_ns_ = model_load_ns;
model_info->state_ = ModelReadyState::LOADING;
model_info->state_reason_.clear();
model_info->agent_model_list_ = agent_model_list;
Expand Down Expand Up @@ -799,10 +806,11 @@ ModelLifeCycle::OnLoadFinal(
// Mark current versions ready and track info in foreground
for (auto& loaded : load_tracker->load_set_) {
std::lock_guard<std::mutex> curr_info_lk(loaded.second->mtx_);

loaded.second->state_ = ModelReadyState::READY;
loaded.second->state_reason_.clear();

#ifdef TRITON_ENABLE_METRICS
CalculateAndReportLoadTime(loaded.second);
#endif // TRITON_ENABLE_METRICS
kthui marked this conversation as resolved.
Show resolved Hide resolved
auto bit = background_models_.find((uintptr_t)loaded.second);
// Check if the version model is loaded in background, if so,
// replace and unload the current serving version
Expand Down Expand Up @@ -847,4 +855,32 @@ ModelLifeCycle::OnLoadFinal(
}
}

void
ModelLifeCycle::CalculateAndReportLoadTime(ModelInfo* loaded_model_info)
{
auto reporter = loaded_model_info->model_->MetricReporter();
const uint64_t now_ns =
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
uint64_t time_to_load_ns = now_ns - loaded_model_info->load_start_ns_;
std::chrono::duration<double> time_to_load =
std::chrono::duration_cast<std::chrono::duration<double>>(
std::chrono::nanoseconds(time_to_load_ns));
ReportModelLoadTime(reporter, time_to_load);
}

void
ModelLifeCycle::ReportModelLoadTime(
std::shared_ptr<MetricModelReporter> reporter,
const std::chrono::duration<double>& time_to_load)
{
#ifdef TRITON_ENABLE_METRICS
if (reporter) {
double load_time_in_seconds = time_to_load.count();
reporter->SetGauge(kModelLoadTimeMetric, load_time_in_seconds);
}
#endif // TRITON_ENABLE_METRICS
}

}} // namespace triton::core
7 changes: 7 additions & 0 deletions src/model_repository_manager/model_lifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ class ModelLifeCycle {
std::mutex mtx_;

uint64_t last_update_ns_;
uint64_t load_start_ns_;

ModelReadyState state_;
std::string state_reason_;
Expand Down Expand Up @@ -313,6 +314,12 @@ class ModelLifeCycle {
ModelInfo* model_info, const bool is_update,
const std::function<void(Status)>& OnComplete,
std::shared_ptr<LoadTracker> load_tracker);
// Calculate time to load model
void CalculateAndReportLoadTime(ModelInfo* loaded_model_info);
indrajit96 marked this conversation as resolved.
Show resolved Hide resolved
// Report Load time per model metrics
void ReportModelLoadTime(
std::shared_ptr<MetricModelReporter> reporter,
const std::chrono::duration<double>& time_to_load);
// Helper function for 'OnLoadComplete()' to finish final operations after
// loading **all** model versions.
void OnLoadFinal(
Expand Down
Loading