From e3b8df05251b36564e9b453ea3323545e58722b1 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 2 Aug 2024 18:20:49 -0700 Subject: [PATCH 1/7] Update build.py --- build.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build.py b/build.py index 6ab8a58515..901714edfd 100755 --- a/build.py +++ b/build.py @@ -1802,6 +1802,10 @@ def backend_clone( os.path.join(build_dir, be, "src", "model.py"), backend_dir, ) + clone_script.cpdir( + os.path.join(build_dir, be, "src", "utils"), + backend_dir, + ) clone_script.comment() clone_script.comment(f"end '{be}' backend") From 6f601f453cb7b9934f4ad4370158b250aa48b80b Mon Sep 17 00:00:00 2001 From: Yingge He Date: Thu, 15 Aug 2024 01:41:57 -0700 Subject: [PATCH 2/7] Add docs --- docs/user_guide/metrics.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md index 0a7f3cf1a3..b8fc0d8ee0 100644 --- a/docs/user_guide/metrics.md +++ b/docs/user_guide/metrics.md @@ -378,3 +378,9 @@ Further documentation can be found in the `TRITONSERVER_MetricFamily*` and The TRT-LLM backend uses the custom metrics API to track and expose specific metrics about LLMs, KV Cache, and Inflight Batching to Triton: https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#triton-metrics + +### vLLM Backend Metrics + +The vLLM backend uses the custom metrics API to track and expose specific metrics about +LLMs to Triton: +https://github.com/triton-inference-server/vllm_backend?tab=readme-ov-file#triton-metrics From ef797b3d1d993accb9dd88290047a431c9cbf847 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Tue, 13 Aug 2024 17:59:45 -0700 Subject: [PATCH 3/7] Test histogram metric --- qa/python_models/custom_metrics/model.py | 113 ++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py index 31f105a1dd..ddc6d83c25 100644 --- a/qa/python_models/custom_metrics/model.py +++ b/qa/python_models/custom_metrics/model.py @@ -1,4 +1,4 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -74,6 +74,96 @@ def _metric_api_helper(self, metric, kind): self.assertEqual(metric.value(), value) logger.log_info("Set metric to : {}".format(metric.value())) + # Test observe value + observe = 0.05 + # Counter and gauge do not support observe + with self.assertRaises(pb_utils.TritonModelException): + metric.observe(observe) + + def _histogram_api_helper(self, metric, name, labels): + def histogram_str_builder(name, type, labels, value, le=None): + if type == "count" or type == "sum": + return f"{name}_{type}{{{labels}}} {value}" + elif type == "bucket": + return f'{name}_bucket{{{labels},le="{le}"}} {value}' + else: + raise + + # Adding logger to test if custom metrics and logging work together + # as they use the same message queue. + logger = pb_utils.Logger + + # All values should be 0.0 before the test + metrics = self._get_metrics() + self.assertIn(histogram_str_builder(name, "count", labels, "0"), metrics) + self.assertIn(histogram_str_builder(name, "sum", labels, "0"), metrics) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "0", le="0.1"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "0", le="1"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "0", le="2.5"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "0", le="5"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "0", le="10"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "0", le="+Inf"), metrics + ) + + # Histogram does not support value + with self.assertRaises(pb_utils.TritonModelException): + metric.value() + + # Test increment value + increment = 2023.0 + # Histogram does not support increment + with self.assertRaises(pb_utils.TritonModelException): + metric.increment(increment) + + # Test set value + value = 999.9 + # Histogram does not support set + with self.assertRaises(pb_utils.TritonModelException): + metric.set(value) + + # Test observe value + data = [0.05, 1.5, 6.0] + for datum in data: + metric.observe(datum) + logger.log_info("Observe histogram metric with value : {}".format(datum)) + + metrics = self._get_metrics() + self.assertIn( + histogram_str_builder(name, "count", labels, str(len(data))), metrics + ) + self.assertIn( + histogram_str_builder(name, "sum", labels, str(sum(data))), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "1", le="0.1"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "1", le="1"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "2", le="2.5"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "2", le="5"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "3", le="10"), metrics + ) + self.assertIn( + histogram_str_builder(name, "bucket", labels, "3", le="+Inf"), metrics + ) + def _dup_metric_helper(self, labels={}): # Adding logger to test if custom metrics and logging work together # as they use the same message queue. @@ -136,6 +226,27 @@ def test_gauge_e2e(self): metrics = self._get_metrics() self.assertIn(pattern, metrics) + def test_histogram_e2e(self): + name = "test_histogram_e2e" + metric_family = pb_utils.MetricFamily( + name=name, + description="test metric histogram kind end to end", + kind=pb_utils.MetricFamily.HISTOGRAM, + ) + labels = {"example1": "counter_label1", "example2": "counter_label2"} + buckets = [0.1, 1.0, 2.5, 5.0, 10.0] + metric = metric_family.Metric(labels=labels, buckets=buckets) + labels_str = 'example1="counter_label1",example2="counter_label2"' + self._histogram_api_helper(metric, name, labels_str) + + metrics = self._get_metrics() + count_pattern = f"{name}_count{{{labels_str}}}" + sum_pattern = f"{name}_sum{{{labels_str}}}" + bucket_pattern = f"{name}_bucket{{{labels_str}" + self.assertEqual(metrics.count(count_pattern), 1) + self.assertEqual(metrics.count(sum_pattern), 1) + self.assertEqual(metrics.count(bucket_pattern), len(buckets) + 1) + def test_dup_metric_family_diff_kind(self): # Test that a duplicate metric family can't be added with a conflicting type/kind metric_family1 = pb_utils.MetricFamily( From e405af117acb0f5602994ba87fc7c2958cbe1699 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 16 Aug 2024 15:51:28 -0700 Subject: [PATCH 4/7] Update tests --- qa/python_models/custom_metrics/model.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py index ddc6d83c25..8b6a2715f9 100644 --- a/qa/python_models/custom_metrics/model.py +++ b/qa/python_models/custom_metrics/model.py @@ -218,11 +218,11 @@ def test_gauge_e2e(self): description="test metric gauge kind end to end", kind=pb_utils.MetricFamily.GAUGE, ) - labels = {"example1": "counter_label1", "example2": "counter_label2"} + labels = {"example1": "gauge_label1", "example2": "gauge_label2"} metric = metric_family.Metric(labels=labels) self._metric_api_helper(metric, "gauge") - pattern = 'test_gauge_e2e{example1="counter_label1",example2="counter_label2"}' + pattern = 'test_gauge_e2e{example1="gauge_label1",example2="gauge_label2"}' metrics = self._get_metrics() self.assertIn(pattern, metrics) @@ -233,10 +233,17 @@ def test_histogram_e2e(self): description="test metric histogram kind end to end", kind=pb_utils.MetricFamily.HISTOGRAM, ) - labels = {"example1": "counter_label1", "example2": "counter_label2"} + labels = {"example1": "histogram_label1", "example2": "histogram_label2"} + + # Test non-ascending order buckets + with self.assertRaises(pb_utils.TritonModelException): + metric = metric_family.Metric( + labels=labels, buckets=[2.5, 0.1, 1.0, 10.0, 5.0] + ) + buckets = [0.1, 1.0, 2.5, 5.0, 10.0] metric = metric_family.Metric(labels=labels, buckets=buckets) - labels_str = 'example1="counter_label1",example2="counter_label2"' + labels_str = 'example1="histogram_label1",example2="histogram_label2"' self._histogram_api_helper(metric, name, labels_str) metrics = self._get_metrics() From 8ff1ef1373b6fa8b1de706d24b907736f2d48c6a Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 16 Aug 2024 16:26:19 -0700 Subject: [PATCH 5/7] Add histogram args tests --- qa/python_models/custom_metrics/model.py | 28 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py index 8b6a2715f9..08459a4423 100644 --- a/qa/python_models/custom_metrics/model.py +++ b/qa/python_models/custom_metrics/model.py @@ -233,16 +233,11 @@ def test_histogram_e2e(self): description="test metric histogram kind end to end", kind=pb_utils.MetricFamily.HISTOGRAM, ) - labels = {"example1": "histogram_label1", "example2": "histogram_label2"} - - # Test non-ascending order buckets - with self.assertRaises(pb_utils.TritonModelException): - metric = metric_family.Metric( - labels=labels, buckets=[2.5, 0.1, 1.0, 10.0, 5.0] - ) + labels = {"example1": "histogram_label1", "example2": "histogram_label2"} buckets = [0.1, 1.0, 2.5, 5.0, 10.0] metric = metric_family.Metric(labels=labels, buckets=buckets) + labels_str = 'example1="histogram_label1",example2="histogram_label2"' self._histogram_api_helper(metric, name, labels_str) @@ -254,6 +249,25 @@ def test_histogram_e2e(self): self.assertEqual(metrics.count(sum_pattern), 1) self.assertEqual(metrics.count(bucket_pattern), len(buckets) + 1) + def test_histogram_args(self): + name = "test_histogram_args" + metric_family = pb_utils.MetricFamily( + name=name, + description="test metric histogram args", + kind=pb_utils.MetricFamily.HISTOGRAM, + ) + + # Test non-ascending order buckets + with self.assertRaises(pb_utils.TritonModelException): + metric_family.Metric(labels={}, buckets=[2.5, 0.1, 1.0, 10.0, 5.0]) + + # Test duplicate value buckets + with self.assertRaises(pb_utils.TritonModelException): + metric_family.Metric(labels={}, buckets=[1, 1, 2, 5, 5]) + + # Test non-ascending order buckets + metric_family.Metric(labels={}, buckets=[]) + def test_dup_metric_family_diff_kind(self): # Test that a duplicate metric family can't be added with a conflicting type/kind metric_family1 = pb_utils.MetricFamily( From d8dc61290951f2580c13e403e36060a57438c56b Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 16 Aug 2024 16:28:23 -0700 Subject: [PATCH 6/7] Test buckets==None --- qa/python_models/custom_metrics/model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py index 08459a4423..d397bad0c6 100644 --- a/qa/python_models/custom_metrics/model.py +++ b/qa/python_models/custom_metrics/model.py @@ -257,6 +257,12 @@ def test_histogram_args(self): kind=pb_utils.MetricFamily.HISTOGRAM, ) + # Test none buckets + with self.assertRaises(pb_utils.TritonModelException): + metric_family.Metric(labels={}) + with self.assertRaises(pb_utils.TritonModelException): + metric_family.Metric(labels={}, buckets=None) + # Test non-ascending order buckets with self.assertRaises(pb_utils.TritonModelException): metric_family.Metric(labels={}, buckets=[2.5, 0.1, 1.0, 10.0, 5.0]) From ebec81ae658bc79c01545e73d5274efc7be666a2 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 16 Aug 2024 17:01:37 -0700 Subject: [PATCH 7/7] Fix comment --- qa/python_models/custom_metrics/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py index d397bad0c6..7c78b46894 100644 --- a/qa/python_models/custom_metrics/model.py +++ b/qa/python_models/custom_metrics/model.py @@ -257,7 +257,7 @@ def test_histogram_args(self): kind=pb_utils.MetricFamily.HISTOGRAM, ) - # Test none buckets + # Test "None" value buckets with self.assertRaises(pb_utils.TritonModelException): metric_family.Metric(labels={}) with self.assertRaises(pb_utils.TritonModelException): @@ -271,7 +271,7 @@ def test_histogram_args(self): with self.assertRaises(pb_utils.TritonModelException): metric_family.Metric(labels={}, buckets=[1, 1, 2, 5, 5]) - # Test non-ascending order buckets + # Test empty list bucket metric_family.Metric(labels={}, buckets=[]) def test_dup_metric_family_diff_kind(self):