From e3b8df05251b36564e9b453ea3323545e58722b1 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 2 Aug 2024 18:20:49 -0700
Subject: [PATCH 1/7] Update build.py

---
 build.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/build.py b/build.py
index 6ab8a58515..901714edfd 100755
--- a/build.py
+++ b/build.py
@@ -1802,6 +1802,10 @@ def backend_clone(
         os.path.join(build_dir, be, "src", "model.py"),
         backend_dir,
     )
+    clone_script.cpdir(
+        os.path.join(build_dir, be, "src", "utils"),
+        backend_dir,
+    )
 
     clone_script.comment()
     clone_script.comment(f"end '{be}' backend")

From 6f601f453cb7b9934f4ad4370158b250aa48b80b Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Thu, 15 Aug 2024 01:41:57 -0700
Subject: [PATCH 2/7] Add docs

---
 docs/user_guide/metrics.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md
index 0a7f3cf1a3..b8fc0d8ee0 100644
--- a/docs/user_guide/metrics.md
+++ b/docs/user_guide/metrics.md
@@ -378,3 +378,9 @@ Further documentation can be found in the `TRITONSERVER_MetricFamily*` and
 The TRT-LLM backend uses the custom metrics API to track and expose specific metrics about
 LLMs, KV Cache, and Inflight Batching to Triton:
 https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#triton-metrics
+
+### vLLM Backend Metrics
+
+The vLLM backend uses the custom metrics API to track and expose specific metrics about
+LLMs to Triton:
+https://github.com/triton-inference-server/vllm_backend?tab=readme-ov-file#triton-metrics

From ef797b3d1d993accb9dd88290047a431c9cbf847 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Tue, 13 Aug 2024 17:59:45 -0700
Subject: [PATCH 3/7] Test histogram metric

---
 qa/python_models/custom_metrics/model.py | 113 ++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 1 deletion(-)

diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py
index 31f105a1dd..ddc6d83c25 100644
--- a/qa/python_models/custom_metrics/model.py
+++ b/qa/python_models/custom_metrics/model.py
@@ -1,4 +1,4 @@
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -74,6 +74,96 @@ def _metric_api_helper(self, metric, kind):
             self.assertEqual(metric.value(), value)
             logger.log_info("Set metric to : {}".format(metric.value()))
 
+        # Test observe value
+        observe = 0.05
+        # Counter and gauge do not support observe
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric.observe(observe)
+
+    def _histogram_api_helper(self, metric, name, labels):
+        def histogram_str_builder(name, type, labels, value, le=None):
+            if type == "count" or type == "sum":
+                return f"{name}_{type}{{{labels}}} {value}"
+            elif type == "bucket":
+                return f'{name}_bucket{{{labels},le="{le}"}} {value}'
+            else:
+                raise
+
+        # Adding logger to test if custom metrics and logging work together
+        # as they use the same message queue.
+        logger = pb_utils.Logger
+
+        # All values should be 0.0 before the test
+        metrics = self._get_metrics()
+        self.assertIn(histogram_str_builder(name, "count", labels, "0"), metrics)
+        self.assertIn(histogram_str_builder(name, "sum", labels, "0"), metrics)
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "0", le="0.1"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "0", le="1"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "0", le="2.5"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "0", le="5"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "0", le="10"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "0", le="+Inf"), metrics
+        )
+
+        # Histogram does not support value
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric.value()
+
+        # Test increment value
+        increment = 2023.0
+        # Histogram does not support increment
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric.increment(increment)
+
+        # Test set value
+        value = 999.9
+        # Histogram does not support set
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric.set(value)
+
+        # Test observe value
+        data = [0.05, 1.5, 6.0]
+        for datum in data:
+            metric.observe(datum)
+            logger.log_info("Observe histogram metric with value : {}".format(datum))
+
+        metrics = self._get_metrics()
+        self.assertIn(
+            histogram_str_builder(name, "count", labels, str(len(data))), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "sum", labels, str(sum(data))), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "1", le="0.1"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "1", le="1"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "2", le="2.5"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "2", le="5"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "3", le="10"), metrics
+        )
+        self.assertIn(
+            histogram_str_builder(name, "bucket", labels, "3", le="+Inf"), metrics
+        )
+
     def _dup_metric_helper(self, labels={}):
         # Adding logger to test if custom metrics and logging work together
         # as they use the same message queue.
@@ -136,6 +226,27 @@ def test_gauge_e2e(self):
         metrics = self._get_metrics()
         self.assertIn(pattern, metrics)
 
+    def test_histogram_e2e(self):
+        name = "test_histogram_e2e"
+        metric_family = pb_utils.MetricFamily(
+            name=name,
+            description="test metric histogram kind end to end",
+            kind=pb_utils.MetricFamily.HISTOGRAM,
+        )
+        labels = {"example1": "counter_label1", "example2": "counter_label2"}
+        buckets = [0.1, 1.0, 2.5, 5.0, 10.0]
+        metric = metric_family.Metric(labels=labels, buckets=buckets)
+        labels_str = 'example1="counter_label1",example2="counter_label2"'
+        self._histogram_api_helper(metric, name, labels_str)
+
+        metrics = self._get_metrics()
+        count_pattern = f"{name}_count{{{labels_str}}}"
+        sum_pattern = f"{name}_sum{{{labels_str}}}"
+        bucket_pattern = f"{name}_bucket{{{labels_str}"
+        self.assertEqual(metrics.count(count_pattern), 1)
+        self.assertEqual(metrics.count(sum_pattern), 1)
+        self.assertEqual(metrics.count(bucket_pattern), len(buckets) + 1)
+
     def test_dup_metric_family_diff_kind(self):
         # Test that a duplicate metric family can't be added with a conflicting type/kind
         metric_family1 = pb_utils.MetricFamily(

From e405af117acb0f5602994ba87fc7c2958cbe1699 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 16 Aug 2024 15:51:28 -0700
Subject: [PATCH 4/7] Update tests

---
 qa/python_models/custom_metrics/model.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py
index ddc6d83c25..8b6a2715f9 100644
--- a/qa/python_models/custom_metrics/model.py
+++ b/qa/python_models/custom_metrics/model.py
@@ -218,11 +218,11 @@ def test_gauge_e2e(self):
             description="test metric gauge kind end to end",
             kind=pb_utils.MetricFamily.GAUGE,
         )
-        labels = {"example1": "counter_label1", "example2": "counter_label2"}
+        labels = {"example1": "gauge_label1", "example2": "gauge_label2"}
         metric = metric_family.Metric(labels=labels)
         self._metric_api_helper(metric, "gauge")
 
-        pattern = 'test_gauge_e2e{example1="counter_label1",example2="counter_label2"}'
+        pattern = 'test_gauge_e2e{example1="gauge_label1",example2="gauge_label2"}'
         metrics = self._get_metrics()
         self.assertIn(pattern, metrics)
 
@@ -233,10 +233,17 @@ def test_histogram_e2e(self):
             description="test metric histogram kind end to end",
             kind=pb_utils.MetricFamily.HISTOGRAM,
         )
-        labels = {"example1": "counter_label1", "example2": "counter_label2"}
+        labels = {"example1": "histogram_label1", "example2": "histogram_label2"}
+
+        # Test non-ascending order buckets
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric = metric_family.Metric(
+                labels=labels, buckets=[2.5, 0.1, 1.0, 10.0, 5.0]
+            )
+
         buckets = [0.1, 1.0, 2.5, 5.0, 10.0]
         metric = metric_family.Metric(labels=labels, buckets=buckets)
-        labels_str = 'example1="counter_label1",example2="counter_label2"'
+        labels_str = 'example1="histogram_label1",example2="histogram_label2"'
         self._histogram_api_helper(metric, name, labels_str)
 
         metrics = self._get_metrics()

From 8ff1ef1373b6fa8b1de706d24b907736f2d48c6a Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 16 Aug 2024 16:26:19 -0700
Subject: [PATCH 5/7] Add histogram args tests

---
 qa/python_models/custom_metrics/model.py | 28 ++++++++++++++++++------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py
index 8b6a2715f9..08459a4423 100644
--- a/qa/python_models/custom_metrics/model.py
+++ b/qa/python_models/custom_metrics/model.py
@@ -233,16 +233,11 @@ def test_histogram_e2e(self):
             description="test metric histogram kind end to end",
             kind=pb_utils.MetricFamily.HISTOGRAM,
         )
-        labels = {"example1": "histogram_label1", "example2": "histogram_label2"}
-
-        # Test non-ascending order buckets
-        with self.assertRaises(pb_utils.TritonModelException):
-            metric = metric_family.Metric(
-                labels=labels, buckets=[2.5, 0.1, 1.0, 10.0, 5.0]
-            )
 
+        labels = {"example1": "histogram_label1", "example2": "histogram_label2"}
         buckets = [0.1, 1.0, 2.5, 5.0, 10.0]
         metric = metric_family.Metric(labels=labels, buckets=buckets)
+
         labels_str = 'example1="histogram_label1",example2="histogram_label2"'
         self._histogram_api_helper(metric, name, labels_str)
 
@@ -254,6 +249,25 @@ def test_histogram_e2e(self):
         self.assertEqual(metrics.count(sum_pattern), 1)
         self.assertEqual(metrics.count(bucket_pattern), len(buckets) + 1)
 
+    def test_histogram_args(self):
+        name = "test_histogram_args"
+        metric_family = pb_utils.MetricFamily(
+            name=name,
+            description="test metric histogram args",
+            kind=pb_utils.MetricFamily.HISTOGRAM,
+        )
+
+        # Test non-ascending order buckets
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric_family.Metric(labels={}, buckets=[2.5, 0.1, 1.0, 10.0, 5.0])
+
+        # Test duplicate value buckets
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric_family.Metric(labels={}, buckets=[1, 1, 2, 5, 5])
+
+        # Test non-ascending order buckets
+        metric_family.Metric(labels={}, buckets=[])
+
     def test_dup_metric_family_diff_kind(self):
         # Test that a duplicate metric family can't be added with a conflicting type/kind
         metric_family1 = pb_utils.MetricFamily(

From d8dc61290951f2580c13e403e36060a57438c56b Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 16 Aug 2024 16:28:23 -0700
Subject: [PATCH 6/7] Test buckets==None

---
 qa/python_models/custom_metrics/model.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py
index 08459a4423..d397bad0c6 100644
--- a/qa/python_models/custom_metrics/model.py
+++ b/qa/python_models/custom_metrics/model.py
@@ -257,6 +257,12 @@ def test_histogram_args(self):
             kind=pb_utils.MetricFamily.HISTOGRAM,
         )
 
+        # Test none buckets
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric_family.Metric(labels={})
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric_family.Metric(labels={}, buckets=None)
+
         # Test non-ascending order buckets
         with self.assertRaises(pb_utils.TritonModelException):
             metric_family.Metric(labels={}, buckets=[2.5, 0.1, 1.0, 10.0, 5.0])

From ebec81ae658bc79c01545e73d5274efc7be666a2 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 16 Aug 2024 17:01:37 -0700
Subject: [PATCH 7/7] Fix comment

---
 qa/python_models/custom_metrics/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py
index d397bad0c6..7c78b46894 100644
--- a/qa/python_models/custom_metrics/model.py
+++ b/qa/python_models/custom_metrics/model.py
@@ -257,7 +257,7 @@ def test_histogram_args(self):
             kind=pb_utils.MetricFamily.HISTOGRAM,
         )
 
-        # Test none buckets
+        # Test "None" value buckets
         with self.assertRaises(pb_utils.TritonModelException):
             metric_family.Metric(labels={})
         with self.assertRaises(pb_utils.TritonModelException):
@@ -271,7 +271,7 @@ def test_histogram_args(self):
         with self.assertRaises(pb_utils.TritonModelException):
             metric_family.Metric(labels={}, buckets=[1, 1, 2, 5, 5])
 
-        # Test non-ascending order buckets
+        # Test empty list bucket
         metric_family.Metric(labels={}, buckets=[])
 
     def test_dup_metric_family_diff_kind(self):