[Bugfix] Guard for negative counter metrics to prevent crash (#10430)

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
vllm-project · Nov 19, 2024 · 272e31c · 272e31c
1 parent 74f8c2c
commit 272e31c
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 1 deletion.
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -1716,7 +1716,7 @@ def _get_stats(self,
             # not counted (to avoid double counting)
             actual_num_batched_tokens = scheduler_outputs.num_batched_tokens  # type: ignore
 
-            num_generation_tokens_from_prefill_groups = 0.
+            num_generation_tokens_from_prefill_groups = 0
             # NOTE: if scheduler_outputs.num_prefill_groups > 0 and
             # the len of scheduler_outputs.scheduled_seq_groups is !=
             # scheduler_outputs.num_prefill_groups, this means that

diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
@@ -512,6 +512,11 @@ def _log_gauge(self, gauge, data: Union[int, float]) -> None:
 
     def _log_counter(self, counter, data: Union[int, float]) -> None:
         # Convenience function for logging to counter.
+        # Prevent ValueError from negative increment
+        if data < 0:
+            logger.warning("Skipping negative increment of %g to %s", data,
+                           counter)
+            return
         counter.labels(**self.labels).inc(data)
 
     def _log_counter_labels(self, counter, data: CollectionsCounter,