remove leftover fp8 code

HabanaAI · Oct 4, 2024 · 84dc6c5 · 84dc6c5
1 parent d90bbce
commit 84dc6c5
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 4 deletions.
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -299,12 +299,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         parser.add_argument(
             '--kv-cache-dtype',
             type=str,
-            choices=['auto', 'fp8', 'fp8_e5m2', 'fp8_e4m3', 'fp8_inc'],
+            choices=['auto', 'fp8', 'fp8_e5m2', 'fp8_e4m3'],
             default=EngineArgs.kv_cache_dtype,
             help='Data type for kv cache storage. If "auto", will use model '
             'data type. CUDA 11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. '
-            'ROCm (AMD GPU) supports fp8 (=fp8_e4m3). '
-            'Intel Gaudi (HPU) supports fp8 (using fp8_inc).')
+            'ROCm (AMD GPU) supports fp8 (=fp8_e4m3)')
         parser.add_argument(
             '--quantization-param-path',
             type=nullable_str,

diff --git a/vllm/utils.py b/vllm/utils.py
@@ -132,7 +132,6 @@
     "fp8": torch.uint8,
     "fp8_e4m3": torch.uint8,
     "fp8_e5m2": torch.uint8,
-    "fp8_inc": torch.float8_e4m3fn,
 }
 
 TORCH_DTYPE_TO_NUMPY_DTYPE = {