Skip to content

Commit

Permalink
remove leftover fp8 code
Browse files Browse the repository at this point in the history
  • Loading branch information
kzawora-intel committed Oct 4, 2024
1 parent d90bbce commit 84dc6c5
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 4 deletions.
5 changes: 2 additions & 3 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument(
'--kv-cache-dtype',
type=str,
choices=['auto', 'fp8', 'fp8_e5m2', 'fp8_e4m3', 'fp8_inc'],
choices=['auto', 'fp8', 'fp8_e5m2', 'fp8_e4m3'],
default=EngineArgs.kv_cache_dtype,
help='Data type for kv cache storage. If "auto", will use model '
'data type. CUDA 11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. '
'ROCm (AMD GPU) supports fp8 (=fp8_e4m3). '
'Intel Gaudi (HPU) supports fp8 (using fp8_inc).')
'ROCm (AMD GPU) supports fp8 (=fp8_e4m3)')
parser.add_argument(
'--quantization-param-path',
type=nullable_str,
Expand Down
1 change: 0 additions & 1 deletion vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@
"fp8": torch.uint8,
"fp8_e4m3": torch.uint8,
"fp8_e5m2": torch.uint8,
"fp8_inc": torch.float8_e4m3fn,
}

TORCH_DTYPE_TO_NUMPY_DTYPE = {
Expand Down

0 comments on commit 84dc6c5

Please sign in to comment.