Skip to content

Commit

Permalink
Update fp8.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Yantom1 authored Sep 24, 2024
1 parent 26d8321 commit e92abd6
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/quantization/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def __init__(self, quant_config: Fp8Config):
# For GPUs that lack FP8 hardware support, we can leverage the
# Marlin kernel for fast weight-only FP8 quantization
self.use_marlin = (not current_platform.has_device_capability(89)
or envs.VLLM_TEST_FORCE_FP8_MARLIN)
or envs.VLLM_TEST_FORCE_FP8_MARLIN)
# Disable marlin for rocm
if is_hip():
self.use_marlin = False
Expand Down

0 comments on commit e92abd6

Please sign in to comment.