Update fp8.py

HabanaAI · Sep 24, 2024 · e92abd6 · e92abd6
1 parent 26d8321
commit e92abd6
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
@@ -126,7 +126,7 @@ def __init__(self, quant_config: Fp8Config):
             # For GPUs that lack FP8 hardware support, we can leverage the
             # Marlin kernel for fast weight-only FP8 quantization
             self.use_marlin = (not current_platform.has_device_capability(89)
-                           or envs.VLLM_TEST_FORCE_FP8_MARLIN)
+                               or envs.VLLM_TEST_FORCE_FP8_MARLIN)
             # Disable marlin for rocm
             if is_hip():
                 self.use_marlin = False