diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 71aedeb01f3bf..88915942220ca 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -120,7 +120,7 @@ class Fp8LinearMethod(LinearMethodBase): def __init__(self, quant_config: Fp8Config): self.quant_config = quant_config - if current_platform.is_cuda_alike() + if current_platform.is_cuda_alike(): self.cutlass_fp8_supported = cutlass_fp8_supported() # For GPUs that lack FP8 hardware support, we can leverage the