format

HabanaAI · Sep 3, 2024 · ce2afde · ce2afde
1 parent a344b4f
commit ce2afde
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 0 deletions.
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
@@ -23,6 +23,7 @@
 from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
 from vllm.utils import print_warning_once
+
 if current_platform.is_hpu():
     from vllm.hpu.ops import scaled_fp8_quant
     ops.scaled_fp8_quant = scaled_fp8_quant

diff --git a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
@@ -6,8 +6,10 @@
 from vllm import _custom_ops as ops
 from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
+
 if current_platform.is_hpu():
     import habana_frameworks.torch.utils.experimental as htexp
+
     from vllm.hpu.ops import scaled_fp8_quant
     ops.scaled_fp8_quant = scaled_fp8_quant
 

diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
@@ -54,6 +54,7 @@
 
 from .interfaces import SupportsLoRA
 from .utils import PPMissingLayer, is_pp_missing_parameter, make_layers
+
 if current_platform.is_hpu():
     import habana_frameworks.torch.core as htcore