Don't default to marlin for amd

PygmalionAI · Oct 14, 2024 · fb49379 · fb49379
1 parent 5b5efb0
commit fb49379
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 1 deletion.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -268,4 +268,4 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA" OR APHRODITE_GPU_LANG STREQUAL "HIP")
 
   message(STATUS "Enabling moe extension.")
   add_dependencies(default _moe_C)
-endif()
+endif()
diff --git a/aphrodite/quantization/gptq_marlin.py b/aphrodite/quantization/gptq_marlin.py
@@ -19,6 +19,7 @@
     marlin_repeat_scales_on_all_ranks, marlin_sort_g_idx, replace_tensor,
     verify_marlin_supported, verify_marlin_supports_shape)
 from aphrodite.scalar_type import scalar_types
+from aphrodite.common.utils import is_hip
 
 
 class GPTQMarlinConfig(QuantizationConfig):
@@ -93,6 +94,9 @@ def override_quantization_method(cls, hf_quant_cfg,
         is_valid_user_quant = (user_quant is None or user_quant == "marlin"
                                or user_quant == "gptq_marlin")
 
+        if is_hip():
+            return None
+
         if can_convert and is_valid_user_quant:
             msg = ("The model is convertible to {} during runtime."
                    " Using {} kernel.".format(cls.get_name(), cls.get_name()))
@@ -105,6 +109,7 @@ def override_quantization_method(cls, hf_quant_cfg,
                         " so forcing gptq. Use quantization=gptq_marlin for"
                         " faster inference")
         return None
+
 
     def get_quant_method(self, layer: torch.nn.Module,
                          prefix: str) -> Optional["GPTQMarlinLinearMethod"]: