Skip to content

Commit

Permalink
Don't default to marlin for amd
Browse files Browse the repository at this point in the history
  • Loading branch information
Naomiusearch committed Oct 14, 2024
1 parent 5b5efb0 commit fb49379
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -268,4 +268,4 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA" OR APHRODITE_GPU_LANG STREQUAL "HIP")

message(STATUS "Enabling moe extension.")
add_dependencies(default _moe_C)
endif()
endif()
5 changes: 5 additions & 0 deletions aphrodite/quantization/gptq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
marlin_repeat_scales_on_all_ranks, marlin_sort_g_idx, replace_tensor,
verify_marlin_supported, verify_marlin_supports_shape)
from aphrodite.scalar_type import scalar_types
from aphrodite.common.utils import is_hip


class GPTQMarlinConfig(QuantizationConfig):
Expand Down Expand Up @@ -93,6 +94,9 @@ def override_quantization_method(cls, hf_quant_cfg,
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
or user_quant == "gptq_marlin")

if is_hip():
return None

if can_convert and is_valid_user_quant:
msg = ("The model is convertible to {} during runtime."
" Using {} kernel.".format(cls.get_name(), cls.get_name()))
Expand All @@ -105,6 +109,7 @@ def override_quantization_method(cls, hf_quant_cfg,
" so forcing gptq. Use quantization=gptq_marlin for"
" faster inference")
return None


def get_quant_method(self, layer: torch.nn.Module,
prefix: str) -> Optional["GPTQMarlinLinearMethod"]:
Expand Down

0 comments on commit fb49379

Please sign in to comment.