From 5b0eabe0e8ff3ac32b84e54b32c1cde757239e5e Mon Sep 17 00:00:00 2001 From: AlpinDale <52078762+AlpinDale@users.noreply.github.com> Date: Sat, 2 Nov 2024 12:08:30 -0700 Subject: [PATCH] fix: compilation of gptq_marlin_gemm object (#800) * fix: compilation of gptq_marlin_gemm object * add devlop to codespell ignore --- kernels/torch_bindings.cpp | 7 +------ pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/kernels/torch_bindings.cpp b/kernels/torch_bindings.cpp index 800af4952..cf63893ff 100644 --- a/kernels/torch_bindings.cpp +++ b/kernels/torch_bindings.cpp @@ -147,12 +147,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { ops.impl("gptq_marlin_24_gemm", torch::kCUDA, &gptq_marlin_24_gemm); // gptq_marlin Optimized Quantized GEMM for GPTQ. - ops.def( - "gptq_marlin_gemm(Tensor a, Tensor b_q_weight, Tensor b_scales, " - "Tensor b_zeros, Tensor g_idx, Tensor perm, Tensor workspace, " - "int b_q_type, " - "SymInt size_m, SymInt size_n, SymInt size_k, bool is_k_full, " - "bool has_zp, bool use_fp32_reduce, bool is_zp_float) -> Tensor"); + ops.def("gptq_marlin_gemm", &gptq_marlin_gemm); ops.impl("gptq_marlin_gemm", torch::kCUDA, &gptq_marlin_gemm); // gptq_marlin repack from GPTQ. diff --git a/pyproject.toml b/pyproject.toml index 88317ea81..6271ead07 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ ignore = [ ] [tool.codespell] -ignore-words-list = "dout, te, indicies, ist, subtile, wit, whit, beseige" +ignore-words-list = "dout, te, indicies, ist, subtile, wit, whit, beseige, devlop" skip = "./tests/,./aphrodite/endpoints/kobold/klite.embd,./kernels/,./tests/benchmarks/sonnet.txt,./docs/" [tool.isort]