From 5b0eabe0e8ff3ac32b84e54b32c1cde757239e5e Mon Sep 17 00:00:00 2001
From: AlpinDale <52078762+AlpinDale@users.noreply.github.com>
Date: Sat, 2 Nov 2024 12:08:30 -0700
Subject: [PATCH] fix: compilation of gptq_marlin_gemm object (#800)

* fix: compilation of gptq_marlin_gemm object

* add devlop to codespell ignore
---
 kernels/torch_bindings.cpp | 7 +------
 pyproject.toml             | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernels/torch_bindings.cpp b/kernels/torch_bindings.cpp
index 800af4952..cf63893ff 100644
--- a/kernels/torch_bindings.cpp
+++ b/kernels/torch_bindings.cpp
@@ -147,12 +147,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   ops.impl("gptq_marlin_24_gemm", torch::kCUDA, &gptq_marlin_24_gemm);
 
   // gptq_marlin Optimized Quantized GEMM for GPTQ.
-  ops.def(
-      "gptq_marlin_gemm(Tensor a, Tensor b_q_weight, Tensor b_scales, "
-      "Tensor b_zeros, Tensor g_idx, Tensor perm, Tensor workspace, "
-      "int b_q_type, "
-      "SymInt size_m, SymInt size_n, SymInt size_k, bool is_k_full, "
-      "bool has_zp, bool use_fp32_reduce, bool is_zp_float) -> Tensor");
+  ops.def("gptq_marlin_gemm", &gptq_marlin_gemm);
   ops.impl("gptq_marlin_gemm", torch::kCUDA, &gptq_marlin_gemm);
 
   // gptq_marlin repack from GPTQ.
diff --git a/pyproject.toml b/pyproject.toml
index 88317ea81..6271ead07 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ ignore = [
 ]
 
 [tool.codespell]
-ignore-words-list = "dout, te, indicies, ist, subtile, wit, whit, beseige"
+ignore-words-list = "dout, te, indicies, ist, subtile, wit, whit, beseige, devlop"
 skip = "./tests/,./aphrodite/endpoints/kobold/klite.embd,./kernels/,./tests/benchmarks/sonnet.txt,./docs/"
 
 [tool.isort]