[Misc] Skip loading extra bias for Qwen2-VL GPTQ-Int8 (vllm-project#8442

)
diiinesh · Sep 13, 2024 · 06311e2 · 06311e2
1 parent cab69a1
commit 06311e2
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py
@@ -1055,6 +1055,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
  if weight_name not in name:
  continue
  name = name.replace(weight_name, param_name)
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
  param = params_dict[name]
  weight_loader = param.weight_loader
  weight_loader(param, loaded_weight, shard_id)
@@ -1078,6 +1081,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
  loaded_weight = loaded_weight.transpose(0, 1)
  loaded_weight = loaded_weight.reshape(-1)
  try:
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
  param = params_dict[name]
  except KeyError:
  print(params_dict.keys())