[WIP] ENH Add support for Qwen2

Add Qwen2 to default target modules, use tiny Qwen2 in tests.
huggingface · Jul 4, 2024 · 6fc2447 · 6fc2447
1 parent 09358aa
commit 6fc2447
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 0 deletions.
diff --git a/src/peft/utils/constants.py b/src/peft/utils/constants.py
@@ -65,6 +65,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
  "mistral": ["input_layernorm", "post_attention_layernorm", "norm"],
  "phi": ["input_layernorm", "final_layernorm"],
  "gemma": ["input_layernorm", "post_attention_layernorm", "norm"],
+ "qwen2": ["post_attention_layernorm"],
 }
 
 TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = {
@@ -99,6 +100,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
  "stablelm": ["q_proj", "v_proj"],
  "phi": ["q_proj", "v_proj", "fc1", "fc2"],
  "gemma": ["q_proj", "v_proj"],
+ "qwen2": ["q_proj", "v_proj"],
 }
 
 TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING = {
@@ -124,6 +126,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
  "falcon": ["query_key_value", "dense_4h_to_h"],
  "phi": ["q_proj", "v_proj", "fc2"],
  "gemma": ["q_proj", "v_proj", "down_proj"],
+ "qwen2": ["q_proj", "v_proj", "down_proj"],
 }
 
 TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING = {
@@ -149,6 +152,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
  "falcon": ["dense_4h_to_h"],
  "phi": ["fc2"],
  "gemma": ["down_proj"],
+ "qwen2": ["down_proj"],
 }
 
 TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING = {
@@ -170,6 +174,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
  "gpt_bigcode": ["c_attn"],
  "deberta": ["in_proj"],
  # "layoutlm": ["query", "value"],
+ "qwen2": ["q_proj", "v_proj"],
 }
 
 TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING = {
@@ -205,6 +210,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
  # "phi": ["q_proj", "v_proj", "fc1", "fc2"], # tested, does not work because of different shapes
  "phi": ["q_proj", "v_proj"],
  # "gemma": ["q_proj", "v_proj"], # tested, does not work because of different shapes
+ "qwen2": ["q_proj", "v_proj"],
 }
 
 WEIGHTS_NAME = "adapter_model.bin"

diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py
@@ -33,6 +33,7 @@
  "hf-internal-testing/tiny-random-GPTJForCausalLM",
  "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM",
  "trl-internal-testing/tiny-random-LlamaForCausalLM",
+ "peft-internal-testing/tiny-dummy-qwen2",
 ]
 
 FULL_GRID = {