diff --git a/src/peft/utils/constants.py b/src/peft/utils/constants.py index 98df496275..20418b1450 100644 --- a/src/peft/utils/constants.py +++ b/src/peft/utils/constants.py @@ -65,6 +65,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values): "mistral": ["input_layernorm", "post_attention_layernorm", "norm"], "phi": ["input_layernorm", "final_layernorm"], "gemma": ["input_layernorm", "post_attention_layernorm", "norm"], + "qwen2": ["post_attention_layernorm"], } TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = { @@ -99,6 +100,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values): "stablelm": ["q_proj", "v_proj"], "phi": ["q_proj", "v_proj", "fc1", "fc2"], "gemma": ["q_proj", "v_proj"], + "qwen2": ["q_proj", "v_proj"], } TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING = { @@ -124,6 +126,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values): "falcon": ["query_key_value", "dense_4h_to_h"], "phi": ["q_proj", "v_proj", "fc2"], "gemma": ["q_proj", "v_proj", "down_proj"], + "qwen2": ["q_proj", "v_proj", "down_proj"], } TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING = { @@ -149,6 +152,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values): "falcon": ["dense_4h_to_h"], "phi": ["fc2"], "gemma": ["down_proj"], + "qwen2": ["down_proj"], } TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING = { @@ -170,6 +174,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values): "gpt_bigcode": ["c_attn"], "deberta": ["in_proj"], # "layoutlm": ["query", "value"], + "qwen2": ["q_proj", "v_proj"], } TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING = { @@ -205,6 +210,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values): # "phi": ["q_proj", "v_proj", "fc1", "fc2"], # tested, does not work because of different shapes "phi": ["q_proj", "v_proj"], # "gemma": ["q_proj", "v_proj"], # tested, does not work because of different shapes + "qwen2": ["q_proj", "v_proj"], } WEIGHTS_NAME = "adapter_model.bin" diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py index d0fa5a6ed8..bd54c7de67 100644 --- a/tests/test_decoder_models.py +++ b/tests/test_decoder_models.py @@ -33,6 +33,7 @@ "hf-internal-testing/tiny-random-GPTJForCausalLM", "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM", "trl-internal-testing/tiny-random-LlamaForCausalLM", + "peft-internal-testing/tiny-dummy-qwen2", ] FULL_GRID = {