From c9b19bb8f33c84ae26609df1b5fb3720ca4089e6 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 25 Jun 2024 13:33:28 +0200 Subject: [PATCH] FIX Init AdaLoRA to be identity transform (#1884) Resolves #1836 There was an accidental change in a previous PR that initialized lora_E as normal, when it should be zeros. --- src/peft/tuners/adalora/layer.py | 2 +- tests/test_decoder_models.py | 1 + tests/test_encoder_decoder_models.py | 1 + tests/test_feature_extraction_models.py | 1 + tests/test_initialization.py | 31 ++++++++++++++++++++++++- 5 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/peft/tuners/adalora/layer.py b/src/peft/tuners/adalora/layer.py index e26534911c..a3a1334d18 100644 --- a/src/peft/tuners/adalora/layer.py +++ b/src/peft/tuners/adalora/layer.py @@ -78,7 +78,7 @@ def update_layer(self, adapter_name, r, lora_alpha, lora_dropout, init_lora_weig def reset_lora_parameters(self, adapter_name): if adapter_name in self.lora_A.keys(): - nn.init.normal_(self.lora_E[adapter_name], mean=0.0, std=0.02) + nn.init.zeros_(self.lora_E[adapter_name]) nn.init.normal_(self.lora_A[adapter_name], mean=0.0, std=0.02) nn.init.normal_(self.lora_B[adapter_name], mean=0.0, std=0.02) diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py index 800c00e50b..d0fa5a6ed8 100644 --- a/tests/test_decoder_models.py +++ b/tests/test_decoder_models.py @@ -169,6 +169,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c { "model_ids": PEFT_DECODER_MODELS_TO_TEST, "lora_kwargs": {"init_lora_weights": [False]}, + "adalora_kwargs": {"init_lora_weights": [False]}, "ia3_kwargs": {"init_ia3_weights": [False]}, "boft_kwargs": {"init_weights": [False]}, "vera_kwargs": {"init_weights": [False]}, diff --git a/tests/test_encoder_decoder_models.py b/tests/test_encoder_decoder_models.py index 2cbecf1fd0..f4fe0b7934 100644 --- a/tests/test_encoder_decoder_models.py +++ b/tests/test_encoder_decoder_models.py @@ -91,6 +91,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c { "model_ids": PEFT_ENCODER_DECODER_MODELS_TO_TEST, "lora_kwargs": {"init_lora_weights": [False]}, + "adalora_kwargs": {"init_lora_weights": [False]}, "ia3_kwargs": {"init_ia3_weights": [False]}, "vera_kwargs": {"init_weights": [False]}, "task_type": "SEQ_2_SEQ_LM", diff --git a/tests/test_feature_extraction_models.py b/tests/test_feature_extraction_models.py index 58badcad17..850f4eb2b1 100644 --- a/tests/test_feature_extraction_models.py +++ b/tests/test_feature_extraction_models.py @@ -107,6 +107,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c { "model_ids": PEFT_FEATURE_EXTRACTION_MODELS_TO_TEST, "lora_kwargs": {"init_lora_weights": [False]}, + "adalora_kwargs": {"init_lora_weights": [False]}, "ia3_kwargs": {"init_ia3_weights": [False]}, "boft_kwargs": {"init_weights": [False]}, "vera_kwargs": {"init_weights": [False]}, diff --git a/tests/test_initialization.py b/tests/test_initialization.py index 5958bb3f62..76f80725d2 100644 --- a/tests/test_initialization.py +++ b/tests/test_initialization.py @@ -25,7 +25,7 @@ class TestLoraInitialization: - """Test class to check the initialization of adapters.""" + """Test class to check the initialization of LoRA adapters.""" torch_device = infer_device() @@ -520,6 +520,8 @@ def test_lora_use_dora_with_megatron_core_raises(self): class TestAdaLoraInitialization: + torch_device = infer_device() + def test_adalora_target_modules_set(self): config = AdaLoraConfig(target_modules=["linear", "embed", "conv2d"]) assert config.target_modules == {"linear", "embed", "conv2d"} @@ -532,6 +534,33 @@ def test_adalora_loftq_config_raises(self): with pytest.raises(ValueError, match="ADALORA does not support LOFTQ"): AdaLoraConfig(loftq_config={"loftq": "config"}) + def get_model(self): + class MyModule(nn.Module): + def __init__(self): + super().__init__() + # choose a large weight so that averages are close to expected values + self.linear = nn.Linear(1000, 1000) + + def forward(self, x): + return self.linear(x) + + return MyModule().eval().to(self.torch_device) + + @pytest.fixture + def data(self): + return torch.rand(10, 1000).to(self.torch_device) + + def test_adalora_default_init_identity(self, data): + # default is True + torch.manual_seed(0) + + model = self.get_model() + output_before = model(data) + config = AdaLoraConfig(target_modules=["linear"]) + model = get_peft_model(model, config) + output_after = model(data) + assert torch.allclose(output_before, output_after) + class TestPromptTuningInitialization: torch_device = infer_device()