From c9b19bb8f33c84ae26609df1b5fb3720ca4089e6 Mon Sep 17 00:00:00 2001
From: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
Date: Tue, 25 Jun 2024 13:33:28 +0200
Subject: [PATCH] FIX Init AdaLoRA to be identity transform (#1884)

Resolves #1836

There was an accidental change in a previous PR that initialized lora_E
as normal, when it should be zeros.
---
 src/peft/tuners/adalora/layer.py        |  2 +-
 tests/test_decoder_models.py            |  1 +
 tests/test_encoder_decoder_models.py    |  1 +
 tests/test_feature_extraction_models.py |  1 +
 tests/test_initialization.py            | 31 ++++++++++++++++++++++++-
 5 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/adalora/layer.py b/src/peft/tuners/adalora/layer.py
index e26534911c..a3a1334d18 100644
--- a/src/peft/tuners/adalora/layer.py
+++ b/src/peft/tuners/adalora/layer.py
@@ -78,7 +78,7 @@ def update_layer(self, adapter_name, r, lora_alpha, lora_dropout, init_lora_weig
 
     def reset_lora_parameters(self, adapter_name):
         if adapter_name in self.lora_A.keys():
-            nn.init.normal_(self.lora_E[adapter_name], mean=0.0, std=0.02)
+            nn.init.zeros_(self.lora_E[adapter_name])
             nn.init.normal_(self.lora_A[adapter_name], mean=0.0, std=0.02)
             nn.init.normal_(self.lora_B[adapter_name], mean=0.0, std=0.02)
 
diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py
index 800c00e50b..d0fa5a6ed8 100644
--- a/tests/test_decoder_models.py
+++ b/tests/test_decoder_models.py
@@ -169,6 +169,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c
             {
                 "model_ids": PEFT_DECODER_MODELS_TO_TEST,
                 "lora_kwargs": {"init_lora_weights": [False]},
+                "adalora_kwargs": {"init_lora_weights": [False]},
                 "ia3_kwargs": {"init_ia3_weights": [False]},
                 "boft_kwargs": {"init_weights": [False]},
                 "vera_kwargs": {"init_weights": [False]},
diff --git a/tests/test_encoder_decoder_models.py b/tests/test_encoder_decoder_models.py
index 2cbecf1fd0..f4fe0b7934 100644
--- a/tests/test_encoder_decoder_models.py
+++ b/tests/test_encoder_decoder_models.py
@@ -91,6 +91,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c
             {
                 "model_ids": PEFT_ENCODER_DECODER_MODELS_TO_TEST,
                 "lora_kwargs": {"init_lora_weights": [False]},
+                "adalora_kwargs": {"init_lora_weights": [False]},
                 "ia3_kwargs": {"init_ia3_weights": [False]},
                 "vera_kwargs": {"init_weights": [False]},
                 "task_type": "SEQ_2_SEQ_LM",
diff --git a/tests/test_feature_extraction_models.py b/tests/test_feature_extraction_models.py
index 58badcad17..850f4eb2b1 100644
--- a/tests/test_feature_extraction_models.py
+++ b/tests/test_feature_extraction_models.py
@@ -107,6 +107,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c
             {
                 "model_ids": PEFT_FEATURE_EXTRACTION_MODELS_TO_TEST,
                 "lora_kwargs": {"init_lora_weights": [False]},
+                "adalora_kwargs": {"init_lora_weights": [False]},
                 "ia3_kwargs": {"init_ia3_weights": [False]},
                 "boft_kwargs": {"init_weights": [False]},
                 "vera_kwargs": {"init_weights": [False]},
diff --git a/tests/test_initialization.py b/tests/test_initialization.py
index 5958bb3f62..76f80725d2 100644
--- a/tests/test_initialization.py
+++ b/tests/test_initialization.py
@@ -25,7 +25,7 @@
 
 
 class TestLoraInitialization:
-    """Test class to check the initialization of adapters."""
+    """Test class to check the initialization of LoRA adapters."""
 
     torch_device = infer_device()
 
@@ -520,6 +520,8 @@ def test_lora_use_dora_with_megatron_core_raises(self):
 
 
 class TestAdaLoraInitialization:
+    torch_device = infer_device()
+
     def test_adalora_target_modules_set(self):
         config = AdaLoraConfig(target_modules=["linear", "embed", "conv2d"])
         assert config.target_modules == {"linear", "embed", "conv2d"}
@@ -532,6 +534,33 @@ def test_adalora_loftq_config_raises(self):
         with pytest.raises(ValueError, match="ADALORA does not support LOFTQ"):
             AdaLoraConfig(loftq_config={"loftq": "config"})
 
+    def get_model(self):
+        class MyModule(nn.Module):
+            def __init__(self):
+                super().__init__()
+                # choose a large weight so that averages are close to expected values
+                self.linear = nn.Linear(1000, 1000)
+
+            def forward(self, x):
+                return self.linear(x)
+
+        return MyModule().eval().to(self.torch_device)
+
+    @pytest.fixture
+    def data(self):
+        return torch.rand(10, 1000).to(self.torch_device)
+
+    def test_adalora_default_init_identity(self, data):
+        # default is True
+        torch.manual_seed(0)
+
+        model = self.get_model()
+        output_before = model(data)
+        config = AdaLoraConfig(target_modules=["linear"])
+        model = get_peft_model(model, config)
+        output_after = model(data)
+        assert torch.allclose(output_before, output_after)
+
 
 class TestPromptTuningInitialization:
     torch_device = infer_device()