FIX Init AdaLoRA to be identity transform (#1884)

Resolves #1836 There was an accidental change in a previous PR that initialized lora_E as normal, when it should be zeros.
huggingface · Jun 25, 2024 · c9b19bb · c9b19bb
1 parent ef23712
commit c9b19bb
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 2 deletions.
diff --git a/src/peft/tuners/adalora/layer.py b/src/peft/tuners/adalora/layer.py
@@ -78,7 +78,7 @@ def update_layer(self, adapter_name, r, lora_alpha, lora_dropout, init_lora_weig
 
  def reset_lora_parameters(self, adapter_name):
  if adapter_name in self.lora_A.keys():
- nn.init.normal_(self.lora_E[adapter_name], mean=0.0, std=0.02)
+ nn.init.zeros_(self.lora_E[adapter_name])
  nn.init.normal_(self.lora_A[adapter_name], mean=0.0, std=0.02)
  nn.init.normal_(self.lora_B[adapter_name], mean=0.0, std=0.02)
 

diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py
@@ -169,6 +169,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c
  {
  "model_ids": PEFT_DECODER_MODELS_TO_TEST,
  "lora_kwargs": {"init_lora_weights": [False]},
+ "adalora_kwargs": {"init_lora_weights": [False]},
  "ia3_kwargs": {"init_ia3_weights": [False]},
  "boft_kwargs": {"init_weights": [False]},
  "vera_kwargs": {"init_weights": [False]},

diff --git a/tests/test_encoder_decoder_models.py b/tests/test_encoder_decoder_models.py
@@ -91,6 +91,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c
  {
  "model_ids": PEFT_ENCODER_DECODER_MODELS_TO_TEST,
  "lora_kwargs": {"init_lora_weights": [False]},
+ "adalora_kwargs": {"init_lora_weights": [False]},
  "ia3_kwargs": {"init_ia3_weights": [False]},
  "vera_kwargs": {"init_weights": [False]},
  "task_type": "SEQ_2_SEQ_LM",

diff --git a/tests/test_feature_extraction_models.py b/tests/test_feature_extraction_models.py
@@ -107,6 +107,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c
  {
  "model_ids": PEFT_FEATURE_EXTRACTION_MODELS_TO_TEST,
  "lora_kwargs": {"init_lora_weights": [False]},
+ "adalora_kwargs": {"init_lora_weights": [False]},
  "ia3_kwargs": {"init_ia3_weights": [False]},
  "boft_kwargs": {"init_weights": [False]},
  "vera_kwargs": {"init_weights": [False]},

diff --git a/tests/test_initialization.py b/tests/test_initialization.py
@@ -25,7 +25,7 @@
 
 
 class TestLoraInitialization:
- """Test class to check the initialization of adapters."""
+ """Test class to check the initialization of LoRA adapters."""
 
  torch_device = infer_device()
 
@@ -520,6 +520,8 @@ def test_lora_use_dora_with_megatron_core_raises(self):
 
 
 class TestAdaLoraInitialization:
+ torch_device = infer_device()
+
  def test_adalora_target_modules_set(self):
  config = AdaLoraConfig(target_modules=["linear", "embed", "conv2d"])
  assert config.target_modules == {"linear", "embed", "conv2d"}
@@ -532,6 +534,33 @@ def test_adalora_loftq_config_raises(self):
  with pytest.raises(ValueError, match="ADALORA does not support LOFTQ"):
  AdaLoraConfig(loftq_config={"loftq": "config"})
 
+ def get_model(self):
+ class MyModule(nn.Module):
+ def __init__(self):
+ super().__init__()
+ # choose a large weight so that averages are close to expected values
+ self.linear = nn.Linear(1000, 1000)
+
+ def forward(self, x):
+ return self.linear(x)
+
+ return MyModule().eval().to(self.torch_device)
+
+ @pytest.fixture
+ def data(self):
+ return torch.rand(10, 1000).to(self.torch_device)
+
+ def test_adalora_default_init_identity(self, data):
+ # default is True
+ torch.manual_seed(0)
+
+ model = self.get_model()
+ output_before = model(data)
+ config = AdaLoraConfig(target_modules=["linear"])
+ model = get_peft_model(model, config)
+ output_after = model(data)
+ assert torch.allclose(output_before, output_after)
+
 
 class TestPromptTuningInitialization:
  torch_device = infer_device()