From 9aeb69c02bac1842d4094c68bebca86d1846ea82 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sun, 18 Feb 2024 17:31:37 -0500
Subject: [PATCH 001/182] Initial commit of integration

---
 src/peft/mapping.py          |   4 +
 src/peft/peft_model.py       |  36 ++-
 src/peft/xlora/__init__.py   |   0
 src/peft/xlora/classifier.py | 320 +++++++++++++++++++++++++++
 src/peft/xlora/config.py     |  82 +++++++
 src/peft/xlora/insertion.py  | 418 +++++++++++++++++++++++++++++++++++
 src/peft/xlora/model.py      | 278 +++++++++++++++++++++++
 7 files changed, 1137 insertions(+), 1 deletion(-)
 create mode 100644 src/peft/xlora/__init__.py
 create mode 100644 src/peft/xlora/classifier.py
 create mode 100644 src/peft/xlora/config.py
 create mode 100644 src/peft/xlora/insertion.py
 create mode 100644 src/peft/xlora/model.py

diff --git a/src/peft/mapping.py b/src/peft/mapping.py
index b62ddf94aa..5e2d585c7c 100644
--- a/src/peft/mapping.py
+++ b/src/peft/mapping.py
@@ -49,8 +49,10 @@
     PrefixTuningConfig,
     PromptEncoderConfig,
     PromptTuningConfig,
+    xLoRAConfig,
 )
 from .utils import _prepare_prompt_learning_config
+from .xlora import xLoRAModel
 
 
 if TYPE_CHECKING:
@@ -64,6 +66,7 @@
     "TOKEN_CLS": PeftModelForTokenClassification,
     "QUESTION_ANS": PeftModelForQuestionAnswering,
     "FEATURE_EXTRACTION": PeftModelForFeatureExtraction,
+    "XLORA": xLoRAModel,
 }
 
 PEFT_TYPE_TO_CONFIG_MAPPING: dict[str, PeftConfig] = {
@@ -79,6 +82,7 @@
     "MULTITASK_PROMPT_TUNING": MultitaskPromptTuningConfig,
     "OFT": OFTConfig,
     "POLY": PolyConfig,
+    "XLORA": xLoRAConfig,
 }
 
 PEFT_TYPE_TO_TUNER_MAPPING = {
diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 512d51b0ce..2be9a781de 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -36,6 +36,8 @@
 from transformers.modeling_outputs import QuestionAnsweringModelOutput, SequenceClassifierOutput, TokenClassifierOutput
 from transformers.utils import PushToHubMixin
 
+from peft.xlora.classifier import xLoRAClassifier
+
 from . import __version__
 from .config import PeftConfig
 from .tuners import (
@@ -70,6 +72,10 @@
     set_peft_model_state_dict,
     shift_tokens_right,
 )
+from .xlora import _get_file_path_dir as xlora_get_file_path_dir
+from .xlora import _load_classifier_weights as xlora_load_classifier_weights
+from .xlora.config import xLoRAConfig
+from .xlora.model import xLoRAModel
 
 
 PEFT_TYPE_TO_MODEL_MAPPING = {
@@ -385,7 +391,35 @@ def from_pretrained(
             model = cls(model, config, adapter_name)
         else:
             model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
-        model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
+
+        if isinstance(model, xLoRAModel):
+            assert isinstance(model, PreTrainedModel)
+            assert isinstance(config, xLoRAConfig)
+
+            device = infer_device()  # As inn PeftModel.load_adapter, torch_device = infer_device(
+            config.device = torch.device(device)
+
+            # If we are passed adapters in the kwargs, it is already in the config.
+            # If no adapters are passed, config.adapters is None
+            if config.adapters is None or config.use_trainable_adapters:
+                adapters_real: dict[str, str] = {
+                    name: xlora_get_file_path_dir(
+                        model_id,
+                        name,
+                        "adapters",
+                    )
+                    for name in config.adapters
+                }
+            else:
+                assert isinstance(config.adapters, dict)
+                adapters_real = config.adapters
+            config.adapters = adapters_real
+
+            classifier: xLoRAClassifier = model.internal_xlora_classifier  # type: ignore
+            classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
+        else:
+            model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
+
         return model
 
     def _setup_prompt_encoder(self, adapter_name: str):
diff --git a/src/peft/xlora/__init__.py b/src/peft/xlora/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/peft/xlora/classifier.py b/src/peft/xlora/classifier.py
new file mode 100644
index 0000000000..2fcd6f64c5
--- /dev/null
+++ b/src/peft/xlora/classifier.py
@@ -0,0 +1,320 @@
+import builtins
+import json
+import typing
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple, Union
+
+import numpy
+import torch
+import torch.nn as nn
+from transformers.modeling_outputs import (  # type: ignore
+    ModelOutput,
+)
+
+from peft.peft_model import PeftModel
+
+from .config import xLoRAConfig
+
+
+Number = Union[builtins.int, builtins.float, builtins.bool]
+
+
+class TemperatureScaledSoftmax(nn.Module):
+    def __init__(self, temperature=1.0):
+        super().__init__()
+        self.temperature = temperature
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, logits):
+        # Scale logits by the temperature
+        scaled_logits = logits / self.temperature
+        # Apply softmax to the scaled logits
+        return self.softmax(scaled_logits)
+
+
+@dataclass
+class InhibitorFlagPayload:
+    batch_size: int
+    seq_len: int
+    override_scaling_pass_value: Number
+
+
+class xLoRAClassifier(nn.Module):
+    """
+    A classifier to select LoRA layers for xLoRA.
+    """
+
+    def __init__(
+        self,
+        model: PeftModel,
+        config: xLoRAConfig,
+        n_classes: int,
+        n_layers: int,
+    ):
+        super().__init__()
+
+        # To avoid registering this with nn.Module
+        self.__dict__["model"] = model
+        self.n_classes = n_classes
+        self.n_layers = n_layers
+        self.config = config
+        self.log_scalings: List[torch.Tensor] = []
+        self.softmax = TemperatureScaledSoftmax(temperature=self.config.softmax_temperature)
+        self.override_scaling_pass_value: Number = config.scaling_pass_value
+
+        self.n_predictions_lifetime = 0
+        self.scalings_logging = False
+
+        dtype = next(model.parameters()).dtype
+        bias_flag = config.use_bias
+
+        self.inner: nn.ModuleList = nn.ModuleList([])
+        if self.config.xlora_depth == 1:
+            if config.layerwise_scalings:  # bias=False if we have just one layer
+                self.last = (
+                    nn.Linear(config.hidden_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
+                )
+            else:
+                self.last = nn.Linear(config.hidden_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+        elif self.config.xlora_depth == 2:
+            self.inner.append(
+                nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
+            )
+
+            if config.enable_relu_and_dropout:
+                self.inner.append(nn.ReLU())
+                self.inner.append(nn.Dropout(p=config.xlora_dropout_p))
+
+            if config.layerwise_scalings:
+                self.last = (
+                    nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
+                )
+            else:
+                self.last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+        else:
+            assert self.config.xlora_depth > 0
+            self.inner.append(
+                nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
+            )
+
+            if config.enable_relu_and_dropout:
+                self.inner.append(nn.ReLU())
+                self.inner.append(nn.Dropout(p=config.xlora_dropout_p))
+
+            for _ in range(config.xlora_depth - 2):
+                self.inner.append(
+                    nn.Linear(config.xlora_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
+                )
+
+                if config.enable_relu_and_dropout:
+                    self.inner.append(nn.ReLU())
+                    self.inner.append(nn.Dropout(p=config.xlora_dropout_p))
+
+            if config.layerwise_scalings:
+                self.last = (
+                    nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
+                )
+            else:
+                self.last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        *args,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Using the hidden states of the model, predict `n_classes` LoRA alpha values. Sets the scalings.
+        """
+        if input_ids is not None:
+            batch_size = input_ids.shape[0]
+        else:
+            batch_size = typing.cast(torch.FloatTensor, inputs_embeds).shape[0]
+
+        if input_ids is not None:
+            seq_len = input_ids.shape[1]
+        else:
+            seq_len = typing.cast(torch.FloatTensor, inputs_embeds).shape[1]
+
+        # For type checking
+        model: PeftModel = self.model  # type: ignore
+        with torch.no_grad():
+            with model.disable_adapter():
+                # TODO(EricLBuehler): Pending removal following analysis
+                """
+                for module in model.base_model.modules():
+                    if isinstance(module.forward.__self__, xLoRALayer):
+                        inst = module.forward.__self__
+                        inst.disabled = True  # Disable it
+                """
+
+                kwargs["output_hidden_states"] = True
+                kwargs["return_dict"] = True
+
+                result: ModelOutput = model.forward(
+                    *args,
+                    input_ids=input_ids,
+                    inputs_embeds=inputs_embeds,
+                    _xlora_classifier_inhibitor_flag=InhibitorFlagPayload(
+                        batch_size=batch_size,
+                        seq_len=seq_len,
+                        override_scaling_pass_value=self.override_scaling_pass_value,
+                    ),
+                    **kwargs,
+                )
+
+                # TODO(EricLBuehler): Pending removal following analysis
+                """
+                # Enable the xLoRALayers
+                for module in model.base_model.modules():
+                    if isinstance(module.forward.__self__, xLoRALayer):
+                        inst = module.forward.__self__
+                        inst.disabled = False  # Disable it
+                """
+
+        hidden_states = result.hidden_states  # type: ignore
+
+        assert hidden_states is not None
+        hidden_state = hidden_states[-1]  # Get the last hidden state
+
+        ### Calculate the sequence lengths
+
+        # TODO(all): Pending removal following analysis
+        """
+        # hidden_state=[batch_size, seq_len, hidden_size]
+        if self.config.stop_token_id is None:  # Calculate via attention mask
+            if input_ids is not None:
+                assert attention_mask is not None, (
+                    "Stop token id was not provided, so sequence length calculation via attention mask was attempted"
+                    + "but the attention mask was not given"
+                )
+                sequence_lengths: Union[int, torch.Tensor] = torch.eq(attention_mask, 0).int().argmax(-1) - 1
+                sequence_lengths = sequence_lengths % input_ids.shape[-1]
+                sequence_lengths = sequence_lengths.to(hidden_state.device)  # type: ignore
+            else:
+                sequence_lengths = -1
+        else:  # Calculate via stop token id
+            if input_ids is not None:
+                # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
+                sequence_lengths = torch.eq(input_ids, self.config.stop_token_id).int().argmax(-1) - 1
+                sequence_lengths = sequence_lengths % input_ids.shape[-1]
+                sequence_lengths = sequence_lengths.to(hidden_state.device)  # type: ignore
+            else:
+                sequence_lengths = -1
+
+        # AFTER THIS: hidden_state=[batch_size, hidden_size]
+        if self.config.use_mean_pool:
+            assert isinstance(sequence_lengths, torch.Tensor)
+            max_length = hidden_state.shape[1]
+            mask = torch.arange(max_length).expand(len(sequence_lengths), max_length).to(
+                hidden_state.device
+            ) < sequence_lengths.unsqueeze(1)
+
+            # Mask the hidden_states
+            masked_hidden_state = hidden_state * mask.unsqueeze(-1)
+
+            # Sum across the sequence length and divide by actual sequence length
+            summed = torch.sum(masked_hidden_state, dim=1)
+            hidden_state = summed / sequence_lengths.unsqueeze(1)
+        else:
+            # Get it for the last token
+            hidden_state = hidden_state[torch.arange(batch_size, device=hidden_state.device), sequence_lengths]
+        """
+
+        ### Classifier run
+        # hidden_state=[batch_size, seq_len, hidden_size]
+        for layer in self.inner:
+            hidden_state = layer.forward(hidden_state)
+
+        logits = self.last.forward(hidden_state)
+
+        ### Repeat to make layerwise scalings if the classifier layer does not
+        if not self.config.layerwise_scalings:
+            logits = logits.unsqueeze(2)
+            logits = logits.expand(-1, -1, self.n_layers, -1)
+
+        ### Classifier run
+
+        scalings = logits.reshape(batch_size, seq_len, self.n_layers, self.n_classes)
+        # scalings = [batch_size, seq_len, n_layers, n_classes]
+
+        if self.config.enable_softmax:
+            scalings = self.softmax(scalings)
+
+        if self.n_predictions_lifetime > 0:
+            print(f"Scaling predictions: {scalings}")
+            self.n_predictions_lifetime -= 1
+
+        if self.scalings_logging:
+            self.log_scalings.append(scalings)
+
+        return scalings
+
+    def get_nb_trainable_parameters(self):
+        # https://github.com/huggingface/peft/blob/main/src/peft/mixed_model.py#L156
+        r"""
+        Returns the number of trainable parameters and number of all parameters in the model.
+        """
+        trainable_params = 0
+        all_param = 0
+        for _, param in self.named_parameters():
+            num_params = param.numel()
+            # if using DS Zero 3 and the weights are initialized empty
+            if num_params == 0 and hasattr(param, "ds_numel"):
+                num_params = param.ds_numel  # type: ignore
+
+            # Due to the design of 4bit linear layers from bitsandbytes
+            # one needs to multiply the number of parameters by 2 to get
+            # the correct number of parameters
+            if param.__class__.__name__ == "Params4bit":
+                num_params = num_params * 2
+
+            all_param += num_params
+            if param.requires_grad:
+                trainable_params += num_params
+
+        return trainable_params, all_param
+
+    @staticmethod
+    def _save_scalings(file: str, scalings: List[torch.Tensor]):
+        result = torch.cat(scalings, dim=0)
+        npy = result.numpy()
+        numpy.save(file, npy)
+
+    def flush_log_scalings(self, path: str):
+        if not self.scalings_logging:
+            raise Exception("Scalings logging is disabled!")
+
+        if len(self.log_scalings) == 0:
+            raise ValueError("No log scalings to flush.")
+
+        seqlens_map: Dict[int, Tuple[List[int], List[torch.Tensor]]] = {}
+        for i, scaling in enumerate(self.log_scalings):
+            seq_len = scaling.shape[0]
+            if seq_len not in seqlens_map:
+                seqlens_map[seq_len] = ([i], [scaling])
+            else:
+                seqlens_map[seq_len][0].append(i)
+                seqlens_map[seq_len][1].append(scaling)
+
+        if len(seqlens_map) == 1:
+            self._save_scalings(path, [scaling.unsqueeze(0) for scaling in self.log_scalings])
+        else:
+            indices_map: Dict[str, List[int]] = {}
+            for seq_len, (indices, scalings_list) in seqlens_map.items():
+                indices_map[f"{path}-{seq_len}.npy"] = indices
+
+                self._save_scalings(f"{path}-{seq_len}", [scaling.unsqueeze(0) for scaling in scalings_list])
+
+            with open(f"{path}-mapping.json", "w") as f:
+                f.write(json.dumps(indices_map))
+
+        self.log_scalings = []
+
+    def set_override_scaling_pass_value(self, value: Union[Number, None]):
+        if value is None:
+            self.override_scaling_pass_value = 1 / self.n_classes
+        else:
+            self.override_scaling_pass_value = value
+        self.config.scaling_pass_value = self.override_scaling_pass_value
diff --git a/src/peft/xlora/config.py b/src/peft/xlora/config.py
new file mode 100644
index 0000000000..7752dc2dd4
--- /dev/null
+++ b/src/peft/xlora/config.py
@@ -0,0 +1,82 @@
+import warnings
+from dataclasses import dataclass
+from typing import Dict, Optional
+
+import torch
+
+
+@dataclass
+class xLoRAConfig:
+    r"""
+    This is the configuration class to store the configuration of a [`xLoRAClassifier`].
+    When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the saved adapters. As such, only the keys
+    matter during loading.
+
+    Args:
+        hidden_size (`int`):
+            Hidden size of the base model.
+        device (`torch.device`):
+            Device for the X-LoRA classifier.
+        adapters (`dict`):
+            Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter. *They will be automatically loaded*, to use as LoRA experts.
+        enable_softmax (`bool`, *optional*, defaults to `True`):
+            Enable softmax application for the X-LoRA classifier.
+        enable_softmax_topk (`bool`, *optional*, defaults to `False`):
+            Enable softmax application for the top-k LoRA adapters. Mutually exclusive to `enable_softmax` and must only be set if `top_k_lora` is.
+        softmax_temperature (`float`, *optional*, defaults to 1.0):
+            Softmax temperature, lower yields sharper predictions
+        layerwise_scalings (`bool`, *optional*, defaults to `False`):
+            Generate scalings for each layer.
+        top_k_lora (`int`, *optional*, defaults to None):
+            Sparsely select the top_k LoRA experts instead of the default dense method.
+        xlora_depth (`int`, *optional*, defaults to 1):
+            Depth of the X-LoRA classifier.
+        xlora_size (`int`, *optional*, defaults to 2048):
+            Hidden size of the X-LoRA classifier, irrelevant if `xlora_depth=1`.
+        enable_relu_and_dropout (`bool`, *optional*, defaults to `False`):
+            Enable ReLU activation and Dropout application of the X-LoRA classifier.
+        use_bias (`bool`, *optional*, defaults to `True`):
+            Enable bias in X-LoRA classifier.
+        xlora_dropout_p (`float`, *optional*, defaults to 0.2):
+            Dropout probability of the X-LoRA classifier, irrelevant if `xlora_depth=1` or `enable_relu_and_dropout=True`.
+        stop_token_id (`int`, *optional*):
+            The id of the stop token for the input. If this is None, the sequence length is calculated using the attention mask.
+        use_trainable_adapters (`bool`, *optional*, defaults to False):
+            Make the adapters trainable.
+        scaling_pass_value (`float`, *optional*, defaults to 0):
+            Scaling pass value.
+        global_scaling_weight (`float`, *optional*, defaults to 1):
+            Weight to multiply output of each LoRA adapter by.
+    """
+
+    model_type = "xlora"
+
+    hidden_size: int
+    device: torch.device
+    adapters: Dict[str, str]
+    enable_softmax: bool = True
+    enable_softmax_topk: bool = False
+    layerwise_scalings: bool = False
+    xlora_depth: int = 1
+    xlora_size: int = 2048
+    enable_relu_and_dropout: bool = False
+    use_bias: bool = True
+    xlora_dropout_p: float = 0.2
+    stop_token_id: Optional[int] = None
+    use_trainable_adapters: bool = False
+    softmax_temperature: float = 1.0
+    top_k_lora: Optional[int] = None
+    scaling_pass_value: float = 0.0
+    global_scaling_weight: float = 1.0
+
+    def __post_init__(self):
+        if self.enable_softmax_topk and self.top_k_lora is None:
+            warnings.warn("`enable_softmax_topk` enabled `top_k_lora` is not set")
+
+        if self.enable_softmax_topk and self.enable_softmax:
+            warnings.warn(
+                "`enable_softmax_topk` and `enable_softmax` are both enabled. This will result in worse performance."
+            )
+
+        if self.top_k_lora is not None and self.top_k_lora < 1:
+            warnings.warn("`top_k_lora` value must be at least 1.")
diff --git a/src/peft/xlora/insertion.py b/src/peft/xlora/insertion.py
new file mode 100644
index 0000000000..ba183c9370
--- /dev/null
+++ b/src/peft/xlora/insertion.py
@@ -0,0 +1,418 @@
+import json
+import os
+from typing import Any, Callable, List, Optional, Tuple, Union
+
+import torch
+from safetensors.torch import save_model  # type: ignore
+from torch import Tensor
+
+from peft.peft_model import PeftModel
+from peft.tuners import lora
+from peft.tuners.tuners_utils import BaseTuner  # type: ignore
+
+from .classifier import Number, xLoRAClassifier
+from .config import xLoRAConfig
+
+
+class xLoRALayer:
+    """
+    A xLoRALayer wraps any LoraLayer and performs the xLoRA operation on the LoRA adaptors specified.
+    Its primary API is the forward method, which uses the scalings to execute the
+    xLoRA algorithm.
+    """
+
+    __slots__ = {"model", "target_forward", "target", "layer_number", "disabled", "config"}
+
+    def __init__(
+        self,
+        model: PeftModel,
+        target: lora.LoraLayer,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: xLoRAConfig,
+    ) -> None:
+        self.model = model
+        self.target_forward = target_forward
+        self.target = target
+        self.layer_number = layer_number
+        self.disabled = False  # TODO(EricLBuehler): Pending removal following analysis
+        self.config = config
+
+    @staticmethod
+    def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter: int) -> torch.Tensor:
+        # scalings_layer = [batch_size, seq_len, n_classes]
+        scalings = scalings_layer[:, :, adapter].unsqueeze(-1)
+        # scalings_layer = [batch_size, seq_len, 1]
+        return x * scalings
+
+    def get_maybe_topk_scalings(self) -> torch.Tensor:
+        # xlora_scalings = [batch_size, seq_len, n_classes]
+        xlora_scalings: Tensor = self.model.internal_xlora_scalings[:, :, self.layer_number, :]  # type: ignore
+
+        if self.config.top_k_lora is not None:
+            _, topk_indices = torch.topk(xlora_scalings, k=self.config.top_k_lora, dim=1)
+
+            # Mask the topk to True, the rest to False
+            mask = torch.zeros_like(xlora_scalings, dtype=torch.bool)
+            mask.scatter_(1, topk_indices, True)
+
+            xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
+
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        if classifier.config.enable_softmax_topk:
+            nonzero_mask = xlora_scalings != 0
+            softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
+            xlora_scalings[nonzero_mask] = softmax_res_nonzero
+
+        return xlora_scalings
+
+
+class xLoRALinearLayer(xLoRALayer):
+    def __init__(
+        self,
+        model: PeftModel,
+        target: lora.Linear,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: xLoRAConfig,
+    ) -> None:
+        super().__init__(model, target, target_forward, layer_number, config)
+
+    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+        """
+        This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
+        To use it, a bound method must be created (bound to an instance of the xLoRALayer class).
+        """
+
+        previous_dtype = x.dtype
+        xlora_scalings = self.get_maybe_topk_scalings()
+
+        if self.target.disable_adapters:
+            if self.target.merged:
+                self.target.unmerge()
+            result = self.target.base_layer(x, *args, **kwargs)
+        elif self.target.merged:
+            result = self.target.base_layer(x, *args, **kwargs)
+        else:
+            result = self.target.base_layer(x, *args, **kwargs)
+
+            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                if active_adapter not in self.target.lora_A.keys():
+                    continue
+                lora_A = self.target.lora_A[active_adapter]
+                lora_B = self.target.lora_B[active_adapter]
+                dropout = self.target.lora_dropout[active_adapter]
+                scaling = self.target.scaling[active_adapter]
+                x = x.to(lora_A.weight.dtype)  # type: ignore
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
+
+        result = result.to(previous_dtype)
+        return result
+
+
+class xLoRAEmbeddingLayer(xLoRALayer):
+    def __init__(
+        self,
+        model: PeftModel,
+        target: lora.Embedding,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: xLoRAConfig,
+    ) -> None:
+        super().__init__(model, target, target_forward, layer_number, config)
+
+    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+        """
+        This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
+        To use it, a bound method must be created (bound to an instance of the xLoRALayer class).
+        """
+
+        xlora_scalings = self.get_maybe_topk_scalings()
+
+        # TODO: no dtype conversion here, unlike in Linear, is that correct?
+        if self.target.disable_adapters:
+            if self.target.merged:
+                self.target.unmerge()
+            result = self.target.base_layer(x, *args, **kwargs)
+        elif self.target.merged:
+            result = self.target.base_layer(x, *args, **kwargs)
+        else:
+            result = self.target.base_layer(x, *args, **kwargs)
+            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                if active_adapter not in self.target.lora_embedding_A:
+                    continue
+                embedding_A = self.target.lora_embedding_A[active_adapter].T
+                embedding_B = self.target.lora_embedding_B[active_adapter].T
+                scaling = self.target.scaling[active_adapter]
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
+                result += (after_A @ embedding_B) * scaling * self.config.global_scaling_weight
+
+        return result
+
+
+class xLoRAConv2dLayer(xLoRALayer):
+    def __init__(
+        self,
+        model: PeftModel,
+        target: lora.Conv2d,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: xLoRAConfig,
+    ) -> None:
+        super().__init__(model, target, target_forward, layer_number, config)
+
+    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+        """
+        This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
+        To use it, a bound method must be created (bound to an instance of the xLoRALayer class).
+        """
+
+        previous_dtype = x.dtype
+        xlora_scalings = self.get_maybe_topk_scalings()
+
+        if self.target.disable_adapters:
+            if self.target.merged:
+                self.target.unmerge()
+            result = self.target.base_layer(x, *args, **kwargs)
+        elif self.target.merged:
+            result = self.target.base_layer(x, *args, **kwargs)
+        else:
+            result = self.target.base_layer(x, *args, **kwargs)
+            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                if active_adapter not in self.target.lora_A.keys():
+                    continue
+                lora_A = self.target.lora_A[active_adapter]
+                lora_B = self.target.lora_B[active_adapter]
+                dropout = self.target.lora_dropout[active_adapter]
+                scaling = self.target.scaling[active_adapter]
+                x = x.to(lora_A.weight.dtype)  # type: ignore
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
+
+        result = result.to(previous_dtype)
+        return result
+
+
+class BaseTunerWrapper:
+    def __init__(self, base_model: BaseTuner, classifier: xLoRAClassifier):
+        self.model = base_model.model
+        self.classifier = classifier
+
+    def forward(self, *args, **kwargs):
+        return self.model(*args, **kwargs)  # Important to *call* the model
+
+
+class PeftModelWrapper:
+    def __init__(
+        self,
+        base_model: PeftModel,
+        base_model_save: Callable[..., None],
+        config: xLoRAConfig,
+        base_model_get_nb_trainable_parameters: Callable[..., Tuple[int, int]],
+        base_model_generate: Callable[..., Any],
+    ):
+        self.model = base_model
+        self.base_model_save = base_model_save
+        self.config = config
+        self.base_model_get_nb_trainable_parameters = base_model_get_nb_trainable_parameters
+        self.base_model_generate = base_model_generate
+
+    def generate(self, *args, **kwargs):
+        res = self.base_model_generate(*args, **kwargs)  # type: ignore
+        # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
+        self.model.base_model.eval()
+        if not self.config.use_trainable_adapters:
+            for name, param in self.model.base_model.named_parameters():
+                if "lora_" in name:
+                    param.requires_grad = False
+        return res
+
+    def set_topk_lora(self, value: Optional[int]):
+        """
+        Sparsely select the specified top_k LoRA experts instead of the default dense method. Set to None to use dense. This is reflected in the config.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier.config.top_k_lora = value
+
+    def get_topk_lora(self) -> Optional[int]:
+        """
+        Get the current top_k LoRA experts value.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        return classifier.config.top_k_lora
+
+    def set_global_scaling_weight(self, weight: float):
+        """
+        Set the global LoRA weight, a scalar to multiply the output of each LoRA adapter by. This is by default 1. This is reflected in the config.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier.config.global_scaling_weight = weight
+
+    def get_global_scaling_weight(self) -> float:
+        """
+        Get the global LoRA weight.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        return classifier.config.global_scaling_weight
+
+    def get_latest_scalings(self) -> Optional[Tensor]:
+        """
+        Returns the latest scalings prediction, or None if no scalings have been predicted. The tensor is of shape (batch_size, seq_len, n_layers, n_classes).
+        """
+        return self.model.internal_xlora_scalings
+
+    def get_scalings_log(self) -> List[Tensor]:
+        """
+        Returns a shallow (only copying the list itself not the tensors) copy of the list containing the scalings log. Editing the list does not change the underlying log.
+        The tensors are of shape (batch_size, seq_len, n_layers, n_classes). The seq_len dim may vary with input dimension.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        return classifier.log_scalings.copy()
+
+    def set_scaling_pass_value(self, value: Union[Number, None]):
+        """
+        Manually set the scalings to a specific value during the scaling pass, forever. Call this function with None to enable the default
+        scalings.
+
+        This is reflected in the config.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier.set_override_scaling_pass_value(value)
+
+    def print_scalings_predictions(self, n_predictions_lifetime: int):
+        """
+        Print the scaling states for the next n classifier predictions (i.e. forward, generate passes)
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier.n_predictions_lifetime = n_predictions_lifetime
+
+    def enable_scalings_logging(self):
+        """
+        Enable scalings logging.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier.scalings_logging = True
+
+    def disable_scalings_logging(self):
+        """
+        Disable scalings logging, clearing the log.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier.scalings_logging = False
+        classifier.log_scalings = []
+
+    def flush_log_scalings(self, path: str):
+        """
+        Write the scalings log (a tensor of shape (num_logged, batch_size, seq_len, n_layers, n_classes)) to the specified path.
+        If the tensor cannot be constructed, multiple files are written containing tensors of shape
+        (num_logged, batch_size, seq_len, n_layers, n_classes) such that each file contains one sequence length. Additionally a JSON
+        file is outputted containing the mapping from each sequence log file to the index of the contained tensor so that one may reconstruct
+        the log order.
+
+        The file specified should not contain an extension.
+        """
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier.flush_log_scalings(path)
+
+    def get_nb_trainable_parameters(self) -> Tuple[int, int]:
+        """
+        Returns the number of trainable parameters and number of all parameters in the model.
+        """
+        model_trainable_params, model_all_param = self.base_model_get_nb_trainable_parameters()
+
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        xlora_trainable_params, xlora_all_param = classifier.get_nb_trainable_parameters()
+
+        trainable_params, all_param = (
+            (model_trainable_params + xlora_trainable_params),
+            (model_all_param + xlora_all_param),
+        )
+
+        return trainable_params, all_param
+
+    def print_trainable_parameters(self):
+        """
+        Prints the number of trainable parameters in the model, including of the xLoRA classifier.
+        """
+        trainable_params, all_param = self.get_nb_trainable_parameters()
+
+        print(
+            f"trainable params: {trainable_params:,d} || "
+            f"all params: {all_param:,d} || "
+            f"trainable%: {100 * trainable_params / all_param:.4f}"
+        )
+
+    def set_use_trainable_adapters(self, use_trainable_adapters: bool):
+        """
+        Set the adapters to trainable or not trainable.
+
+        This is reflected in the config.
+        """
+        for name, param in self.model.base_model.named_parameters():
+            if "lora_" in name:
+                param.requires_grad = use_trainable_adapters
+
+        self.config.use_trainable_adapters = use_trainable_adapters
+
+    def get_use_trainable_adapters(self) -> bool:
+        """
+        Get the trainable or not trainable state of the adapters.
+        """
+        return self.config.use_trainable_adapters
+
+    def save_pretrained(
+        self,
+        save_directory: str,
+        safe_serialization: bool = True,
+        selected_adapters: Optional[List[str]] = None,
+        save_embedding_layers: Union[str, bool] = "auto",
+        is_main_process: bool = True,
+        **kwargs: Any,
+    ) -> None:
+        r"""
+        This function saves the classifier weights to a directory. It is the counerpart to `from_pretrained`.
+
+        Args:
+            save_directory (`str`):
+                Directory where the adapter model and configuration files will be saved (will be created if it does not
+                exist).
+            safe_serialization (`bool`, *optional*):
+                Whether to save the adapter files in safetensors format, defaults to `True`.
+            is_main_process (`bool`, *optional*):
+                Whether the process calling this is the main process or not. Will default to `True`. Will not save the
+                checkpoint if not on the main process, which is important for multi device setups (e.g. DDP).
+        """
+        if os.path.isfile(save_directory):
+            raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
+
+        if is_main_process:
+            os.makedirs(save_directory, exist_ok=True)
+
+        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+
+        conf = classifier.config.__dict__.copy()
+        del conf["device"]
+
+        if is_main_process:
+            os.makedirs(os.path.join(save_directory, "adapters"), exist_ok=True)
+        self.base_model_save(
+            save_directory=os.path.join(save_directory, "adapters"),
+            safe_serialization=safe_serialization,
+            is_main_process=is_main_process,
+            selected_adapters=selected_adapters,
+            save_embedding_layers=save_embedding_layers,
+            **kwargs,
+        )
+
+        with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
+            json.dump(conf, f)
+
+        if safe_serialization:
+            # https://github.com/huggingface/peft/blob/main/src/peft/peft_model.py#L223
+            if is_main_process and safe_serialization:
+                save_model(classifier, os.path.join(save_directory, "xlora_classifier.safetensors"))
+        elif is_main_process:
+            state_dict = classifier.state_dict()
+            torch.save(state_dict, os.path.join(save_directory, "xlora_classifier.pt"))
diff --git a/src/peft/xlora/model.py b/src/peft/xlora/model.py
new file mode 100644
index 0000000000..0cf6f30384
--- /dev/null
+++ b/src/peft/xlora/model.py
@@ -0,0 +1,278 @@
+import os
+from typing import Optional, Union
+
+import torch
+import torch.nn as nn
+from huggingface_hub import file_exists, hf_hub_download  # type: ignore
+from huggingface_hub.utils import EntryNotFoundError  # type: ignore
+from safetensors.torch import load_file as safe_load_file
+from transformers import PreTrainedModel  # type: ignore
+
+from ..peft_model import PeftConfig, PeftModel
+from ..tuners import lora
+from ..utils.other import (
+    infer_device,
+)
+from .classifier import InhibitorFlagPayload, xLoRAClassifier
+from .config import xLoRAConfig
+from .insertion import BaseTunerWrapper, PeftModelWrapper, xLoRAConv2dLayer, xLoRAEmbeddingLayer, xLoRALinearLayer
+
+
+def convert_layers_to_xlora(
+    base: PeftModel,
+    config: xLoRAConfig,
+) -> int:
+    """
+    Returns the number of swapped layers.
+    """
+    assert isinstance(base.base_model, lora.LoraModel)
+    total_swapped = 0
+
+    scaling_keys = None
+    for module in base.modules():
+        if isinstance(module, lora.LoraLayer):
+            if not scaling_keys:
+                scaling_keys = list(module.scaling.keys())  # NOTE(EricLBuehler): Python 3.7: dicts are ordered!
+
+        if isinstance(module, lora.Linear):
+            assert scaling_keys is not None
+            new_layer: Union[xLoRALinearLayer, xLoRAEmbeddingLayer, xLoRAConv2dLayer] = xLoRALinearLayer(
+                model=base,
+                target=module,
+                target_forward=module.forward,
+                layer_number=total_swapped,
+                config=config,
+            )
+            module.forward = new_layer.forward  # type: ignore[method-assign]
+            total_swapped += 1
+        elif isinstance(module, lora.Embedding):
+            assert scaling_keys is not None
+            new_layer = xLoRAEmbeddingLayer(
+                model=base,
+                target=module,
+                target_forward=module.forward,
+                layer_number=total_swapped,
+                config=config,
+            )
+            module.forward = new_layer.forward  # type: ignore[method-assign]
+            total_swapped += 1
+        elif isinstance(module, lora.Conv2d):
+            assert scaling_keys is not None
+            new_layer = xLoRAConv2dLayer(
+                model=base,
+                target=module,
+                target_forward=module.forward,
+                layer_number=total_swapped,
+                config=config,
+            )
+            module.forward = new_layer.forward  # type: ignore[method-assign]
+            total_swapped += 1
+
+    return total_swapped
+
+
+class xLoRAModel(PeftModel, PeftModelWrapper):
+    def __init__(self, model: nn.Module, peft_config: PeftConfig) -> None:
+        assert isinstance(model, PreTrainedModel)
+        assert isinstance(peft_config, xLoRAConfig)
+
+        if hasattr(model.config, "use_cache"):
+            assert not model.config.use_cache, "`use_cache` must be False"
+
+        use_trainable_adapters = peft_config.use_trainable_adapters
+        adapters_items = iter(peft_config.adapters.items())
+        first_item = next(adapters_items)
+        model_peft = PeftModel.from_pretrained(
+            model, first_item[1], first_item[0], is_trainable=use_trainable_adapters
+        )
+
+        for adapter_name, model_id in adapters_items:
+            model_peft.load_adapter(model_id, adapter_name, is_trainable=use_trainable_adapters)
+
+        model_peft.base_model.set_adapter(list(peft_config.adapters.keys()))
+
+        def hook(module, *args, **kwargs) -> None:
+            args_real = args[0]
+            kwargs_real: dict = args[1]
+            kwargs_real.update(kwargs)
+
+            xlora_classifier: xLoRAClassifier = model_peft.internal_xlora_classifier  # type: ignore
+
+            if "_xlora_classifier_inhibitor_flag" in kwargs_real:
+                payload: InhibitorFlagPayload = kwargs_real["_xlora_classifier_inhibitor_flag"]
+
+                del kwargs_real["_xlora_classifier_inhibitor_flag"]
+
+                model_peft.internal_xlora_scalings = torch.full(  # type: ignore
+                    (payload.batch_size, payload.seq_len, xlora_classifier.n_layers, xlora_classifier.n_classes),
+                    payload.override_scaling_pass_value,  # requires_grad=True
+                )  # TODO(EricLBuehler): is the requires_grad=True necessary?
+
+                return
+
+            xlora_scalings = xlora_classifier.forward(
+                *args_real,
+                **kwargs_real,
+            )
+            # Set the scalings
+            model_peft.internal_xlora_scalings = xlora_scalings
+
+        model.register_forward_pre_hook(hook, with_kwargs=True, prepend=True)
+
+        model_peft.base_model.eval()
+        if not use_trainable_adapters:
+            total_frozen = 0
+            for name, param in model_peft.base_model.named_parameters():
+                if "lora_" in name:
+                    param.requires_grad = False
+                    total_frozen += 1
+
+        assert isinstance(model_peft.base_model, lora.LoraModel)
+
+        total_swapped = convert_layers_to_xlora(
+            model_peft,
+            peft_config,
+        )
+
+        n_classes = len(peft_config.adapters)
+        xlora_classifier = xLoRAClassifier(model_peft, peft_config, n_classes, total_swapped)
+
+        # Setup the internal state
+        base_model_wrapper = BaseTunerWrapper(model_peft.base_model, xlora_classifier)
+        model_peft.base_model.forward = base_model_wrapper.forward  # type: ignore[method-assign]
+
+        peft_model_wrapper = PeftModelWrapper(
+            model_peft,
+            model_peft.save_pretrained,
+            peft_config,
+            model_peft.get_nb_trainable_parameters,
+            model_peft.generate,
+        )
+        model_peft.save_pretrained = peft_model_wrapper.save_pretrained  # type: ignore[method-assign]
+        model_peft.generate = peft_model_wrapper.generate  # type: ignore
+
+        assert not hasattr(model_peft, "set_use_trainable_adapters")
+        model_peft.set_use_trainable_adapters = peft_model_wrapper.set_use_trainable_adapters  # type: ignore
+
+        assert not hasattr(model_peft, "print_scalings_predictions")
+        model_peft.print_scalings_predictions = peft_model_wrapper.print_scalings_predictions  # type: ignore
+
+        assert not hasattr(model_peft, "enable_scalings_logging")
+        model_peft.enable_scalings_logging = peft_model_wrapper.enable_scalings_logging  # type: ignore
+
+        assert not hasattr(model_peft, "disable_scalings_logging")
+        model_peft.disable_scalings_logging = peft_model_wrapper.disable_scalings_logging  # type: ignore
+
+        assert not hasattr(model_peft, "flush_log_scalings")
+        model_peft.flush_log_scalings = peft_model_wrapper.flush_log_scalings  # type: ignore
+
+        assert not hasattr(model_peft, "get_scalings_log")
+        model_peft.get_scalings_log = peft_model_wrapper.get_scalings_log  # type: ignore
+
+        assert not hasattr(model_peft, "set_scaling_pass_value")
+        model_peft.set_scaling_pass_value = peft_model_wrapper.set_scaling_pass_value  # type: ignore
+
+        assert not hasattr(model_peft, "set_global_scaling_weight")
+        model_peft.set_global_scaling_weight = peft_model_wrapper.set_global_scaling_weight  # type: ignore
+
+        assert not hasattr(model_peft, "get_global_scaling_weight")
+        model_peft.get_global_scaling_weight = peft_model_wrapper.get_global_scaling_weight  # type: ignore
+
+        assert not hasattr(model_peft, "set_topk_lora")
+        model_peft.set_topk_lora = peft_model_wrapper.set_topk_lora  # type: ignore
+
+        assert not hasattr(model_peft, "get_topk_lora")
+        model_peft.get_topk_lora = peft_model_wrapper.get_topk_lora  # type: ignore
+
+        model_peft.get_nb_trainable_parameters = peft_model_wrapper.get_nb_trainable_parameters  # type: ignore
+
+        model_peft.print_trainable_parameters = peft_model_wrapper.print_trainable_parameters  # type: ignore
+
+        # Setup the model internal state
+        assert not hasattr(model_peft, "internal_xlora_classifier")
+        model_peft.internal_xlora_classifier = xlora_classifier
+
+        assert not hasattr(model_peft, "internal_xlora_scalings")
+        model_peft.internal_xlora_scalings = None  # type: ignore
+
+
+def _load_classifier_weights(model_id: str, device: Optional[str] = None, **hf_hub_download_kwargs) -> dict:
+    r"""
+    A helper method to load the classifier weights from the HuggingFace Hub or locally. Copied from load_peft_weights
+
+    Args:
+        model_id (`str`):
+            The local path to the adapter weights or the name of the adapter to load from the HuggingFace Hub.
+        device (`str`):
+            The device to load the weights onto.
+        hf_hub_download_kwargs (`dict`):
+            Additional arguments to pass to the `hf_hub_download` method when loading from the HuggingFace Hub.
+    """
+    path = (
+        os.path.join(model_id, hf_hub_download_kwargs["subfolder"])
+        if hf_hub_download_kwargs.get("subfolder", None) is not None
+        else model_id
+    )
+
+    SAFETENSORS_WEIGHTS_NAME = "xlora_classifier.safetensors"
+    WEIGHTS_NAME = "xlora_classifier.pt"
+
+    if device is None:
+        device = infer_device()
+
+    if os.path.exists(os.path.join(path, SAFETENSORS_WEIGHTS_NAME)):
+        filename = os.path.join(path, SAFETENSORS_WEIGHTS_NAME)
+        use_safetensors = True
+    elif os.path.exists(os.path.join(path, WEIGHTS_NAME)):
+        filename = os.path.join(path, WEIGHTS_NAME)
+        use_safetensors = False
+    else:
+        token = hf_hub_download_kwargs.get("token", None)
+        if token is None:
+            token = hf_hub_download_kwargs.get("use_auth_token", None)
+
+        hub_filename = (
+            os.path.join(hf_hub_download_kwargs["subfolder"], SAFETENSORS_WEIGHTS_NAME)
+            if hf_hub_download_kwargs.get("subfolder", None) is not None
+            else SAFETENSORS_WEIGHTS_NAME
+        )
+        has_remote_safetensors_file = file_exists(
+            repo_id=model_id,
+            filename=hub_filename,
+            revision=hf_hub_download_kwargs.get("revision", None),
+            repo_type=hf_hub_download_kwargs.get("repo_type", None),
+            token=token,
+        )
+        use_safetensors = has_remote_safetensors_file
+
+        if has_remote_safetensors_file:
+            # Priority 1: load safetensors weights
+            filename = hf_hub_download(
+                model_id,
+                SAFETENSORS_WEIGHTS_NAME,
+                **hf_hub_download_kwargs,
+            )
+        else:
+            try:
+                filename = hf_hub_download(model_id, WEIGHTS_NAME, **hf_hub_download_kwargs)
+            except EntryNotFoundError:
+                raise ValueError(
+                    f"Can't find weights for {model_id} in {model_id} or in the Hugging Face Hub. "
+                    f"Please check that the file {WEIGHTS_NAME} or {SAFETENSORS_WEIGHTS_NAME} is present at {model_id}."
+                )
+
+    if use_safetensors:
+        if hasattr(torch.backends, "mps") and (device == torch.device("mps")):
+            adapters_weights = safe_load_file(filename, device="cpu")
+        else:
+            adapters_weights = safe_load_file(filename, device=device)
+    else:
+        adapters_weights = torch.load(filename, map_location=torch.device(device))
+
+    return adapters_weights
+
+
+def _get_file_path_dir(load_directory: Union[str, os.PathLike], name: str, dir: str) -> str:
+    if os.path.exists(os.path.join(load_directory, dir, name)):
+        return os.path.join(load_directory, dir, name)
+    return hf_hub_download(load_directory, filename=name, subfolder=dir)

From b9d3878f7414c95756ab84b6ec7b2f2a90fbee55 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 20 Feb 2024 07:45:32 -0500
Subject: [PATCH 002/182] Pass a reference back to the peftmodel when creating

---
 src/peft/mapping.py                       |  2 --
 src/peft/peft_model.py                    |  9 +++++----
 src/peft/tuners/__init__.py               |  1 +
 src/peft/tuners/adalora/model.py          |  2 +-
 src/peft/tuners/adaption_prompt/model.py  |  2 +-
 src/peft/tuners/ia3/model.py              |  2 +-
 src/peft/tuners/lora/model.py             |  2 +-
 src/peft/tuners/lycoris_utils.py          |  2 +-
 src/peft/tuners/poly/model.py             |  2 +-
 src/peft/{ => tuners}/xlora/__init__.py   |  0
 src/peft/{ => tuners}/xlora/classifier.py |  0
 src/peft/{ => tuners}/xlora/config.py     |  4 +++-
 src/peft/{ => tuners}/xlora/insertion.py  |  0
 src/peft/{ => tuners}/xlora/model.py      | 15 ++++++---------
 src/peft/utils/peft_types.py              |  2 ++
 15 files changed, 23 insertions(+), 22 deletions(-)
 rename src/peft/{ => tuners}/xlora/__init__.py (100%)
 rename src/peft/{ => tuners}/xlora/classifier.py (100%)
 rename src/peft/{ => tuners}/xlora/config.py (98%)
 rename src/peft/{ => tuners}/xlora/insertion.py (100%)
 rename src/peft/{ => tuners}/xlora/model.py (97%)

diff --git a/src/peft/mapping.py b/src/peft/mapping.py
index 5e2d585c7c..a0ea1abe06 100644
--- a/src/peft/mapping.py
+++ b/src/peft/mapping.py
@@ -52,7 +52,6 @@
     xLoRAConfig,
 )
 from .utils import _prepare_prompt_learning_config
-from .xlora import xLoRAModel
 
 
 if TYPE_CHECKING:
@@ -66,7 +65,6 @@
     "TOKEN_CLS": PeftModelForTokenClassification,
     "QUESTION_ANS": PeftModelForQuestionAnswering,
     "FEATURE_EXTRACTION": PeftModelForFeatureExtraction,
-    "XLORA": xLoRAModel,
 }
 
 PEFT_TYPE_TO_CONFIG_MAPPING: dict[str, PeftConfig] = {
diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 2be9a781de..c7ee3c3968 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -55,6 +55,10 @@
     PromptEncoder,
 )
 from .tuners.tuners_utils import BaseTunerLayer
+from .tuners.xlora import _get_file_path_dir as xlora_get_file_path_dir
+from .tuners.xlora import _load_classifier_weights as xlora_load_classifier_weights
+from .tuners.xlora.config import xLoRAConfig
+from .tuners.xlora.model import xLoRAModel
 from .utils import (
     SAFETENSORS_WEIGHTS_NAME,
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
@@ -72,10 +76,6 @@
     set_peft_model_state_dict,
     shift_tokens_right,
 )
-from .xlora import _get_file_path_dir as xlora_get_file_path_dir
-from .xlora import _load_classifier_weights as xlora_load_classifier_weights
-from .xlora.config import xLoRAConfig
-from .xlora.model import xLoRAModel
 
 
 PEFT_TYPE_TO_MODEL_MAPPING = {
@@ -90,6 +90,7 @@
     PeftType.IA3: IA3Model,
     PeftType.OFT: OFTModel,
     PeftType.POLY: PolyModel,
+    PeftType.XLORA: xLoRAModel,
 }
 
 
diff --git a/src/peft/tuners/__init__.py b/src/peft/tuners/__init__.py
index b47baa6681..b8d334f2d4 100644
--- a/src/peft/tuners/__init__.py
+++ b/src/peft/tuners/__init__.py
@@ -30,3 +30,4 @@
 from .oft import OFTConfig, OFTModel
 from .mixed import MixedModel
 from .poly import PolyConfig, PolyModel
+from .xlora import xLoRAConfig, xLoRAModel
diff --git a/src/peft/tuners/adalora/model.py b/src/peft/tuners/adalora/model.py
index bf334b39ce..81610f5997 100644
--- a/src/peft/tuners/adalora/model.py
+++ b/src/peft/tuners/adalora/model.py
@@ -61,7 +61,7 @@ class AdaLoraModel(LoraModel):
 
     # Note: don't redefine prefix here, it should be inherited from LoraModel
 
-    def __init__(self, model, config, adapter_name):
+    def __init__(self, model, config, adapter_name, _model_peft):
         super().__init__(model, config, adapter_name)
 
         traininable_mode_counter = 0
diff --git a/src/peft/tuners/adaption_prompt/model.py b/src/peft/tuners/adaption_prompt/model.py
index 08aea27f8e..66ad96f2f2 100644
--- a/src/peft/tuners/adaption_prompt/model.py
+++ b/src/peft/tuners/adaption_prompt/model.py
@@ -40,7 +40,7 @@ class AdaptionPromptModel(nn.Module):
     - Disabling the adapter would also result in the modules being removed from the model.
     """
 
-    def __init__(self, model, configs: Dict, adapter_name: str):
+    def __init__(self, model, configs: Dict, adapter_name: str, _model_peft):
         super().__init__()
         self.model = model
         # Store adapter configs by name.
diff --git a/src/peft/tuners/ia3/model.py b/src/peft/tuners/ia3/model.py
index 61969fe698..62037ae420 100644
--- a/src/peft/tuners/ia3/model.py
+++ b/src/peft/tuners/ia3/model.py
@@ -72,7 +72,7 @@ class IA3Model(BaseTuner):
 
     prefix: str = "ia3_"
 
-    def __init__(self, model, config, adapter_name):
+    def __init__(self, model, config, adapter_name, _model_peft):
         super().__init__(model, config, adapter_name)
 
     @staticmethod
diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 3f381efffd..1bf728013b 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -132,7 +132,7 @@ class LoraModel(BaseTuner):
 
     prefix: str = "lora_"
 
-    def __init__(self, model, config, adapter_name) -> None:
+    def __init__(self, model, config, adapter_name, _model_peft) -> None:
         super().__init__(model, config, adapter_name)
 
     def _check_new_adapter_config(self, config: LoraConfig) -> None:
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 39c750ae8e..552acdaef9 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -197,7 +197,7 @@ class LycorisTuner(BaseTuner):
     prefix: str
     layers_mapping: dict[type[torch.nn.Module], type[LycorisLayer]]
 
-    def __init__(self, model, config, adapter_name):
+    def __init__(self, model, config, adapter_name, _model_peft):
         super().__init__(model, config, adapter_name)
 
     def __getattr__(self, name: str):
diff --git a/src/peft/tuners/poly/model.py b/src/peft/tuners/poly/model.py
index 943a287955..f29b2c673e 100644
--- a/src/peft/tuners/poly/model.py
+++ b/src/peft/tuners/poly/model.py
@@ -33,7 +33,7 @@
 class PolyModel(BaseTuner):
     prefix: str = "poly_"
 
-    def __init__(self, model, config, adapter_name) -> None:
+    def __init__(self, model, config, adapter_name, _model_peft) -> None:
         super().__init__(model, config, adapter_name)
 
     @staticmethod
diff --git a/src/peft/xlora/__init__.py b/src/peft/tuners/xlora/__init__.py
similarity index 100%
rename from src/peft/xlora/__init__.py
rename to src/peft/tuners/xlora/__init__.py
diff --git a/src/peft/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
similarity index 100%
rename from src/peft/xlora/classifier.py
rename to src/peft/tuners/xlora/classifier.py
diff --git a/src/peft/xlora/config.py b/src/peft/tuners/xlora/config.py
similarity index 98%
rename from src/peft/xlora/config.py
rename to src/peft/tuners/xlora/config.py
index 7752dc2dd4..06b972e299 100644
--- a/src/peft/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -4,9 +4,11 @@
 
 import torch
 
+from peft.config import PeftConfig
+
 
 @dataclass
-class xLoRAConfig:
+class xLoRAConfig(PeftConfig):
     r"""
     This is the configuration class to store the configuration of a [`xLoRAClassifier`].
     When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the saved adapters. As such, only the keys
diff --git a/src/peft/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
similarity index 100%
rename from src/peft/xlora/insertion.py
rename to src/peft/tuners/xlora/insertion.py
diff --git a/src/peft/xlora/model.py b/src/peft/tuners/xlora/model.py
similarity index 97%
rename from src/peft/xlora/model.py
rename to src/peft/tuners/xlora/model.py
index 0cf6f30384..6c91b1ea36 100644
--- a/src/peft/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -8,11 +8,11 @@
 from safetensors.torch import load_file as safe_load_file
 from transformers import PreTrainedModel  # type: ignore
 
-from ..peft_model import PeftConfig, PeftModel
-from ..tuners import lora
-from ..utils.other import (
+from ...peft_model import PeftConfig, PeftModel
+from ...utils.other import (
     infer_device,
 )
+from . import lora
 from .classifier import InhibitorFlagPayload, xLoRAClassifier
 from .config import xLoRAConfig
 from .insertion import BaseTunerWrapper, PeftModelWrapper, xLoRAConv2dLayer, xLoRAEmbeddingLayer, xLoRALinearLayer
@@ -72,7 +72,9 @@ def convert_layers_to_xlora(
 
 
 class xLoRAModel(PeftModel, PeftModelWrapper):
-    def __init__(self, model: nn.Module, peft_config: PeftConfig) -> None:
+    def __init__(self, model: nn.Module, peft_config: PeftConfig, model_peft: PeftModel) -> None:
+        # TODO(EricLBuehler): model_peft.base_model needs to be a LoraModel.
+
         assert isinstance(model, PreTrainedModel)
         assert isinstance(peft_config, xLoRAConfig)
 
@@ -81,11 +83,6 @@ def __init__(self, model: nn.Module, peft_config: PeftConfig) -> None:
 
         use_trainable_adapters = peft_config.use_trainable_adapters
         adapters_items = iter(peft_config.adapters.items())
-        first_item = next(adapters_items)
-        model_peft = PeftModel.from_pretrained(
-            model, first_item[1], first_item[0], is_trainable=use_trainable_adapters
-        )
-
         for adapter_name, model_id in adapters_items:
             model_peft.load_adapter(model_id, adapter_name, is_trainable=use_trainable_adapters)
 
diff --git a/src/peft/utils/peft_types.py b/src/peft/utils/peft_types.py
index d4a84435dc..db33ef42ff 100644
--- a/src/peft/utils/peft_types.py
+++ b/src/peft/utils/peft_types.py
@@ -35,6 +35,7 @@ class PeftType(str, enum.Enum):
     - LOHA
     - LOKR
     - OFT
+    - XLORA
     """
 
     PROMPT_TUNING = "PROMPT_TUNING"
@@ -49,6 +50,7 @@ class PeftType(str, enum.Enum):
     LOKR = "LOKR"
     OFT = "OFT"
     POLY = "POLY"
+    XLORA = "XLORA"
 
 
 class TaskType(str, enum.Enum):

From bbd3ad4bb5579c8f87375a07494860d7771a4221 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 20 Feb 2024 07:50:11 -0500
Subject: [PATCH 003/182] Check base model in from_pretrained

---
 src/peft/peft_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index c7ee3c3968..5926df391d 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -393,7 +393,7 @@ def from_pretrained(
         else:
             model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
 
-        if isinstance(model, xLoRAModel):
+        if isinstance(model.base_model, xLoRAModel):
             assert isinstance(model, PreTrainedModel)
             assert isinstance(config, xLoRAConfig)
 

From ea53917c96542151d2ef672550c540ffc3ac1845 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 20 Feb 2024 07:50:43 -0500
Subject: [PATCH 004/182] Fix assert, attr

---
 src/peft/peft_model.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 5926df391d..2763916333 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -394,7 +394,6 @@ def from_pretrained(
             model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
 
         if isinstance(model.base_model, xLoRAModel):
-            assert isinstance(model, PreTrainedModel)
             assert isinstance(config, xLoRAConfig)
 
             device = infer_device()  # As inn PeftModel.load_adapter, torch_device = infer_device(
@@ -416,7 +415,7 @@ def from_pretrained(
                 adapters_real = config.adapters
             config.adapters = adapters_real
 
-            classifier: xLoRAClassifier = model.internal_xlora_classifier  # type: ignore
+            classifier: xLoRAClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
         else:
             model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)

From 79c4df960a5968d00e81d26839f8affa391b0869 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 20 Feb 2024 07:55:16 -0500
Subject: [PATCH 005/182] Fix inheritance from peft model

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 6c91b1ea36..3fa8245c97 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -71,7 +71,7 @@ def convert_layers_to_xlora(
     return total_swapped
 
 
-class xLoRAModel(PeftModel, PeftModelWrapper):
+class xLoRAModel(PeftModelWrapper):
     def __init__(self, model: nn.Module, peft_config: PeftConfig, model_peft: PeftModel) -> None:
         # TODO(EricLBuehler): model_peft.base_model needs to be a LoraModel.
 

From 2ee88afac527d337431164aa9b3476c7c530fcac Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 20 Feb 2024 07:58:39 -0500
Subject: [PATCH 006/182] Update inheritance again

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 3fa8245c97..5eb2edcdcb 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -71,7 +71,7 @@ def convert_layers_to_xlora(
     return total_swapped
 
 
-class xLoRAModel(PeftModelWrapper):
+class xLoRAModel:
     def __init__(self, model: nn.Module, peft_config: PeftConfig, model_peft: PeftModel) -> None:
         # TODO(EricLBuehler): model_peft.base_model needs to be a LoraModel.
 

From 19063dc06be91b8c5f28924cf959f22cc7d5a974 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 20 Feb 2024 08:20:02 -0500
Subject: [PATCH 007/182] Update inheritance again and properly instantiate

---
 src/peft/peft_model.py         |  2 +-
 src/peft/tuners/xlora/model.py | 26 +++++++++++++++-----------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 2763916333..3f39b1f899 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -135,7 +135,7 @@ def __init__(self, model: PreTrainedModel, peft_config: PeftConfig, adapter_name
         else:
             self._peft_config = None
             cls = PEFT_TYPE_TO_MODEL_MAPPING[peft_config.peft_type]
-            self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
+            self.base_model = cls(model, {adapter_name: peft_config}, adapter_name, self)
             self.set_additional_trainable_modules(peft_config, adapter_name)
 
         if getattr(model, "is_gradient_checkpointing", True):
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 5eb2edcdcb..7af82be473 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -6,7 +6,9 @@
 from huggingface_hub import file_exists, hf_hub_download  # type: ignore
 from huggingface_hub.utils import EntryNotFoundError  # type: ignore
 from safetensors.torch import load_file as safe_load_file
-from transformers import PreTrainedModel  # type: ignore
+from transformers import PreTrainedModel
+
+from peft.tuners.lora.model import LoraModel
 
 from ...peft_model import PeftConfig, PeftModel
 from ...utils.other import (
@@ -71,13 +73,15 @@ def convert_layers_to_xlora(
     return total_swapped
 
 
-class xLoRAModel:
-    def __init__(self, model: nn.Module, peft_config: PeftConfig, model_peft: PeftModel) -> None:
-        # TODO(EricLBuehler): model_peft.base_model needs to be a LoraModel.
-
+class xLoRAModel(LoraModel):
+    def __init__(self, model: nn.Module, peft_config: PeftConfig, adapter_name: str, model_peft: PeftModel) -> None:
         assert isinstance(model, PreTrainedModel)
         assert isinstance(peft_config, xLoRAConfig)
 
+        super().__init__(model, peft_config, adapter_name, model_peft)
+
+        # TODO(EricLBuehler): model_peft.base_model needs to be a LoraModel
+
         if hasattr(model.config, "use_cache"):
             assert not model.config.use_cache, "`use_cache` must be False"
 
@@ -86,7 +90,7 @@ def __init__(self, model: nn.Module, peft_config: PeftConfig, model_peft: PeftMo
         for adapter_name, model_id in adapters_items:
             model_peft.load_adapter(model_id, adapter_name, is_trainable=use_trainable_adapters)
 
-        model_peft.base_model.set_adapter(list(peft_config.adapters.keys()))
+        self.set_adapter(list(peft_config.adapters.keys()))
 
         def hook(module, *args, **kwargs) -> None:
             args_real = args[0]
@@ -116,15 +120,15 @@ def hook(module, *args, **kwargs) -> None:
 
         model.register_forward_pre_hook(hook, with_kwargs=True, prepend=True)
 
-        model_peft.base_model.eval()
+        self.eval()
         if not use_trainable_adapters:
             total_frozen = 0
-            for name, param in model_peft.base_model.named_parameters():
+            for name, param in self.named_parameters():
                 if "lora_" in name:
                     param.requires_grad = False
                     total_frozen += 1
 
-        assert isinstance(model_peft.base_model, lora.LoraModel)
+        assert isinstance(self, LoraModel)
 
         total_swapped = convert_layers_to_xlora(
             model_peft,
@@ -135,8 +139,8 @@ def hook(module, *args, **kwargs) -> None:
         xlora_classifier = xLoRAClassifier(model_peft, peft_config, n_classes, total_swapped)
 
         # Setup the internal state
-        base_model_wrapper = BaseTunerWrapper(model_peft.base_model, xlora_classifier)
-        model_peft.base_model.forward = base_model_wrapper.forward  # type: ignore[method-assign]
+        base_model_wrapper = BaseTunerWrapper(self, xlora_classifier)
+        self.forward = base_model_wrapper.forward  # type: ignore[method-assign]
 
         peft_model_wrapper = PeftModelWrapper(
             model_peft,

From d364e5b6ecff14130c1ee9fad3dcaa0ff8c89937 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 20 Feb 2024 08:20:12 -0500
Subject: [PATCH 008/182] Update comment

---
 src/peft/tuners/xlora/model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 7af82be473..fb7759c68a 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -80,8 +80,6 @@ def __init__(self, model: nn.Module, peft_config: PeftConfig, adapter_name: str,
 
         super().__init__(model, peft_config, adapter_name, model_peft)
 
-        # TODO(EricLBuehler): model_peft.base_model needs to be a LoraModel
-
         if hasattr(model.config, "use_cache"):
             assert not model.config.use_cache, "`use_cache` must be False"
 

From c9ab31069146f92fcbe37453f90fe9d2affc2e9b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 05:27:05 -0500
Subject: [PATCH 009/182] Export config, model

---
 src/peft/tuners/xlora/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/peft/tuners/xlora/__init__.py b/src/peft/tuners/xlora/__init__.py
index e69de29bb2..987fe98542 100644
--- a/src/peft/tuners/xlora/__init__.py
+++ b/src/peft/tuners/xlora/__init__.py
@@ -0,0 +1,5 @@
+from .config import xLoRAConfig
+from .model import xLoRAModel
+
+
+__all__ = ["xLoRAConfig", "xLoRAModel"]

From 9ed665eb47fc340d39fe7d88c06e405b7a9d88da Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 05:42:55 -0500
Subject: [PATCH 010/182] Remove use of default attr

---
 src/peft/tuners/xlora/config.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 06b972e299..52c488e4ee 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -51,8 +51,6 @@ class xLoRAConfig(PeftConfig):
             Weight to multiply output of each LoRA adapter by.
     """
 
-    model_type = "xlora"
-
     hidden_size: int
     device: torch.device
     adapters: Dict[str, str]

From ae3200626cfb8a7f5a735aba14728dd70c46a3dc Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 05:51:33 -0500
Subject: [PATCH 011/182] Remove use of default attr

---
 src/peft/tuners/xlora/config.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 52c488e4ee..da8011890f 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -51,9 +51,9 @@ class xLoRAConfig(PeftConfig):
             Weight to multiply output of each LoRA adapter by.
     """
 
-    hidden_size: int
-    device: torch.device
-    adapters: Dict[str, str]
+    hidden_size: int = None  # type: ignore
+    device: torch.device = None  # type: ignore
+    adapters: Dict[str, str] = None  # type: ignore
     enable_softmax: bool = True
     enable_softmax_topk: bool = False
     layerwise_scalings: bool = False
@@ -70,6 +70,10 @@ class xLoRAConfig(PeftConfig):
     global_scaling_weight: float = 1.0
 
     def __post_init__(self):
+        assert self.hidden_size is not None
+        assert self.device is not None
+        assert self.adapters is not None
+
         if self.enable_softmax_topk and self.top_k_lora is None:
             warnings.warn("`enable_softmax_topk` enabled `top_k_lora` is not set")
 

From 9bfe8a6808b7ea61133e9693ee150534a8d34f2f Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 05:53:26 -0500
Subject: [PATCH 012/182] Update imports

---
 src/peft/peft_model.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 3f39b1f899..aab74f660c 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -36,8 +36,6 @@
 from transformers.modeling_outputs import QuestionAnsweringModelOutput, SequenceClassifierOutput, TokenClassifierOutput
 from transformers.utils import PushToHubMixin
 
-from peft.xlora.classifier import xLoRAClassifier
-
 from . import __version__
 from .config import PeftConfig
 from .tuners import (
@@ -53,12 +51,13 @@
     PrefixEncoder,
     PromptEmbedding,
     PromptEncoder,
+    xLoRAConfig,
+    xLoRAModel,
 )
 from .tuners.tuners_utils import BaseTunerLayer
 from .tuners.xlora import _get_file_path_dir as xlora_get_file_path_dir
 from .tuners.xlora import _load_classifier_weights as xlora_load_classifier_weights
-from .tuners.xlora.config import xLoRAConfig
-from .tuners.xlora.model import xLoRAModel
+from .tuners.xlora.classifier import xLoRAClassifier
 from .utils import (
     SAFETENSORS_WEIGHTS_NAME,
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,

From 048958fb64e941aeb1df60f5a42119c7614c3584 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 05:54:39 -0500
Subject: [PATCH 013/182] Update imports

---
 src/peft/mixed_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/mixed_model.py b/src/peft/mixed_model.py
index 92b9f74ecd..6c0b2ecf6b 100644
--- a/src/peft/mixed_model.py
+++ b/src/peft/mixed_model.py
@@ -23,7 +23,7 @@
 from torch import nn
 from transformers.utils import PushToHubMixin
 
-from peft.tuners.mixed import COMPATIBLE_TUNER_TYPES
+from .tuners.mixed import COMPATIBLE_TUNER_TYPES
 
 from .config import PeftConfig
 from .peft_model import PeftModel

From 024ce153c093e17729629c03d1d3c616e67009aa Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 06:00:58 -0500
Subject: [PATCH 014/182] Update imports

---
 src/peft/tuners/xlora/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index fb7759c68a..5103773eec 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -10,11 +10,11 @@
 
 from peft.tuners.lora.model import LoraModel
 
-from ...peft_model import PeftConfig, PeftModel
-from ...utils.other import (
+from peft.peft_model import PeftConfig, PeftModel
+from peft.utils.other import (
     infer_device,
 )
-from . import lora
+from .. import lora
 from .classifier import InhibitorFlagPayload, xLoRAClassifier
 from .config import xLoRAConfig
 from .insertion import BaseTunerWrapper, PeftModelWrapper, xLoRAConv2dLayer, xLoRAEmbeddingLayer, xLoRALinearLayer

From 3d08fc99fd654deb40f703b2c22df452b640452b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 06:08:05 -0500
Subject: [PATCH 015/182] Work on circular import

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 5103773eec..22290945ba 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -74,7 +74,7 @@ def convert_layers_to_xlora(
 
 
 class xLoRAModel(LoraModel):
-    def __init__(self, model: nn.Module, peft_config: PeftConfig, adapter_name: str, model_peft: PeftModel) -> None:
+    def __init__(self, model: nn.Module, peft_config: xLoRAConfig, adapter_name: str, model_peft: nn.Module) -> None:
         assert isinstance(model, PreTrainedModel)
         assert isinstance(peft_config, xLoRAConfig)
 

From 7e5332b250beb2ba51b9251ead4c02e85556b774 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 06:10:23 -0500
Subject: [PATCH 016/182] Work on circular import again

---
 src/peft/__init__.py           | 2 ++
 src/peft/tuners/xlora/model.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/peft/__init__.py b/src/peft/__init__.py
index 55419e22bc..fbaa4f6c23 100644
--- a/src/peft/__init__.py
+++ b/src/peft/__init__.py
@@ -73,6 +73,8 @@
     OFTModel,
     PolyConfig,
     PolyModel,
+    xLoRAConfig,
+    xLoRAModel
 )
 from .utils import (
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 22290945ba..9284362f68 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -10,7 +10,6 @@
 
 from peft.tuners.lora.model import LoraModel
 
-from peft.peft_model import PeftConfig, PeftModel
 from peft.utils.other import (
     infer_device,
 )
@@ -21,7 +20,7 @@
 
 
 def convert_layers_to_xlora(
-    base: PeftModel,
+    base: nn.Module, # PeftModel
     config: xLoRAConfig,
 ) -> int:
     """
@@ -75,6 +74,7 @@ def convert_layers_to_xlora(
 
 class xLoRAModel(LoraModel):
     def __init__(self, model: nn.Module, peft_config: xLoRAConfig, adapter_name: str, model_peft: nn.Module) -> None:
+        # model_peft: PeftModel
         assert isinstance(model, PreTrainedModel)
         assert isinstance(peft_config, xLoRAConfig)
 

From d5193f4b698f24ebe16c97dc3c19ee5975dc5c06 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 06:57:49 -0500
Subject: [PATCH 017/182] Remove another circular import

---
 src/peft/__init__.py                | 2 +-
 src/peft/mixed_model.py             | 3 +--
 src/peft/tuners/xlora/classifier.py | 6 ++----
 src/peft/tuners/xlora/model.py      | 4 ++--
 4 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/peft/__init__.py b/src/peft/__init__.py
index fbaa4f6c23..5868fe753a 100644
--- a/src/peft/__init__.py
+++ b/src/peft/__init__.py
@@ -74,7 +74,7 @@
     PolyConfig,
     PolyModel,
     xLoRAConfig,
-    xLoRAModel
+    xLoRAModel,
 )
 from .utils import (
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
diff --git a/src/peft/mixed_model.py b/src/peft/mixed_model.py
index 6c0b2ecf6b..7201917ef9 100644
--- a/src/peft/mixed_model.py
+++ b/src/peft/mixed_model.py
@@ -23,8 +23,6 @@
 from torch import nn
 from transformers.utils import PushToHubMixin
 
-from .tuners.mixed import COMPATIBLE_TUNER_TYPES
-
 from .config import PeftConfig
 from .peft_model import PeftModel
 from .tuners import (
@@ -36,6 +34,7 @@
     MixedModel,
     OFTModel,
 )
+from .tuners.mixed import COMPATIBLE_TUNER_TYPES
 from .utils import PeftType, _set_adapter, _set_trainable
 
 
diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 2fcd6f64c5..96f30d0ebf 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -11,8 +11,6 @@
     ModelOutput,
 )
 
-from peft.peft_model import PeftModel
-
 from .config import xLoRAConfig
 
 
@@ -46,7 +44,7 @@ class xLoRAClassifier(nn.Module):
 
     def __init__(
         self,
-        model: PeftModel,
+        model: nn.Module,  # PeftModel
         config: xLoRAConfig,
         n_classes: int,
         n_layers: int,
@@ -138,7 +136,7 @@ def forward(
             seq_len = typing.cast(torch.FloatTensor, inputs_embeds).shape[1]
 
         # For type checking
-        model: PeftModel = self.model  # type: ignore
+        model: nn.Module = self.model  # type: ignore
         with torch.no_grad():
             with model.disable_adapter():
                 # TODO(EricLBuehler): Pending removal following analysis
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 9284362f68..054e2123a7 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -9,10 +9,10 @@
 from transformers import PreTrainedModel
 
 from peft.tuners.lora.model import LoraModel
-
 from peft.utils.other import (
     infer_device,
 )
+
 from .. import lora
 from .classifier import InhibitorFlagPayload, xLoRAClassifier
 from .config import xLoRAConfig
@@ -20,7 +20,7 @@
 
 
 def convert_layers_to_xlora(
-    base: nn.Module, # PeftModel
+    base: nn.Module,  # PeftModel
     config: xLoRAConfig,
 ) -> int:
     """

From 5d7ff643b22abf95cd3b7598926922e9334ab3e7 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 06:59:46 -0500
Subject: [PATCH 018/182] Remove another circular import

---
 src/peft/tuners/xlora/insertion.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index ba183c9370..1492f5a122 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -5,8 +5,8 @@
 import torch
 from safetensors.torch import save_model  # type: ignore
 from torch import Tensor
+from torch import nn
 
-from peft.peft_model import PeftModel
 from peft.tuners import lora
 from peft.tuners.tuners_utils import BaseTuner  # type: ignore
 
@@ -25,7 +25,7 @@ class xLoRALayer:
 
     def __init__(
         self,
-        model: PeftModel,
+        model: nn.Module, # PeftModel
         target: lora.LoraLayer,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -70,7 +70,7 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
 class xLoRALinearLayer(xLoRALayer):
     def __init__(
         self,
-        model: PeftModel,
+        model: nn.Module, # PeftModel
         target: lora.Linear,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -114,7 +114,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
 class xLoRAEmbeddingLayer(xLoRALayer):
     def __init__(
         self,
-        model: PeftModel,
+        model: nn.Module, # PeftModel
         target: lora.Embedding,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -155,7 +155,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
 class xLoRAConv2dLayer(xLoRALayer):
     def __init__(
         self,
-        model: PeftModel,
+        model: nn.Module, # PeftModel
         target: lora.Conv2d,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -207,7 +207,7 @@ def forward(self, *args, **kwargs):
 class PeftModelWrapper:
     def __init__(
         self,
-        base_model: PeftModel,
+        base_model: nn.Module, # PeftModel
         base_model_save: Callable[..., None],
         config: xLoRAConfig,
         base_model_get_nb_trainable_parameters: Callable[..., Tuple[int, int]],

From b41089d8c480b781b621a5abfba36ea02a99ce5a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 12:48:39 -0500
Subject: [PATCH 019/182] Slightly refactor

---
 src/peft/tuners/xlora/insertion.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 1492f5a122..e95cf390f3 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -4,8 +4,7 @@
 
 import torch
 from safetensors.torch import save_model  # type: ignore
-from torch import Tensor
-from torch import nn
+from torch import Tensor, nn
 
 from peft.tuners import lora
 from peft.tuners.tuners_utils import BaseTuner  # type: ignore

From bfd794fbccac7c04087bc09543c468cbe5ebea6c Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 12:54:51 -0500
Subject: [PATCH 020/182] Update with EricLBuehler/xlora#20

---
 src/peft/tuners/xlora/classifier.py | 61 -----------------------------
 src/peft/tuners/xlora/insertion.py  | 13 +++---
 src/peft/tuners/xlora/model.py      |  4 +-
 3 files changed, 8 insertions(+), 70 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 96f30d0ebf..e4374db9c0 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -139,14 +139,6 @@ def forward(
         model: nn.Module = self.model  # type: ignore
         with torch.no_grad():
             with model.disable_adapter():
-                # TODO(EricLBuehler): Pending removal following analysis
-                """
-                for module in model.base_model.modules():
-                    if isinstance(module.forward.__self__, xLoRALayer):
-                        inst = module.forward.__self__
-                        inst.disabled = True  # Disable it
-                """
-
                 kwargs["output_hidden_states"] = True
                 kwargs["return_dict"] = True
 
@@ -162,64 +154,11 @@ def forward(
                     **kwargs,
                 )
 
-                # TODO(EricLBuehler): Pending removal following analysis
-                """
-                # Enable the xLoRALayers
-                for module in model.base_model.modules():
-                    if isinstance(module.forward.__self__, xLoRALayer):
-                        inst = module.forward.__self__
-                        inst.disabled = False  # Disable it
-                """
-
         hidden_states = result.hidden_states  # type: ignore
 
         assert hidden_states is not None
         hidden_state = hidden_states[-1]  # Get the last hidden state
 
-        ### Calculate the sequence lengths
-
-        # TODO(all): Pending removal following analysis
-        """
-        # hidden_state=[batch_size, seq_len, hidden_size]
-        if self.config.stop_token_id is None:  # Calculate via attention mask
-            if input_ids is not None:
-                assert attention_mask is not None, (
-                    "Stop token id was not provided, so sequence length calculation via attention mask was attempted"
-                    + "but the attention mask was not given"
-                )
-                sequence_lengths: Union[int, torch.Tensor] = torch.eq(attention_mask, 0).int().argmax(-1) - 1
-                sequence_lengths = sequence_lengths % input_ids.shape[-1]
-                sequence_lengths = sequence_lengths.to(hidden_state.device)  # type: ignore
-            else:
-                sequence_lengths = -1
-        else:  # Calculate via stop token id
-            if input_ids is not None:
-                # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
-                sequence_lengths = torch.eq(input_ids, self.config.stop_token_id).int().argmax(-1) - 1
-                sequence_lengths = sequence_lengths % input_ids.shape[-1]
-                sequence_lengths = sequence_lengths.to(hidden_state.device)  # type: ignore
-            else:
-                sequence_lengths = -1
-
-        # AFTER THIS: hidden_state=[batch_size, hidden_size]
-        if self.config.use_mean_pool:
-            assert isinstance(sequence_lengths, torch.Tensor)
-            max_length = hidden_state.shape[1]
-            mask = torch.arange(max_length).expand(len(sequence_lengths), max_length).to(
-                hidden_state.device
-            ) < sequence_lengths.unsqueeze(1)
-
-            # Mask the hidden_states
-            masked_hidden_state = hidden_state * mask.unsqueeze(-1)
-
-            # Sum across the sequence length and divide by actual sequence length
-            summed = torch.sum(masked_hidden_state, dim=1)
-            hidden_state = summed / sequence_lengths.unsqueeze(1)
-        else:
-            # Get it for the last token
-            hidden_state = hidden_state[torch.arange(batch_size, device=hidden_state.device), sequence_lengths]
-        """
-
         ### Classifier run
         # hidden_state=[batch_size, seq_len, hidden_size]
         for layer in self.inner:
diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index e95cf390f3..c5772bd553 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -20,11 +20,11 @@ class xLoRALayer:
     xLoRA algorithm.
     """
 
-    __slots__ = {"model", "target_forward", "target", "layer_number", "disabled", "config"}
+    __slots__ = {"model", "target_forward", "target", "layer_number", "config"}
 
     def __init__(
         self,
-        model: nn.Module, # PeftModel
+        model: nn.Module,  # PeftModel
         target: lora.LoraLayer,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -34,7 +34,6 @@ def __init__(
         self.target_forward = target_forward
         self.target = target
         self.layer_number = layer_number
-        self.disabled = False  # TODO(EricLBuehler): Pending removal following analysis
         self.config = config
 
     @staticmethod
@@ -69,7 +68,7 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
 class xLoRALinearLayer(xLoRALayer):
     def __init__(
         self,
-        model: nn.Module, # PeftModel
+        model: nn.Module,  # PeftModel
         target: lora.Linear,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -113,7 +112,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
 class xLoRAEmbeddingLayer(xLoRALayer):
     def __init__(
         self,
-        model: nn.Module, # PeftModel
+        model: nn.Module,  # PeftModel
         target: lora.Embedding,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -154,7 +153,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
 class xLoRAConv2dLayer(xLoRALayer):
     def __init__(
         self,
-        model: nn.Module, # PeftModel
+        model: nn.Module,  # PeftModel
         target: lora.Conv2d,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -206,7 +205,7 @@ def forward(self, *args, **kwargs):
 class PeftModelWrapper:
     def __init__(
         self,
-        base_model: nn.Module, # PeftModel
+        base_model: nn.Module,  # PeftModel
         base_model_save: Callable[..., None],
         config: xLoRAConfig,
         base_model_get_nb_trainable_parameters: Callable[..., Tuple[int, int]],
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 054e2123a7..f015bb3812 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -104,8 +104,8 @@ def hook(module, *args, **kwargs) -> None:
 
                 model_peft.internal_xlora_scalings = torch.full(  # type: ignore
                     (payload.batch_size, payload.seq_len, xlora_classifier.n_layers, xlora_classifier.n_classes),
-                    payload.override_scaling_pass_value,  # requires_grad=True
-                )  # TODO(EricLBuehler): is the requires_grad=True necessary?
+                    payload.override_scaling_pass_value,
+                )
 
                 return
 

From 726f8a8284ee2b51caddddc7f59bfed29dcd5bb0 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 12:56:50 -0500
Subject: [PATCH 021/182] Make signature an exact copy

---
 src/peft/tuners/xlora/insertion.py | 2 +-
 src/peft/tuners/xlora/model.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index c5772bd553..0da9d093d7 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -364,7 +364,7 @@ def save_pretrained(
         self,
         save_directory: str,
         safe_serialization: bool = True,
-        selected_adapters: Optional[List[str]] = None,
+        selected_adapters: Optional[list[str]] = None,
         save_embedding_layers: Union[str, bool] = "auto",
         is_main_process: bool = True,
         **kwargs: Any,
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index f015bb3812..689d6aed1a 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -147,7 +147,7 @@ def hook(module, *args, **kwargs) -> None:
             model_peft.get_nb_trainable_parameters,
             model_peft.generate,
         )
-        model_peft.save_pretrained = peft_model_wrapper.save_pretrained  # type: ignore[method-assign]
+        model_peft.save_pretrained = peft_model_wrapper.save_pretrained  # type: ignore
         model_peft.generate = peft_model_wrapper.generate  # type: ignore
 
         assert not hasattr(model_peft, "set_use_trainable_adapters")

From 8a1c68caecb82341ceed70929fefcd64d72f8d86 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 15:37:03 -0500
Subject: [PATCH 022/182] Refactor util fns

---
 src/peft/peft_model.py         |  2 +
 src/peft/tuners/xlora/model.py | 91 +--------------------------------
 src/peft/tuners/xlora/util.py  | 93 ++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 90 deletions(-)
 create mode 100644 src/peft/tuners/xlora/util.py

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index aab74f660c..b96f5640bc 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -58,6 +58,8 @@
 from .tuners.xlora import _get_file_path_dir as xlora_get_file_path_dir
 from .tuners.xlora import _load_classifier_weights as xlora_load_classifier_weights
 from .tuners.xlora.classifier import xLoRAClassifier
+from .tuners.xlora.util import _get_file_path_dir as xlora_get_file_path_dir
+from .tuners.xlora.util import _load_classifier_weights as xlora_load_classifier_weights
 from .utils import (
     SAFETENSORS_WEIGHTS_NAME,
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 689d6aed1a..d47df06ed9 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -1,17 +1,10 @@
-import os
-from typing import Optional, Union
+from typing import Union
 
 import torch
 import torch.nn as nn
-from huggingface_hub import file_exists, hf_hub_download  # type: ignore
-from huggingface_hub.utils import EntryNotFoundError  # type: ignore
-from safetensors.torch import load_file as safe_load_file
 from transformers import PreTrainedModel
 
 from peft.tuners.lora.model import LoraModel
-from peft.utils.other import (
-    infer_device,
-)
 
 from .. import lora
 from .classifier import InhibitorFlagPayload, xLoRAClassifier
@@ -193,85 +186,3 @@ def hook(module, *args, **kwargs) -> None:
 
         assert not hasattr(model_peft, "internal_xlora_scalings")
         model_peft.internal_xlora_scalings = None  # type: ignore
-
-
-def _load_classifier_weights(model_id: str, device: Optional[str] = None, **hf_hub_download_kwargs) -> dict:
-    r"""
-    A helper method to load the classifier weights from the HuggingFace Hub or locally. Copied from load_peft_weights
-
-    Args:
-        model_id (`str`):
-            The local path to the adapter weights or the name of the adapter to load from the HuggingFace Hub.
-        device (`str`):
-            The device to load the weights onto.
-        hf_hub_download_kwargs (`dict`):
-            Additional arguments to pass to the `hf_hub_download` method when loading from the HuggingFace Hub.
-    """
-    path = (
-        os.path.join(model_id, hf_hub_download_kwargs["subfolder"])
-        if hf_hub_download_kwargs.get("subfolder", None) is not None
-        else model_id
-    )
-
-    SAFETENSORS_WEIGHTS_NAME = "xlora_classifier.safetensors"
-    WEIGHTS_NAME = "xlora_classifier.pt"
-
-    if device is None:
-        device = infer_device()
-
-    if os.path.exists(os.path.join(path, SAFETENSORS_WEIGHTS_NAME)):
-        filename = os.path.join(path, SAFETENSORS_WEIGHTS_NAME)
-        use_safetensors = True
-    elif os.path.exists(os.path.join(path, WEIGHTS_NAME)):
-        filename = os.path.join(path, WEIGHTS_NAME)
-        use_safetensors = False
-    else:
-        token = hf_hub_download_kwargs.get("token", None)
-        if token is None:
-            token = hf_hub_download_kwargs.get("use_auth_token", None)
-
-        hub_filename = (
-            os.path.join(hf_hub_download_kwargs["subfolder"], SAFETENSORS_WEIGHTS_NAME)
-            if hf_hub_download_kwargs.get("subfolder", None) is not None
-            else SAFETENSORS_WEIGHTS_NAME
-        )
-        has_remote_safetensors_file = file_exists(
-            repo_id=model_id,
-            filename=hub_filename,
-            revision=hf_hub_download_kwargs.get("revision", None),
-            repo_type=hf_hub_download_kwargs.get("repo_type", None),
-            token=token,
-        )
-        use_safetensors = has_remote_safetensors_file
-
-        if has_remote_safetensors_file:
-            # Priority 1: load safetensors weights
-            filename = hf_hub_download(
-                model_id,
-                SAFETENSORS_WEIGHTS_NAME,
-                **hf_hub_download_kwargs,
-            )
-        else:
-            try:
-                filename = hf_hub_download(model_id, WEIGHTS_NAME, **hf_hub_download_kwargs)
-            except EntryNotFoundError:
-                raise ValueError(
-                    f"Can't find weights for {model_id} in {model_id} or in the Hugging Face Hub. "
-                    f"Please check that the file {WEIGHTS_NAME} or {SAFETENSORS_WEIGHTS_NAME} is present at {model_id}."
-                )
-
-    if use_safetensors:
-        if hasattr(torch.backends, "mps") and (device == torch.device("mps")):
-            adapters_weights = safe_load_file(filename, device="cpu")
-        else:
-            adapters_weights = safe_load_file(filename, device=device)
-    else:
-        adapters_weights = torch.load(filename, map_location=torch.device(device))
-
-    return adapters_weights
-
-
-def _get_file_path_dir(load_directory: Union[str, os.PathLike], name: str, dir: str) -> str:
-    if os.path.exists(os.path.join(load_directory, dir, name)):
-        return os.path.join(load_directory, dir, name)
-    return hf_hub_download(load_directory, filename=name, subfolder=dir)
diff --git a/src/peft/tuners/xlora/util.py b/src/peft/tuners/xlora/util.py
new file mode 100644
index 0000000000..3d34906d45
--- /dev/null
+++ b/src/peft/tuners/xlora/util.py
@@ -0,0 +1,93 @@
+import os
+from typing import Optional, Union
+
+import torch
+from huggingface_hub import file_exists, hf_hub_download  # type: ignore
+from huggingface_hub.utils import EntryNotFoundError  # type: ignore
+from safetensors.torch import load_file as safe_load_file
+
+from peft.utils.other import (
+    infer_device,
+)
+
+
+def _load_classifier_weights(model_id: str, device: Optional[str] = None, **hf_hub_download_kwargs) -> dict:
+    r"""
+    A helper method to load the classifier weights from the HuggingFace Hub or locally. Copied from load_peft_weights
+
+    Args:
+        model_id (`str`):
+            The local path to the adapter weights or the name of the adapter to load from the HuggingFace Hub.
+        device (`str`):
+            The device to load the weights onto.
+        hf_hub_download_kwargs (`dict`):
+            Additional arguments to pass to the `hf_hub_download` method when loading from the HuggingFace Hub.
+    """
+    path = (
+        os.path.join(model_id, hf_hub_download_kwargs["subfolder"])
+        if hf_hub_download_kwargs.get("subfolder", None) is not None
+        else model_id
+    )
+
+    SAFETENSORS_WEIGHTS_NAME = "xlora_classifier.safetensors"
+    WEIGHTS_NAME = "xlora_classifier.pt"
+
+    if device is None:
+        device = infer_device()
+
+    if os.path.exists(os.path.join(path, SAFETENSORS_WEIGHTS_NAME)):
+        filename = os.path.join(path, SAFETENSORS_WEIGHTS_NAME)
+        use_safetensors = True
+    elif os.path.exists(os.path.join(path, WEIGHTS_NAME)):
+        filename = os.path.join(path, WEIGHTS_NAME)
+        use_safetensors = False
+    else:
+        token = hf_hub_download_kwargs.get("token", None)
+        if token is None:
+            token = hf_hub_download_kwargs.get("use_auth_token", None)
+
+        hub_filename = (
+            os.path.join(hf_hub_download_kwargs["subfolder"], SAFETENSORS_WEIGHTS_NAME)
+            if hf_hub_download_kwargs.get("subfolder", None) is not None
+            else SAFETENSORS_WEIGHTS_NAME
+        )
+        has_remote_safetensors_file = file_exists(
+            repo_id=model_id,
+            filename=hub_filename,
+            revision=hf_hub_download_kwargs.get("revision", None),
+            repo_type=hf_hub_download_kwargs.get("repo_type", None),
+            token=token,
+        )
+        use_safetensors = has_remote_safetensors_file
+
+        if has_remote_safetensors_file:
+            # Priority 1: load safetensors weights
+            filename = hf_hub_download(
+                model_id,
+                SAFETENSORS_WEIGHTS_NAME,
+                **hf_hub_download_kwargs,
+            )
+        else:
+            try:
+                filename = hf_hub_download(model_id, WEIGHTS_NAME, **hf_hub_download_kwargs)
+            except EntryNotFoundError:
+                raise ValueError(
+                    f"Can't find weights for {model_id} in {model_id} or in the Hugging Face Hub. "
+                    f"Please check that the file {WEIGHTS_NAME} or {SAFETENSORS_WEIGHTS_NAME} is present at {model_id}."
+                )
+
+    if use_safetensors:
+        if hasattr(torch.backends, "mps") and (device == torch.device("mps")):
+            adapters_weights = safe_load_file(filename, device="cpu")
+        else:
+            adapters_weights = safe_load_file(filename, device=device)
+    else:
+        adapters_weights = torch.load(filename, map_location=torch.device(device))
+
+    return adapters_weights
+
+
+def _get_file_path_dir(load_directory: Union[str, os.PathLike], name: str, dir: str) -> str:
+    if os.path.exists(os.path.join(load_directory, dir, name)):
+        return os.path.join(load_directory, dir, name)
+    return hf_hub_download(load_directory, filename=name, subfolder=dir)

From 1a346159215af30fff37de98e019ceb0dc553d14 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 16:10:04 -0500
Subject: [PATCH 023/182] Update the typing structure

---
 src/peft/tuners/xlora/model.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index d47df06ed9..c39cb31b3d 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -66,9 +66,13 @@ def convert_layers_to_xlora(
 
 
 class xLoRAModel(LoraModel):
-    def __init__(self, model: nn.Module, peft_config: xLoRAConfig, adapter_name: str, model_peft: nn.Module) -> None:
+    def __init__(
+        self, model: nn.Module, config: dict[str, xLoRAConfig], adapter_name: str, model_peft: nn.Module
+    ) -> None:
         # model_peft: PeftModel
         assert isinstance(model, PreTrainedModel)
+        assert len(config) == 1
+        peft_config = config[adapter_name]
         assert isinstance(peft_config, xLoRAConfig)
 
         super().__init__(model, peft_config, adapter_name, model_peft)

From 1f0e03a1eda8bb41cb1fbe892257c74d4501c942 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 16:11:33 -0500
Subject: [PATCH 024/182] Pass super() the dict

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index c39cb31b3d..b0962d64a0 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -75,7 +75,7 @@ def __init__(
         peft_config = config[adapter_name]
         assert isinstance(peft_config, xLoRAConfig)
 
-        super().__init__(model, peft_config, adapter_name, model_peft)
+        super().__init__(model, config, adapter_name, model_peft)
 
         if hasattr(model.config, "use_cache"):
             assert not model.config.use_cache, "`use_cache` must be False"

From c326560c59d2f63b511d3b149c4ea0a1092e6222 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 16:20:49 -0500
Subject: [PATCH 025/182] Add capability to disable default injection

---
 src/peft/tuners/lora/model.py   |  4 ++--
 src/peft/tuners/tuners_utils.py | 13 ++++++++++---
 src/peft/tuners/xlora/model.py  |  2 +-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 1bf728013b..4520b5d13e 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -132,8 +132,8 @@ class LoraModel(BaseTuner):
 
     prefix: str = "lora_"
 
-    def __init__(self, model, config, adapter_name, _model_peft) -> None:
-        super().__init__(model, config, adapter_name)
+    def __init__(self, model, config, adapter_name, _model_peft, _disable_inject: bool = False) -> None:
+        super().__init__(model, config, adapter_name, _disable_inject)
 
     def _check_new_adapter_config(self, config: LoraConfig) -> None:
         """
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 5958357b92..e5fce05287 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -140,7 +140,13 @@ class BaseTuner(nn.Module, ABC):
             double-check that the `config.target_modules` where specified correctly.
     """
 
-    def __init__(self, model, peft_config: Union[PeftConfig, dict[str, PeftConfig]], adapter_name: str) -> None:
+    def __init__(
+        self,
+        model,
+        peft_config: Union[PeftConfig, dict[str, PeftConfig]],
+        adapter_name: str,
+        _disable_inject: bool = False,
+    ) -> None:
         super().__init__()
 
         self.model = model
@@ -161,8 +167,9 @@ def __init__(self, model, peft_config: Union[PeftConfig, dict[str, PeftConfig]],
                 # user is adding a dict of PeftConfigs
                 self.peft_config.update(peft_config)
 
-        self.active_adapter = adapter_name
-        self.inject_adapter(self.model, adapter_name)
+        if not _disable_inject:
+            self.active_adapter = adapter_name
+            self.inject_adapter(self.model, adapter_name)
 
         # Copy the peft_config in the injected model.
         self.model.peft_config = self.peft_config
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index b0962d64a0..f0b3f0560d 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -75,7 +75,7 @@ def __init__(
         peft_config = config[adapter_name]
         assert isinstance(peft_config, xLoRAConfig)
 
-        super().__init__(model, config, adapter_name, model_peft)
+        super().__init__(model, config, adapter_name, model_peft, _disable_inject=True)
 
         if hasattr(model.config, "use_cache"):
             assert not model.config.use_cache, "`use_cache` must be False"

From c9dab76f86b7213c692225149c03a6e4ec325e8e Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 16:36:37 -0500
Subject: [PATCH 026/182] Set base model for loading adapter

---
 src/peft/tuners/xlora/model.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index f0b3f0560d..d665ee4cf0 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -82,6 +82,10 @@ def __init__(
 
         use_trainable_adapters = peft_config.use_trainable_adapters
         adapters_items = iter(peft_config.adapters.items())
+
+        # Because we call load_adapter, which requires base_model to be defined
+        model_peft.base_model = self
+
         for adapter_name, model_id in adapters_items:
             model_peft.load_adapter(model_id, adapter_name, is_trainable=use_trainable_adapters)
 

From 980b2c88cf24027764e33129b0a9b6d65afa9c58 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 16:44:15 -0500
Subject: [PATCH 027/182] Set peft type

---
 src/peft/tuners/xlora/model.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index d665ee4cf0..e76fc370b6 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -5,6 +5,7 @@
 from transformers import PreTrainedModel
 
 from peft.tuners.lora.model import LoraModel
+from peft.utils.peft_types import PeftType
 
 from .. import lora
 from .classifier import InhibitorFlagPayload, xLoRAClassifier
@@ -85,11 +86,14 @@ def __init__(
 
         # Because we call load_adapter, which requires base_model to be defined
         model_peft.base_model = self
-
+        # For load_adapter to think we are a LoraModel
+        model_peft.peft_type = PeftType.LORA
+        
         for adapter_name, model_id in adapters_items:
             model_peft.load_adapter(model_id, adapter_name, is_trainable=use_trainable_adapters)
 
         self.set_adapter(list(peft_config.adapters.keys()))
+        model_peft.peft_type = PeftType.XLORA
 
         def hook(module, *args, **kwargs) -> None:
             args_real = args[0]

From c3f4ed9f0265f8d87f5a5be89ec1135010595b0a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 17:43:25 -0500
Subject: [PATCH 028/182] Default adapter name

---
 src/peft/tuners/tuners_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index e5fce05287..0df5597e9c 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -170,6 +170,8 @@ def __init__(
         if not _disable_inject:
             self.active_adapter = adapter_name
             self.inject_adapter(self.model, adapter_name)
+        else:
+            self.active_adapter = ""
 
         # Copy the peft_config in the injected model.
         self.model.peft_config = self.peft_config

From effaca05684811b2b10ccfb7155fe571f9a7531d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 17:51:48 -0500
Subject: [PATCH 029/182] Ensure all nonzero length

---
 src/peft/peft_model.py          | 2 +-
 src/peft/tuners/tuners_utils.py | 8 ++++----
 src/peft/tuners/xlora/model.py  | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index b96f5640bc..b9c1467571 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -162,7 +162,7 @@ def active_adapters(self) -> list[str]:
             adapters = self.active_adapter
             if isinstance(adapters, str):
                 adapters = [adapters]
-        return adapters
+        return list(filter(lambda x: len(x) > 0, adapters))
 
     @peft_config.setter
     def peft_config(self, value: dict[str, PeftConfig]):
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 0df5597e9c..7448834b03 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -179,9 +179,9 @@ def __init__(
     @property
     def active_adapters(self) -> list[str]:
         if isinstance(self.active_adapter, str):
-            return [self.active_adapter]
+            return list(filter(lambda x: len(x) > 0, [self.active_adapter]))
         # is already a list of str
-        return self.active_adapter
+        return list(filter(lambda x: len(x) > 0, self.active_adapter))
 
     def forward(self, *args: Any, **kwargs: Any):
         return self.model.forward(*args, **kwargs)
@@ -493,9 +493,9 @@ def active_adapter(self) -> str:
     @property
     def active_adapters(self):
         if isinstance(self.active_adapter, str):
-            return [self.active_adapter]
+            return list(filter(lambda x: len(x) > 0, [self.active_adapter]))
         # is already a list of str
-        return self.active_adapter
+        return list(filter(lambda x: len(x) > 0, self.active_adapter))
 
     def enable_adapters(self, enabled: bool) -> None:
         """Toggle the enabling and disabling of adapters
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index e76fc370b6..4a976fab25 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -88,7 +88,7 @@ def __init__(
         model_peft.base_model = self
         # For load_adapter to think we are a LoraModel
         model_peft.peft_type = PeftType.LORA
-        
+
         for adapter_name, model_id in adapters_items:
             model_peft.load_adapter(model_id, adapter_name, is_trainable=use_trainable_adapters)
 

From 031fabb1f60cb85a7ab1c4e2720ed5ea7b948832 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 21 Feb 2024 18:58:30 -0500
Subject: [PATCH 030/182] Fix get nb trainable params

---
 src/peft/tuners/xlora/insertion.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 0da9d093d7..4c5a313f41 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -321,10 +321,11 @@ def get_nb_trainable_parameters(self) -> Tuple[int, int]:
         model_trainable_params, model_all_param = self.base_model_get_nb_trainable_parameters()
 
         classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        xlora_trainable_params, xlora_all_param = classifier.get_nb_trainable_parameters()
+        # Ignoring xlora_trainable_params as it is already included in model_trainable_params
+        _xlora_trainable_params, xlora_all_param = classifier.get_nb_trainable_parameters()
 
         trainable_params, all_param = (
-            (model_trainable_params + xlora_trainable_params),
+            model_trainable_params,
             (model_all_param + xlora_all_param),
         )
 

From 9a7943e32c0f46318d3b7fad25522cbcd5509389 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 05:35:38 -0500
Subject: [PATCH 031/182] Add some docs to tuner class

---
 src/peft/tuners/xlora/model.py | 76 ++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 4a976fab25..612251b91c 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -67,13 +67,83 @@ def convert_layers_to_xlora(
 
 
 class xLoRAModel(LoraModel):
+    """
+    Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model.
+
+    The method is described in detail in https://arxiv.org/abs/2402.07148.
+
+    Args:
+        model ([`torch.nn.Module`]): The model to be adapted.
+        config ([`xLoRAConfig`]): The configuration of the Lora model.
+        adapter_name (`str`): The name of the adapter, does not affect the LoRA adapter names.
+
+    Returns:
+        `torch.nn.Module`: The X-LoRA model.
+
+    Example:
+
+        ```py
+        >>> from transformers import AutoModelForSeq2SeqLM, AutoConfig
+        >>> from peft import LoraModel, LoraConfig
+
+        >>> model_config = AutoConfig.from_pretrained("t5-base")
+        >>> config = xLoRAConfig(
+        ...     task_type="SEQ_2_SEQ_LM",
+        ...     hidden_size=model_config.hidden_size,
+        ...     xlora_depth=3,
+        ...     adapters={
+        ...         "adapter_1": "./path/to/the/checkpoint/",
+        ...         "adapter_2": "./path/to/the/checkpoint/",
+        ...         "adapter_n": "./path/to/the/checkpoint/",
+        ...     },
+        ... )
+
+        >>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
+        >>> xlora_model = xLoRAModel(model, config, "xlora")
+        ```
+
+        ```py
+        >>> from transformers import AutoModelForCausalLM, AutoConfig
+        >>> from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_int8_training
+
+        >>> model_config = AutoConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+        >>> config = xLoRAConfig(
+        ...     task_type="CAUSAL_LM",
+        ...     hidden_size=model_config.hidden_size,
+        ...     xlora_depth=4,
+        ...     adapters={
+        ...         "adapter_1": "./path/to/the/checkpoint/",
+        ...         "adapter_2": "./path/to/the/checkpoint/",
+        ...         "adapter_n": "./path/to/the/checkpoint/",
+        ...     },
+        ... )
+
+        >>> model = AutoModelForCausalLM.from_pretrained(
+        ...     "mistralai/Mistral-7B-Instruct-v0.1",
+        ...     trust_remote_code=True,
+        ...     use_flash_attention_2=False,
+        ...     device_map="cuda:0",
+        ...     torch_dtype=torch.bfloat16,
+        ... )
+        >>> model = prepare_model_for_int8_training(model)
+        >>> xlora_model = get_peft_model(model, config)
+        ```
+    """
+
     def __init__(
-        self, model: nn.Module, config: dict[str, xLoRAConfig], adapter_name: str, model_peft: nn.Module
+        self,
+        model: nn.Module,
+        config: Union[dict[str, xLoRAConfig], xLoRAConfig],
+        adapter_name: str,
+        model_peft: nn.Module,
     ) -> None:
         # model_peft: PeftModel
         assert isinstance(model, PreTrainedModel)
-        assert len(config) == 1
-        peft_config = config[adapter_name]
+        if isinstance(config, dict):
+            assert len(config) == 1
+            peft_config = config[adapter_name]
+        else:
+            peft_config = config
         assert isinstance(peft_config, xLoRAConfig)
 
         super().__init__(model, config, adapter_name, model_peft, _disable_inject=True)

From deeb169af658cae192b4c2bd3d214658558b945c Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 05:36:35 -0500
Subject: [PATCH 032/182] Update post init of xlora config

---
 src/peft/tuners/xlora/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index da8011890f..c30bfbafe3 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -5,6 +5,7 @@
 import torch
 
 from peft.config import PeftConfig
+from peft.utils.peft_types import PeftType
 
 
 @dataclass
@@ -73,6 +74,7 @@ def __post_init__(self):
         assert self.hidden_size is not None
         assert self.device is not None
         assert self.adapters is not None
+        self.peft_type = PeftType.XLORA
 
         if self.enable_softmax_topk and self.top_k_lora is None:
             warnings.warn("`enable_softmax_topk` enabled `top_k_lora` is not set")

From 1e73f3d2453b99b804343292d9d5aefee0243657 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 05:40:46 -0500
Subject: [PATCH 033/182] Mention method swapping

---
 src/peft/tuners/xlora/model.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 612251b91c..27dea758a5 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -72,10 +72,13 @@ class xLoRAModel(LoraModel):
 
     The method is described in detail in https://arxiv.org/abs/2402.07148.
 
+    The X-LoRA model modifies the methods of `model_peft` to inject its own API.
+
     Args:
         model ([`torch.nn.Module`]): The model to be adapted.
         config ([`xLoRAConfig`]): The configuration of the Lora model.
         adapter_name (`str`): The name of the adapter, does not affect the LoRA adapter names.
+        model_peft (`PeftModel`): Base peft model.
 
     Returns:
         `torch.nn.Module`: The X-LoRA model.

From 73901e895182ca6eacd2ce49ca8bfa1f6dccc15a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 05:46:02 -0500
Subject: [PATCH 034/182] Remove incorrect example

---
 src/peft/tuners/xlora/model.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 27dea758a5..bbcff9f001 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -84,27 +84,6 @@ class xLoRAModel(LoraModel):
         `torch.nn.Module`: The X-LoRA model.
 
     Example:
-
-        ```py
-        >>> from transformers import AutoModelForSeq2SeqLM, AutoConfig
-        >>> from peft import LoraModel, LoraConfig
-
-        >>> model_config = AutoConfig.from_pretrained("t5-base")
-        >>> config = xLoRAConfig(
-        ...     task_type="SEQ_2_SEQ_LM",
-        ...     hidden_size=model_config.hidden_size,
-        ...     xlora_depth=3,
-        ...     adapters={
-        ...         "adapter_1": "./path/to/the/checkpoint/",
-        ...         "adapter_2": "./path/to/the/checkpoint/",
-        ...         "adapter_n": "./path/to/the/checkpoint/",
-        ...     },
-        ... )
-
-        >>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
-        >>> xlora_model = xLoRAModel(model, config, "xlora")
-        ```
-
         ```py
         >>> from transformers import AutoModelForCausalLM, AutoConfig
         >>> from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_int8_training

From 279036d5649e9c0ad179b4665babd26d6aa4b87e Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 05:48:02 -0500
Subject: [PATCH 035/182] Update to use default args for compat

---
 src/peft/tuners/adalora/model.py         | 2 +-
 src/peft/tuners/adaption_prompt/model.py | 2 +-
 src/peft/tuners/lora/model.py            | 2 +-
 src/peft/tuners/lycoris_utils.py         | 2 +-
 src/peft/tuners/poly/model.py            | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/peft/tuners/adalora/model.py b/src/peft/tuners/adalora/model.py
index 81610f5997..be935acc7e 100644
--- a/src/peft/tuners/adalora/model.py
+++ b/src/peft/tuners/adalora/model.py
@@ -61,7 +61,7 @@ class AdaLoraModel(LoraModel):
 
     # Note: don't redefine prefix here, it should be inherited from LoraModel
 
-    def __init__(self, model, config, adapter_name, _model_peft):
+    def __init__(self, model, config, adapter_name, _model_peft=None):
         super().__init__(model, config, adapter_name)
 
         traininable_mode_counter = 0
diff --git a/src/peft/tuners/adaption_prompt/model.py b/src/peft/tuners/adaption_prompt/model.py
index 66ad96f2f2..c036cad8d1 100644
--- a/src/peft/tuners/adaption_prompt/model.py
+++ b/src/peft/tuners/adaption_prompt/model.py
@@ -40,7 +40,7 @@ class AdaptionPromptModel(nn.Module):
     - Disabling the adapter would also result in the modules being removed from the model.
     """
 
-    def __init__(self, model, configs: Dict, adapter_name: str, _model_peft):
+    def __init__(self, model, configs: Dict, adapter_name: str, _model_peft=None):
         super().__init__()
         self.model = model
         # Store adapter configs by name.
diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 4520b5d13e..16d6b369dd 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -132,7 +132,7 @@ class LoraModel(BaseTuner):
 
     prefix: str = "lora_"
 
-    def __init__(self, model, config, adapter_name, _model_peft, _disable_inject: bool = False) -> None:
+    def __init__(self, model, config, adapter_name, _model_peft=None, _disable_inject: bool = False) -> None:
         super().__init__(model, config, adapter_name, _disable_inject)
 
     def _check_new_adapter_config(self, config: LoraConfig) -> None:
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 552acdaef9..62576505f0 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -197,7 +197,7 @@ class LycorisTuner(BaseTuner):
     prefix: str
     layers_mapping: dict[type[torch.nn.Module], type[LycorisLayer]]
 
-    def __init__(self, model, config, adapter_name, _model_peft):
+    def __init__(self, model, config, adapter_name, _model_peft=None):
         super().__init__(model, config, adapter_name)
 
     def __getattr__(self, name: str):
diff --git a/src/peft/tuners/poly/model.py b/src/peft/tuners/poly/model.py
index f29b2c673e..93b8b19856 100644
--- a/src/peft/tuners/poly/model.py
+++ b/src/peft/tuners/poly/model.py
@@ -33,7 +33,7 @@
 class PolyModel(BaseTuner):
     prefix: str = "poly_"
 
-    def __init__(self, model, config, adapter_name, _model_peft) -> None:
+    def __init__(self, model, config, adapter_name, _model_peft=None) -> None:
         super().__init__(model, config, adapter_name)
 
     @staticmethod

From 644a56281a8d9ff021b8edb4f3514fa05e6eab6e Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 07:43:19 -0500
Subject: [PATCH 036/182] Move API for nice visibility

---
 src/peft/peft_model.py             |   2 +-
 src/peft/tuners/xlora/insertion.py | 153 ++----------------------
 src/peft/tuners/xlora/model.py     | 179 ++++++++++++++++++++++-------
 3 files changed, 147 insertions(+), 187 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index b9c1467571..a301aaca26 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -397,7 +397,7 @@ def from_pretrained(
         if isinstance(model.base_model, xLoRAModel):
             assert isinstance(config, xLoRAConfig)
 
-            device = infer_device()  # As inn PeftModel.load_adapter, torch_device = infer_device(
+            device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device(
             config.device = torch.device(device)
 
             # If we are passed adapters in the kwargs, it is already in the config.
diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 4c5a313f41..ae3bf0655b 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -1,15 +1,16 @@
 import json
 import os
-from typing import Any, Callable, List, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Tuple, Union
 
 import torch
 from safetensors.torch import save_model  # type: ignore
 from torch import Tensor, nn
 
 from peft.tuners import lora
-from peft.tuners.tuners_utils import BaseTuner  # type: ignore
+from peft.tuners.tuners_utils import BaseTuner
+from peft.tuners.xlora.model import xLoRAModel  # type: ignore
 
-from .classifier import Number, xLoRAClassifier
+from .classifier import xLoRAClassifier
 from .config import xLoRAConfig
 
 
@@ -24,7 +25,7 @@ class xLoRALayer:
 
     def __init__(
         self,
-        model: nn.Module,  # PeftModel
+        model: xLoRAModel,
         target: lora.LoraLayer,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -205,13 +206,13 @@ def forward(self, *args, **kwargs):
 class PeftModelWrapper:
     def __init__(
         self,
-        base_model: nn.Module,  # PeftModel
+        peft_model: nn.Module,  # PeftModel
         base_model_save: Callable[..., None],
         config: xLoRAConfig,
         base_model_get_nb_trainable_parameters: Callable[..., Tuple[int, int]],
         base_model_generate: Callable[..., Any],
     ):
-        self.model = base_model
+        self.peft_model = peft_model
         self.base_model_save = base_model_save
         self.config = config
         self.base_model_get_nb_trainable_parameters = base_model_get_nb_trainable_parameters
@@ -220,147 +221,13 @@ def __init__(
     def generate(self, *args, **kwargs):
         res = self.base_model_generate(*args, **kwargs)  # type: ignore
         # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
-        self.model.base_model.eval()
+        self.peft_model.base_model.eval()
         if not self.config.use_trainable_adapters:
-            for name, param in self.model.base_model.named_parameters():
+            for name, param in self.peft_model.base_model.named_parameters():
                 if "lora_" in name:
                     param.requires_grad = False
         return res
 
-    def set_topk_lora(self, value: Optional[int]):
-        """
-        Sparsely select the specified top_k LoRA experts instead of the default dense method. Set to None to use dense. This is reflected in the config.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        classifier.config.top_k_lora = value
-
-    def get_topk_lora(self) -> Optional[int]:
-        """
-        Get the current top_k LoRA experts value.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        return classifier.config.top_k_lora
-
-    def set_global_scaling_weight(self, weight: float):
-        """
-        Set the global LoRA weight, a scalar to multiply the output of each LoRA adapter by. This is by default 1. This is reflected in the config.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        classifier.config.global_scaling_weight = weight
-
-    def get_global_scaling_weight(self) -> float:
-        """
-        Get the global LoRA weight.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        return classifier.config.global_scaling_weight
-
-    def get_latest_scalings(self) -> Optional[Tensor]:
-        """
-        Returns the latest scalings prediction, or None if no scalings have been predicted. The tensor is of shape (batch_size, seq_len, n_layers, n_classes).
-        """
-        return self.model.internal_xlora_scalings
-
-    def get_scalings_log(self) -> List[Tensor]:
-        """
-        Returns a shallow (only copying the list itself not the tensors) copy of the list containing the scalings log. Editing the list does not change the underlying log.
-        The tensors are of shape (batch_size, seq_len, n_layers, n_classes). The seq_len dim may vary with input dimension.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        return classifier.log_scalings.copy()
-
-    def set_scaling_pass_value(self, value: Union[Number, None]):
-        """
-        Manually set the scalings to a specific value during the scaling pass, forever. Call this function with None to enable the default
-        scalings.
-
-        This is reflected in the config.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        classifier.set_override_scaling_pass_value(value)
-
-    def print_scalings_predictions(self, n_predictions_lifetime: int):
-        """
-        Print the scaling states for the next n classifier predictions (i.e. forward, generate passes)
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        classifier.n_predictions_lifetime = n_predictions_lifetime
-
-    def enable_scalings_logging(self):
-        """
-        Enable scalings logging.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        classifier.scalings_logging = True
-
-    def disable_scalings_logging(self):
-        """
-        Disable scalings logging, clearing the log.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        classifier.scalings_logging = False
-        classifier.log_scalings = []
-
-    def flush_log_scalings(self, path: str):
-        """
-        Write the scalings log (a tensor of shape (num_logged, batch_size, seq_len, n_layers, n_classes)) to the specified path.
-        If the tensor cannot be constructed, multiple files are written containing tensors of shape
-        (num_logged, batch_size, seq_len, n_layers, n_classes) such that each file contains one sequence length. Additionally a JSON
-        file is outputted containing the mapping from each sequence log file to the index of the contained tensor so that one may reconstruct
-        the log order.
-
-        The file specified should not contain an extension.
-        """
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        classifier.flush_log_scalings(path)
-
-    def get_nb_trainable_parameters(self) -> Tuple[int, int]:
-        """
-        Returns the number of trainable parameters and number of all parameters in the model.
-        """
-        model_trainable_params, model_all_param = self.base_model_get_nb_trainable_parameters()
-
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
-        # Ignoring xlora_trainable_params as it is already included in model_trainable_params
-        _xlora_trainable_params, xlora_all_param = classifier.get_nb_trainable_parameters()
-
-        trainable_params, all_param = (
-            model_trainable_params,
-            (model_all_param + xlora_all_param),
-        )
-
-        return trainable_params, all_param
-
-    def print_trainable_parameters(self):
-        """
-        Prints the number of trainable parameters in the model, including of the xLoRA classifier.
-        """
-        trainable_params, all_param = self.get_nb_trainable_parameters()
-
-        print(
-            f"trainable params: {trainable_params:,d} || "
-            f"all params: {all_param:,d} || "
-            f"trainable%: {100 * trainable_params / all_param:.4f}"
-        )
-
-    def set_use_trainable_adapters(self, use_trainable_adapters: bool):
-        """
-        Set the adapters to trainable or not trainable.
-
-        This is reflected in the config.
-        """
-        for name, param in self.model.base_model.named_parameters():
-            if "lora_" in name:
-                param.requires_grad = use_trainable_adapters
-
-        self.config.use_trainable_adapters = use_trainable_adapters
-
-    def get_use_trainable_adapters(self) -> bool:
-        """
-        Get the trainable or not trainable state of the adapters.
-        """
-        return self.config.use_trainable_adapters
-
     def save_pretrained(
         self,
         save_directory: str,
@@ -389,7 +256,7 @@ def save_pretrained(
         if is_main_process:
             os.makedirs(save_directory, exist_ok=True)
 
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier: xLoRAClassifier = self.peft_model.base_model.internal_xlora_classifier  # type: ignore
 
         conf = classifier.config.__dict__.copy()
         del conf["device"]
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index bbcff9f001..64ec608db7 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -8,7 +8,7 @@
 from peft.utils.peft_types import PeftType
 
 from .. import lora
-from .classifier import InhibitorFlagPayload, xLoRAClassifier
+from .classifier import InhibitorFlagPayload, Number, xLoRAClassifier
 from .config import xLoRAConfig
 from .insertion import BaseTunerWrapper, PeftModelWrapper, xLoRAConv2dLayer, xLoRAEmbeddingLayer, xLoRALinearLayer
 
@@ -72,8 +72,6 @@ class xLoRAModel(LoraModel):
 
     The method is described in detail in https://arxiv.org/abs/2402.07148.
 
-    The X-LoRA model modifies the methods of `model_peft` to inject its own API.
-
     Args:
         model ([`torch.nn.Module`]): The model to be adapted.
         config ([`xLoRAConfig`]): The configuration of the Lora model.
@@ -152,14 +150,14 @@ def hook(module, *args, **kwargs) -> None:
             kwargs_real: dict = args[1]
             kwargs_real.update(kwargs)
 
-            xlora_classifier: xLoRAClassifier = model_peft.internal_xlora_classifier  # type: ignore
+            xlora_classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
 
             if "_xlora_classifier_inhibitor_flag" in kwargs_real:
                 payload: InhibitorFlagPayload = kwargs_real["_xlora_classifier_inhibitor_flag"]
 
                 del kwargs_real["_xlora_classifier_inhibitor_flag"]
 
-                model_peft.internal_xlora_scalings = torch.full(  # type: ignore
+                self.internal_xlora_scalings = torch.full(  # type: ignore
                     (payload.batch_size, payload.seq_len, xlora_classifier.n_layers, xlora_classifier.n_classes),
                     payload.override_scaling_pass_value,
                 )
@@ -171,7 +169,7 @@ def hook(module, *args, **kwargs) -> None:
                 **kwargs_real,
             )
             # Set the scalings
-            model_peft.internal_xlora_scalings = xlora_scalings
+            self.internal_xlora_scalings = xlora_scalings
 
         model.register_forward_pre_hook(hook, with_kwargs=True, prepend=True)
 
@@ -207,46 +205,141 @@ def hook(module, *args, **kwargs) -> None:
         model_peft.save_pretrained = peft_model_wrapper.save_pretrained  # type: ignore
         model_peft.generate = peft_model_wrapper.generate  # type: ignore
 
-        assert not hasattr(model_peft, "set_use_trainable_adapters")
-        model_peft.set_use_trainable_adapters = peft_model_wrapper.set_use_trainable_adapters  # type: ignore
-
-        assert not hasattr(model_peft, "print_scalings_predictions")
-        model_peft.print_scalings_predictions = peft_model_wrapper.print_scalings_predictions  # type: ignore
-
-        assert not hasattr(model_peft, "enable_scalings_logging")
-        model_peft.enable_scalings_logging = peft_model_wrapper.enable_scalings_logging  # type: ignore
-
-        assert not hasattr(model_peft, "disable_scalings_logging")
-        model_peft.disable_scalings_logging = peft_model_wrapper.disable_scalings_logging  # type: ignore
-
-        assert not hasattr(model_peft, "flush_log_scalings")
-        model_peft.flush_log_scalings = peft_model_wrapper.flush_log_scalings  # type: ignore
-
-        assert not hasattr(model_peft, "get_scalings_log")
-        model_peft.get_scalings_log = peft_model_wrapper.get_scalings_log  # type: ignore
-
-        assert not hasattr(model_peft, "set_scaling_pass_value")
-        model_peft.set_scaling_pass_value = peft_model_wrapper.set_scaling_pass_value  # type: ignore
-
-        assert not hasattr(model_peft, "set_global_scaling_weight")
-        model_peft.set_global_scaling_weight = peft_model_wrapper.set_global_scaling_weight  # type: ignore
+        # Setup the model internal state
+        self.internal_xlora_classifier = xlora_classifier
+        self.internal_xlora_scalings = None  # type: ignore
+        self.xlora_config = peft_config
+
+    def set_topk_lora(self, value: Optional[int]):
+        """
+        Sparsely select the specified top_k LoRA experts instead of the default dense method. Set to None to use dense. This is reflected in the config.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier.config.top_k_lora = value
+
+    def get_topk_lora(self) -> Optional[int]:
+        """
+        Get the current top_k LoRA experts value.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        return classifier.config.top_k_lora
+
+    def set_global_scaling_weight(self, weight: float):
+        """
+        Set the global LoRA weight, a scalar to multiply the output of each LoRA adapter by. This is by default 1. This is reflected in the config.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier.config.global_scaling_weight = weight
+
+    def get_global_scaling_weight(self) -> float:
+        """
+        Get the global LoRA weight.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        return classifier.config.global_scaling_weight
+
+    def get_latest_scalings(self) -> Optional[torch.Tensor]:
+        """
+        Returns the latest scalings prediction, or None if no scalings have been predicted. The tensor is of shape (batch_size, seq_len, n_layers, n_classes).
+        """
+        return self.internal_xlora_scalings
+
+    def get_scalings_log(self) -> List[torch.Tensor]:
+        """
+        Returns a shallow (only copying the list itself not the tensors) copy of the list containing the scalings log. Editing the list does not change the underlying log.
+        The tensors are of shape (batch_size, seq_len, n_layers, n_classes). The seq_len dim may vary with input dimension.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        return classifier.log_scalings.copy()
+
+    def set_scaling_pass_value(self, value: Union[Number, None]):
+        """
+        Manually set the scalings to a specific value during the scaling pass, forever. Call this function with None to enable the default
+        scalings.
+
+        This is reflected in the config.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier.set_override_scaling_pass_value(value)
+
+    def print_scalings_predictions(self, n_predictions_lifetime: int):
+        """
+        Print the scaling states for the next n classifier predictions (i.e. forward, generate passes)
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier.n_predictions_lifetime = n_predictions_lifetime
+
+    def enable_scalings_logging(self):
+        """
+        Enable scalings logging.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier.scalings_logging = True
+
+    def disable_scalings_logging(self):
+        """
+        Disable scalings logging, clearing the log.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier.scalings_logging = False
+        classifier.log_scalings = []
+
+    def flush_log_scalings(self, path: str):
+        """
+        Write the scalings log (a tensor of shape (num_logged, batch_size, seq_len, n_layers, n_classes)) to the specified path.
+        If the tensor cannot be constructed, multiple files are written containing tensors of shape
+        (num_logged, batch_size, seq_len, n_layers, n_classes) such that each file contains one sequence length. Additionally a JSON
+        file is outputted containing the mapping from each sequence log file to the index of the contained tensor so that one may reconstruct
+        the log order.
+
+        The file specified should not contain an extension.
+        """
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier.flush_log_scalings(path)
+
+    def get_nb_trainable_parameters(self) -> Tuple[int, int]:
+        """
+        Returns the number of trainable parameters and number of all parameters in the model.
+        """
+        model_trainable_params, model_all_param = self.base_model_get_nb_trainable_parameters()
+
+        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        # Ignoring xlora_trainable_params as it is already included in model_trainable_params
+        _xlora_trainable_params, xlora_all_param = classifier.get_nb_trainable_parameters()
+
+        trainable_params, all_param = (
+            model_trainable_params,
+            (model_all_param + xlora_all_param),
+        )
 
-        assert not hasattr(model_peft, "get_global_scaling_weight")
-        model_peft.get_global_scaling_weight = peft_model_wrapper.get_global_scaling_weight  # type: ignore
+        return trainable_params, all_param
 
-        assert not hasattr(model_peft, "set_topk_lora")
-        model_peft.set_topk_lora = peft_model_wrapper.set_topk_lora  # type: ignore
+    def print_trainable_parameters(self):
+        """
+        Prints the number of trainable parameters in the model, including of the xLoRA classifier.
+        """
+        trainable_params, all_param = self.get_nb_trainable_parameters()
 
-        assert not hasattr(model_peft, "get_topk_lora")
-        model_peft.get_topk_lora = peft_model_wrapper.get_topk_lora  # type: ignore
+        print(
+            f"trainable params: {trainable_params:,d} || "
+            f"all params: {all_param:,d} || "
+            f"trainable%: {100 * trainable_params / all_param:.4f}"
+        )
 
-        model_peft.get_nb_trainable_parameters = peft_model_wrapper.get_nb_trainable_parameters  # type: ignore
+    def set_use_trainable_adapters(self, use_trainable_adapters: bool):
+        """
+        Set the adapters to trainable or not trainable.
 
-        model_peft.print_trainable_parameters = peft_model_wrapper.print_trainable_parameters  # type: ignore
+        This is reflected in the config.
+        """
+        for name, param in self.named_parameters():
+            if "lora_" in name:
+                param.requires_grad = use_trainable_adapters
 
-        # Setup the model internal state
-        assert not hasattr(model_peft, "internal_xlora_classifier")
-        model_peft.internal_xlora_classifier = xlora_classifier
+        self.xlora_config.use_trainable_adapters = use_trainable_adapters
 
-        assert not hasattr(model_peft, "internal_xlora_scalings")
-        model_peft.internal_xlora_scalings = None  # type: ignore
+    def get_use_trainable_adapters(self) -> bool:
+        """
+        Get the trainable or not trainable state of the adapters.
+        """
+        return self.xlora_config.use_trainable_adapters

From 6e3dc3d3ea33ce70439bd8642dec7a8db7178d07 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 11:48:31 -0500
Subject: [PATCH 037/182] Remove use of __slots__

---
 src/peft/tuners/xlora/insertion.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index ae3bf0655b..3a975b91a8 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -21,8 +21,6 @@ class xLoRALayer:
     xLoRA algorithm.
     """
 
-    __slots__ = {"model", "target_forward", "target", "layer_number", "config"}
-
     def __init__(
         self,
         model: xLoRAModel,

From 012ed6fdc10064e6be51c4f361792b778d1c7af1 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 12:26:58 -0500
Subject: [PATCH 038/182] Remove passing of classifier

---
 src/peft/tuners/xlora/insertion.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 3a975b91a8..1408dce27c 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -193,9 +193,8 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
 
 
 class BaseTunerWrapper:
-    def __init__(self, base_model: BaseTuner, classifier: xLoRAClassifier):
+    def __init__(self, base_model: BaseTuner):
         self.model = base_model.model
-        self.classifier = classifier
 
     def forward(self, *args, **kwargs):
         return self.model(*args, **kwargs)  # Important to *call* the model

From 510247d9a755907021ddf0e05fa2f3660c91b32b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 12:29:16 -0500
Subject: [PATCH 039/182] Remove passing of classifier

---
 src/peft/tuners/xlora/insertion.py | 8 --------
 src/peft/tuners/xlora/model.py     | 7 +++----
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 1408dce27c..c5f71757d8 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -192,14 +192,6 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         return result
 
 
-class BaseTunerWrapper:
-    def __init__(self, base_model: BaseTuner):
-        self.model = base_model.model
-
-    def forward(self, *args, **kwargs):
-        return self.model(*args, **kwargs)  # Important to *call* the model
-
-
 class PeftModelWrapper:
     def __init__(
         self,
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 64ec608db7..2cf32600e8 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -191,10 +191,6 @@ def hook(module, *args, **kwargs) -> None:
         n_classes = len(peft_config.adapters)
         xlora_classifier = xLoRAClassifier(model_peft, peft_config, n_classes, total_swapped)
 
-        # Setup the internal state
-        base_model_wrapper = BaseTunerWrapper(self, xlora_classifier)
-        self.forward = base_model_wrapper.forward  # type: ignore[method-assign]
-
         peft_model_wrapper = PeftModelWrapper(
             model_peft,
             model_peft.save_pretrained,
@@ -210,6 +206,9 @@ def hook(module, *args, **kwargs) -> None:
         self.internal_xlora_scalings = None  # type: ignore
         self.xlora_config = peft_config
 
+    def forward(self, *args, **kwargs):
+        return self.model(*args, **kwargs)  # Important to *call* the model
+
     def set_topk_lora(self, value: Optional[int]):
         """
         Sparsely select the specified top_k LoRA experts instead of the default dense method. Set to None to use dense. This is reflected in the config.

From f3e3d1f36778a87aa203b584c86d364974dc58b8 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 12:33:02 -0500
Subject: [PATCH 040/182] Remove many asserts, converting to exceptions

---
 src/peft/peft_model.py         |  4 ++--
 src/peft/tuners/xlora/model.py | 15 ++++++---------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index a301aaca26..ab96d0c205 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -395,7 +395,8 @@ def from_pretrained(
             model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
 
         if isinstance(model.base_model, xLoRAModel):
-            assert isinstance(config, xLoRAConfig)
+            if not isinstance(config, xLoRAConfig):
+                raise TypeError(f"Expected 'xLoRAConfig', got '{type(config)}' instead.")
 
             device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device(
             config.device = torch.device(device)
@@ -412,7 +413,6 @@ def from_pretrained(
                     for name in config.adapters
                 }
             else:
-                assert isinstance(config.adapters, dict)
                 adapters_real = config.adapters
             config.adapters = adapters_real
 
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 2cf32600e8..82d89dbd49 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -20,7 +20,6 @@ def convert_layers_to_xlora(
     """
     Returns the number of swapped layers.
     """
-    assert isinstance(base.base_model, lora.LoraModel)
     total_swapped = 0
 
     scaling_keys = None
@@ -30,7 +29,6 @@ def convert_layers_to_xlora(
                 scaling_keys = list(module.scaling.keys())  # NOTE(EricLBuehler): Python 3.7: dicts are ordered!
 
         if isinstance(module, lora.Linear):
-            assert scaling_keys is not None
             new_layer: Union[xLoRALinearLayer, xLoRAEmbeddingLayer, xLoRAConv2dLayer] = xLoRALinearLayer(
                 model=base,
                 target=module,
@@ -41,7 +39,6 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Embedding):
-            assert scaling_keys is not None
             new_layer = xLoRAEmbeddingLayer(
                 model=base,
                 target=module,
@@ -52,7 +49,6 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Conv2d):
-            assert scaling_keys is not None
             new_layer = xLoRAConv2dLayer(
                 model=base,
                 target=module,
@@ -118,13 +114,16 @@ def __init__(
         model_peft: nn.Module,
     ) -> None:
         # model_peft: PeftModel
-        assert isinstance(model, PreTrainedModel)
+        if not isinstance(model, PreTrainedModel):
+            raise TypeError(f"Expected model type to be 'PreTrainedModel', got '{type(model)}' instead.")
         if isinstance(config, dict):
-            assert len(config) == 1
+            if len(config) != 1:
+                raise TypeError(f"Expected one config.")
             peft_config = config[adapter_name]
         else:
             peft_config = config
-        assert isinstance(peft_config, xLoRAConfig)
+        if not isinstance(peft_config, xLoRAConfig):
+            raise TypeError(f"Expected config type to be 'xLoRAConfig', got '{type(model)}' instead.")
 
         super().__init__(model, config, adapter_name, model_peft, _disable_inject=True)
 
@@ -181,8 +180,6 @@ def hook(module, *args, **kwargs) -> None:
                     param.requires_grad = False
                     total_frozen += 1
 
-        assert isinstance(self, LoraModel)
-
         total_swapped = convert_layers_to_xlora(
             model_peft,
             peft_config,

From a24f51be058d9b66ff2e1d41ce917dff29b3e4fa Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 12:35:39 -0500
Subject: [PATCH 041/182] Update naming convention

---
 src/peft/__init__.py                |  4 +--
 src/peft/mapping.py                 |  4 +--
 src/peft/peft_model.py              | 17 ++++-----
 src/peft/tuners/__init__.py         |  2 +-
 src/peft/tuners/xlora/__init__.py   |  6 ++--
 src/peft/tuners/xlora/classifier.py |  8 ++---
 src/peft/tuners/xlora/config.py     |  4 +--
 src/peft/tuners/xlora/insertion.py  | 40 ++++++++++-----------
 src/peft/tuners/xlora/model.py      | 54 ++++++++++++++---------------
 9 files changed, 70 insertions(+), 69 deletions(-)

diff --git a/src/peft/__init__.py b/src/peft/__init__.py
index 5868fe753a..0af151475a 100644
--- a/src/peft/__init__.py
+++ b/src/peft/__init__.py
@@ -73,8 +73,8 @@
     OFTModel,
     PolyConfig,
     PolyModel,
-    xLoRAConfig,
-    xLoRAModel,
+    XLoraConfig,
+    XLoraModel,
 )
 from .utils import (
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
diff --git a/src/peft/mapping.py b/src/peft/mapping.py
index a0ea1abe06..2a4f324fc1 100644
--- a/src/peft/mapping.py
+++ b/src/peft/mapping.py
@@ -49,7 +49,7 @@
     PrefixTuningConfig,
     PromptEncoderConfig,
     PromptTuningConfig,
-    xLoRAConfig,
+    XLoraConfig,
 )
 from .utils import _prepare_prompt_learning_config
 
@@ -80,7 +80,7 @@
     "MULTITASK_PROMPT_TUNING": MultitaskPromptTuningConfig,
     "OFT": OFTConfig,
     "POLY": PolyConfig,
-    "XLORA": xLoRAConfig,
+    "XLORA": XLoraConfig,
 }
 
 PEFT_TYPE_TO_TUNER_MAPPING = {
diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index ab96d0c205..b2734f0273 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -51,13 +51,14 @@
     PrefixEncoder,
     PromptEmbedding,
     PromptEncoder,
-    xLoRAConfig,
-    xLoRAModel,
+    XLoraConfig,
+    XLoraModel,
 )
+
 from .tuners.tuners_utils import BaseTunerLayer
 from .tuners.xlora import _get_file_path_dir as xlora_get_file_path_dir
 from .tuners.xlora import _load_classifier_weights as xlora_load_classifier_weights
-from .tuners.xlora.classifier import xLoRAClassifier
+from .tuners.xlora.classifier import XLoraClassifier
 from .tuners.xlora.util import _get_file_path_dir as xlora_get_file_path_dir
 from .tuners.xlora.util import _load_classifier_weights as xlora_load_classifier_weights
 from .utils import (
@@ -91,7 +92,7 @@
     PeftType.IA3: IA3Model,
     PeftType.OFT: OFTModel,
     PeftType.POLY: PolyModel,
-    PeftType.XLORA: xLoRAModel,
+    PeftType.XLORA: XLoraModel,
 }
 
 
@@ -394,9 +395,9 @@ def from_pretrained(
         else:
             model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
 
-        if isinstance(model.base_model, xLoRAModel):
-            if not isinstance(config, xLoRAConfig):
-                raise TypeError(f"Expected 'xLoRAConfig', got '{type(config)}' instead.")
+        if isinstance(model.base_model, XLoraModel):
+            if not isinstance(config, XLoraConfig):
+                raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
 
             device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device(
             config.device = torch.device(device)
@@ -416,7 +417,7 @@ def from_pretrained(
                 adapters_real = config.adapters
             config.adapters = adapters_real
 
-            classifier: xLoRAClassifier = model.base_model.internal_xlora_classifier  # type: ignore
+            classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
         else:
             model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
diff --git a/src/peft/tuners/__init__.py b/src/peft/tuners/__init__.py
index b8d334f2d4..f410c6615e 100644
--- a/src/peft/tuners/__init__.py
+++ b/src/peft/tuners/__init__.py
@@ -30,4 +30,4 @@
 from .oft import OFTConfig, OFTModel
 from .mixed import MixedModel
 from .poly import PolyConfig, PolyModel
-from .xlora import xLoRAConfig, xLoRAModel
+from .xlora import XLoraConfig, XLoraModel
diff --git a/src/peft/tuners/xlora/__init__.py b/src/peft/tuners/xlora/__init__.py
index 987fe98542..0eed83ccae 100644
--- a/src/peft/tuners/xlora/__init__.py
+++ b/src/peft/tuners/xlora/__init__.py
@@ -1,5 +1,5 @@
-from .config import xLoRAConfig
-from .model import xLoRAModel
+from .config import XLoraConfig
+from .model import XLoraModel
 
 
-__all__ = ["xLoRAConfig", "xLoRAModel"]
+__all__ = ["XLoraConfig", "XLoraModel"]
diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index e4374db9c0..7cdaa0a6aa 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -11,7 +11,7 @@
     ModelOutput,
 )
 
-from .config import xLoRAConfig
+from .config import XLoraConfig
 
 
 Number = Union[builtins.int, builtins.float, builtins.bool]
@@ -37,15 +37,15 @@ class InhibitorFlagPayload:
     override_scaling_pass_value: Number
 
 
-class xLoRAClassifier(nn.Module):
+class XLoraClassifier(nn.Module):
     """
-    A classifier to select LoRA layers for xLoRA.
+    A classifier to select LoRA layers for XLora.
     """
 
     def __init__(
         self,
         model: nn.Module,  # PeftModel
-        config: xLoRAConfig,
+        config: XLoraConfig,
         n_classes: int,
         n_layers: int,
     ):
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index c30bfbafe3..3b8f8804ae 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -9,9 +9,9 @@
 
 
 @dataclass
-class xLoRAConfig(PeftConfig):
+class XLoraConfig(PeftConfig):
     r"""
-    This is the configuration class to store the configuration of a [`xLoRAClassifier`].
+    This is the configuration class to store the configuration of a [`XLoraClassifier`].
     When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the saved adapters. As such, only the keys
     matter during loading.
 
diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index c5f71757d8..cdee075110 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -8,26 +8,26 @@
 
 from peft.tuners import lora
 from peft.tuners.tuners_utils import BaseTuner
-from peft.tuners.xlora.model import xLoRAModel  # type: ignore
+from peft.tuners.xlora.model import XLoraModel  # type: ignore
 
-from .classifier import xLoRAClassifier
-from .config import xLoRAConfig
+from .classifier import XLoraClassifier
+from .config import XLoraConfig
 
 
-class xLoRALayer:
+class XLoraLayer:
     """
-    A xLoRALayer wraps any LoraLayer and performs the xLoRA operation on the LoRA adaptors specified.
+    A XLoraLayer wraps any LoraLayer and performs the XLora operation on the LoRA adaptors specified.
     Its primary API is the forward method, which uses the scalings to execute the
-    xLoRA algorithm.
+    XLora algorithm.
     """
 
     def __init__(
         self,
-        model: xLoRAModel,
+        model: XLoraModel,
         target: lora.LoraLayer,
         target_forward: Callable[..., Any],
         layer_number: int,
-        config: xLoRAConfig,
+        config: XLoraConfig,
     ) -> None:
         self.model = model
         self.target_forward = target_forward
@@ -55,7 +55,7 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
 
             xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
 
-        classifier: xLoRAClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.model.internal_xlora_classifier  # type: ignore
         if classifier.config.enable_softmax_topk:
             nonzero_mask = xlora_scalings != 0
             softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
@@ -64,21 +64,21 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
         return xlora_scalings
 
 
-class xLoRALinearLayer(xLoRALayer):
+class XLoraLinearLayer(XLoraLayer):
     def __init__(
         self,
         model: nn.Module,  # PeftModel
         target: lora.Linear,
         target_forward: Callable[..., Any],
         layer_number: int,
-        config: xLoRAConfig,
+        config: XLoraConfig,
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the xLoRALayer class).
+        To use it, a bound method must be created (bound to an instance of the XLoraLayer class).
         """
 
         previous_dtype = x.dtype
@@ -108,21 +108,21 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         return result
 
 
-class xLoRAEmbeddingLayer(xLoRALayer):
+class XLoraEmbeddingLayer(XLoraLayer):
     def __init__(
         self,
         model: nn.Module,  # PeftModel
         target: lora.Embedding,
         target_forward: Callable[..., Any],
         layer_number: int,
-        config: xLoRAConfig,
+        config: XLoraConfig,
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the xLoRALayer class).
+        To use it, a bound method must be created (bound to an instance of the XLoraLayer class).
         """
 
         xlora_scalings = self.get_maybe_topk_scalings()
@@ -149,21 +149,21 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         return result
 
 
-class xLoRAConv2dLayer(xLoRALayer):
+class XLoraConv2dLayer(XLoraLayer):
     def __init__(
         self,
         model: nn.Module,  # PeftModel
         target: lora.Conv2d,
         target_forward: Callable[..., Any],
         layer_number: int,
-        config: xLoRAConfig,
+        config: XLoraConfig,
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the xLoRALayer class).
+        To use it, a bound method must be created (bound to an instance of the XLoraLayer class).
         """
 
         previous_dtype = x.dtype
@@ -197,7 +197,7 @@ def __init__(
         self,
         peft_model: nn.Module,  # PeftModel
         base_model_save: Callable[..., None],
-        config: xLoRAConfig,
+        config: XLoraConfig,
         base_model_get_nb_trainable_parameters: Callable[..., Tuple[int, int]],
         base_model_generate: Callable[..., Any],
     ):
@@ -245,7 +245,7 @@ def save_pretrained(
         if is_main_process:
             os.makedirs(save_directory, exist_ok=True)
 
-        classifier: xLoRAClassifier = self.peft_model.base_model.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.peft_model.base_model.internal_xlora_classifier  # type: ignore
 
         conf = classifier.config.__dict__.copy()
         del conf["device"]
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 82d89dbd49..532e0a1137 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -8,14 +8,14 @@
 from peft.utils.peft_types import PeftType
 
 from .. import lora
-from .classifier import InhibitorFlagPayload, Number, xLoRAClassifier
-from .config import xLoRAConfig
-from .insertion import BaseTunerWrapper, PeftModelWrapper, xLoRAConv2dLayer, xLoRAEmbeddingLayer, xLoRALinearLayer
+from .classifier import InhibitorFlagPayload, Number, XLoraClassifier
+from .config import XLoraConfig
+from .insertion import BaseTunerWrapper, PeftModelWrapper, XLoraConv2dLayer, XLoraEmbeddingLayer, XLoraLinearLayer
 
 
 def convert_layers_to_xlora(
     base: nn.Module,  # PeftModel
-    config: xLoRAConfig,
+    config: XLoraConfig,
 ) -> int:
     """
     Returns the number of swapped layers.
@@ -29,7 +29,7 @@ def convert_layers_to_xlora(
                 scaling_keys = list(module.scaling.keys())  # NOTE(EricLBuehler): Python 3.7: dicts are ordered!
 
         if isinstance(module, lora.Linear):
-            new_layer: Union[xLoRALinearLayer, xLoRAEmbeddingLayer, xLoRAConv2dLayer] = xLoRALinearLayer(
+            new_layer: Union[XLoraLinearLayer, XLoraEmbeddingLayer, XLoraConv2dLayer] = XLoraLinearLayer(
                 model=base,
                 target=module,
                 target_forward=module.forward,
@@ -39,7 +39,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Embedding):
-            new_layer = xLoRAEmbeddingLayer(
+            new_layer = XLoraEmbeddingLayer(
                 model=base,
                 target=module,
                 target_forward=module.forward,
@@ -49,7 +49,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Conv2d):
-            new_layer = xLoRAConv2dLayer(
+            new_layer = XLoraConv2dLayer(
                 model=base,
                 target=module,
                 target_forward=module.forward,
@@ -62,7 +62,7 @@ def convert_layers_to_xlora(
     return total_swapped
 
 
-class xLoRAModel(LoraModel):
+class XLoraModel(LoraModel):
     """
     Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model.
 
@@ -70,7 +70,7 @@ class xLoRAModel(LoraModel):
 
     Args:
         model ([`torch.nn.Module`]): The model to be adapted.
-        config ([`xLoRAConfig`]): The configuration of the Lora model.
+        config ([`XLoraConfig`]): The configuration of the Lora model.
         adapter_name (`str`): The name of the adapter, does not affect the LoRA adapter names.
         model_peft (`PeftModel`): Base peft model.
 
@@ -83,7 +83,7 @@ class xLoRAModel(LoraModel):
         >>> from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_int8_training
 
         >>> model_config = AutoConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
-        >>> config = xLoRAConfig(
+        >>> config = XLoraConfig(
         ...     task_type="CAUSAL_LM",
         ...     hidden_size=model_config.hidden_size,
         ...     xlora_depth=4,
@@ -109,7 +109,7 @@ class xLoRAModel(LoraModel):
     def __init__(
         self,
         model: nn.Module,
-        config: Union[dict[str, xLoRAConfig], xLoRAConfig],
+        config: Union[dict[str, XLoraConfig], XLoraConfig],
         adapter_name: str,
         model_peft: nn.Module,
     ) -> None:
@@ -122,8 +122,8 @@ def __init__(
             peft_config = config[adapter_name]
         else:
             peft_config = config
-        if not isinstance(peft_config, xLoRAConfig):
-            raise TypeError(f"Expected config type to be 'xLoRAConfig', got '{type(model)}' instead.")
+        if not isinstance(peft_config, XLoraConfig):
+            raise TypeError(f"Expected config type to be 'XLoraConfig', got '{type(model)}' instead.")
 
         super().__init__(model, config, adapter_name, model_peft, _disable_inject=True)
 
@@ -149,7 +149,7 @@ def hook(module, *args, **kwargs) -> None:
             kwargs_real: dict = args[1]
             kwargs_real.update(kwargs)
 
-            xlora_classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+            xlora_classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
 
             if "_xlora_classifier_inhibitor_flag" in kwargs_real:
                 payload: InhibitorFlagPayload = kwargs_real["_xlora_classifier_inhibitor_flag"]
@@ -186,7 +186,7 @@ def hook(module, *args, **kwargs) -> None:
         )
 
         n_classes = len(peft_config.adapters)
-        xlora_classifier = xLoRAClassifier(model_peft, peft_config, n_classes, total_swapped)
+        xlora_classifier = XLoraClassifier(model_peft, peft_config, n_classes, total_swapped)
 
         peft_model_wrapper = PeftModelWrapper(
             model_peft,
@@ -210,28 +210,28 @@ def set_topk_lora(self, value: Optional[int]):
         """
         Sparsely select the specified top_k LoRA experts instead of the default dense method. Set to None to use dense. This is reflected in the config.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.config.top_k_lora = value
 
     def get_topk_lora(self) -> Optional[int]:
         """
         Get the current top_k LoRA experts value.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         return classifier.config.top_k_lora
 
     def set_global_scaling_weight(self, weight: float):
         """
         Set the global LoRA weight, a scalar to multiply the output of each LoRA adapter by. This is by default 1. This is reflected in the config.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.config.global_scaling_weight = weight
 
     def get_global_scaling_weight(self) -> float:
         """
         Get the global LoRA weight.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         return classifier.config.global_scaling_weight
 
     def get_latest_scalings(self) -> Optional[torch.Tensor]:
@@ -245,7 +245,7 @@ def get_scalings_log(self) -> List[torch.Tensor]:
         Returns a shallow (only copying the list itself not the tensors) copy of the list containing the scalings log. Editing the list does not change the underlying log.
         The tensors are of shape (batch_size, seq_len, n_layers, n_classes). The seq_len dim may vary with input dimension.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         return classifier.log_scalings.copy()
 
     def set_scaling_pass_value(self, value: Union[Number, None]):
@@ -255,28 +255,28 @@ def set_scaling_pass_value(self, value: Union[Number, None]):
 
         This is reflected in the config.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.set_override_scaling_pass_value(value)
 
     def print_scalings_predictions(self, n_predictions_lifetime: int):
         """
         Print the scaling states for the next n classifier predictions (i.e. forward, generate passes)
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.n_predictions_lifetime = n_predictions_lifetime
 
     def enable_scalings_logging(self):
         """
         Enable scalings logging.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.scalings_logging = True
 
     def disable_scalings_logging(self):
         """
         Disable scalings logging, clearing the log.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.scalings_logging = False
         classifier.log_scalings = []
 
@@ -290,7 +290,7 @@ def flush_log_scalings(self, path: str):
 
         The file specified should not contain an extension.
         """
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.flush_log_scalings(path)
 
     def get_nb_trainable_parameters(self) -> Tuple[int, int]:
@@ -299,7 +299,7 @@ def get_nb_trainable_parameters(self) -> Tuple[int, int]:
         """
         model_trainable_params, model_all_param = self.base_model_get_nb_trainable_parameters()
 
-        classifier: xLoRAClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         # Ignoring xlora_trainable_params as it is already included in model_trainable_params
         _xlora_trainable_params, xlora_all_param = classifier.get_nb_trainable_parameters()
 
@@ -312,7 +312,7 @@ def get_nb_trainable_parameters(self) -> Tuple[int, int]:
 
     def print_trainable_parameters(self):
         """
-        Prints the number of trainable parameters in the model, including of the xLoRA classifier.
+        Prints the number of trainable parameters in the model, including of the XLora classifier.
         """
         trainable_params, all_param = self.get_nb_trainable_parameters()
 

From 358ea194a6b00b7d6c4fd704c677e100defa1a97 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 13:05:44 -0500
Subject: [PATCH 042/182] Refactor Lora layers toreduce code repetion

---
 src/peft/peft_model.py             |  14 +--
 src/peft/tuners/lora/layer.py      |  82 +++++++++++++++++-
 src/peft/tuners/xlora/insertion.py | 133 +----------------------------
 src/peft/tuners/xlora/model.py     |  31 +------
 4 files changed, 94 insertions(+), 166 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index b2734f0273..c1844167e7 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -51,8 +51,8 @@
     PrefixEncoder,
     PromptEmbedding,
     PromptEncoder,
-    XLoraConfig,
-    XLoraModel,
+    xLoRAConfig,
+    xLoRAModel,
 )
 
 from .tuners.tuners_utils import BaseTunerLayer
@@ -92,7 +92,7 @@
     PeftType.IA3: IA3Model,
     PeftType.OFT: OFTModel,
     PeftType.POLY: PolyModel,
-    PeftType.XLORA: XLoraModel,
+    PeftType.XLORA: xLoRAModel,
 }
 
 
@@ -395,9 +395,9 @@ def from_pretrained(
         else:
             model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
 
-        if isinstance(model.base_model, XLoraModel):
-            if not isinstance(config, XLoraConfig):
-                raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
+        if isinstance(model.base_model, xLoRAModel):
+            if not isinstance(config, xLoRAConfig):
+                raise TypeError(f"Expected 'xLoRAConfig', got '{type(config)}' instead.")
 
             device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device(
             config.device = torch.device(device)
@@ -417,7 +417,7 @@ def from_pretrained(
                 adapters_real = config.adapters
             config.adapters = adapters_real
 
-            classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
+            classifier: xLoRAClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
         else:
             model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
index 829b7bdf80..a190c48e06 100644
--- a/src/peft/tuners/lora/layer.py
+++ b/src/peft/tuners/lora/layer.py
@@ -481,9 +481,22 @@ def get_delta_weight(self, adapter) -> torch.Tensor:
 
         return output_tensor
 
+<<<<<<< HEAD
     def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
         self._check_forward_args(x, *args, **kwargs)
         adapter_names = kwargs.pop("adapter_names", None)
+=======
+    def forward(
+        self,
+        x: torch.Tensor,
+        *args: Any,
+        _xlora_layer: Optional[XLoraLayer] = None,
+        _xlora_scalings: Optional[torch.Tensor] = None,
+        _xlora_scaling_weight: Optional[Number] = None,
+        **kwargs: Any,
+    ) -> torch.Tensor:
+        previous_dtype = x.dtype
+>>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
         if self.disable_adapters:
             if self.merged:
@@ -495,8 +508,12 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
+<<<<<<< HEAD
             torch_result_dtype = result.dtype
             for active_adapter in self.active_adapters:
+=======
+            for adapter_n, active_adapter in enumerate(self.active_adapters):
+>>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
                 if active_adapter not in self.lora_A.keys():
                     continue
                 lora_A = self.lora_A[active_adapter]
@@ -504,6 +521,17 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
                 dropout = self.lora_dropout[active_adapter]
                 scaling = self.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)
+<<<<<<< HEAD
+=======
+                if _xlora_layer is not None:
+                    x_inp = _xlora_layer.apply_scalings_to_x(x, _xlora_scalings, adapter_n)
+                else:
+                    x_inp = x
+                res = lora_B(lora_A(dropout(x_inp))) * scaling
+                if _xlora_layer is not None:
+                    res = res * _xlora_scaling_weight
+                result += res
+>>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
                 if not self.use_dora[active_adapter]:
                     result = result + lora_B(lora_A(dropout(x))) * scaling
@@ -711,7 +739,15 @@ def _embed(self, input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
             sparse=base_layer.sparse,
         )
 
-    def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
+    def forward(
+        self,
+        x: torch.Tensor,
+        *args: Any,
+        _xlora_layer: Optional[XLoraLayer] = None,
+        _xlora_scalings: Optional[torch.Tensor] = None,
+        _xlora_scaling_weight: Optional[Number] = None,
+        **kwargs: Any,
+    ) -> torch.Tensor:
         # TODO: no dtype conversion here, unlike in Linear, is that correct?
         self._check_forward_args(x, *args, **kwargs)
         adapter_names = kwargs.pop("adapter_names", None)
@@ -726,16 +762,32 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
+<<<<<<< HEAD
             torch_result_dtype = result.dtype
             for active_adapter in self.active_adapters:
+=======
+            for adapter_n, active_adapter in enumerate(self.active_adapters):
+>>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
                 if active_adapter not in self.lora_embedding_A:
                     continue
                 embedding_A = self.lora_embedding_A[active_adapter].T
                 embedding_B = self.lora_embedding_B[active_adapter].T
                 scaling = self.scaling[active_adapter]
+<<<<<<< HEAD
                 after_A = self._embed(x, embedding_A)
                 result = result + (after_A @ embedding_B) * scaling
             result = result.to(torch_result_dtype)
+=======
+                if _xlora_layer is not None:
+                    x_inp = _xlora_layer.apply_scalings_to_x(x, _xlora_scalings, adapter_n)
+                else:
+                    x_inp = x
+                after_A = self._embed(x_inp, embedding_A)
+                res = (after_A @ embedding_B) * scaling
+                if _xlora_layer is not None:
+                    res = res * _xlora_scaling_weight
+                result += res
+>>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
         return result
 
@@ -945,6 +997,7 @@ def get_delta_weight(self, adapter) -> torch.Tensor:
 
         return output_tensor
 
+<<<<<<< HEAD
     def _get_weight_norm(self, weight, lora_weight, scaling) -> torch.Tensor:
         # calculate L2 norm of weight matrix, channel-wise
         weight = weight + scaling * lora_weight
@@ -988,6 +1041,18 @@ def _apply_dora(self, x, lora_A, lora_B, scaling, active_adapter):
     def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
         self._check_forward_args(x, *args, **kwargs)
         adapter_names = kwargs.pop("adapter_names", None)
+=======
+    def forward(
+        self,
+        x: torch.Tensor,
+        *args,
+        _xlora_layer: Optional[XLoraLayer] = None,
+        _xlora_scalings: Optional[torch.Tensor] = None,
+        _xlora_scaling_weight: Optional[Number] = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        previous_dtype = x.dtype
+>>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
         if self.disable_adapters:
             if self.merged:
@@ -999,9 +1064,13 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
+<<<<<<< HEAD
             torch_result_dtype = result.dtype
 
             for active_adapter in self.active_adapters:
+=======
+            for adapter_n, active_adapter in enumerate(self.active_adapters):
+>>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
                 if active_adapter not in self.lora_A.keys():
                     continue
                 lora_A = self.lora_A[active_adapter]
@@ -1009,6 +1078,17 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 dropout = self.lora_dropout[active_adapter]
                 scaling = self.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)
+<<<<<<< HEAD
+=======
+                if _xlora_layer is not None:
+                    x_inp = _xlora_layer.apply_scalings_to_x(x, _xlora_scalings, adapter_n)
+                else:
+                    x_inp = x
+                res = lora_B(lora_A(dropout(x_inp))) * scaling
+                if _xlora_layer is not None:
+                    res = res * _xlora_scaling_weight
+                result += res
+>>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
                 if not self.use_dora[active_adapter]:
                     result = result + lora_B(lora_A(dropout(x))) * scaling
diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index cdee075110..3cf193d4c6 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -7,8 +7,6 @@
 from torch import Tensor, nn
 
 from peft.tuners import lora
-from peft.tuners.tuners_utils import BaseTuner
-from peft.tuners.xlora.model import XLoraModel  # type: ignore
 
 from .classifier import XLoraClassifier
 from .config import XLoraConfig
@@ -23,7 +21,7 @@ class XLoraLayer:
 
     def __init__(
         self,
-        model: XLoraModel,
+        model: nn.Module,  # PeftModel
         target: lora.LoraLayer,
         target_forward: Callable[..., Any],
         layer_number: int,
@@ -55,7 +53,7 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
 
             xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
 
-        classifier: XLoraClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.model.base_model.internal_xlora_classifier  # type: ignore
         if classifier.config.enable_softmax_topk:
             nonzero_mask = xlora_scalings != 0
             softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
@@ -63,133 +61,8 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
 
         return xlora_scalings
 
-
-class XLoraLinearLayer(XLoraLayer):
-    def __init__(
-        self,
-        model: nn.Module,  # PeftModel
-        target: lora.Linear,
-        target_forward: Callable[..., Any],
-        layer_number: int,
-        config: XLoraConfig,
-    ) -> None:
-        super().__init__(model, target, target_forward, layer_number, config)
-
-    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
-        """
-        This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the XLoraLayer class).
-        """
-
-        previous_dtype = x.dtype
-        xlora_scalings = self.get_maybe_topk_scalings()
-
-        if self.target.disable_adapters:
-            if self.target.merged:
-                self.target.unmerge()
-            result = self.target.base_layer(x, *args, **kwargs)
-        elif self.target.merged:
-            result = self.target.base_layer(x, *args, **kwargs)
-        else:
-            result = self.target.base_layer(x, *args, **kwargs)
-
-            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
-                if active_adapter not in self.target.lora_A.keys():
-                    continue
-                lora_A = self.target.lora_A[active_adapter]
-                lora_B = self.target.lora_B[active_adapter]
-                dropout = self.target.lora_dropout[active_adapter]
-                scaling = self.target.scaling[active_adapter]
-                x = x.to(lora_A.weight.dtype)  # type: ignore
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
-                result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
-
-        result = result.to(previous_dtype)
-        return result
-
-
-class XLoraEmbeddingLayer(XLoraLayer):
-    def __init__(
-        self,
-        model: nn.Module,  # PeftModel
-        target: lora.Embedding,
-        target_forward: Callable[..., Any],
-        layer_number: int,
-        config: XLoraConfig,
-    ) -> None:
-        super().__init__(model, target, target_forward, layer_number, config)
-
-    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
-        """
-        This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the XLoraLayer class).
-        """
-
-        xlora_scalings = self.get_maybe_topk_scalings()
-
-        # TODO: no dtype conversion here, unlike in Linear, is that correct?
-        if self.target.disable_adapters:
-            if self.target.merged:
-                self.target.unmerge()
-            result = self.target.base_layer(x, *args, **kwargs)
-        elif self.target.merged:
-            result = self.target.base_layer(x, *args, **kwargs)
-        else:
-            result = self.target.base_layer(x, *args, **kwargs)
-            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
-                if active_adapter not in self.target.lora_embedding_A:
-                    continue
-                embedding_A = self.target.lora_embedding_A[active_adapter].T
-                embedding_B = self.target.lora_embedding_B[active_adapter].T
-                scaling = self.target.scaling[active_adapter]
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
-                after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
-                result += (after_A @ embedding_B) * scaling * self.config.global_scaling_weight
-
-        return result
-
-
-class XLoraConv2dLayer(XLoraLayer):
-    def __init__(
-        self,
-        model: nn.Module,  # PeftModel
-        target: lora.Conv2d,
-        target_forward: Callable[..., Any],
-        layer_number: int,
-        config: XLoraConfig,
-    ) -> None:
-        super().__init__(model, target, target_forward, layer_number, config)
-
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
-        """
-        This method is designed to be a drop-in-replacement for the peft LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the XLoraLayer class).
-        """
-
-        previous_dtype = x.dtype
-        xlora_scalings = self.get_maybe_topk_scalings()
-
-        if self.target.disable_adapters:
-            if self.target.merged:
-                self.target.unmerge()
-            result = self.target.base_layer(x, *args, **kwargs)
-        elif self.target.merged:
-            result = self.target.base_layer(x, *args, **kwargs)
-        else:
-            result = self.target.base_layer(x, *args, **kwargs)
-            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
-                if active_adapter not in self.target.lora_A.keys():
-                    continue
-                lora_A = self.target.lora_A[active_adapter]
-                lora_B = self.target.lora_B[active_adapter]
-                dropout = self.target.lora_dropout[active_adapter]
-                scaling = self.target.scaling[active_adapter]
-                x = x.to(lora_A.weight.dtype)  # type: ignore
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
-                result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
-
-        result = result.to(previous_dtype)
-        return result
+        return self.target.forward(x, *args, **kwargs)
 
 
 class PeftModelWrapper:
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 532e0a1137..87079863b5 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -10,7 +10,7 @@
 from .. import lora
 from .classifier import InhibitorFlagPayload, Number, XLoraClassifier
 from .config import XLoraConfig
-from .insertion import BaseTunerWrapper, PeftModelWrapper, XLoraConv2dLayer, XLoraEmbeddingLayer, XLoraLinearLayer
+from .insertion import PeftModelWrapper, XLoraLayer
 
 
 def convert_layers_to_xlora(
@@ -22,34 +22,9 @@ def convert_layers_to_xlora(
     """
     total_swapped = 0
 
-    scaling_keys = None
     for module in base.modules():
         if isinstance(module, lora.LoraLayer):
-            if not scaling_keys:
-                scaling_keys = list(module.scaling.keys())  # NOTE(EricLBuehler): Python 3.7: dicts are ordered!
-
-        if isinstance(module, lora.Linear):
-            new_layer: Union[XLoraLinearLayer, XLoraEmbeddingLayer, XLoraConv2dLayer] = XLoraLinearLayer(
-                model=base,
-                target=module,
-                target_forward=module.forward,
-                layer_number=total_swapped,
-                config=config,
-            )
-            module.forward = new_layer.forward  # type: ignore[method-assign]
-            total_swapped += 1
-        elif isinstance(module, lora.Embedding):
-            new_layer = XLoraEmbeddingLayer(
-                model=base,
-                target=module,
-                target_forward=module.forward,
-                layer_number=total_swapped,
-                config=config,
-            )
-            module.forward = new_layer.forward  # type: ignore[method-assign]
-            total_swapped += 1
-        elif isinstance(module, lora.Conv2d):
-            new_layer = XLoraConv2dLayer(
+            new_layer = XLoraLayer(
                 model=base,
                 target=module,
                 target_forward=module.forward,
@@ -118,7 +93,7 @@ def __init__(
             raise TypeError(f"Expected model type to be 'PreTrainedModel', got '{type(model)}' instead.")
         if isinstance(config, dict):
             if len(config) != 1:
-                raise TypeError(f"Expected one config.")
+                raise TypeError("Expected one config.")
             peft_config = config[adapter_name]
         else:
             peft_config = config

From e6126facc55b2ba046c458b800c8954eb35f9b8e Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 13:12:43 -0500
Subject: [PATCH 043/182] Remove _disable_inject hack

---
 src/peft/peft_model.py          | 18 ++++++++----------
 src/peft/tuners/lora/model.py   |  4 ++--
 src/peft/tuners/tuners_utils.py | 16 ++++++----------
 src/peft/tuners/xlora/model.py  |  4 +++-
 4 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index c1844167e7..3852238032 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -51,13 +51,11 @@
     PrefixEncoder,
     PromptEmbedding,
     PromptEncoder,
-    xLoRAConfig,
-    xLoRAModel,
+    XLoraConfig,
+    XLoraModel,
 )
 
 from .tuners.tuners_utils import BaseTunerLayer
-from .tuners.xlora import _get_file_path_dir as xlora_get_file_path_dir
-from .tuners.xlora import _load_classifier_weights as xlora_load_classifier_weights
 from .tuners.xlora.classifier import XLoraClassifier
 from .tuners.xlora.util import _get_file_path_dir as xlora_get_file_path_dir
 from .tuners.xlora.util import _load_classifier_weights as xlora_load_classifier_weights
@@ -92,7 +90,7 @@
     PeftType.IA3: IA3Model,
     PeftType.OFT: OFTModel,
     PeftType.POLY: PolyModel,
-    PeftType.XLORA: xLoRAModel,
+    PeftType.XLORA: XLoraModel,
 }
 
 
@@ -163,7 +161,7 @@ def active_adapters(self) -> list[str]:
             adapters = self.active_adapter
             if isinstance(adapters, str):
                 adapters = [adapters]
-        return list(filter(lambda x: len(x) > 0, adapters))
+        return adapters
 
     @peft_config.setter
     def peft_config(self, value: dict[str, PeftConfig]):
@@ -395,9 +393,9 @@ def from_pretrained(
         else:
             model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
 
-        if isinstance(model.base_model, xLoRAModel):
-            if not isinstance(config, xLoRAConfig):
-                raise TypeError(f"Expected 'xLoRAConfig', got '{type(config)}' instead.")
+        if isinstance(model.base_model, XLoraModel):
+            if not isinstance(config, XLoraConfig):
+                raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
 
             device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device(
             config.device = torch.device(device)
@@ -417,7 +415,7 @@ def from_pretrained(
                 adapters_real = config.adapters
             config.adapters = adapters_real
 
-            classifier: xLoRAClassifier = model.base_model.internal_xlora_classifier  # type: ignore
+            classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
         else:
             model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 16d6b369dd..9f7759fa5e 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -132,8 +132,8 @@ class LoraModel(BaseTuner):
 
     prefix: str = "lora_"
 
-    def __init__(self, model, config, adapter_name, _model_peft=None, _disable_inject: bool = False) -> None:
-        super().__init__(model, config, adapter_name, _disable_inject)
+    def __init__(self, model, config, adapter_name, _model_peft=None) -> None:
+        super().__init__(model, config, adapter_name)
 
     def _check_new_adapter_config(self, config: LoraConfig) -> None:
         """
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 7448834b03..fe4b1dfb14 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -145,7 +145,6 @@ def __init__(
         model,
         peft_config: Union[PeftConfig, dict[str, PeftConfig]],
         adapter_name: str,
-        _disable_inject: bool = False,
     ) -> None:
         super().__init__()
 
@@ -167,11 +166,8 @@ def __init__(
                 # user is adding a dict of PeftConfigs
                 self.peft_config.update(peft_config)
 
-        if not _disable_inject:
-            self.active_adapter = adapter_name
-            self.inject_adapter(self.model, adapter_name)
-        else:
-            self.active_adapter = ""
+        self.active_adapter = adapter_name
+        self.inject_adapter(self.model, adapter_name)
 
         # Copy the peft_config in the injected model.
         self.model.peft_config = self.peft_config
@@ -179,9 +175,9 @@ def __init__(
     @property
     def active_adapters(self) -> list[str]:
         if isinstance(self.active_adapter, str):
-            return list(filter(lambda x: len(x) > 0, [self.active_adapter]))
+            return [self.active_adapter]
         # is already a list of str
-        return list(filter(lambda x: len(x) > 0, self.active_adapter))
+        return self.active_adapter
 
     def forward(self, *args: Any, **kwargs: Any):
         return self.model.forward(*args, **kwargs)
@@ -493,9 +489,9 @@ def active_adapter(self) -> str:
     @property
     def active_adapters(self):
         if isinstance(self.active_adapter, str):
-            return list(filter(lambda x: len(x) > 0, [self.active_adapter]))
+            return [self.active_adapter]
         # is already a list of str
-        return list(filter(lambda x: len(x) > 0, self.active_adapter))
+        return self.active_adapter
 
     def enable_adapters(self, enabled: bool) -> None:
         """Toggle the enabling and disabling of adapters
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 87079863b5..8335f255e3 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -100,7 +100,7 @@ def __init__(
         if not isinstance(peft_config, XLoraConfig):
             raise TypeError(f"Expected config type to be 'XLoraConfig', got '{type(model)}' instead.")
 
-        super().__init__(model, config, adapter_name, model_peft, _disable_inject=True)
+        super().__init__(model, config, adapter_name, model_peft)
 
         if hasattr(model.config, "use_cache"):
             assert not model.config.use_cache, "`use_cache` must be False"
@@ -116,6 +116,8 @@ def __init__(
         for adapter_name, model_id in adapters_items:
             model_peft.load_adapter(model_id, adapter_name, is_trainable=use_trainable_adapters)
 
+        self.delete_adapter(adapter_name)
+
         self.set_adapter(list(peft_config.adapters.keys()))
         model_peft.peft_type = PeftType.XLORA
 

From de765311fb5ddee474dcb44908a7508b05d249c8 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 13:33:33 -0500
Subject: [PATCH 044/182] Remove passing peftmodel to super

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 8335f255e3..045c1c4e56 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -100,7 +100,7 @@ def __init__(
         if not isinstance(peft_config, XLoraConfig):
             raise TypeError(f"Expected config type to be 'XLoraConfig', got '{type(model)}' instead.")
 
-        super().__init__(model, config, adapter_name, model_peft)
+        super().__init__(model, config, adapter_name)
 
         if hasattr(model.config, "use_cache"):
             assert not model.config.use_cache, "`use_cache` must be False"

From 1dace5683d63a40cc41c678cdf464203dd0fbb99 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 22 Feb 2024 13:47:43 -0500
Subject: [PATCH 045/182] Use post init fn to improve separation of concerns

---
 src/peft/peft_model.py           |  4 +++-
 src/peft/tuners/adalora/model.py |  2 +-
 src/peft/tuners/ia3/model.py     |  2 +-
 src/peft/tuners/lora/layer.py    |  6 ++++++
 src/peft/tuners/lycoris_utils.py |  2 +-
 src/peft/tuners/poly/model.py    |  2 +-
 src/peft/tuners/xlora/model.py   | 19 ++++++++-----------
 7 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 3852238032..470e0a8529 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -135,7 +135,9 @@ def __init__(self, model: PreTrainedModel, peft_config: PeftConfig, adapter_name
         else:
             self._peft_config = None
             cls = PEFT_TYPE_TO_MODEL_MAPPING[peft_config.peft_type]
-            self.base_model = cls(model, {adapter_name: peft_config}, adapter_name, self)
+            self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
+            if isinstance(self.base_model, XLoraModel):
+                self.base_model.__xlora_post_init__(model, peft_config, adapter_name)
             self.set_additional_trainable_modules(peft_config, adapter_name)
 
         if getattr(model, "is_gradient_checkpointing", True):
diff --git a/src/peft/tuners/adalora/model.py b/src/peft/tuners/adalora/model.py
index be935acc7e..bf334b39ce 100644
--- a/src/peft/tuners/adalora/model.py
+++ b/src/peft/tuners/adalora/model.py
@@ -61,7 +61,7 @@ class AdaLoraModel(LoraModel):
 
     # Note: don't redefine prefix here, it should be inherited from LoraModel
 
-    def __init__(self, model, config, adapter_name, _model_peft=None):
+    def __init__(self, model, config, adapter_name):
         super().__init__(model, config, adapter_name)
 
         traininable_mode_counter = 0
diff --git a/src/peft/tuners/ia3/model.py b/src/peft/tuners/ia3/model.py
index 62037ae420..61969fe698 100644
--- a/src/peft/tuners/ia3/model.py
+++ b/src/peft/tuners/ia3/model.py
@@ -72,7 +72,7 @@ class IA3Model(BaseTuner):
 
     prefix: str = "ia3_"
 
-    def __init__(self, model, config, adapter_name, _model_peft):
+    def __init__(self, model, config, adapter_name):
         super().__init__(model, config, adapter_name)
 
     @staticmethod
diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
index a190c48e06..05c6698888 100644
--- a/src/peft/tuners/lora/layer.py
+++ b/src/peft/tuners/lora/layer.py
@@ -495,9 +495,12 @@ def forward(
         _xlora_scaling_weight: Optional[Number] = None,
         **kwargs: Any,
     ) -> torch.Tensor:
+<<<<<<< HEAD
         previous_dtype = x.dtype
 >>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
+=======
+>>>>>>> c5cdfc3 (Use post init fn to improve separation of concerns)
         if self.disable_adapters:
             if self.merged:
                 self.unmerge()
@@ -1051,9 +1054,12 @@ def forward(
         _xlora_scaling_weight: Optional[Number] = None,
         **kwargs,
     ) -> torch.Tensor:
+<<<<<<< HEAD
         previous_dtype = x.dtype
 >>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
+=======
+>>>>>>> c5cdfc3 (Use post init fn to improve separation of concerns)
         if self.disable_adapters:
             if self.merged:
                 self.unmerge()
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 62576505f0..39c750ae8e 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -197,7 +197,7 @@ class LycorisTuner(BaseTuner):
     prefix: str
     layers_mapping: dict[type[torch.nn.Module], type[LycorisLayer]]
 
-    def __init__(self, model, config, adapter_name, _model_peft=None):
+    def __init__(self, model, config, adapter_name):
         super().__init__(model, config, adapter_name)
 
     def __getattr__(self, name: str):
diff --git a/src/peft/tuners/poly/model.py b/src/peft/tuners/poly/model.py
index 93b8b19856..943a287955 100644
--- a/src/peft/tuners/poly/model.py
+++ b/src/peft/tuners/poly/model.py
@@ -33,7 +33,7 @@
 class PolyModel(BaseTuner):
     prefix: str = "poly_"
 
-    def __init__(self, model, config, adapter_name, _model_peft=None) -> None:
+    def __init__(self, model, config, adapter_name) -> None:
         super().__init__(model, config, adapter_name)
 
     @staticmethod
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 045c1c4e56..a712556f88 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -47,7 +47,6 @@ class XLoraModel(LoraModel):
         model ([`torch.nn.Module`]): The model to be adapted.
         config ([`XLoraConfig`]): The configuration of the Lora model.
         adapter_name (`str`): The name of the adapter, does not affect the LoRA adapter names.
-        model_peft (`PeftModel`): Base peft model.
 
     Returns:
         `torch.nn.Module`: The X-LoRA model.
@@ -86,30 +85,28 @@ def __init__(
         model: nn.Module,
         config: Union[dict[str, XLoraConfig], XLoraConfig],
         adapter_name: str,
+    ) -> None:
+        super().__init__(model, config, adapter_name)
+
+    def __xlora_post_init__(
+        self,
+        model: nn.Module,
+        peft_config: XLoraConfig,
+        adapter_name: str,
         model_peft: nn.Module,
     ) -> None:
         # model_peft: PeftModel
         if not isinstance(model, PreTrainedModel):
             raise TypeError(f"Expected model type to be 'PreTrainedModel', got '{type(model)}' instead.")
-        if isinstance(config, dict):
-            if len(config) != 1:
-                raise TypeError("Expected one config.")
-            peft_config = config[adapter_name]
-        else:
-            peft_config = config
         if not isinstance(peft_config, XLoraConfig):
             raise TypeError(f"Expected config type to be 'XLoraConfig', got '{type(model)}' instead.")
 
-        super().__init__(model, config, adapter_name)
-
         if hasattr(model.config, "use_cache"):
             assert not model.config.use_cache, "`use_cache` must be False"
 
         use_trainable_adapters = peft_config.use_trainable_adapters
         adapters_items = iter(peft_config.adapters.items())
 
-        # Because we call load_adapter, which requires base_model to be defined
-        model_peft.base_model = self
         # For load_adapter to think we are a LoraModel
         model_peft.peft_type = PeftType.LORA
 

From e11e4334abbd879dcbcb317a34df6556320f5123 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 13:41:38 -0500
Subject: [PATCH 046/182] Update naming and fix call

---
 src/peft/peft_model.py         | 2 +-
 src/peft/tuners/xlora/model.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 470e0a8529..fdad81cc2d 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -137,7 +137,7 @@ def __init__(self, model: PreTrainedModel, peft_config: PeftConfig, adapter_name
             cls = PEFT_TYPE_TO_MODEL_MAPPING[peft_config.peft_type]
             self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
             if isinstance(self.base_model, XLoraModel):
-                self.base_model.__xlora_post_init__(model, peft_config, adapter_name)
+                self.base_model._xlora_post_init(model, peft_config, adapter_name, self)
             self.set_additional_trainable_modules(peft_config, adapter_name)
 
         if getattr(model, "is_gradient_checkpointing", True):
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index a712556f88..963e0db655 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -88,7 +88,7 @@ def __init__(
     ) -> None:
         super().__init__(model, config, adapter_name)
 
-    def __xlora_post_init__(
+    def _xlora_post_init(
         self,
         model: nn.Module,
         peft_config: XLoraConfig,

From 63debdb84c80e4ca4e98e0e2497f72121526a6fc Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 13:46:25 -0500
Subject: [PATCH 047/182] Remove 'n_predictions_lifetime'

---
 src/peft/tuners/xlora/classifier.py | 5 -----
 src/peft/tuners/xlora/model.py      | 7 -------
 2 files changed, 12 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 7cdaa0a6aa..621a188ca9 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -60,7 +60,6 @@ def __init__(
         self.softmax = TemperatureScaledSoftmax(temperature=self.config.softmax_temperature)
         self.override_scaling_pass_value: Number = config.scaling_pass_value
 
-        self.n_predictions_lifetime = 0
         self.scalings_logging = False
 
         dtype = next(model.parameters()).dtype
@@ -179,10 +178,6 @@ def forward(
         if self.config.enable_softmax:
             scalings = self.softmax(scalings)
 
-        if self.n_predictions_lifetime > 0:
-            print(f"Scaling predictions: {scalings}")
-            self.n_predictions_lifetime -= 1
-
         if self.scalings_logging:
             self.log_scalings.append(scalings)
 
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 963e0db655..7d354de3aa 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -232,13 +232,6 @@ def set_scaling_pass_value(self, value: Union[Number, None]):
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.set_override_scaling_pass_value(value)
 
-    def print_scalings_predictions(self, n_predictions_lifetime: int):
-        """
-        Print the scaling states for the next n classifier predictions (i.e. forward, generate passes)
-        """
-        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
-        classifier.n_predictions_lifetime = n_predictions_lifetime
-
     def enable_scalings_logging(self):
         """
         Enable scalings logging.

From 226ab92a162171cc303d421be737ee3a7ebd20e4 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 14:18:25 -0500
Subject: [PATCH 048/182] Move generate to XLoraModel, simplifying
 PeftModelWrapper

---
 src/peft/tuners/xlora/insertion.py | 18 +-----------------
 src/peft/tuners/xlora/model.py     | 28 ++++++++++++++++------------
 2 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 3cf193d4c6..7e8964948a 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Any, Callable, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Union
 
 import torch
 from safetensors.torch import save_model  # type: ignore
@@ -70,25 +70,9 @@ def __init__(
         self,
         peft_model: nn.Module,  # PeftModel
         base_model_save: Callable[..., None],
-        config: XLoraConfig,
-        base_model_get_nb_trainable_parameters: Callable[..., Tuple[int, int]],
-        base_model_generate: Callable[..., Any],
     ):
         self.peft_model = peft_model
         self.base_model_save = base_model_save
-        self.config = config
-        self.base_model_get_nb_trainable_parameters = base_model_get_nb_trainable_parameters
-        self.base_model_generate = base_model_generate
-
-    def generate(self, *args, **kwargs):
-        res = self.base_model_generate(*args, **kwargs)  # type: ignore
-        # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
-        self.peft_model.base_model.eval()
-        if not self.config.use_trainable_adapters:
-            for name, param in self.peft_model.base_model.named_parameters():
-                if "lora_" in name:
-                    param.requires_grad = False
-        return res
 
     def save_pretrained(
         self,
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 7d354de3aa..89e600a45b 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -101,6 +101,8 @@ def _xlora_post_init(
         if not isinstance(peft_config, XLoraConfig):
             raise TypeError(f"Expected config type to be 'XLoraConfig', got '{type(model)}' instead.")
 
+        self.xlora_config = peft_config
+
         if hasattr(model.config, "use_cache"):
             assert not model.config.use_cache, "`use_cache` must be False"
 
@@ -146,13 +148,7 @@ def hook(module, *args, **kwargs) -> None:
 
         model.register_forward_pre_hook(hook, with_kwargs=True, prepend=True)
 
-        self.eval()
-        if not use_trainable_adapters:
-            total_frozen = 0
-            for name, param in self.named_parameters():
-                if "lora_" in name:
-                    param.requires_grad = False
-                    total_frozen += 1
+        self._freeze_all_adapters()
 
         total_swapped = convert_layers_to_xlora(
             model_peft,
@@ -165,17 +161,25 @@ def hook(module, *args, **kwargs) -> None:
         peft_model_wrapper = PeftModelWrapper(
             model_peft,
             model_peft.save_pretrained,
-            peft_config,
-            model_peft.get_nb_trainable_parameters,
-            model_peft.generate,
         )
         model_peft.save_pretrained = peft_model_wrapper.save_pretrained  # type: ignore
-        model_peft.generate = peft_model_wrapper.generate  # type: ignore
 
         # Setup the model internal state
         self.internal_xlora_classifier = xlora_classifier
         self.internal_xlora_scalings = None  # type: ignore
-        self.xlora_config = peft_config
+
+    def _freeze_all_adapters(self):
+        self.eval()
+        if not self.xlora_config.use_trainable_adapters:
+            for name, param in self.named_parameters():
+                if "lora_" in name:
+                    param.requires_grad = False
+
+    def generate(self, *args, **kwargs):
+        res = super().generate(*args, **kwargs)  # type: ignore
+        # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
+        self._freeze_all_adapters()
+        return res
 
     def forward(self, *args, **kwargs):
         return self.model(*args, **kwargs)  # Important to *call* the model

From feba4f6d3bbd29d01d2969fa265b308afc9dad8d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 14:44:01 -0500
Subject: [PATCH 049/182] Simplify save pretrained and from pretrained

---
 src/peft/peft_model.py             | 13 -------------
 src/peft/tuners/xlora/insertion.py |  4 ++++
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index fdad81cc2d..f6922d9df0 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -57,7 +57,6 @@
 
 from .tuners.tuners_utils import BaseTunerLayer
 from .tuners.xlora.classifier import XLoraClassifier
-from .tuners.xlora.util import _get_file_path_dir as xlora_get_file_path_dir
 from .tuners.xlora.util import _load_classifier_weights as xlora_load_classifier_weights
 from .utils import (
     SAFETENSORS_WEIGHTS_NAME,
@@ -404,18 +403,6 @@ def from_pretrained(
 
             # If we are passed adapters in the kwargs, it is already in the config.
             # If no adapters are passed, config.adapters is None
-            if config.adapters is None or config.use_trainable_adapters:
-                adapters_real: dict[str, str] = {
-                    name: xlora_get_file_path_dir(
-                        model_id,
-                        name,
-                        "adapters",
-                    )
-                    for name in config.adapters
-                }
-            else:
-                adapters_real = config.adapters
-            config.adapters = adapters_real
 
             classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 7e8964948a..4a476ff901 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -118,6 +118,10 @@ def save_pretrained(
             **kwargs,
         )
 
+        conf["adapters"] = {
+            name: os.path.join(save_directory, name) if name != "default" else save_directory
+            for name in conf["adapters"]
+        }
         with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
             json.dump(conf, f)
 

From fd0ab4b5b6241610175e5246bd0fbc06c102792d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 14:44:33 -0500
Subject: [PATCH 050/182] Do not save to nested dir

---
 src/peft/tuners/xlora/insertion.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 4a476ff901..a0e02324db 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -107,10 +107,8 @@ def save_pretrained(
         conf = classifier.config.__dict__.copy()
         del conf["device"]
 
-        if is_main_process:
-            os.makedirs(os.path.join(save_directory, "adapters"), exist_ok=True)
         self.base_model_save(
-            save_directory=os.path.join(save_directory, "adapters"),
+            save_directory=save_directory,
             safe_serialization=safe_serialization,
             is_main_process=is_main_process,
             selected_adapters=selected_adapters,

From c44cd5acf3e85689dec744313dcaa8e6e0b19268 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 14:54:51 -0500
Subject: [PATCH 051/182] Use new _save_pretrained_hook to reduce code
 duplication

---
 src/peft/peft_model.py             |  5 +++
 src/peft/tuners/xlora/insertion.py | 72 +-----------------------------
 src/peft/tuners/xlora/model.py     | 38 ++++++++++++----
 3 files changed, 36 insertions(+), 79 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index f6922d9df0..3760ea1999 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -222,6 +222,7 @@ def save_pretrained(
             os.makedirs(save_directory, exist_ok=True)
             self.create_or_update_model_card(save_directory)
 
+        adapters = {}
         for adapter_name in selected_adapters:
             peft_config = self.peft_config[adapter_name]
             # save only the trainable weights
@@ -233,6 +234,7 @@ def save_pretrained(
             )
             output_dir = os.path.join(save_directory, adapter_name) if adapter_name != "default" else save_directory
             os.makedirs(output_dir, exist_ok=True)
+            adapters[adapter_name] = output_dir
 
             if is_main_process and safe_serialization:
                 # Section copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_utils.py#L2111-L2134
@@ -293,6 +295,9 @@ def save_pretrained(
                 peft_config.save_pretrained(output_dir, auto_mapping_dict=auto_mapping_dict)
             peft_config.inference_mode = inference_mode
 
+        if hasattr(self.base_model, "_save_pretrained_hook"):
+            self.base_model._save_pretrained_hook(save_directory, adapters, safe_serialization, is_main_process)
+
     @classmethod
     def from_pretrained(
         cls,
diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index a0e02324db..f8811dfb6f 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -1,9 +1,6 @@
-import json
-import os
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable
 
 import torch
-from safetensors.torch import save_model  # type: ignore
 from torch import Tensor, nn
 
 from peft.tuners import lora
@@ -63,70 +60,3 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         return self.target.forward(x, *args, **kwargs)
-
-
-class PeftModelWrapper:
-    def __init__(
-        self,
-        peft_model: nn.Module,  # PeftModel
-        base_model_save: Callable[..., None],
-    ):
-        self.peft_model = peft_model
-        self.base_model_save = base_model_save
-
-    def save_pretrained(
-        self,
-        save_directory: str,
-        safe_serialization: bool = True,
-        selected_adapters: Optional[list[str]] = None,
-        save_embedding_layers: Union[str, bool] = "auto",
-        is_main_process: bool = True,
-        **kwargs: Any,
-    ) -> None:
-        r"""
-        This function saves the classifier weights to a directory. It is the counerpart to `from_pretrained`.
-
-        Args:
-            save_directory (`str`):
-                Directory where the adapter model and configuration files will be saved (will be created if it does not
-                exist).
-            safe_serialization (`bool`, *optional*):
-                Whether to save the adapter files in safetensors format, defaults to `True`.
-            is_main_process (`bool`, *optional*):
-                Whether the process calling this is the main process or not. Will default to `True`. Will not save the
-                checkpoint if not on the main process, which is important for multi device setups (e.g. DDP).
-        """
-        if os.path.isfile(save_directory):
-            raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
-
-        if is_main_process:
-            os.makedirs(save_directory, exist_ok=True)
-
-        classifier: XLoraClassifier = self.peft_model.base_model.internal_xlora_classifier  # type: ignore
-
-        conf = classifier.config.__dict__.copy()
-        del conf["device"]
-
-        self.base_model_save(
-            save_directory=save_directory,
-            safe_serialization=safe_serialization,
-            is_main_process=is_main_process,
-            selected_adapters=selected_adapters,
-            save_embedding_layers=save_embedding_layers,
-            **kwargs,
-        )
-
-        conf["adapters"] = {
-            name: os.path.join(save_directory, name) if name != "default" else save_directory
-            for name in conf["adapters"]
-        }
-        with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
-            json.dump(conf, f)
-
-        if safe_serialization:
-            # https://github.com/huggingface/peft/blob/main/src/peft/peft_model.py#L223
-            if is_main_process and safe_serialization:
-                save_model(classifier, os.path.join(save_directory, "xlora_classifier.safetensors"))
-        elif is_main_process:
-            state_dict = classifier.state_dict()
-            torch.save(state_dict, os.path.join(save_directory, "xlora_classifier.pt"))
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 89e600a45b..931e5235dd 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -1,7 +1,10 @@
-from typing import List, Optional, Tuple, Union
+import json
+import os
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
+from safetensors.torch import save_model  # type: ignore
 from transformers import PreTrainedModel
 
 from peft.tuners.lora.model import LoraModel
@@ -10,7 +13,7 @@
 from .. import lora
 from .classifier import InhibitorFlagPayload, Number, XLoraClassifier
 from .config import XLoraConfig
-from .insertion import PeftModelWrapper, XLoraLayer
+from .insertion import XLoraLayer
 
 
 def convert_layers_to_xlora(
@@ -158,12 +161,6 @@ def hook(module, *args, **kwargs) -> None:
         n_classes = len(peft_config.adapters)
         xlora_classifier = XLoraClassifier(model_peft, peft_config, n_classes, total_swapped)
 
-        peft_model_wrapper = PeftModelWrapper(
-            model_peft,
-            model_peft.save_pretrained,
-        )
-        model_peft.save_pretrained = peft_model_wrapper.save_pretrained  # type: ignore
-
         # Setup the model internal state
         self.internal_xlora_classifier = xlora_classifier
         self.internal_xlora_scalings = None  # type: ignore
@@ -181,6 +178,31 @@ def generate(self, *args, **kwargs):
         self._freeze_all_adapters()
         return res
 
+    def _save_pretrained_hook(
+        self,
+        save_directory: str,
+        adapters: Dict[str, str],
+        safe_serialization: bool = True,
+        is_main_process: bool = True,
+        **kwargs: Any,
+    ) -> None:
+        classifier: XLoraClassifier = self.internal_xlora_classifier
+
+        conf = self.xlora_config.__dict__.copy()
+        del conf["device"]
+
+        conf["adapters"] = adapters
+        with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
+            json.dump(conf, f)
+
+        if safe_serialization:
+            # https://github.com/huggingface/peft/blob/main/src/peft/peft_model.py#L223
+            if is_main_process and safe_serialization:
+                save_model(classifier, os.path.join(save_directory, "xlora_classifier.safetensors"))
+        elif is_main_process:
+            state_dict = classifier.state_dict()
+            torch.save(state_dict, os.path.join(save_directory, "xlora_classifier.pt"))
+
     def forward(self, *args, **kwargs):
         return self.model(*args, **kwargs)  # Important to *call* the model
 

From 1f368370bd451a0668172016d24dbed6857af6b5 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 14:59:13 -0500
Subject: [PATCH 052/182] Add explaining comment

---
 src/peft/tuners/xlora/model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 931e5235dd..0250d32521 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -173,6 +173,7 @@ def _freeze_all_adapters(self):
                     param.requires_grad = False
 
     def generate(self, *args, **kwargs):
+        # Rely on LoraModel.__getattr__
         res = super().generate(*args, **kwargs)  # type: ignore
         # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
         self._freeze_all_adapters()

From d2d3b6a103ec52e13c80f4c01f2b59e61d8e3c08 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 15:38:47 -0500
Subject: [PATCH 053/182] Call .clear on log scalings

---
 src/peft/tuners/xlora/classifier.py | 2 +-
 src/peft/tuners/xlora/model.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 621a188ca9..428906236c 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -242,7 +242,7 @@ def flush_log_scalings(self, path: str):
             with open(f"{path}-mapping.json", "w") as f:
                 f.write(json.dumps(indices_map))
 
-        self.log_scalings = []
+        self.log_scalings.clear()
 
     def set_override_scaling_pass_value(self, value: Union[Number, None]):
         if value is None:
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 0250d32521..8764d88471 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -272,7 +272,7 @@ def disable_scalings_logging(self):
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.scalings_logging = False
-        classifier.log_scalings = []
+        classifier.log_scalings.clear()
 
     def flush_log_scalings(self, path: str):
         """

From 9a7032b88cf9301f254d312272f83d5353357c42 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 15:45:05 -0500
Subject: [PATCH 054/182] Seperate method use cases

---
 src/peft/tuners/xlora/model.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 8764d88471..9c07007e13 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -268,10 +268,16 @@ def enable_scalings_logging(self):
 
     def disable_scalings_logging(self):
         """
-        Disable scalings logging, clearing the log.
+        Disable scalings logging, without clearing the log.
         """
-        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.model.internal_xlora_classifier  # type: ignore
         classifier.scalings_logging = False
+
+    def clear_scalings_log(self):
+        """
+        Clear the scalings log.
+        """
+        classifier: XLoraClassifier = self.model.internal_xlora_classifier  # type: ignore
         classifier.log_scalings.clear()
 
     def flush_log_scalings(self, path: str):

From f4a53142a2c5bd7e33bce0a0da5d529adc4f3f6d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 16:14:09 -0500
Subject: [PATCH 055/182] Prefix with _

---
 src/peft/tuners/xlora/classifier.py | 4 ++--
 src/peft/tuners/xlora/model.py      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 428906236c..4f407332e4 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -214,7 +214,7 @@ def _save_scalings(file: str, scalings: List[torch.Tensor]):
         npy = result.numpy()
         numpy.save(file, npy)
 
-    def flush_log_scalings(self, path: str):
+    def _flush_log_scalings(self, path: str):
         if not self.scalings_logging:
             raise Exception("Scalings logging is disabled!")
 
@@ -244,7 +244,7 @@ def flush_log_scalings(self, path: str):
 
         self.log_scalings.clear()
 
-    def set_override_scaling_pass_value(self, value: Union[Number, None]):
+    def _set_override_scaling_pass_value(self, value: Union[Number, None]):
         if value is None:
             self.override_scaling_pass_value = 1 / self.n_classes
         else:
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 9c07007e13..e60409d5f3 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -257,7 +257,7 @@ def set_scaling_pass_value(self, value: Union[Number, None]):
         This is reflected in the config.
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
-        classifier.set_override_scaling_pass_value(value)
+        classifier._set_override_scaling_pass_value(value)
 
     def enable_scalings_logging(self):
         """
@@ -291,7 +291,7 @@ def flush_log_scalings(self, path: str):
         The file specified should not contain an extension.
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
-        classifier.flush_log_scalings(path)
+        classifier._flush_log_scalings(path)
 
     def get_nb_trainable_parameters(self) -> Tuple[int, int]:
         """

From cf4fa9db833abd65a4b250be368e1468cb98bac2 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 16:26:19 -0500
Subject: [PATCH 056/182] Remove get, print trainable params as they are
 redundant

---
 src/peft/tuners/xlora/model.py | 31 +------------------------------
 1 file changed, 1 insertion(+), 30 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index e60409d5f3..05f6f433ed 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Union
 
 import torch
 import torch.nn as nn
@@ -293,35 +293,6 @@ def flush_log_scalings(self, path: str):
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier._flush_log_scalings(path)
 
-    def get_nb_trainable_parameters(self) -> Tuple[int, int]:
-        """
-        Returns the number of trainable parameters and number of all parameters in the model.
-        """
-        model_trainable_params, model_all_param = self.base_model_get_nb_trainable_parameters()
-
-        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
-        # Ignoring xlora_trainable_params as it is already included in model_trainable_params
-        _xlora_trainable_params, xlora_all_param = classifier.get_nb_trainable_parameters()
-
-        trainable_params, all_param = (
-            model_trainable_params,
-            (model_all_param + xlora_all_param),
-        )
-
-        return trainable_params, all_param
-
-    def print_trainable_parameters(self):
-        """
-        Prints the number of trainable parameters in the model, including of the XLora classifier.
-        """
-        trainable_params, all_param = self.get_nb_trainable_parameters()
-
-        print(
-            f"trainable params: {trainable_params:,d} || "
-            f"all params: {all_param:,d} || "
-            f"trainable%: {100 * trainable_params / all_param:.4f}"
-        )
-
     def set_use_trainable_adapters(self, use_trainable_adapters: bool):
         """
         Set the adapters to trainable or not trainable.

From 862b880d8afeccf11e0b5911515e5ab2644137e5 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 23 Feb 2024 18:46:32 -0500
Subject: [PATCH 057/182] Fix inclusion of old kwarg

---
 src/peft/tuners/adaption_prompt/model.py | 2 +-
 src/peft/tuners/lora/model.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/adaption_prompt/model.py b/src/peft/tuners/adaption_prompt/model.py
index c036cad8d1..08aea27f8e 100644
--- a/src/peft/tuners/adaption_prompt/model.py
+++ b/src/peft/tuners/adaption_prompt/model.py
@@ -40,7 +40,7 @@ class AdaptionPromptModel(nn.Module):
     - Disabling the adapter would also result in the modules being removed from the model.
     """
 
-    def __init__(self, model, configs: Dict, adapter_name: str, _model_peft=None):
+    def __init__(self, model, configs: Dict, adapter_name: str):
         super().__init__()
         self.model = model
         # Store adapter configs by name.
diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 9f7759fa5e..3f381efffd 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -132,7 +132,7 @@ class LoraModel(BaseTuner):
 
     prefix: str = "lora_"
 
-    def __init__(self, model, config, adapter_name, _model_peft=None) -> None:
+    def __init__(self, model, config, adapter_name) -> None:
         super().__init__(model, config, adapter_name)
 
     def _check_new_adapter_config(self, config: LoraConfig) -> None:

From 9c5e3be1931f5bc28f19a126bb207ed40a359ff5 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 08:09:29 -0500
Subject: [PATCH 058/182] Remove circular imports

---
 src/peft/tuners/lora/layer.py | 88 +----------------------------------
 1 file changed, 1 insertion(+), 87 deletions(-)

diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
index 05c6698888..829b7bdf80 100644
--- a/src/peft/tuners/lora/layer.py
+++ b/src/peft/tuners/lora/layer.py
@@ -481,26 +481,10 @@ def get_delta_weight(self, adapter) -> torch.Tensor:
 
         return output_tensor
 
-<<<<<<< HEAD
     def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
         self._check_forward_args(x, *args, **kwargs)
         adapter_names = kwargs.pop("adapter_names", None)
-=======
-    def forward(
-        self,
-        x: torch.Tensor,
-        *args: Any,
-        _xlora_layer: Optional[XLoraLayer] = None,
-        _xlora_scalings: Optional[torch.Tensor] = None,
-        _xlora_scaling_weight: Optional[Number] = None,
-        **kwargs: Any,
-    ) -> torch.Tensor:
-<<<<<<< HEAD
-        previous_dtype = x.dtype
->>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
-=======
->>>>>>> c5cdfc3 (Use post init fn to improve separation of concerns)
         if self.disable_adapters:
             if self.merged:
                 self.unmerge()
@@ -511,12 +495,8 @@ def forward(
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
-<<<<<<< HEAD
             torch_result_dtype = result.dtype
             for active_adapter in self.active_adapters:
-=======
-            for adapter_n, active_adapter in enumerate(self.active_adapters):
->>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
                 if active_adapter not in self.lora_A.keys():
                     continue
                 lora_A = self.lora_A[active_adapter]
@@ -524,17 +504,6 @@ def forward(
                 dropout = self.lora_dropout[active_adapter]
                 scaling = self.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)
-<<<<<<< HEAD
-=======
-                if _xlora_layer is not None:
-                    x_inp = _xlora_layer.apply_scalings_to_x(x, _xlora_scalings, adapter_n)
-                else:
-                    x_inp = x
-                res = lora_B(lora_A(dropout(x_inp))) * scaling
-                if _xlora_layer is not None:
-                    res = res * _xlora_scaling_weight
-                result += res
->>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
                 if not self.use_dora[active_adapter]:
                     result = result + lora_B(lora_A(dropout(x))) * scaling
@@ -742,15 +711,7 @@ def _embed(self, input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
             sparse=base_layer.sparse,
         )
 
-    def forward(
-        self,
-        x: torch.Tensor,
-        *args: Any,
-        _xlora_layer: Optional[XLoraLayer] = None,
-        _xlora_scalings: Optional[torch.Tensor] = None,
-        _xlora_scaling_weight: Optional[Number] = None,
-        **kwargs: Any,
-    ) -> torch.Tensor:
+    def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
         # TODO: no dtype conversion here, unlike in Linear, is that correct?
         self._check_forward_args(x, *args, **kwargs)
         adapter_names = kwargs.pop("adapter_names", None)
@@ -765,32 +726,16 @@ def forward(
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
-<<<<<<< HEAD
             torch_result_dtype = result.dtype
             for active_adapter in self.active_adapters:
-=======
-            for adapter_n, active_adapter in enumerate(self.active_adapters):
->>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
                 if active_adapter not in self.lora_embedding_A:
                     continue
                 embedding_A = self.lora_embedding_A[active_adapter].T
                 embedding_B = self.lora_embedding_B[active_adapter].T
                 scaling = self.scaling[active_adapter]
-<<<<<<< HEAD
                 after_A = self._embed(x, embedding_A)
                 result = result + (after_A @ embedding_B) * scaling
             result = result.to(torch_result_dtype)
-=======
-                if _xlora_layer is not None:
-                    x_inp = _xlora_layer.apply_scalings_to_x(x, _xlora_scalings, adapter_n)
-                else:
-                    x_inp = x
-                after_A = self._embed(x_inp, embedding_A)
-                res = (after_A @ embedding_B) * scaling
-                if _xlora_layer is not None:
-                    res = res * _xlora_scaling_weight
-                result += res
->>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
         return result
 
@@ -1000,7 +945,6 @@ def get_delta_weight(self, adapter) -> torch.Tensor:
 
         return output_tensor
 
-<<<<<<< HEAD
     def _get_weight_norm(self, weight, lora_weight, scaling) -> torch.Tensor:
         # calculate L2 norm of weight matrix, channel-wise
         weight = weight + scaling * lora_weight
@@ -1044,22 +988,7 @@ def _apply_dora(self, x, lora_A, lora_B, scaling, active_adapter):
     def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
         self._check_forward_args(x, *args, **kwargs)
         adapter_names = kwargs.pop("adapter_names", None)
-=======
-    def forward(
-        self,
-        x: torch.Tensor,
-        *args,
-        _xlora_layer: Optional[XLoraLayer] = None,
-        _xlora_scalings: Optional[torch.Tensor] = None,
-        _xlora_scaling_weight: Optional[Number] = None,
-        **kwargs,
-    ) -> torch.Tensor:
-<<<<<<< HEAD
-        previous_dtype = x.dtype
->>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
-=======
->>>>>>> c5cdfc3 (Use post init fn to improve separation of concerns)
         if self.disable_adapters:
             if self.merged:
                 self.unmerge()
@@ -1070,13 +999,9 @@ def forward(
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
-<<<<<<< HEAD
             torch_result_dtype = result.dtype
 
             for active_adapter in self.active_adapters:
-=======
-            for adapter_n, active_adapter in enumerate(self.active_adapters):
->>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
                 if active_adapter not in self.lora_A.keys():
                     continue
                 lora_A = self.lora_A[active_adapter]
@@ -1084,17 +1009,6 @@ def forward(
                 dropout = self.lora_dropout[active_adapter]
                 scaling = self.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)
-<<<<<<< HEAD
-=======
-                if _xlora_layer is not None:
-                    x_inp = _xlora_layer.apply_scalings_to_x(x, _xlora_scalings, adapter_n)
-                else:
-                    x_inp = x
-                res = lora_B(lora_A(dropout(x_inp))) * scaling
-                if _xlora_layer is not None:
-                    res = res * _xlora_scaling_weight
-                result += res
->>>>>>> 177f1b4 (Refactor Lora layers toreduce code repetion)
 
                 if not self.use_dora[active_adapter]:
                     result = result + lora_B(lora_A(dropout(x))) * scaling

From 34694d5140b9a025d562ab99128fd3e46a43aa17 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 08:15:29 -0500
Subject: [PATCH 059/182] Override method

---
 src/peft/tuners/xlora/model.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 05f6f433ed..9eb821cd55 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -172,6 +172,10 @@ def _freeze_all_adapters(self):
                 if "lora_" in name:
                     param.requires_grad = False
 
+    @staticmethod
+    def _prepare_adapter_config(peft_config, model_config):
+        return peft_config
+
     def generate(self, *args, **kwargs):
         # Rely on LoraModel.__getattr__
         res = super().generate(*args, **kwargs)  # type: ignore

From 18bd1b8c8905f3ff2fc4311571e9e42329b91eb6 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 08:20:06 -0500
Subject: [PATCH 060/182] Set target modules to None

---
 src/peft/tuners/xlora/config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 3b8f8804ae..48c6e0e737 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -75,6 +75,7 @@ def __post_init__(self):
         assert self.device is not None
         assert self.adapters is not None
         self.peft_type = PeftType.XLORA
+        self.target_modules = None
 
         if self.enable_softmax_topk and self.top_k_lora is None:
             warnings.warn("`enable_softmax_topk` enabled `top_k_lora` is not set")

From 306bb349a24cbe69647d406c726c0af1bfef8236 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 08:27:13 -0500
Subject: [PATCH 061/182] Do not set target modules to None

---
 src/peft/tuners/xlora/config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 48c6e0e737..3b8f8804ae 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -75,7 +75,6 @@ def __post_init__(self):
         assert self.device is not None
         assert self.adapters is not None
         self.peft_type = PeftType.XLORA
-        self.target_modules = None
 
         if self.enable_softmax_topk and self.top_k_lora is None:
             warnings.warn("`enable_softmax_topk` enabled `top_k_lora` is not set")

From b8121ac9db168e10e0e872bf42e8abec7ca9ae1a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:16:54 -0500
Subject: [PATCH 062/182] Try to avoid checking target_modules

---
 src/peft/tuners/tuners_utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index fe4b1dfb14..99b3932286 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -29,6 +29,7 @@
 from transformers import PreTrainedModel
 from transformers.pytorch_utils import Conv1D
 
+from peft.tuners.xlora.config import XLoraConfig
 from peft.utils import INCLUDE_LINEAR_LAYERS_SHORTHAND
 
 from ..config import PeftConfig
@@ -167,7 +168,10 @@ def __init__(
                 self.peft_config.update(peft_config)
 
         self.active_adapter = adapter_name
-        self.inject_adapter(self.model, adapter_name)
+        if isinstance(peft_config[adapter_name], XLoraConfig):
+            self.inject_adapter(self.model, adapter_name, has_target_modules=False)
+        else:
+            self.inject_adapter(self.model, adapter_name)
 
         # Copy the peft_config in the injected model.
         self.model.peft_config = self.peft_config
@@ -319,7 +323,8 @@ def inject_adapter(self, model: nn.Module, adapter_name: str):
         key_list = [key for key, _ in model.named_modules()]
 
         # update peft_config.target_modules if required
-        peft_config = _maybe_include_all_linear_layers(peft_config, model)
+        if has_target_modules:
+            peft_config = _maybe_include_all_linear_layers(peft_config, model)
 
         for key in key_list:
             # Check for modules_to_save in case

From 44cdc0798c9cdfab0fd0d2ea85b9d57bc278b0fb Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:20:45 -0500
Subject: [PATCH 063/182] Remove circular import

---
 src/peft/tuners/tuners_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 99b3932286..44e2ae338a 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -29,8 +29,8 @@
 from transformers import PreTrainedModel
 from transformers.pytorch_utils import Conv1D
 
-from peft.tuners.xlora.config import XLoraConfig
 from peft.utils import INCLUDE_LINEAR_LAYERS_SHORTHAND
+from peft.utils.peft_types import PeftType
 
 from ..config import PeftConfig
 from ..utils import ModulesToSaveWrapper, _get_submodules
@@ -168,7 +168,7 @@ def __init__(
                 self.peft_config.update(peft_config)
 
         self.active_adapter = adapter_name
-        if isinstance(peft_config[adapter_name], XLoraConfig):
+        if peft_config[adapter_name].peft_type == PeftType.XLORA:
             self.inject_adapter(self.model, adapter_name, has_target_modules=False)
         else:
             self.inject_adapter(self.model, adapter_name)

From ba905a64be61ebf323efc59509170ca75dc56dd5 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:25:05 -0500
Subject: [PATCH 064/182] Override another method

---
 src/peft/tuners/xlora/model.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 9eb821cd55..9dd9a9ae57 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -176,6 +176,9 @@ def _freeze_all_adapters(self):
     def _prepare_adapter_config(peft_config, model_config):
         return peft_config
 
+    def check_target_module_exists(config, key: str) -> bool:
+        return False
+
     def generate(self, *args, **kwargs):
         # Rely on LoraModel.__getattr__
         res = super().generate(*args, **kwargs)  # type: ignore

From d5446f64db6eb90ff3d43aa51ddf8e14ccb11894 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:26:05 -0500
Subject: [PATCH 065/182] Override _check_target_module_exists

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 9dd9a9ae57..6f956eee53 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -176,7 +176,7 @@ def _freeze_all_adapters(self):
     def _prepare_adapter_config(peft_config, model_config):
         return peft_config
 
-    def check_target_module_exists(config, key: str) -> bool:
+    def _check_target_module_exists(config, key: str) -> bool:
         return False
 
     def generate(self, *args, **kwargs):

From 2e7f51a3e0afce022ace4f3fa9b1a53b1dc5e475 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:28:00 -0500
Subject: [PATCH 066/182] Make method instance

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 6f956eee53..6919552531 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -176,7 +176,7 @@ def _freeze_all_adapters(self):
     def _prepare_adapter_config(peft_config, model_config):
         return peft_config
 
-    def _check_target_module_exists(config, key: str) -> bool:
+    def _check_target_module_exists(self, config, key: str) -> bool:
         return False
 
     def generate(self, *args, **kwargs):

From 7c354d9aeb2de55659b9b887bfcdfe2c0b67b711 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:30:30 -0500
Subject: [PATCH 067/182] Make a nicer check for having target_modules

---
 src/peft/tuners/tuners_utils.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 44e2ae338a..cee94e49d4 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -30,7 +30,6 @@
 from transformers.pytorch_utils import Conv1D
 
 from peft.utils import INCLUDE_LINEAR_LAYERS_SHORTHAND
-from peft.utils.peft_types import PeftType
 
 from ..config import PeftConfig
 from ..utils import ModulesToSaveWrapper, _get_submodules
@@ -168,10 +167,7 @@ def __init__(
                 self.peft_config.update(peft_config)
 
         self.active_adapter = adapter_name
-        if peft_config[adapter_name].peft_type == PeftType.XLORA:
-            self.inject_adapter(self.model, adapter_name, has_target_modules=False)
-        else:
-            self.inject_adapter(self.model, adapter_name)
+        self.inject_adapter(self.model, adapter_name)
 
         # Copy the peft_config in the injected model.
         self.model.peft_config = self.peft_config
@@ -283,6 +279,7 @@ def _check_new_adapter_config(self, config: PeftConfig) -> None:
         """
         pass
 
+<<<<<<< HEAD
     def _check_merge_allowed(self):
         """Helper method to check whether the adapter can be merged.
 
@@ -290,6 +287,8 @@ def _check_merge_allowed(self):
         """
         pass
 
+=======
+>>>>>>> cddc00f (Make a nicer check for having target_modules)
     def inject_adapter(self, model: nn.Module, adapter_name: str):
         r"""
         Creates adapter layers and replaces the target modules with the adapter layers. This method is called under the
@@ -323,7 +322,7 @@ def inject_adapter(self, model: nn.Module, adapter_name: str):
         key_list = [key for key, _ in model.named_modules()]
 
         # update peft_config.target_modules if required
-        if has_target_modules:
+        if hasattr(peft_config, "target_modules"):
             peft_config = _maybe_include_all_linear_layers(peft_config, model)
 
         for key in key_list:
@@ -351,7 +350,7 @@ def inject_adapter(self, model: nn.Module, adapter_name: str):
             parent, target, target_name = _get_submodules(model, key)
             self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key)
 
-        if not is_target_modules_in_base_model:
+        if not is_target_modules_in_base_model and hasattr(peft_config, "target_modules"):
             raise ValueError(
                 f"Target modules {peft_config.target_modules} not found in the base model. "
                 f"Please check the target modules and try again."

From 76b26b5154f9b1ac285fa8f1b309f95617347579 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:35:16 -0500
Subject: [PATCH 068/182] Avoid unnecessary injection

---
 src/peft/tuners/tuners_utils.py | 8 ++++----
 src/peft/tuners/xlora/model.py  | 7 -------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index cee94e49d4..41d454db89 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -167,7 +167,8 @@ def __init__(
                 self.peft_config.update(peft_config)
 
         self.active_adapter = adapter_name
-        self.inject_adapter(self.model, adapter_name)
+        if hasattr(peft_config, "target_modules"):
+            self.inject_adapter(self.model, adapter_name)
 
         # Copy the peft_config in the injected model.
         self.model.peft_config = self.peft_config
@@ -322,8 +323,7 @@ def inject_adapter(self, model: nn.Module, adapter_name: str):
         key_list = [key for key, _ in model.named_modules()]
 
         # update peft_config.target_modules if required
-        if hasattr(peft_config, "target_modules"):
-            peft_config = _maybe_include_all_linear_layers(peft_config, model)
+        peft_config = _maybe_include_all_linear_layers(peft_config, model)
 
         for key in key_list:
             # Check for modules_to_save in case
@@ -350,7 +350,7 @@ def inject_adapter(self, model: nn.Module, adapter_name: str):
             parent, target, target_name = _get_submodules(model, key)
             self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key)
 
-        if not is_target_modules_in_base_model and hasattr(peft_config, "target_modules"):
+        if not is_target_modules_in_base_model:
             raise ValueError(
                 f"Target modules {peft_config.target_modules} not found in the base model. "
                 f"Please check the target modules and try again."
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 6919552531..05f6f433ed 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -172,13 +172,6 @@ def _freeze_all_adapters(self):
                 if "lora_" in name:
                     param.requires_grad = False
 
-    @staticmethod
-    def _prepare_adapter_config(peft_config, model_config):
-        return peft_config
-
-    def _check_target_module_exists(self, config, key: str) -> bool:
-        return False
-
     def generate(self, *args, **kwargs):
         # Rely on LoraModel.__getattr__
         res = super().generate(*args, **kwargs)  # type: ignore

From 5828ab4b01c4de342c161be0bf5dcff4d80f046b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:47:14 -0500
Subject: [PATCH 069/182] Remove xlora conf from mark only adapters as
 trainable

---
 src/peft/tuners/xlora/model.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 05f6f433ed..14633a9513 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -178,6 +178,17 @@ def generate(self, *args, **kwargs):
         # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
         self._freeze_all_adapters()
         return res
+    
+    def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
+        active_adapters = []
+        copy = self.active_adapters.copy()
+        for name in self.active_adapters:
+            if not isinstance(self.peft_config[name], XLoraConfig):
+                active_adapters.append(name)
+
+        super()._mark_only_adapters_as_trainable(model)
+
+        self.active_adapters = copy
 
     def _save_pretrained_hook(
         self,

From 9b56c2ab705536b0da1b6627a12eb6d809d8d680 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:53:46 -0500
Subject: [PATCH 070/182] Account for property

---
 src/peft/tuners/xlora/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 14633a9513..dc79698f8e 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -178,17 +178,17 @@ def generate(self, *args, **kwargs):
         # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
         self._freeze_all_adapters()
         return res
-    
+
     def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
         active_adapters = []
         copy = self.active_adapters.copy()
         for name in self.active_adapters:
             if not isinstance(self.peft_config[name], XLoraConfig):
                 active_adapters.append(name)
-
+        self.active_adapter = active_adapters
         super()._mark_only_adapters_as_trainable(model)
 
-        self.active_adapters = copy
+        self.active_adapter = copy
 
     def _save_pretrained_hook(
         self,

From 0ea1fa268714f4611b81ab5c49850abde8b1bd4b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 10:57:54 -0500
Subject: [PATCH 071/182] Call correct method

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index dc79698f8e..7f7200f8b8 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -174,7 +174,7 @@ def _freeze_all_adapters(self):
 
     def generate(self, *args, **kwargs):
         # Rely on LoraModel.__getattr__
-        res = super().generate(*args, **kwargs)  # type: ignore
+        res = self.model.generate(*args, **kwargs)  # type: ignore
         # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
         self._freeze_all_adapters()
         return res

From 8f67a6be98b5aa7f39734b04b2ca6232acc0af14 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 11:05:27 -0500
Subject: [PATCH 072/182] Fix for scoping

---
 src/peft/tuners/xlora/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 7f7200f8b8..4393989898 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -115,8 +115,8 @@ def _xlora_post_init(
         # For load_adapter to think we are a LoraModel
         model_peft.peft_type = PeftType.LORA
 
-        for adapter_name, model_id in adapters_items:
-            model_peft.load_adapter(model_id, adapter_name, is_trainable=use_trainable_adapters)
+        for name, model_id in adapters_items:
+            model_peft.load_adapter(model_id, name, is_trainable=use_trainable_adapters)
 
         self.delete_adapter(adapter_name)
 

From a4823fe148ca046043b29ed9a6eec0dabb4a2667 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 11:10:30 -0500
Subject: [PATCH 073/182] Set active adapter to not 'default'

---
 src/peft/tuners/xlora/model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 4393989898..dfb99913d2 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -121,6 +121,7 @@ def _xlora_post_init(
         self.delete_adapter(adapter_name)
 
         self.set_adapter(list(peft_config.adapters.keys()))
+        model_peft.active_adapter = name
         model_peft.peft_type = PeftType.XLORA
 
         def hook(module, *args, **kwargs) -> None:

From 1bafcd8c91bfef6d68d20d5f96228bffff4f83ab Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 28 Feb 2024 11:17:28 -0500
Subject: [PATCH 074/182] Fix recursion err

---
 src/peft/tuners/xlora/insertion.py | 6 +-----
 src/peft/tuners/xlora/model.py     | 1 -
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index f8811dfb6f..41ad3f8ae1 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -3,8 +3,6 @@
 import torch
 from torch import Tensor, nn
 
-from peft.tuners import lora
-
 from .classifier import XLoraClassifier
 from .config import XLoraConfig
 
@@ -19,14 +17,12 @@ class XLoraLayer:
     def __init__(
         self,
         model: nn.Module,  # PeftModel
-        target: lora.LoraLayer,
         target_forward: Callable[..., Any],
         layer_number: int,
         config: XLoraConfig,
     ) -> None:
         self.model = model
         self.target_forward = target_forward
-        self.target = target
         self.layer_number = layer_number
         self.config = config
 
@@ -59,4 +55,4 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
         return xlora_scalings
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
-        return self.target.forward(x, *args, **kwargs)
+        return self.target_forward(x, *args, **kwargs)
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index dfb99913d2..4d878da0fa 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -29,7 +29,6 @@ def convert_layers_to_xlora(
         if isinstance(module, lora.LoraLayer):
             new_layer = XLoraLayer(
                 model=base,
-                target=module,
                 target_forward=module.forward,
                 layer_number=total_swapped,
                 config=config,

From b5f5563e6e0ee3d05b45ae80d32216a9666046f7 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 29 Feb 2024 11:21:59 -0500
Subject: [PATCH 075/182] Index into config to check

---
 src/peft/tuners/tuners_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 41d454db89..82cda79a5b 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -167,7 +167,7 @@ def __init__(
                 self.peft_config.update(peft_config)
 
         self.active_adapter = adapter_name
-        if hasattr(peft_config, "target_modules"):
+        if hasattr(peft_config[adapter_name], "target_modules"):
             self.inject_adapter(self.model, adapter_name)
 
         # Copy the peft_config in the injected model.

From e6b45c38f07feaf54e074adb7012bcf7dc50f4f1 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 1 Mar 2024 19:55:11 -0500
Subject: [PATCH 076/182] Get and pass scalings

---
 src/peft/tuners/xlora/insertion.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
index 41ad3f8ae1..5838d10c1c 100644
--- a/src/peft/tuners/xlora/insertion.py
+++ b/src/peft/tuners/xlora/insertion.py
@@ -55,4 +55,12 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
         return xlora_scalings
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
-        return self.target_forward(x, *args, **kwargs)
+        scalings = self.get_maybe_topk_scalings()
+        return self.target_forward(
+            x,
+            *args,
+            _xlora_layer=self,
+            _xlora_scalings=scalings,
+            _xlora_scaling_weight=self.config.global_scaling_weight,
+            **kwargs,
+        )

From b8ba6690b54c1abb2b525ccadd10504f687dd35a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 4 Mar 2024 21:26:45 -0500
Subject: [PATCH 077/182] Remove deprecated attr

---
 src/peft/tuners/xlora/config.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 3b8f8804ae..c101dc3ed9 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -42,8 +42,6 @@ class XLoraConfig(PeftConfig):
             Enable bias in X-LoRA classifier.
         xlora_dropout_p (`float`, *optional*, defaults to 0.2):
             Dropout probability of the X-LoRA classifier, irrelevant if `xlora_depth=1` or `enable_relu_and_dropout=True`.
-        stop_token_id (`int`, *optional*):
-            The id of the stop token for the input. If this is None, the sequence length is calculated using the attention mask.
         use_trainable_adapters (`bool`, *optional*, defaults to False):
             Make the adapters trainable.
         scaling_pass_value (`float`, *optional*, defaults to 0):
@@ -63,7 +61,6 @@ class XLoraConfig(PeftConfig):
     enable_relu_and_dropout: bool = False
     use_bias: bool = True
     xlora_dropout_p: float = 0.2
-    stop_token_id: Optional[int] = None
     use_trainable_adapters: bool = False
     softmax_temperature: float = 1.0
     top_k_lora: Optional[int] = None

From 31b121cb8be4c612c68a9e72c2b711f0d384d3cf Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 6 Mar 2024 06:36:12 -0500
Subject: [PATCH 078/182] Remove the xloralayer for a hook, refactoring changes

---
 src/peft/peft_model.py              |  4 +-
 src/peft/tuners/tuners_utils.py     |  3 +-
 src/peft/tuners/xlora/classifier.py |  7 +--
 src/peft/tuners/xlora/config.py     | 13 +++---
 src/peft/tuners/xlora/insertion.py  | 66 -----------------------------
 src/peft/tuners/xlora/model.py      | 46 ++++++++++++++++----
 6 files changed, 54 insertions(+), 85 deletions(-)
 delete mode 100644 src/peft/tuners/xlora/insertion.py

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 3760ea1999..d29912603e 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -136,7 +136,7 @@ def __init__(self, model: PreTrainedModel, peft_config: PeftConfig, adapter_name
             cls = PEFT_TYPE_TO_MODEL_MAPPING[peft_config.peft_type]
             self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
             if isinstance(self.base_model, XLoraModel):
-                self.base_model._xlora_post_init(model, peft_config, adapter_name, self)
+                self.base_model.post_init_lora(model, peft_config, adapter_name, self)
             self.set_additional_trainable_modules(peft_config, adapter_name)
 
         if getattr(model, "is_gradient_checkpointing", True):
@@ -403,7 +403,7 @@ def from_pretrained(
             if not isinstance(config, XLoraConfig):
                 raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
 
-            device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device(
+            device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device()
             config.device = torch.device(device)
 
             # If we are passed adapters in the kwargs, it is already in the config.
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 82cda79a5b..b9d59f8a1d 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -30,6 +30,7 @@
 from transformers.pytorch_utils import Conv1D
 
 from peft.utils import INCLUDE_LINEAR_LAYERS_SHORTHAND
+from peft.utils.peft_types import PeftType
 
 from ..config import PeftConfig
 from ..utils import ModulesToSaveWrapper, _get_submodules
@@ -167,7 +168,7 @@ def __init__(
                 self.peft_config.update(peft_config)
 
         self.active_adapter = adapter_name
-        if hasattr(peft_config[adapter_name], "target_modules"):
+        if peft_config[adapter_name] != PeftType.XLORA:
             self.inject_adapter(self.model, adapter_name)
 
         # Copy the peft_config in the injected model.
diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 4f407332e4..076b19c61a 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -51,8 +51,7 @@ def __init__(
     ):
         super().__init__()
 
-        # To avoid registering this with nn.Module
-        self.__dict__["model"] = model
+        self.model = model
         self.n_classes = n_classes
         self.n_layers = n_layers
         self.config = config
@@ -89,7 +88,9 @@ def __init__(
             else:
                 self.last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
         else:
-            assert self.config.xlora_depth > 0
+            if self.config.xlora_depth <= 0 :
+                raise ValueError("X-LoRA depth must be strictly positive.")
+            
             self.inner.append(
                 nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
             )
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index c101dc3ed9..a4ad17b40f 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -12,8 +12,8 @@
 class XLoraConfig(PeftConfig):
     r"""
     This is the configuration class to store the configuration of a [`XLoraClassifier`].
-    When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the saved adapters. As such, only the keys
-    matter during loading.
+    When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the 
+    saved adapters. As such, only the keys matter during loading.
 
     Args:
         hidden_size (`int`):
@@ -21,11 +21,13 @@ class XLoraConfig(PeftConfig):
         device (`torch.device`):
             Device for the X-LoRA classifier.
         adapters (`dict`):
-            Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter. *They will be automatically loaded*, to use as LoRA experts.
+            Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter. 
+            *They will be automatically loaded*, to use as LoRA experts.
         enable_softmax (`bool`, *optional*, defaults to `True`):
             Enable softmax application for the X-LoRA classifier.
         enable_softmax_topk (`bool`, *optional*, defaults to `False`):
-            Enable softmax application for the top-k LoRA adapters. Mutually exclusive to `enable_softmax` and must only be set if `top_k_lora` is.
+            Enable softmax application for the top-k LoRA adapters. Mutually exclusive to
+            `enable_softmax` and must only be set if `top_k_lora` is.
         softmax_temperature (`float`, *optional*, defaults to 1.0):
             Softmax temperature, lower yields sharper predictions
         layerwise_scalings (`bool`, *optional*, defaults to `False`):
@@ -41,7 +43,8 @@ class XLoraConfig(PeftConfig):
         use_bias (`bool`, *optional*, defaults to `True`):
             Enable bias in X-LoRA classifier.
         xlora_dropout_p (`float`, *optional*, defaults to 0.2):
-            Dropout probability of the X-LoRA classifier, irrelevant if `xlora_depth=1` or `enable_relu_and_dropout=True`.
+            Dropout probability of the X-LoRA classifier, irrelevant if `xlora_depth=1` or
+            `enable_relu_and_dropout=True`.
         use_trainable_adapters (`bool`, *optional*, defaults to False):
             Make the adapters trainable.
         scaling_pass_value (`float`, *optional*, defaults to 0):
diff --git a/src/peft/tuners/xlora/insertion.py b/src/peft/tuners/xlora/insertion.py
deleted file mode 100644
index 5838d10c1c..0000000000
--- a/src/peft/tuners/xlora/insertion.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from typing import Any, Callable
-
-import torch
-from torch import Tensor, nn
-
-from .classifier import XLoraClassifier
-from .config import XLoraConfig
-
-
-class XLoraLayer:
-    """
-    A XLoraLayer wraps any LoraLayer and performs the XLora operation on the LoRA adaptors specified.
-    Its primary API is the forward method, which uses the scalings to execute the
-    XLora algorithm.
-    """
-
-    def __init__(
-        self,
-        model: nn.Module,  # PeftModel
-        target_forward: Callable[..., Any],
-        layer_number: int,
-        config: XLoraConfig,
-    ) -> None:
-        self.model = model
-        self.target_forward = target_forward
-        self.layer_number = layer_number
-        self.config = config
-
-    @staticmethod
-    def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter: int) -> torch.Tensor:
-        # scalings_layer = [batch_size, seq_len, n_classes]
-        scalings = scalings_layer[:, :, adapter].unsqueeze(-1)
-        # scalings_layer = [batch_size, seq_len, 1]
-        return x * scalings
-
-    def get_maybe_topk_scalings(self) -> torch.Tensor:
-        # xlora_scalings = [batch_size, seq_len, n_classes]
-        xlora_scalings: Tensor = self.model.internal_xlora_scalings[:, :, self.layer_number, :]  # type: ignore
-
-        if self.config.top_k_lora is not None:
-            _, topk_indices = torch.topk(xlora_scalings, k=self.config.top_k_lora, dim=1)
-
-            # Mask the topk to True, the rest to False
-            mask = torch.zeros_like(xlora_scalings, dtype=torch.bool)
-            mask.scatter_(1, topk_indices, True)
-
-            xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
-
-        classifier: XLoraClassifier = self.model.base_model.internal_xlora_classifier  # type: ignore
-        if classifier.config.enable_softmax_topk:
-            nonzero_mask = xlora_scalings != 0
-            softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
-            xlora_scalings[nonzero_mask] = softmax_res_nonzero
-
-        return xlora_scalings
-
-    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
-        scalings = self.get_maybe_topk_scalings()
-        return self.target_forward(
-            x,
-            *args,
-            _xlora_layer=self,
-            _xlora_scalings=scalings,
-            _xlora_scaling_weight=self.config.global_scaling_weight,
-            **kwargs,
-        )
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 4d878da0fa..03b8fbf108 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -13,11 +13,17 @@
 from .. import lora
 from .classifier import InhibitorFlagPayload, Number, XLoraClassifier
 from .config import XLoraConfig
-from .insertion import XLoraLayer
 
+@staticmethod
+def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter: int) -> torch.Tensor:
+    # scalings_layer = [batch_size, seq_len, n_classes]
+    scalings = scalings_layer[:, :, adapter].unsqueeze(-1)
+    # scalings_layer = [batch_size, seq_len, 1]
+    return x * scalings
 
 def convert_layers_to_xlora(
     base: nn.Module,  # PeftModel
+    xloramodel: nn.Module, # XLoraModel
     config: XLoraConfig,
 ) -> int:
     """
@@ -27,13 +33,36 @@ def convert_layers_to_xlora(
 
     for module in base.modules():
         if isinstance(module, lora.LoraLayer):
-            new_layer = XLoraLayer(
-                model=base,
-                target_forward=module.forward,
-                layer_number=total_swapped,
-                config=config,
-            )
             module.forward = new_layer.forward  # type: ignore[method-assign]
+
+            def hook(module, *args, **kwargs) -> None:
+                kwargs_real: dict = args[1]
+                kwargs_real.update(kwargs)
+
+                # xlora_scalings = [batch_size, seq_len, n_classes]
+                xlora_scalings: Tensor = xloramodel.internal_xlora_scalings[:, :, total_swapped, :]  # type: ignore
+
+                if config.top_k_lora is not None:
+                    _, topk_indices = torch.topk(xlora_scalings, k=config.top_k_lora, dim=1)
+
+                    # Mask the topk to True, the rest to False
+                    mask = torch.zeros_like(xlora_scalings, dtype=torch.bool)
+                    mask.scatter_(1, topk_indices, True)
+
+                    xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
+
+                if config.enable_softmax_topk:
+                    nonzero_mask = xlora_scalings != 0
+                    softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
+                    xlora_scalings[nonzero_mask] = softmax_res_nonzero
+
+                kwargs_real["_xlora_apply"] = apply_scalings_to_x
+                kwargs_real["_xlora_scalings"] = xlora_scalings
+                kwargs_real["_xlora_scaling_weight"] = config.global_scaling_weight
+
+                return (args[0], kwargs_real)
+
+            module.register_forward_pre_hook(hook, with_kwargs=True, prepend=True)
             total_swapped += 1
 
     return total_swapped
@@ -90,7 +119,7 @@ def __init__(
     ) -> None:
         super().__init__(model, config, adapter_name)
 
-    def _xlora_post_init(
+    def post_init_lora(
         self,
         model: nn.Module,
         peft_config: XLoraConfig,
@@ -155,6 +184,7 @@ def hook(module, *args, **kwargs) -> None:
 
         total_swapped = convert_layers_to_xlora(
             model_peft,
+            self,
             peft_config,
         )
 

From df6867d5fd4de5201c7be4500a42fc76dd6cf5bf Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 6 Mar 2024 08:08:06 -0500
Subject: [PATCH 079/182] Add copywright notice

---
 src/peft/tuners/xlora/__init__.py   | 14 ++++++++++++++
 src/peft/tuners/xlora/classifier.py | 14 ++++++++++++++
 src/peft/tuners/xlora/config.py     | 14 ++++++++++++++
 src/peft/tuners/xlora/model.py      | 14 ++++++++++++++
 src/peft/tuners/xlora/util.py       | 14 ++++++++++++++
 5 files changed, 70 insertions(+)

diff --git a/src/peft/tuners/xlora/__init__.py b/src/peft/tuners/xlora/__init__.py
index 0eed83ccae..df41e1e611 100644
--- a/src/peft/tuners/xlora/__init__.py
+++ b/src/peft/tuners/xlora/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .config import XLoraConfig
 from .model import XLoraModel
 
diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 076b19c61a..beacad5a5c 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -1,3 +1,17 @@
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import builtins
 import json
 import typing
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index a4ad17b40f..6db3e156be 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -1,3 +1,17 @@
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import warnings
 from dataclasses import dataclass
 from typing import Dict, Optional
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 03b8fbf108..81b73ca987 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -1,3 +1,17 @@
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import os
 from typing import Any, Dict, List, Optional, Union
diff --git a/src/peft/tuners/xlora/util.py b/src/peft/tuners/xlora/util.py
index 3d34906d45..05c94827b3 100644
--- a/src/peft/tuners/xlora/util.py
+++ b/src/peft/tuners/xlora/util.py
@@ -1,3 +1,17 @@
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 from typing import Optional, Union
 

From 9025310613e1a4e6b94e7dd847a6f2839cef5ed6 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 6 Mar 2024 08:10:39 -0500
Subject: [PATCH 080/182] Remove some typing things

---
 src/peft/tuners/xlora/classifier.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index beacad5a5c..48e29b062f 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -69,7 +69,7 @@ def __init__(
         self.n_classes = n_classes
         self.n_layers = n_layers
         self.config = config
-        self.log_scalings: List[torch.Tensor] = []
+        self.log_scalings = []
         self.softmax = TemperatureScaledSoftmax(temperature=self.config.softmax_temperature)
         self.override_scaling_pass_value: Number = config.scaling_pass_value
 
@@ -78,7 +78,7 @@ def __init__(
         dtype = next(model.parameters()).dtype
         bias_flag = config.use_bias
 
-        self.inner: nn.ModuleList = nn.ModuleList([])
+        self.inner = nn.ModuleList([])
         if self.config.xlora_depth == 1:
             if config.layerwise_scalings:  # bias=False if we have just one layer
                 self.last = (
@@ -150,7 +150,7 @@ def forward(
             seq_len = typing.cast(torch.FloatTensor, inputs_embeds).shape[1]
 
         # For type checking
-        model: nn.Module = self.model  # type: ignore
+        model = self.model
         with torch.no_grad():
             with model.disable_adapter():
                 kwargs["output_hidden_states"] = True

From 506e96e99478fa5107a0ceee2ee5c386f3130a3e Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 6 Mar 2024 08:16:53 -0500
Subject: [PATCH 081/182] Remove some typing things

---
 src/peft/tuners/xlora/classifier.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 48e29b062f..63971ccfc8 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -142,12 +142,12 @@ def forward(
         if input_ids is not None:
             batch_size = input_ids.shape[0]
         else:
-            batch_size = typing.cast(torch.FloatTensor, inputs_embeds).shape[0]
+            batch_size = inputs_embeds.shape[0]
 
         if input_ids is not None:
             seq_len = input_ids.shape[1]
         else:
-            seq_len = typing.cast(torch.FloatTensor, inputs_embeds).shape[1]
+            seq_len = inputs_embeds.shape[1]
 
         # For type checking
         model = self.model

From 07ff71fa7e10e798e536119ebea071fa281ae840 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 6 Mar 2024 08:20:53 -0500
Subject: [PATCH 082/182] Add note to docstring

---
 src/peft/tuners/xlora/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 81b73ca987..2be4e3dd3f 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -84,7 +84,8 @@ def hook(module, *args, **kwargs) -> None:
 
 class XLoraModel(LoraModel):
     """
-    Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model.
+    Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model. Currently,
+    this X-LoRA implementation only works with models with a transformer architecture.
 
     The method is described in detail in https://arxiv.org/abs/2402.07148.
 

From a11178792d96431958f133b593767c6587267f0c Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sat, 9 Mar 2024 14:12:09 -0500
Subject: [PATCH 083/182] Add to config

---
 src/peft/tuners/xlora/config.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 6db3e156be..4997ea695d 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -32,6 +32,8 @@ class XLoraConfig(PeftConfig):
     Args:
         hidden_size (`int`):
             Hidden size of the base model.
+        base_model_id (`str`):
+            Base model HF id.
         device (`torch.device`):
             Device for the X-LoRA classifier.
         adapters (`dict`):
@@ -68,6 +70,7 @@ class XLoraConfig(PeftConfig):
     """
 
     hidden_size: int = None  # type: ignore
+    base_model_id: str
     device: torch.device = None  # type: ignore
     adapters: Dict[str, str] = None  # type: ignore
     enable_softmax: bool = True

From 15b7bec85efb3cb238f55b4ece77d09d5d0744cf Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sat, 9 Mar 2024 14:16:11 -0500
Subject: [PATCH 084/182] Update for new saving

---
 src/peft/peft_model.py         | 4 +---
 src/peft/tuners/xlora/model.py | 3 +--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index d29912603e..810bab641d 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -222,7 +222,6 @@ def save_pretrained(
             os.makedirs(save_directory, exist_ok=True)
             self.create_or_update_model_card(save_directory)
 
-        adapters = {}
         for adapter_name in selected_adapters:
             peft_config = self.peft_config[adapter_name]
             # save only the trainable weights
@@ -234,7 +233,6 @@ def save_pretrained(
             )
             output_dir = os.path.join(save_directory, adapter_name) if adapter_name != "default" else save_directory
             os.makedirs(output_dir, exist_ok=True)
-            adapters[adapter_name] = output_dir
 
             if is_main_process and safe_serialization:
                 # Section copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_utils.py#L2111-L2134
@@ -296,7 +294,7 @@ def save_pretrained(
             peft_config.inference_mode = inference_mode
 
         if hasattr(self.base_model, "_save_pretrained_hook"):
-            self.base_model._save_pretrained_hook(save_directory, adapters, safe_serialization, is_main_process)
+            self.base_model._save_pretrained_hook(save_directory, safe_serialization, is_main_process)
 
     @classmethod
     def from_pretrained(
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 2be4e3dd3f..c72483374b 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -238,7 +238,6 @@ def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
     def _save_pretrained_hook(
         self,
         save_directory: str,
-        adapters: Dict[str, str],
         safe_serialization: bool = True,
         is_main_process: bool = True,
         **kwargs: Any,
@@ -248,7 +247,7 @@ def _save_pretrained_hook(
         conf = self.xlora_config.__dict__.copy()
         del conf["device"]
 
-        conf["adapters"] = adapters
+        conf["adapters"] = list(conf["adapters"].keys())
         with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
             json.dump(conf, f)
 

From 0ef9181bed9df31d648bc6fa31818bfb594bbd45 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 18 Mar 2024 19:20:07 -0400
Subject: [PATCH 085/182] Fix topk impl

---
 src/peft/tuners/xlora/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index c72483374b..2eed46ae2c 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -57,11 +57,11 @@ def hook(module, *args, **kwargs) -> None:
                 xlora_scalings: Tensor = xloramodel.internal_xlora_scalings[:, :, total_swapped, :]  # type: ignore
 
                 if config.top_k_lora is not None:
-                    _, topk_indices = torch.topk(xlora_scalings, k=config.top_k_lora, dim=1)
+                    _, topk_indices = torch.topk(xlora_scalings, k=config.top_k_lora, dim=-1)
 
                     # Mask the topk to True, the rest to False
                     mask = torch.zeros_like(xlora_scalings, dtype=torch.bool)
-                    mask.scatter_(1, topk_indices, True)
+                    mask.scatter_(-1, topk_indices, True)
 
                     xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
 

From 3804d6a18e2009ce249446c30423eeb857563772 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 29 Mar 2024 13:32:40 -0400
Subject: [PATCH 086/182] Update based on comments

---
 src/peft/tuners/xlora/classifier.py | 42 ++++++++++++++---------------
 src/peft/tuners/xlora/model.py      |  2 --
 src/peft/tuners/xlora/util.py       |  3 ++-
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 63971ccfc8..c07ce759b1 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -78,56 +78,57 @@ def __init__(
         dtype = next(model.parameters()).dtype
         bias_flag = config.use_bias
 
-        self.inner = nn.ModuleList([])
+        layers = []
         if self.config.xlora_depth == 1:
             if config.layerwise_scalings:  # bias=False if we have just one layer
-                self.last = (
+                last = (
                     nn.Linear(config.hidden_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
                 )
             else:
-                self.last = nn.Linear(config.hidden_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.hidden_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
         elif self.config.xlora_depth == 2:
-            self.inner.append(
+            layers.append(
                 nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
             )
 
             if config.enable_relu_and_dropout:
-                self.inner.append(nn.ReLU())
-                self.inner.append(nn.Dropout(p=config.xlora_dropout_p))
+                layers.append(nn.ReLU())
+                layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             if config.layerwise_scalings:
-                self.last = (
+                last = (
                     nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
                 )
             else:
-                self.last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
         else:
             if self.config.xlora_depth <= 0 :
                 raise ValueError("X-LoRA depth must be strictly positive.")
             
-            self.inner.append(
+            layers.append(
                 nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
             )
 
             if config.enable_relu_and_dropout:
-                self.inner.append(nn.ReLU())
-                self.inner.append(nn.Dropout(p=config.xlora_dropout_p))
+                layers.append(nn.ReLU())
+                layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             for _ in range(config.xlora_depth - 2):
-                self.inner.append(
+                layers.append(
                     nn.Linear(config.xlora_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
                 )
 
                 if config.enable_relu_and_dropout:
-                    self.inner.append(nn.ReLU())
-                    self.inner.append(nn.Dropout(p=config.xlora_dropout_p))
+                    layers.append(nn.ReLU())
+                    layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             if config.layerwise_scalings:
-                self.last = (
+                last = (
                     nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
                 )
             else:
-                self.last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+        self.layers = nn.Sequential(*layers, last)
 
     def forward(
         self,
@@ -175,12 +176,11 @@ def forward(
 
         ### Classifier run
         # hidden_state=[batch_size, seq_len, hidden_size]
-        for layer in self.inner:
-            hidden_state = layer.forward(hidden_state)
+        logits = self.layers.forward(hidden_state)
 
-        logits = self.last.forward(hidden_state)
-
-        ### Repeat to make layerwise scalings if the classifier layer does not
+        ### Repeat to make layerwise scalings
+        ### If layerwise_scalings=False, then the classifier only outputs logits which are not layer-wise.
+        ### So, we expand them to the correct shape.
         if not self.config.layerwise_scalings:
             logits = logits.unsqueeze(2)
             logits = logits.expand(-1, -1, self.n_layers, -1)
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 2eed46ae2c..1ffe5907f7 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -47,8 +47,6 @@ def convert_layers_to_xlora(
 
     for module in base.modules():
         if isinstance(module, lora.LoraLayer):
-            module.forward = new_layer.forward  # type: ignore[method-assign]
-
             def hook(module, *args, **kwargs) -> None:
                 kwargs_real: dict = args[1]
                 kwargs_real.update(kwargs)
diff --git a/src/peft/tuners/xlora/util.py b/src/peft/tuners/xlora/util.py
index 05c94827b3..6b7c58402f 100644
--- a/src/peft/tuners/xlora/util.py
+++ b/src/peft/tuners/xlora/util.py
@@ -27,7 +27,8 @@
 
 def _load_classifier_weights(model_id: str, device: Optional[str] = None, **hf_hub_download_kwargs) -> dict:
     r"""
-    A helper method to load the classifier weights from the HuggingFace Hub or locally. Copied from load_peft_weights
+    A helper method to load the classifier weights from the HuggingFace Hub or locally.
+    This is essentially `load_peft_weights`, but with the safetensors names changed.
 
     Args:
         model_id (`str`):

From ea3ea8f2180e21fc838ee249b2002790664bd579 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 29 Mar 2024 13:45:44 -0400
Subject: [PATCH 087/182] Add the xlora layer structure back in

---
 src/peft/tuners/xlora/model.py | 61 +++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 1ffe5907f7..d4725d50eb 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -27,6 +27,7 @@
 from .. import lora
 from .classifier import InhibitorFlagPayload, Number, XLoraClassifier
 from .config import XLoraConfig
+from .layer import XLoRALinearLayer, XLoRAEmbeddingLayer, XLoRAConv2dLayer
 
 @staticmethod
 def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter: int) -> torch.Tensor:
@@ -46,37 +47,37 @@ def convert_layers_to_xlora(
     total_swapped = 0
 
     for module in base.modules():
-        if isinstance(module, lora.LoraLayer):
-            def hook(module, *args, **kwargs) -> None:
-                kwargs_real: dict = args[1]
-                kwargs_real.update(kwargs)
-
-                # xlora_scalings = [batch_size, seq_len, n_classes]
-                xlora_scalings: Tensor = xloramodel.internal_xlora_scalings[:, :, total_swapped, :]  # type: ignore
-
-                if config.top_k_lora is not None:
-                    _, topk_indices = torch.topk(xlora_scalings, k=config.top_k_lora, dim=-1)
-
-                    # Mask the topk to True, the rest to False
-                    mask = torch.zeros_like(xlora_scalings, dtype=torch.bool)
-                    mask.scatter_(-1, topk_indices, True)
-
-                    xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
-
-                if config.enable_softmax_topk:
-                    nonzero_mask = xlora_scalings != 0
-                    softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
-                    xlora_scalings[nonzero_mask] = softmax_res_nonzero
-
-                kwargs_real["_xlora_apply"] = apply_scalings_to_x
-                kwargs_real["_xlora_scalings"] = xlora_scalings
-                kwargs_real["_xlora_scaling_weight"] = config.global_scaling_weight
-
-                return (args[0], kwargs_real)
-
-            module.register_forward_pre_hook(hook, with_kwargs=True, prepend=True)
+        if isinstance(module, lora.Linear):
+            new_layer = XLoRALinearLayer(
+                model=xloramodel,
+                target=module,
+                target_forward=module.forward,
+                layer_number=total_swapped,
+                config=config,
+            )
+            module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
-
+        elif isinstance(module, lora.Embedding):
+            new_layer = XLoRAEmbeddingLayer(
+                model=xloramodel,
+                target=module,
+                target_forward=module.forward,
+                layer_number=total_swapped,
+                config=config,
+            )
+            module.forward = new_layer.forward  # type: ignore[method-assign]
+            total_swapped += 1
+        elif isinstance(module, lora.Conv2d):
+            new_layer = XLoRAConv2dLayer(
+                model=xloramodel,
+                target=module,
+                target_forward=module.forward,
+                layer_number=total_swapped,
+                config=config,
+            )
+            module.forward = new_layer.forward  # type: ignore[method-assign]
+            total_swapped += 1
+            
     return total_swapped
 
 

From 1ce8391b02157e2aedd91156c25037bba4624cb3 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 29 Mar 2024 13:47:49 -0400
Subject: [PATCH 088/182] Add the xlora layer structure back in

---
 src/peft/tuners/xlora/layer.py | 185 +++++++++++++++++++++++++++++++++
 1 file changed, 185 insertions(+)
 create mode 100644 src/peft/tuners/xlora/layer.py

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
new file mode 100644
index 0000000000..688759835e
--- /dev/null
+++ b/src/peft/tuners/xlora/layer.py
@@ -0,0 +1,185 @@
+from typing import Any, Callable
+
+import torch
+import torch.nn as nn
+from peft.tuners import lora
+from torch import Tensor
+
+from xlora.xlora_config import XLoraConfig
+
+
+class XLoRALayer:
+    """
+    A XLoRALayer wraps any LoraLayer and performs the XLoRA operation on the LoRA adaptors specified.
+    Its primary API is the forward method, which uses the scalings to execute the
+    XLoRA algorithm.
+    """
+    
+    def __init__(
+        self,
+        model: nn.Module, # XLoraModel
+        target: lora.LoraLayer,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: XLoraConfig,
+    ) -> None:
+        self.model = model
+        self.target_forward = target_forward
+        self.target = target
+        self.layer_number = layer_number
+        self.config = config
+
+    @staticmethod
+    def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter: int) -> torch.Tensor:
+        # scalings_layer = [batch_size, seq_len, n_classes]
+        scalings = scalings_layer[:, :, adapter].unsqueeze(-1)
+        # scalings_layer = [batch_size, seq_len, 1]
+        return x * scalings
+
+    def get_maybe_topk_scalings(self) -> torch.Tensor:
+        # xlora_scalings = [batch_size, seq_len, n_classes]
+        xlora_scalings: Tensor = self.model.internal_xlora_scalings[:, :, self.layer_number, :]  # type: ignore
+
+        if self.config.top_k_lora is not None:
+            _, topk_indices = torch.topk(xlora_scalings, k=self.config.top_k_lora, dim=-1)
+
+            # Mask the topk to True, the rest to False
+            mask = torch.zeros_like(xlora_scalings, dtype=torch.bool)
+            mask.scatter_(-1, topk_indices, True)
+
+            xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
+
+        if self.config.enable_softmax_topk:
+            nonzero_mask = xlora_scalings != 0
+            softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
+            xlora_scalings[nonzero_mask] = softmax_res_nonzero
+
+        return xlora_scalings
+
+
+class XLoRALinearLayer(XLoRALayer):
+    def __init__(
+        self,
+        model: nn.Module,
+        target: lora.Linear,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: XLoraConfig,
+    ) -> None:
+        super().__init__(model, target, target_forward, layer_number, config)
+
+    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+        """
+        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method.
+        To use it, a bound method must be created (bound to an instance of the XLoRALayer class).
+        """
+
+        previous_dtype = x.dtype
+        xlora_scalings = self.get_maybe_topk_scalings()
+
+        if self.target.disable_adapters:
+            if self.target.merged:
+                self.target.unmerge()
+            result = self.target.base_layer(x, *args, **kwargs)
+        elif self.target.merged:
+            result = self.target.base_layer(x, *args, **kwargs)
+        else:
+            result = self.target.base_layer(x, *args, **kwargs)
+
+            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                if active_adapter not in self.target.lora_A.keys():
+                    continue
+                lora_A = self.target.lora_A[active_adapter]
+                lora_B = self.target.lora_B[active_adapter]
+                dropout = self.target.lora_dropout[active_adapter]
+                scaling = self.target.scaling[active_adapter]
+                x = x.to(lora_A.weight.dtype)  # type: ignore
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
+
+        result = result.to(previous_dtype)
+        return result
+
+
+class XLoRAEmbeddingLayer(XLoRALayer):
+    def __init__(
+        self,
+        model: nn.Module,
+        target: lora.Embedding,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: XLoraConfig,
+    ) -> None:
+        super().__init__(model, target, target_forward, layer_number, config)
+
+    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+        """
+        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method.
+        To use it, a bound method must be created (bound to an instance of the XLoRALayer class).
+        """
+
+        xlora_scalings = self.get_maybe_topk_scalings()
+
+        # TODO: no dtype conversion here, unlike in Linear, is that correct?
+        if self.target.disable_adapters:
+            if self.target.merged:
+                self.target.unmerge()
+            result = self.target.base_layer(x, *args, **kwargs)
+        elif self.target.merged:
+            result = self.target.base_layer(x, *args, **kwargs)
+        else:
+            result = self.target.base_layer(x, *args, **kwargs)
+            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                if active_adapter not in self.target.lora_embedding_A:
+                    continue
+                embedding_A = self.target.lora_embedding_A[active_adapter].T
+                embedding_B = self.target.lora_embedding_B[active_adapter].T
+                scaling = self.target.scaling[active_adapter]
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
+                result += (after_A @ embedding_B) * scaling * self.config.global_scaling_weight
+
+        return result
+
+
+class XLoRAConv2dLayer(XLoRALayer):
+    def __init__(
+        self,
+        model: nn.Module,
+        target: lora.Conv2d,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: XLoraConfig,
+    ) -> None:
+        super().__init__(model, target, target_forward, layer_number, config)
+
+    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+        """
+        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method.
+        To use it, a bound method must be created (bound to an instance of the XLoRALayer class).
+        """
+
+        previous_dtype = x.dtype
+        xlora_scalings = self.get_maybe_topk_scalings()
+
+        if self.target.disable_adapters:
+            if self.target.merged:
+                self.target.unmerge()
+            result = self.target.base_layer(x, *args, **kwargs)
+        elif self.target.merged:
+            result = self.target.base_layer(x, *args, **kwargs)
+        else:
+            result = self.target.base_layer(x, *args, **kwargs)
+            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                if active_adapter not in self.target.lora_A.keys():
+                    continue
+                lora_A = self.target.lora_A[active_adapter]
+                lora_B = self.target.lora_B[active_adapter]
+                dropout = self.target.lora_dropout[active_adapter]
+                scaling = self.target.scaling[active_adapter]
+                x = x.to(lora_A.weight.dtype)  # type: ignore
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
+
+        result = result.to(previous_dtype)
+        return result

From b48d4afc8d0bab3c10d0a68df81e6108dfe1844c Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:16:25 -0400
Subject: [PATCH 089/182] Make some style changes

---
 src/peft/tuners/tuners_utils.py |  3 ---
 src/peft/tuners/xlora/config.py | 15 +++++++++++----
 src/peft/tuners/xlora/layer.py  |  2 +-
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index b9d59f8a1d..613eb2af2e 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -281,7 +281,6 @@ def _check_new_adapter_config(self, config: PeftConfig) -> None:
         """
         pass
 
-<<<<<<< HEAD
     def _check_merge_allowed(self):
         """Helper method to check whether the adapter can be merged.
 
@@ -289,8 +288,6 @@ def _check_merge_allowed(self):
         """
         pass
 
-=======
->>>>>>> cddc00f (Make a nicer check for having target_modules)
     def inject_adapter(self, model: nn.Module, adapter_name: str):
         r"""
         Creates adapter layers and replaces the target modules with the adapter layers. This method is called under the
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 4997ea695d..1a13cf9f0e 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -16,6 +16,7 @@
 from dataclasses import dataclass
 from typing import Dict, Optional
 
+from sklearn import base
 import torch
 
 from peft.config import PeftConfig
@@ -70,7 +71,7 @@ class XLoraConfig(PeftConfig):
     """
 
     hidden_size: int = None  # type: ignore
-    base_model_id: str
+    base_model_id: str = None
     device: torch.device = None  # type: ignore
     adapters: Dict[str, str] = None  # type: ignore
     enable_softmax: bool = True
@@ -88,11 +89,17 @@ class XLoraConfig(PeftConfig):
     global_scaling_weight: float = 1.0
 
     def __post_init__(self):
-        assert self.hidden_size is not None
-        assert self.device is not None
-        assert self.adapters is not None
         self.peft_type = PeftType.XLORA
 
+        if self.base_model_id == None:
+            raise TypeError("Expected value for base model ID.")
+        if self.hidden_size == None:
+            raise TypeError("Expected value for hidden size.")
+        if self.device == None:
+            raise TypeError("Expected value for device.")
+        if self.adapters == None:
+            raise TypeError("Expected value for adapters.")
+
         if self.enable_softmax_topk and self.top_k_lora is None:
             warnings.warn("`enable_softmax_topk` enabled `top_k_lora` is not set")
 
diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 688759835e..12d7e5787f 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -5,7 +5,7 @@
 from peft.tuners import lora
 from torch import Tensor
 
-from xlora.xlora_config import XLoraConfig
+from .config import XLoraConfig
 
 
 class XLoRALayer:

From 4d48c5960e4c98ddce01ba887627dbb15228cd8d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:17:14 -0400
Subject: [PATCH 090/182] Fix some bugs

---
 src/peft/tuners/tuners_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 613eb2af2e..4b30a2751f 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -663,6 +663,8 @@ def _maybe_include_all_linear_layers(peft_config: PeftConfig, model: nn.Module)
     Helper function to update `target_modules` to all linear/Conv1D layers if provided as 'all-linear'. Adapted from
     the QLoRA repository: https://github.com/artidoro/qlora/blob/main/qlora.py
     """
+    if not hasattr(peft_config, "target_modules"):
+        return peft_config
 
     # if `target_modules` is a string, convert to lower case and check if it matches "all-linear"
     if not (

From 93069bb5ac64f4b35b9aea65231e7d1e58125a4c Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:18:43 -0400
Subject: [PATCH 091/182] Remove base model id as unnecessary

---
 src/peft/tuners/xlora/config.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 1a13cf9f0e..78b7d10a57 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -33,8 +33,6 @@ class XLoraConfig(PeftConfig):
     Args:
         hidden_size (`int`):
             Hidden size of the base model.
-        base_model_id (`str`):
-            Base model HF id.
         device (`torch.device`):
             Device for the X-LoRA classifier.
         adapters (`dict`):
@@ -71,7 +69,6 @@ class XLoraConfig(PeftConfig):
     """
 
     hidden_size: int = None  # type: ignore
-    base_model_id: str = None
     device: torch.device = None  # type: ignore
     adapters: Dict[str, str] = None  # type: ignore
     enable_softmax: bool = True
@@ -91,8 +88,6 @@ class XLoraConfig(PeftConfig):
     def __post_init__(self):
         self.peft_type = PeftType.XLORA
 
-        if self.base_model_id == None:
-            raise TypeError("Expected value for base model ID.")
         if self.hidden_size == None:
             raise TypeError("Expected value for hidden size.")
         if self.device == None:

From 3ee935011c01069063d78f1db3a70fb1c5d1b176 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:21:33 -0400
Subject: [PATCH 092/182] Format with ruff

---
 src/peft/peft_model.py              |  1 -
 src/peft/tuners/xlora/classifier.py | 25 +++++++------------------
 src/peft/tuners/xlora/config.py     | 11 +++++------
 src/peft/tuners/xlora/layer.py      |  7 ++++---
 src/peft/tuners/xlora/model.py      | 10 ++++++----
 5 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 810bab641d..d45cb268f2 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -54,7 +54,6 @@
     XLoraConfig,
     XLoraModel,
 )
-
 from .tuners.tuners_utils import BaseTunerLayer
 from .tuners.xlora.classifier import XLoraClassifier
 from .tuners.xlora.util import _load_classifier_weights as xlora_load_classifier_weights
diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index c07ce759b1..916f965e95 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -14,7 +14,6 @@
 
 import builtins
 import json
-import typing
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple, Union
 
@@ -81,33 +80,25 @@ def __init__(
         layers = []
         if self.config.xlora_depth == 1:
             if config.layerwise_scalings:  # bias=False if we have just one layer
-                last = (
-                    nn.Linear(config.hidden_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
-                )
+                last = nn.Linear(config.hidden_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
             else:
                 last = nn.Linear(config.hidden_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
         elif self.config.xlora_depth == 2:
-            layers.append(
-                nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
-            )
+            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype))
 
             if config.enable_relu_and_dropout:
                 layers.append(nn.ReLU())
                 layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             if config.layerwise_scalings:
-                last = (
-                    nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
-                )
+                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
             else:
                 last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
         else:
-            if self.config.xlora_depth <= 0 :
+            if self.config.xlora_depth <= 0:
                 raise ValueError("X-LoRA depth must be strictly positive.")
-            
-            layers.append(
-                nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
-            )
+
+            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype))
 
             if config.enable_relu_and_dropout:
                 layers.append(nn.ReLU())
@@ -123,9 +114,7 @@ def __init__(
                     layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             if config.layerwise_scalings:
-                last = (
-                    nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
-                )
+                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
             else:
                 last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
         self.layers = nn.Sequential(*layers, last)
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 78b7d10a57..94c3789cdc 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -16,7 +16,6 @@
 from dataclasses import dataclass
 from typing import Dict, Optional
 
-from sklearn import base
 import torch
 
 from peft.config import PeftConfig
@@ -27,7 +26,7 @@
 class XLoraConfig(PeftConfig):
     r"""
     This is the configuration class to store the configuration of a [`XLoraClassifier`].
-    When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the 
+    When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the
     saved adapters. As such, only the keys matter during loading.
 
     Args:
@@ -36,7 +35,7 @@ class XLoraConfig(PeftConfig):
         device (`torch.device`):
             Device for the X-LoRA classifier.
         adapters (`dict`):
-            Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter. 
+            Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter.
             *They will be automatically loaded*, to use as LoRA experts.
         enable_softmax (`bool`, *optional*, defaults to `True`):
             Enable softmax application for the X-LoRA classifier.
@@ -88,11 +87,11 @@ class XLoraConfig(PeftConfig):
     def __post_init__(self):
         self.peft_type = PeftType.XLORA
 
-        if self.hidden_size == None:
+        if self.hidden_size is None:
             raise TypeError("Expected value for hidden size.")
-        if self.device == None:
+        if self.device is None:
             raise TypeError("Expected value for device.")
-        if self.adapters == None:
+        if self.adapters is None:
             raise TypeError("Expected value for adapters.")
 
         if self.enable_softmax_topk and self.top_k_lora is None:
diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 12d7e5787f..3a4a8d533a 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -2,9 +2,10 @@
 
 import torch
 import torch.nn as nn
-from peft.tuners import lora
 from torch import Tensor
 
+from peft.tuners import lora
+
 from .config import XLoraConfig
 
 
@@ -14,10 +15,10 @@ class XLoRALayer:
     Its primary API is the forward method, which uses the scalings to execute the
     XLoRA algorithm.
     """
-    
+
     def __init__(
         self,
-        model: nn.Module, # XLoraModel
+        model: nn.Module,  # XLoraModel
         target: lora.LoraLayer,
         target_forward: Callable[..., Any],
         layer_number: int,
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index d4725d50eb..b6c588fb57 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -14,7 +14,7 @@
 
 import json
 import os
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, List, Optional, Union
 
 import torch
 import torch.nn as nn
@@ -27,7 +27,8 @@
 from .. import lora
 from .classifier import InhibitorFlagPayload, Number, XLoraClassifier
 from .config import XLoraConfig
-from .layer import XLoRALinearLayer, XLoRAEmbeddingLayer, XLoRAConv2dLayer
+from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALinearLayer
+
 
 @staticmethod
 def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter: int) -> torch.Tensor:
@@ -36,9 +37,10 @@ def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter:
     # scalings_layer = [batch_size, seq_len, 1]
     return x * scalings
 
+
 def convert_layers_to_xlora(
     base: nn.Module,  # PeftModel
-    xloramodel: nn.Module, # XLoraModel
+    xloramodel: nn.Module,  # XLoraModel
     config: XLoraConfig,
 ) -> int:
     """
@@ -77,7 +79,7 @@ def convert_layers_to_xlora(
             )
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
-            
+
     return total_swapped
 
 

From 7c1bcfa27ffec1c9fa812c520611695c6e5116bf Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:21:48 -0400
Subject: [PATCH 093/182] Add xlora test

---
 tests/test_xlora.py | 57 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 tests/test_xlora.py

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
new file mode 100644
index 0000000000..87f0034124
--- /dev/null
+++ b/tests/test_xlora.py
@@ -0,0 +1,57 @@
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from peft import LoraConfig, PeftType, TaskType, XLoraConfig, get_peft_model
+
+
+class TestXlora:
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def test_functional(self, tmp_path):
+        model_id = "facebook/opt-125m"
+        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
+
+        for i in range(1, 9):
+            torch.manual_seed(i)
+            lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
+            model = AutoModelForCausalLM.from_pretrained(model_id)
+            peft_model = get_peft_model(model, lora_config)
+            peft_model.save_pretrained(f"{tmp_path}/checkpoint-{i}")
+            print(f"finished {i} of 8")
+
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+        model.config.use_cache = False
+        adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
+
+        peft_config = XLoraConfig(
+            task_type=TaskType.CAUSAL_LM,
+            peft_type=PeftType.XLORA,
+            hidden_size=model.config.hidden_size,
+            xlora_depth=8,
+            device=self.device,
+            adapters=adapters,
+        )
+        model = get_peft_model(model, peft_config).to("cuda")
+
+        model.enable_scalings_logging()
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])

From e627525e0b25bf5750b01b8d780cafcf81689441 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:34:32 -0400
Subject: [PATCH 094/182] Handle lack of target modules in lora method

---
 src/peft/tuners/lora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 3f381efffd..c9f5021330 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -407,7 +407,7 @@ def _check_merge_allowed(self):
 
     @staticmethod
     def _prepare_adapter_config(peft_config, model_config):
-        if peft_config.target_modules is None:
+        if hasattr(peft_config, "target_modules") and peft_config.target_modules is None:
             if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
                 raise ValueError("Please specify `target_modules` in `peft_config`")
             peft_config.target_modules = set(

From aa189c616b80a1cebbce6423268e3a4f3e5e79b6 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:35:41 -0400
Subject: [PATCH 095/182] Handle lack of layer_replication in lora method

---
 src/peft/tuners/lora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index c9f5021330..bfdeef3445 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -164,7 +164,7 @@ def _prepare_model(self, peft_config: LoraConfig, model: nn.Module):
             model (`nn.Module`):
                 The model that is going to be adapted.
         """
-        if peft_config.layer_replication:
+        if hasattr(peft_config, "layer_replication") and peft_config.layer_replication:
             replicate_layers(model, peft_config.layer_replication)
 
     def _create_and_replace(

From 2df1d5fb3eb5a517cbe5f61293c8a72636feae34 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:37:56 -0400
Subject: [PATCH 096/182] Handle lack of target_modules in check target modules
 exists

---
 src/peft/tuners/lora/model.py   | 2 ++
 src/peft/tuners/tuners_utils.py | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index bfdeef3445..3280d4494a 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -164,6 +164,7 @@ def _prepare_model(self, peft_config: LoraConfig, model: nn.Module):
             model (`nn.Module`):
                 The model that is going to be adapted.
         """
+        # Handle X-LoRA case
         if hasattr(peft_config, "layer_replication") and peft_config.layer_replication:
             replicate_layers(model, peft_config.layer_replication)
 
@@ -407,6 +408,7 @@ def _check_merge_allowed(self):
 
     @staticmethod
     def _prepare_adapter_config(peft_config, model_config):
+    # Handle X-LoRA case
         if hasattr(peft_config, "target_modules") and peft_config.target_modules is None:
             if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
                 raise ValueError("Please specify `target_modules` in `peft_config`")
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 4b30a2751f..b450abb980 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -604,6 +604,9 @@ def check_target_module_exists(config, key: str) -> bool | re.Match[str] | None:
         `bool` | `re.Match[str]` | `None`: True of match object if key matches any target modules from config, False or
         None if no match found
     """
+    # Handle X-LoRA case
+    if not hasattr(config, "target_modules"):
+        return False
     if isinstance(config.target_modules, str):
         target_module_found = re.fullmatch(config.target_modules, key)
     elif key in config.target_modules:

From b14c3351631aa3f52e785d3ac52ed9a546fb86d7 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:41:01 -0400
Subject: [PATCH 097/182] Handle lack of target_modules in inject_adapter

---
 src/peft/tuners/tuners_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index b450abb980..dd8b3df28b 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -348,7 +348,8 @@ def inject_adapter(self, model: nn.Module, adapter_name: str):
             parent, target, target_name = _get_submodules(model, key)
             self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key)
 
-        if not is_target_modules_in_base_model:
+        # Handle X-LoRA case.
+        if not is_target_modules_in_base_model and hasattr(peft_config, "target_modules"):
             raise ValueError(
                 f"Target modules {peft_config.target_modules} not found in the base model. "
                 f"Please check the target modules and try again."

From 8bec326e664a141a1643351a8d552e426ecc9dd6 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:53:37 -0400
Subject: [PATCH 098/182] Fix the recursion err

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index b6c588fb57..0fbedaa6fc 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -208,7 +208,7 @@ def hook(module, *args, **kwargs) -> None:
         xlora_classifier = XLoraClassifier(model_peft, peft_config, n_classes, total_swapped)
 
         # Setup the model internal state
-        self.internal_xlora_classifier = xlora_classifier
+        self.__dict__["internal_xlora_classifier"] = xlora_classifier
         self.internal_xlora_scalings = None  # type: ignore
 
     def _freeze_all_adapters(self):

From c79b37c8e719fc124ca41217089fdd09640b45c0 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 05:58:50 -0400
Subject: [PATCH 099/182] Formatting

---
 src/peft/tuners/lora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 3280d4494a..c956620e43 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -408,7 +408,7 @@ def _check_merge_allowed(self):
 
     @staticmethod
     def _prepare_adapter_config(peft_config, model_config):
-    # Handle X-LoRA case
+        # Handle X-LoRA case
         if hasattr(peft_config, "target_modules") and peft_config.target_modules is None:
             if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
                 raise ValueError("Please specify `target_modules` in `peft_config`")

From e453747814e22f7b33d46bfe514a6d8971cd0335 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 11:33:11 -0400
Subject: [PATCH 100/182] Do not include device in config

---
 src/peft/tuners/xlora/classifier.py | 21 ++++++++++-----------
 src/peft/tuners/xlora/config.py     |  7 -------
 src/peft/tuners/xlora/model.py      | 13 ++++++++-----
 3 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 916f965e95..c5d6de8e2d 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -61,6 +61,7 @@ def __init__(
         config: XLoraConfig,
         n_classes: int,
         n_layers: int,
+        device: torch.device,
     ):
         super().__init__()
 
@@ -80,43 +81,41 @@ def __init__(
         layers = []
         if self.config.xlora_depth == 1:
             if config.layerwise_scalings:  # bias=False if we have just one layer
-                last = nn.Linear(config.hidden_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.hidden_size, n_classes * n_layers, bias=bias_flag).to(device).to(dtype)
             else:
-                last = nn.Linear(config.hidden_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.hidden_size, n_classes, bias=bias_flag).to(device).to(dtype)
         elif self.config.xlora_depth == 2:
-            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype))
+            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(device).to(dtype))
 
             if config.enable_relu_and_dropout:
                 layers.append(nn.ReLU())
                 layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             if config.layerwise_scalings:
-                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(device).to(dtype)
             else:
-                last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(device).to(dtype)
         else:
             if self.config.xlora_depth <= 0:
                 raise ValueError("X-LoRA depth must be strictly positive.")
 
-            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype))
+            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(device).to(dtype))
 
             if config.enable_relu_and_dropout:
                 layers.append(nn.ReLU())
                 layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             for _ in range(config.xlora_depth - 2):
-                layers.append(
-                    nn.Linear(config.xlora_size, config.xlora_size, bias=bias_flag).to(config.device).to(dtype)
-                )
+                layers.append(nn.Linear(config.xlora_size, config.xlora_size, bias=bias_flag).to(device).to(dtype))
 
                 if config.enable_relu_and_dropout:
                     layers.append(nn.ReLU())
                     layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             if config.layerwise_scalings:
-                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(device).to(dtype)
             else:
-                last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(config.device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(device).to(dtype)
         self.layers = nn.Sequential(*layers, last)
 
     def forward(
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 94c3789cdc..73acd15a28 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -16,8 +16,6 @@
 from dataclasses import dataclass
 from typing import Dict, Optional
 
-import torch
-
 from peft.config import PeftConfig
 from peft.utils.peft_types import PeftType
 
@@ -32,8 +30,6 @@ class XLoraConfig(PeftConfig):
     Args:
         hidden_size (`int`):
             Hidden size of the base model.
-        device (`torch.device`):
-            Device for the X-LoRA classifier.
         adapters (`dict`):
             Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter.
             *They will be automatically loaded*, to use as LoRA experts.
@@ -68,7 +64,6 @@ class XLoraConfig(PeftConfig):
     """
 
     hidden_size: int = None  # type: ignore
-    device: torch.device = None  # type: ignore
     adapters: Dict[str, str] = None  # type: ignore
     enable_softmax: bool = True
     enable_softmax_topk: bool = False
@@ -89,8 +84,6 @@ def __post_init__(self):
 
         if self.hidden_size is None:
             raise TypeError("Expected value for hidden size.")
-        if self.device is None:
-            raise TypeError("Expected value for device.")
         if self.adapters is None:
             raise TypeError("Expected value for adapters.")
 
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 0fbedaa6fc..9ee2bdc76b 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -42,14 +42,16 @@ def convert_layers_to_xlora(
     base: nn.Module,  # PeftModel
     xloramodel: nn.Module,  # XLoraModel
     config: XLoraConfig,
-) -> int:
+) -> (int, torch.device | None):
     """
     Returns the number of swapped layers.
     """
     total_swapped = 0
 
+    device = None
     for module in base.modules():
         if isinstance(module, lora.Linear):
+            device = module.lora_A[next(iter(module.lora_A))].weight.device
             new_layer = XLoRALinearLayer(
                 model=xloramodel,
                 target=module,
@@ -60,6 +62,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Embedding):
+            device = module.lora_A[next(iter(module.lora_A))].weight.device
             new_layer = XLoRAEmbeddingLayer(
                 model=xloramodel,
                 target=module,
@@ -70,6 +73,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Conv2d):
+            device = module.lora_A[next(iter(module.lora_A))].weight.device
             new_layer = XLoRAConv2dLayer(
                 model=xloramodel,
                 target=module,
@@ -80,7 +84,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
 
-    return total_swapped
+    return (total_swapped, device)
 
 
 class XLoraModel(LoraModel):
@@ -198,14 +202,14 @@ def hook(module, *args, **kwargs) -> None:
 
         self._freeze_all_adapters()
 
-        total_swapped = convert_layers_to_xlora(
+        total_swapped, device = convert_layers_to_xlora(
             model_peft,
             self,
             peft_config,
         )
 
         n_classes = len(peft_config.adapters)
-        xlora_classifier = XLoraClassifier(model_peft, peft_config, n_classes, total_swapped)
+        xlora_classifier = XLoraClassifier(model_peft, peft_config, n_classes, total_swapped, device)
 
         # Setup the model internal state
         self.__dict__["internal_xlora_classifier"] = xlora_classifier
@@ -246,7 +250,6 @@ def _save_pretrained_hook(
         classifier: XLoraClassifier = self.internal_xlora_classifier
 
         conf = self.xlora_config.__dict__.copy()
-        del conf["device"]
 
         conf["adapters"] = list(conf["adapters"].keys())
         with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:

From 10161b3a72a5ec50a862a6fd849fc7b628ab8c8b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 11:45:46 -0400
Subject: [PATCH 101/182] Cleaner method to eliminate adapters

---
 src/peft/tuners/xlora/config.py | 3 ++-
 src/peft/tuners/xlora/model.py  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 73acd15a28..2258dd510d 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -32,7 +32,8 @@ class XLoraConfig(PeftConfig):
             Hidden size of the base model.
         adapters (`dict`):
             Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter.
-            *They will be automatically loaded*, to use as LoRA experts.
+            *They will be automatically loaded*, to use as LoRA experts. When using from_pretrained, pass the new adapters dict
+            as a keyword argument.
         enable_softmax (`bool`, *optional*, defaults to `True`):
             Enable softmax application for the X-LoRA classifier.
         enable_softmax_topk (`bool`, *optional*, defaults to `False`):
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 9ee2bdc76b..4d21c3ab95 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -251,7 +251,8 @@ def _save_pretrained_hook(
 
         conf = self.xlora_config.__dict__.copy()
 
-        conf["adapters"] = list(conf["adapters"].keys())
+        # So that the adapters are unloadable and the user is forced to set them for from_pretrained
+        conf["adapters"] = None
         with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
             json.dump(conf, f)
 

From f22316823cecfe1b5eae857bf69b0a2f8c2be605 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 11:47:22 -0400
Subject: [PATCH 102/182] Cleaner method to eliminate adapters

---
 src/peft/peft_model.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index d45cb268f2..0393b4a4a1 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -399,12 +399,11 @@ def from_pretrained(
         if isinstance(model.base_model, XLoraModel):
             if not isinstance(config, XLoraConfig):
                 raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
+            if "adapters" not in kwargs:
+                raise ValueError(f"Expected adapters to be in kwargs")
 
             device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device()
-            config.device = torch.device(device)
-
-            # If we are passed adapters in the kwargs, it is already in the config.
-            # If no adapters are passed, config.adapters is None
+            config.adapters = kwargs["adapters"]
 
             classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore

From 31078a6cbf93366664eb7cd4e1c487304034c35a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 13:59:57 -0400
Subject: [PATCH 103/182] Remove unnecessary methods

---
 src/peft/tuners/xlora/model.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 4d21c3ab95..f80f171f74 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -274,13 +274,6 @@ def set_topk_lora(self, value: Optional[int]):
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.config.top_k_lora = value
 
-    def get_topk_lora(self) -> Optional[int]:
-        """
-        Get the current top_k LoRA experts value.
-        """
-        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
-        return classifier.config.top_k_lora
-
     def set_global_scaling_weight(self, weight: float):
         """
         Set the global LoRA weight, a scalar to multiply the output of each LoRA adapter by. This is by default 1. This is reflected in the config.
@@ -309,16 +302,6 @@ def get_scalings_log(self) -> List[torch.Tensor]:
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         return classifier.log_scalings.copy()
 
-    def set_scaling_pass_value(self, value: Union[Number, None]):
-        """
-        Manually set the scalings to a specific value during the scaling pass, forever. Call this function with None to enable the default
-        scalings.
-
-        This is reflected in the config.
-        """
-        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
-        classifier._set_override_scaling_pass_value(value)
-
     def enable_scalings_logging(self):
         """
         Enable scalings logging.

From d52f6cea7ca66c5623844f2db08e2e5c9b1d5dc2 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 14:08:03 -0400
Subject: [PATCH 104/182] Error on multiple xlora adapters

---
 src/peft/tuners/xlora/model.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index f80f171f74..53d00d86e0 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -147,11 +147,6 @@ def post_init_lora(
         model_peft: nn.Module,
     ) -> None:
         # model_peft: PeftModel
-        if not isinstance(model, PreTrainedModel):
-            raise TypeError(f"Expected model type to be 'PreTrainedModel', got '{type(model)}' instead.")
-        if not isinstance(peft_config, XLoraConfig):
-            raise TypeError(f"Expected config type to be 'XLoraConfig', got '{type(model)}' instead.")
-
         self.xlora_config = peft_config
 
         if hasattr(model.config, "use_cache"):
@@ -264,6 +259,13 @@ def _save_pretrained_hook(
             state_dict = classifier.state_dict()
             torch.save(state_dict, os.path.join(save_directory, "xlora_classifier.pt"))
 
+    def inject_adapter(self, model: nn.Module, adapter_name: str):
+        # XLora only supports a single adapter. It wouldn't make sense to have multiple XLora adapters,
+        # but multiple Lora adapters are essentially a requirement.
+        if len(set(self.peft_config.keys()) | {adapter_name}) > 1:
+            raise ValueError("Trying to add a second XLora adapter, but {self.__class__.__name__} only supports a single adapter.")
+        return super().inject_adapter(model, adapter_name=adapter_name)
+
     def forward(self, *args, **kwargs):
         return self.model(*args, **kwargs)  # Important to *call* the model
 

From 7e3c0d04f904063b8a6d2a7b5bda21529e234c82 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 5 Apr 2024 14:09:40 -0400
Subject: [PATCH 105/182] Error on use of dora

---
 src/peft/tuners/xlora/layer.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 3a4a8d533a..a86c60d590 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -88,6 +88,9 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
             result = self.target.base_layer(x, *args, **kwargs)
 
             for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                # TODO: implement X-LoRA with Lora+Dora layers
+                if self.target.use_dora[active_adapter]:
+                    raise ValueError("X-LoRA currently does not support LoRA layers with DoRA")
                 if active_adapter not in self.target.lora_A.keys():
                     continue
                 lora_A = self.target.lora_A[active_adapter]
@@ -131,6 +134,9 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         else:
             result = self.target.base_layer(x, *args, **kwargs)
             for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                # TODO: implement X-LoRA with Lora+Dora layers
+                if self.target.use_dora[active_adapter]:
+                    raise ValueError("X-LoRA currently does not support LoRA layers with DoRA")
                 if active_adapter not in self.target.lora_embedding_A:
                     continue
                 embedding_A = self.target.lora_embedding_A[active_adapter].T
@@ -172,6 +178,9 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         else:
             result = self.target.base_layer(x, *args, **kwargs)
             for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                # TODO: implement X-LoRA with Lora+Dora layers
+                if self.target.use_dora[active_adapter]:
+                    raise ValueError("X-LoRA currently does not support LoRA layers with DoRA")
                 if active_adapter not in self.target.lora_A.keys():
                     continue
                 lora_A = self.target.lora_A[active_adapter]

From 1edeb96718179e82992ebfe5fdf1dde0900a924f Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 9 Apr 2024 14:15:03 -0400
Subject: [PATCH 106/182] Raise errors and other misc fixes

---
 src/peft/tuners/xlora/config.py |  2 +-
 src/peft/tuners/xlora/model.py  | 10 ++++++----
 tests/test_xlora.py             |  1 -
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 2258dd510d..c51bd4dafa 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -23,7 +23,7 @@
 @dataclass
 class XLoraConfig(PeftConfig):
     r"""
-    This is the configuration class to store the configuration of a [`XLoraClassifier`].
+    This is the configuration class to store the configuration of a `XLoraModel`.
     When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the
     saved adapters. As such, only the keys matter during loading.
 
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 53d00d86e0..b60bda6be4 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -149,11 +149,11 @@ def post_init_lora(
         # model_peft: PeftModel
         self.xlora_config = peft_config
 
-        if hasattr(model.config, "use_cache"):
-            assert not model.config.use_cache, "`use_cache` must be False"
+        if hasattr(model.config, "use_cache") and not model.config.use_cache:
+            raise ValueError("`use_cache` must be False")
 
         use_trainable_adapters = peft_config.use_trainable_adapters
-        adapters_items = iter(peft_config.adapters.items())
+        adapters_items = peft_config.adapters.items()
 
         # For load_adapter to think we are a LoraModel
         model_peft.peft_type = PeftType.LORA
@@ -263,7 +263,9 @@ def inject_adapter(self, model: nn.Module, adapter_name: str):
         # XLora only supports a single adapter. It wouldn't make sense to have multiple XLora adapters,
         # but multiple Lora adapters are essentially a requirement.
         if len(set(self.peft_config.keys()) | {adapter_name}) > 1:
-            raise ValueError("Trying to add a second XLora adapter, but {self.__class__.__name__} only supports a single adapter.")
+            raise ValueError(
+                "Trying to add a second XLora adapter, but {self.__class__.__name__} only supports a single adapter."
+            )
         return super().inject_adapter(model, adapter_name=adapter_name)
 
     def forward(self, *args, **kwargs):
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 87f0034124..b84ccfa61d 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -42,7 +42,6 @@ def test_functional(self, tmp_path):
             peft_type=PeftType.XLORA,
             hidden_size=model.config.hidden_size,
             xlora_depth=8,
-            device=self.device,
             adapters=adapters,
         )
         model = get_peft_model(model, peft_config).to("cuda")

From bb5e54615630f0098025d5ee0aada0a316c75a74 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 9 Apr 2024 14:16:11 -0400
Subject: [PATCH 107/182] Depend on use trainable adapters

---
 src/peft/tuners/xlora/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index b60bda6be4..43308a6cb7 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -231,7 +231,8 @@ def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
             if not isinstance(self.peft_config[name], XLoraConfig):
                 active_adapters.append(name)
         self.active_adapter = active_adapters
-        super()._mark_only_adapters_as_trainable(model)
+        if self.xlora_config.use_trainable_adapters:
+            super()._mark_only_adapters_as_trainable(model)
 
         self.active_adapter = copy
 

From a4479b65c9c45ce374b356763ccab29de96969f8 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 10 Apr 2024 05:45:45 -0400
Subject: [PATCH 108/182] Implement another test and restructure the model

---
 src/peft/tuners/xlora/model.py | 41 ++++++---------------
 tests/test_xlora.py            | 66 ++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 30 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 43308a6cb7..a342ae443f 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -87,7 +87,7 @@ def convert_layers_to_xlora(
     return (total_swapped, device)
 
 
-class XLoraModel(LoraModel):
+class XLoraModel:
     """
     Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model. Currently,
     this X-LoRA implementation only works with models with a transformer architecture.
@@ -137,7 +137,7 @@ def __init__(
         config: Union[dict[str, XLoraConfig], XLoraConfig],
         adapter_name: str,
     ) -> None:
-        super().__init__(model, config, adapter_name)
+        self.lora_model = LoraModel(model, config, adapter_name)
 
     def post_init_lora(
         self,
@@ -161,9 +161,9 @@ def post_init_lora(
         for name, model_id in adapters_items:
             model_peft.load_adapter(model_id, name, is_trainable=use_trainable_adapters)
 
-        self.delete_adapter(adapter_name)
+        self.lora_model.delete_adapter(adapter_name)
 
-        self.set_adapter(list(peft_config.adapters.keys()))
+        self.lora_model.set_adapter(list(peft_config.adapters.keys()))
         model_peft.active_adapter = name
         model_peft.peft_type = PeftType.XLORA
 
@@ -207,7 +207,7 @@ def hook(module, *args, **kwargs) -> None:
         xlora_classifier = XLoraClassifier(model_peft, peft_config, n_classes, total_swapped, device)
 
         # Setup the model internal state
-        self.__dict__["internal_xlora_classifier"] = xlora_classifier
+        self.internal_xlora_classifier = xlora_classifier
         self.internal_xlora_scalings = None  # type: ignore
 
     def _freeze_all_adapters(self):
@@ -226,15 +226,15 @@ def generate(self, *args, **kwargs):
 
     def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
         active_adapters = []
-        copy = self.active_adapters.copy()
-        for name in self.active_adapters:
-            if not isinstance(self.peft_config[name], XLoraConfig):
+        copy = self.lora_model.active_adapters.copy()
+        for name in self.lora_model.active_adapters:
+            if not isinstance(self.lora_model.peft_config[name], XLoraConfig):
                 active_adapters.append(name)
-        self.active_adapter = active_adapters
+        self.lora_model.active_adapter = active_adapters
         if self.xlora_config.use_trainable_adapters:
             super()._mark_only_adapters_as_trainable(model)
 
-        self.active_adapter = copy
+        self.lora_model.active_adapter = copy
 
     def _save_pretrained_hook(
         self,
@@ -243,8 +243,6 @@ def _save_pretrained_hook(
         is_main_process: bool = True,
         **kwargs: Any,
     ) -> None:
-        classifier: XLoraClassifier = self.internal_xlora_classifier
-
         conf = self.xlora_config.__dict__.copy()
 
         # So that the adapters are unloadable and the user is forced to set them for from_pretrained
@@ -252,25 +250,8 @@ def _save_pretrained_hook(
         with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
             json.dump(conf, f)
 
-        if safe_serialization:
-            # https://github.com/huggingface/peft/blob/main/src/peft/peft_model.py#L223
-            if is_main_process and safe_serialization:
-                save_model(classifier, os.path.join(save_directory, "xlora_classifier.safetensors"))
-        elif is_main_process:
-            state_dict = classifier.state_dict()
-            torch.save(state_dict, os.path.join(save_directory, "xlora_classifier.pt"))
-
-    def inject_adapter(self, model: nn.Module, adapter_name: str):
-        # XLora only supports a single adapter. It wouldn't make sense to have multiple XLora adapters,
-        # but multiple Lora adapters are essentially a requirement.
-        if len(set(self.peft_config.keys()) | {adapter_name}) > 1:
-            raise ValueError(
-                "Trying to add a second XLora adapter, but {self.__class__.__name__} only supports a single adapter."
-            )
-        return super().inject_adapter(model, adapter_name=adapter_name)
-
     def forward(self, *args, **kwargs):
-        return self.model(*args, **kwargs)  # Important to *call* the model
+        return self.lora_model.model(*args, **kwargs)  # Important to *call* the model
 
     def set_topk_lora(self, value: Optional[int]):
         """
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index b84ccfa61d..283ba602de 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -54,3 +54,69 @@ def test_functional(self, tmp_path):
         )
         text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
         print(text[0])
+
+    def test_methods(self, tmp_path):
+        model_id = "facebook/opt-125m"
+        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
+
+        for i in range(1, 9):
+            torch.manual_seed(i)
+            lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
+            model = AutoModelForCausalLM.from_pretrained(model_id)
+            peft_model = get_peft_model(model, lora_config)
+            peft_model.save_pretrained(f"{tmp_path}/checkpoint-{i}")
+            print(f"finished {i} of 8")
+
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+        model.config.use_cache = False
+        adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
+
+        peft_config = XLoraConfig(
+            task_type=TaskType.CAUSAL_LM,
+            peft_type=PeftType.XLORA,
+            hidden_size=model.config.hidden_size,
+            xlora_depth=8,
+            adapters=adapters,
+        )
+        model = get_peft_model(model, peft_config).to("cuda")
+
+        model.enable_scalings_logging()
+        model.set_topk_lora(3)
+        assert model.internal_xlora_classifier.config.top_k_lora == 3
+
+        model.set_global_scaling_weight(1.5)
+        assert model.internal_xlora_classifier.config.set_global_scaling_weight == 1.5
+        assert model.get_global_scaling_weight() == 1.5
+
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
+
+        _ = model.get_latest_scalings()
+        assert len(model.get_scalings_log()) == 1
+
+        model.disable_scalings_logging()
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
+
+        assert len(model.get_scalings_log()) == 1
+
+        model.clear_scalings_log()
+        assert len(model.get_scalings_log()) == 1
+
+        model.flush_log_scalings("output")  # writes to output.npy and a json file
+
+        model.set_use_trainable_adapters()
+        assert model.xlora_config.use_trainable_adapters
+
+        model.get_use_trainable_adapters()
+        assert not model.xlora_config.use_trainable_adapters

From 81b069b6c8915cd9208be9a581b8cf90d4f44e1a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 10 Apr 2024 05:49:25 -0400
Subject: [PATCH 109/182] Inherit

---
 src/peft/tuners/xlora/model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index a342ae443f..f18e7df3de 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -22,6 +22,7 @@
 from transformers import PreTrainedModel
 
 from peft.tuners.lora.model import LoraModel
+from peft.tuners.tuners_utils import BaseTuner
 from peft.utils.peft_types import PeftType
 
 from .. import lora
@@ -87,7 +88,7 @@ def convert_layers_to_xlora(
     return (total_swapped, device)
 
 
-class XLoraModel:
+class XLoraModel(BaseTuner):
     """
     Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model. Currently,
     this X-LoRA implementation only works with models with a transformer architecture.
@@ -138,6 +139,7 @@ def __init__(
         adapter_name: str,
     ) -> None:
         self.lora_model = LoraModel(model, config, adapter_name)
+        super().__init__(model, config, adapter_name)
 
     def post_init_lora(
         self,

From 33c2623241d8cc9245b5e3c65da121ddb738052d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 10 Apr 2024 06:02:51 -0400
Subject: [PATCH 110/182] Impl some abstract methods and remove the special
 cases

---
 src/peft/tuners/lora/model.py   | 13 +++++--------
 src/peft/tuners/tuners_utils.py |  3 ---
 src/peft/tuners/xlora/model.py  | 22 ++++++++++++++++++++++
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index c956620e43..1c4de9b379 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -408,14 +408,11 @@ def _check_merge_allowed(self):
 
     @staticmethod
     def _prepare_adapter_config(peft_config, model_config):
-        # Handle X-LoRA case
-        if hasattr(peft_config, "target_modules") and peft_config.target_modules is None:
-            if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
-                raise ValueError("Please specify `target_modules` in `peft_config`")
-            peft_config.target_modules = set(
-                TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
-            )
-        return peft_config
+        if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
+            raise ValueError("Please specify `target_modules` in `peft_config`")
+        peft_config.target_modules = set(
+            TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
+        )
 
     def _unload_and_optionally_merge(
         self,
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index dd8b3df28b..d03eab9d8a 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -605,9 +605,6 @@ def check_target_module_exists(config, key: str) -> bool | re.Match[str] | None:
         `bool` | `re.Match[str]` | `None`: True of match object if key matches any target modules from config, False or
         None if no match found
     """
-    # Handle X-LoRA case
-    if not hasattr(config, "target_modules"):
-        return False
     if isinstance(config.target_modules, str):
         target_module_found = re.fullmatch(config.target_modules, key)
     elif key in config.target_modules:
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index f18e7df3de..479d8e1adc 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -238,6 +238,28 @@ def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
 
         self.lora_model.active_adapter = copy
 
+    @staticmethod
+    def _prepare_adapter_config(peft_config, _model_config):
+        # Handle X-LoRA case
+        return peft_config
+
+    def _create_and_replace(
+        self,
+        lora_config,
+        adapter_name,
+        target,
+        target_name,
+        parent,
+        current_key,
+    ):
+        # Does nothing because XLoraModel has no target modules
+        pass
+
+    @staticmethod
+    def _check_target_module_exists(lora_config, key):
+        # Does nothing because XLoraModel has no target modules
+        return False
+
     def _save_pretrained_hook(
         self,
         save_directory: str,

From 63f34188519af21f24d68dd780c9bd38d8bdc024 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 10 Apr 2024 06:09:46 -0400
Subject: [PATCH 111/182] Fix return

---
 src/peft/tuners/lora/model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 1c4de9b379..c214ff6b5a 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -413,6 +413,7 @@ def _prepare_adapter_config(peft_config, model_config):
         peft_config.target_modules = set(
             TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
         )
+        return peft_config
 
     def _unload_and_optionally_merge(
         self,

From 31863fa26487538a11ea1c91e0cee3d2f8540bca Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 10 Apr 2024 08:35:52 -0400
Subject: [PATCH 112/182] Make some progress

---
 src/peft/tuners/lora/model.py   |  6 ++++++
 src/peft/tuners/tuners_utils.py |  2 +-
 src/peft/tuners/xlora/model.py  | 35 +++++++++++++++++++++------------
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index c214ff6b5a..a337f83796 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -179,6 +179,9 @@ def _create_and_replace(
     ):
         if current_key is None:
             raise ValueError("Current Key shouldn't be `None`")
+        # Handle X-LoRA case:
+        if not hasattr(lora_config, "rank_pattern"):
+            return
 
         # Regexp matching - Find key which matches current target_name in patterns provided
         pattern_keys = list(chain(lora_config.rank_pattern.keys(), lora_config.alpha_pattern.keys()))
@@ -257,6 +260,9 @@ def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
                 p.requires_grad = False
 
         for active_adapter in self.active_adapters:
+            # Handle X-LoRA case
+            if not hasattr(self.peft_config[active_adapter], "bias"):
+                return
             bias = self.peft_config[active_adapter].bias
             if bias == "none":
                 continue
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index d03eab9d8a..3787f45e94 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -168,7 +168,7 @@ def __init__(
                 self.peft_config.update(peft_config)
 
         self.active_adapter = adapter_name
-        if peft_config[adapter_name] != PeftType.XLORA:
+        if peft_config != PeftType.XLORA or peft_config[adapter_name] != PeftType.XLORA:
             self.inject_adapter(self.model, adapter_name)
 
         # Copy the peft_config in the injected model.
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 479d8e1adc..cf25e70e71 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -138,7 +138,13 @@ def __init__(
         config: Union[dict[str, XLoraConfig], XLoraConfig],
         adapter_name: str,
     ) -> None:
-        self.lora_model = LoraModel(model, config, adapter_name)
+        if isinstance(config, dict):
+            conf = config[adapter_name]
+        else:
+            conf = config
+        self.__dict__["xlora_config"] = conf
+        self.__dict__["lora_model"] = LoraModel(model, config, adapter_name)
+        del conf.target_modules
         super().__init__(model, config, adapter_name)
 
     def post_init_lora(
@@ -151,23 +157,17 @@ def post_init_lora(
         # model_peft: PeftModel
         self.xlora_config = peft_config
 
-        if hasattr(model.config, "use_cache") and not model.config.use_cache:
+        if hasattr(model.config, "use_cache") and model.config.use_cache:
             raise ValueError("`use_cache` must be False")
 
-        use_trainable_adapters = peft_config.use_trainable_adapters
         adapters_items = peft_config.adapters.items()
 
-        # For load_adapter to think we are a LoraModel
-        model_peft.peft_type = PeftType.LORA
-
         for name, model_id in adapters_items:
-            model_peft.load_adapter(model_id, name, is_trainable=use_trainable_adapters)
+            self.lora_model.load_adapter(model_id, name)
 
         self.lora_model.delete_adapter(adapter_name)
 
         self.lora_model.set_adapter(list(peft_config.adapters.keys()))
-        model_peft.active_adapter = name
-        model_peft.peft_type = PeftType.XLORA
 
         def hook(module, *args, **kwargs) -> None:
             args_real = args[0]
@@ -220,13 +220,22 @@ def _freeze_all_adapters(self):
                     param.requires_grad = False
 
     def generate(self, *args, **kwargs):
-        # Rely on LoraModel.__getattr__
-        res = self.model.generate(*args, **kwargs)  # type: ignore
+        res = self.lora_model.generate(*args, **kwargs)  # type: ignore
         # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
         self._freeze_all_adapters()
         return res
 
+    def __getattr__(self, name: str):
+        """Forward missing attributes to the wrapped module."""
+        try:
+            return super().__getattr__(name)  # defer to nn.Module's logic
+        except AttributeError:
+            return getattr(self.lora_model, name)
+
     def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
+        # Handle case during init
+        if not hasattr(self, "lora_model"):
+            return
         active_adapters = []
         copy = self.lora_model.active_adapters.copy()
         for name in self.lora_model.active_adapters:
@@ -323,14 +332,14 @@ def disable_scalings_logging(self):
         """
         Disable scalings logging, without clearing the log.
         """
-        classifier: XLoraClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.scalings_logging = False
 
     def clear_scalings_log(self):
         """
         Clear the scalings log.
         """
-        classifier: XLoraClassifier = self.model.internal_xlora_classifier  # type: ignore
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.log_scalings.clear()
 
     def flush_log_scalings(self, path: str):

From 34f7594ed2105ae38ce2776a7b9ec1d0038d034b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 10 Apr 2024 08:39:48 -0400
Subject: [PATCH 113/182] Make some progress

---
 src/peft/tuners/xlora/model.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index cf25e70e71..c3f2c93159 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -146,6 +146,8 @@ def __init__(
         self.__dict__["lora_model"] = LoraModel(model, config, adapter_name)
         del conf.target_modules
         super().__init__(model, config, adapter_name)
+        del self.__dict__["xlora_config"]
+        del self.__dict__["lora_model"]
 
     def post_init_lora(
         self,

From 7468114961a0e7d3693acc00ad631bac34e816fb Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 10 Apr 2024 19:17:18 -0400
Subject: [PATCH 114/182] Working version and add some tests

---
 src/peft/tuners/xlora/classifier.py |   8 +--
 src/peft/tuners/xlora/model.py      |  11 +--
 tests/test_xlora.py                 | 108 ++++++++++++++++++++--------
 3 files changed, 88 insertions(+), 39 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index c5d6de8e2d..4a6a604b65 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -65,7 +65,7 @@ def __init__(
     ):
         super().__init__()
 
-        self.model = model
+        self.__dict__["model"] = model  # We want to hide this from Pytorch...
         self.n_classes = n_classes
         self.n_layers = n_layers
         self.config = config
@@ -226,12 +226,12 @@ def _flush_log_scalings(self, path: str):
 
         seqlens_map: Dict[int, Tuple[List[int], List[torch.Tensor]]] = {}
         for i, scaling in enumerate(self.log_scalings):
-            seq_len = scaling.shape[0]
+            seq_len = scaling.shape[1]
             if seq_len not in seqlens_map:
-                seqlens_map[seq_len] = ([i], [scaling])
+                seqlens_map[seq_len] = ([i], [scaling.cpu()])
             else:
                 seqlens_map[seq_len][0].append(i)
-                seqlens_map[seq_len][1].append(scaling)
+                seqlens_map[seq_len][1].append(scaling.cpu())
 
         if len(seqlens_map) == 1:
             self._save_scalings(path, [scaling.unsqueeze(0) for scaling in self.log_scalings])
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index c3f2c93159..efc9fa5a1b 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -142,12 +142,15 @@ def __init__(
             conf = config[adapter_name]
         else:
             conf = config
+        l = LoraModel(model, config.copy(), adapter_name)
         self.__dict__["xlora_config"] = conf
-        self.__dict__["lora_model"] = LoraModel(model, config, adapter_name)
-        del conf.target_modules
+        del self.xlora_config.target_modules
+        self.__dict__["lora_model"] = l
         super().__init__(model, config, adapter_name)
-        del self.__dict__["xlora_config"]
         del self.__dict__["lora_model"]
+        del self.__dict__["xlora_config"]
+        self.lora_model = l
+        self.xlora_config = conf
 
     def post_init_lora(
         self,
@@ -167,8 +170,6 @@ def post_init_lora(
         for name, model_id in adapters_items:
             self.lora_model.load_adapter(model_id, name)
 
-        self.lora_model.delete_adapter(adapter_name)
-
         self.lora_model.set_adapter(list(peft_config.adapters.keys()))
 
         def hook(module, *args, **kwargs) -> None:
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 283ba602de..86500b84e0 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -14,25 +14,30 @@
 
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import os
 
 from peft import LoraConfig, PeftType, TaskType, XLoraConfig, get_peft_model
 
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
+tmp_path = "/tmp/xlora_test"
+os.makedirs(tmp_path, exist_ok=True)
+
+for i in range(1, 9):
+    torch.manual_seed(i)
+    lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
+    model = AutoModelForCausalLM.from_pretrained(model_id)
+    peft_model = get_peft_model(model, lora_config)
+    peft_model.save_pretrained(f"{tmp_path}/checkpoint-{i}")
+    print(f"finished {i} of 8")
+
 
 class TestXlora:
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
-    def test_functional(self, tmp_path):
-        model_id = "facebook/opt-125m"
+    def test_functional(self):
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
 
-        for i in range(1, 9):
-            torch.manual_seed(i)
-            lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
-            model = AutoModelForCausalLM.from_pretrained(model_id)
-            peft_model = get_peft_model(model, lora_config)
-            peft_model.save_pretrained(f"{tmp_path}/checkpoint-{i}")
-            print(f"finished {i} of 8")
-
         model = AutoModelForCausalLM.from_pretrained(model_id)
         model.config.use_cache = False
         adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
@@ -55,18 +60,9 @@ def test_functional(self, tmp_path):
         text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
         print(text[0])
 
-    def test_methods(self, tmp_path):
-        model_id = "facebook/opt-125m"
+    def test_scalings_logging_methods(self):
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
 
-        for i in range(1, 9):
-            torch.manual_seed(i)
-            lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
-            model = AutoModelForCausalLM.from_pretrained(model_id)
-            peft_model = get_peft_model(model, lora_config)
-            peft_model.save_pretrained(f"{tmp_path}/checkpoint-{i}")
-            print(f"finished {i} of 8")
-
         model = AutoModelForCausalLM.from_pretrained(model_id)
         model.config.use_cache = False
         adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
@@ -81,12 +77,6 @@ def test_methods(self, tmp_path):
         model = get_peft_model(model, peft_config).to("cuda")
 
         model.enable_scalings_logging()
-        model.set_topk_lora(3)
-        assert model.internal_xlora_classifier.config.top_k_lora == 3
-
-        model.set_global_scaling_weight(1.5)
-        assert model.internal_xlora_classifier.config.set_global_scaling_weight == 1.5
-        assert model.get_global_scaling_weight() == 1.5
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
@@ -97,9 +87,10 @@ def test_methods(self, tmp_path):
         print(text[0])
 
         _ = model.get_latest_scalings()
-        assert len(model.get_scalings_log()) == 1
+        assert 32 >= len(model.get_scalings_log()) > 0
 
         model.disable_scalings_logging()
+
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
             input_ids=inputs.to("cuda"),
@@ -108,15 +99,72 @@ def test_methods(self, tmp_path):
         text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
         print(text[0])
 
-        assert len(model.get_scalings_log()) == 1
+        assert 32 >= len(model.get_scalings_log()) > 0
 
         model.clear_scalings_log()
-        assert len(model.get_scalings_log()) == 1
+        assert len(model.get_scalings_log()) == 0
+
+    def test_scalings_flush(self):
+        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
+
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+        model.config.use_cache = False
+        adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
+
+        peft_config = XLoraConfig(
+            task_type=TaskType.CAUSAL_LM,
+            peft_type=PeftType.XLORA,
+            hidden_size=model.config.hidden_size,
+            xlora_depth=8,
+            adapters=adapters,
+        )
+        model = get_peft_model(model, peft_config).to("cuda")
+
+        model.enable_scalings_logging()
+
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
 
         model.flush_log_scalings("output")  # writes to output.npy and a json file
 
-        model.set_use_trainable_adapters()
+    def test_misc_methods(self):
+        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
+
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+        model.config.use_cache = False
+        adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
+
+        peft_config = XLoraConfig(
+            task_type=TaskType.CAUSAL_LM,
+            peft_type=PeftType.XLORA,
+            hidden_size=model.config.hidden_size,
+            xlora_depth=8,
+            adapters=adapters,
+        )
+        model = get_peft_model(model, peft_config).to("cuda")
+
+        model.set_global_scaling_weight(1.5)
+        assert model.internal_xlora_classifier.config.global_scaling_weight == 1.5
+        assert model.get_global_scaling_weight() == 1.5
+
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
+
+        model.set_use_trainable_adapters(True)
         assert model.xlora_config.use_trainable_adapters
 
+        model.set_use_trainable_adapters(False)
         model.get_use_trainable_adapters()
         assert not model.xlora_config.use_trainable_adapters
+
+        assert str(model) != None

From e4053bc5ef6b238089a0bd05162a55316fa2e44b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 10 Apr 2024 19:56:16 -0400
Subject: [PATCH 115/182] Explain need for refreezing

---
 src/peft/tuners/xlora/model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index efc9fa5a1b..7322956afb 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -200,7 +200,7 @@ def hook(module, *args, **kwargs) -> None:
 
         model.register_forward_pre_hook(hook, with_kwargs=True, prepend=True)
 
-        self._freeze_all_adapters()
+        self._maybe_freeze_all_adapters()
 
         total_swapped, device = convert_layers_to_xlora(
             model_peft,
@@ -215,7 +215,7 @@ def hook(module, *args, **kwargs) -> None:
         self.internal_xlora_classifier = xlora_classifier
         self.internal_xlora_scalings = None  # type: ignore
 
-    def _freeze_all_adapters(self):
+    def _maybe_freeze_all_adapters(self):
         self.eval()
         if not self.xlora_config.use_trainable_adapters:
             for name, param in self.named_parameters():
@@ -224,8 +224,8 @@ def _freeze_all_adapters(self):
 
     def generate(self, *args, **kwargs):
         res = self.lora_model.generate(*args, **kwargs)  # type: ignore
-        # TODO(EricLBuehler): Evaluate effectiveness and performance degradation
-        self._freeze_all_adapters()
+        #  This is necessary because we use PeftModel.disable_adapter() which reenables the adapters
+        self._maybe_freeze_all_adapters()
         return res
 
     def __getattr__(self, name: str):

From cc0cfee1173f5819ffb4d1d9ed0e4ab9b88c06b4 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 07:51:47 -0400
Subject: [PATCH 116/182] Fix some ruff lints

---
 src/peft/peft_model.py         |  2 +-
 src/peft/tuners/xlora/model.py | 11 ++++-------
 tests/test_xlora.py            |  6 ++++--
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 58cb4c3cb6..1522817014 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -400,7 +400,7 @@ def from_pretrained(
             if not isinstance(config, XLoraConfig):
                 raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
             if "adapters" not in kwargs:
-                raise ValueError(f"Expected adapters to be in kwargs")
+                raise ValueError("Expected adapters to be in kwargs")
 
             device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device()
             config.adapters = kwargs["adapters"]
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 7322956afb..4c2c97912e 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -18,15 +18,12 @@
 
 import torch
 import torch.nn as nn
-from safetensors.torch import save_model  # type: ignore
-from transformers import PreTrainedModel
 
 from peft.tuners.lora.model import LoraModel
 from peft.tuners.tuners_utils import BaseTuner
-from peft.utils.peft_types import PeftType
 
 from .. import lora
-from .classifier import InhibitorFlagPayload, Number, XLoraClassifier
+from .classifier import InhibitorFlagPayload, XLoraClassifier
 from .config import XLoraConfig
 from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALinearLayer
 
@@ -142,14 +139,14 @@ def __init__(
             conf = config[adapter_name]
         else:
             conf = config
-        l = LoraModel(model, config.copy(), adapter_name)
+        lora_model = LoraModel(model, config.copy(), adapter_name)
         self.__dict__["xlora_config"] = conf
         del self.xlora_config.target_modules
-        self.__dict__["lora_model"] = l
+        self.__dict__["lora_model"] = lora_model
         super().__init__(model, config, adapter_name)
         del self.__dict__["lora_model"]
         del self.__dict__["xlora_config"]
-        self.lora_model = l
+        self.lora_model = lora_model
         self.xlora_config = conf
 
     def post_init_lora(
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 86500b84e0..0d2b9e8055 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-import os
 
 from peft import LoraConfig, PeftType, TaskType, XLoraConfig, get_peft_model
 
+
 model_id = "facebook/opt-125m"
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
 tmp_path = "/tmp/xlora_test"
@@ -167,4 +169,4 @@ def test_misc_methods(self):
         model.get_use_trainable_adapters()
         assert not model.xlora_config.use_trainable_adapters
 
-        assert str(model) != None
+        assert str(model) is not None

From 9debbe3eb2b0c0db886ad6fafae0fef6ce0b2ad2 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 08:38:48 -0400
Subject: [PATCH 117/182] Get adapter names and handle gracefully with
 subfolders

---
 src/peft/peft_model.py         | 30 ++++++++++++++++++++++++++----
 src/peft/tuners/xlora/model.py | 15 ++++++++++++---
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 1522817014..1b2002c40e 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -22,13 +22,14 @@
 from copy import deepcopy
 from typing import Any, Optional, Union
 
+import huggingface_hub
 import packaging.version
 import torch
 import transformers
 from accelerate import dispatch_model, infer_auto_device_map
 from accelerate.hooks import AlignDevicesHook, add_hook_to_module, remove_hook_from_submodules
 from accelerate.utils import get_balanced_memory, named_module_tensors
-from huggingface_hub import ModelCard, ModelCardData, hf_hub_download
+from huggingface_hub import HfFileSystem, ModelCard, ModelCardData, hf_hub_download
 from safetensors import safe_open
 from safetensors.torch import save_file as safe_save_file
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
@@ -390,6 +391,30 @@ def from_pretrained(
             raise ValueError("Cannot set a prompt learning adapter to trainable when loading pretrained adapter.")
         else:
             config.inference_mode = not is_trainable
+        if isinstance(model.base_model, XLoraModel):
+            if not isinstance(config, XLoraConfig):
+                raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
+            if "adapters" in kwargs:
+                config.adapters = kwargs["adapters"]
+            else:
+                # If the path is on HF hub, then we get the adapter names to create a subfolders list which tells
+                # `load_adapter` where the adapters are.
+                if not os.path.exists(model_id):
+                    s = HfFileSystem()
+
+                    # The names of the adapters which must be in folders
+                    adapter_names = [
+                        file["name"][len(model_id) + 1 :] for file in s.ls(model_id) if file["type"] == "directory"
+                    ]
+                    # Prepare a dict of adapter paths, which really just point to the hf id; we will use the subfolders
+                    adapter_paths = {}
+                    for adapter_name in adapter_names:
+                        adapter_paths[adapter_name] = os.path.join(model_id, model_id)
+                    config.adapters = adapter_paths
+                    config._subfolders = adapter_names
+                else:
+                    if "adapters" not in kwargs:
+                        raise ValueError("If model_id is a local path, then `adapters` must be passed in kwargs.")
 
         if config.task_type not in MODEL_TYPE_TO_PEFT_MODEL_MAPPING.keys():
             model = cls(model, config, adapter_name)
@@ -399,11 +424,8 @@ def from_pretrained(
         if isinstance(model.base_model, XLoraModel):
             if not isinstance(config, XLoraConfig):
                 raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
-            if "adapters" not in kwargs:
-                raise ValueError("Expected adapters to be in kwargs")
 
             device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device()
-            config.adapters = kwargs["adapters"]
 
             classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 4c2c97912e..0e87930f0d 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -153,7 +153,6 @@ def post_init_lora(
         self,
         model: nn.Module,
         peft_config: XLoraConfig,
-        adapter_name: str,
         model_peft: nn.Module,
     ) -> None:
         # model_peft: PeftModel
@@ -163,9 +162,17 @@ def post_init_lora(
             raise ValueError("`use_cache` must be False")
 
         adapters_items = peft_config.adapters.items()
+        if hasattr(self.xlora_config, "_subfolders"):
+            adapters_items = zip(peft_config.adapters.items(), self.xlora_config._subfolders)
+        else:
+            adapters_items = peft_config.adapters.items()
 
-        for name, model_id in adapters_items:
-            self.lora_model.load_adapter(model_id, name)
+        if hasattr(self.xlora_config, "_subfolders"):
+            for (adapter_name, model_id), subfolder in adapters_items:
+                self.lora_model.load_adapter(model_id, adapter_name, subfolder=subfolder)
+        else:
+            for adapter_name, model_id in adapters_items:
+                self.lora_model.load_adapter(model_id, adapter_name)
 
         self.lora_model.set_adapter(list(peft_config.adapters.keys()))
 
@@ -280,6 +287,8 @@ def _save_pretrained_hook(
 
         # So that the adapters are unloadable and the user is forced to set them for from_pretrained
         conf["adapters"] = None
+        if hasattr(conf, "_subfolders"):
+            del conf["_subfolders"]  # It may have been added in from_pretrained
         with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
             json.dump(conf, f)
 

From a283ea8b6d2e642e948730d05f841730b148f5d7 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 09:50:28 -0400
Subject: [PATCH 118/182] Fix unused

---
 src/peft/peft_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 1b2002c40e..8b06caaa74 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -22,7 +22,6 @@
 from copy import deepcopy
 from typing import Any, Optional, Union
 
-import huggingface_hub
 import packaging.version
 import torch
 import transformers

From aca6044529f555dbba6768f34dd1dd12751e9f92 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 22:07:29 -0400
Subject: [PATCH 119/182] Rewrite the flush_log_scalings function with new
 functionality

---
 src/peft/peft_model.py              |   2 +-
 src/peft/tuners/xlora/classifier.py |  55 +++-----------
 src/peft/tuners/xlora/model.py      |  14 ++--
 tests/test_xlora.py                 | 108 +++++++++++++---------------
 4 files changed, 64 insertions(+), 115 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 8b06caaa74..0e73a7ccd3 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -424,7 +424,7 @@ def from_pretrained(
             if not isinstance(config, XLoraConfig):
                 raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
 
-            device = infer_device()  # As in PeftModel.load_adapter, torch_device = infer_device()
+            device = device = kwargs.get("device") if kwargs.get("device") is not None else infer_device()
 
             classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 4a6a604b65..6c2a13c21d 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -159,7 +159,6 @@ def forward(
 
         hidden_states = result.hidden_states  # type: ignore
 
-        assert hidden_states is not None
         hidden_state = hidden_states[-1]  # Get the last hidden state
 
         ### Classifier run
@@ -186,38 +185,12 @@ def forward(
 
         return scalings
 
-    def get_nb_trainable_parameters(self):
-        # https://github.com/huggingface/peft/blob/main/src/peft/mixed_model.py#L156
-        r"""
-        Returns the number of trainable parameters and number of all parameters in the model.
+    def _get_bucketed_scalings(self) -> Dict[int, Tuple[List[int], List[torch.Tensor]]]:
+        """
+        Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first)
+        and the associated tensors. The positions are paired with the associated tensors and give the position
+        in the scaling log. Each scaling is a tensor of shape (batch_size, seq_len, n_layers, n_classes)).
         """
-        trainable_params = 0
-        all_param = 0
-        for _, param in self.named_parameters():
-            num_params = param.numel()
-            # if using DS Zero 3 and the weights are initialized empty
-            if num_params == 0 and hasattr(param, "ds_numel"):
-                num_params = param.ds_numel  # type: ignore
-
-            # Due to the design of 4bit linear layers from bitsandbytes
-            # one needs to multiply the number of parameters by 2 to get
-            # the correct number of parameters
-            if param.__class__.__name__ == "Params4bit":
-                num_params = num_params * 2
-
-            all_param += num_params
-            if param.requires_grad:
-                trainable_params += num_params
-
-        return trainable_params, all_param
-
-    @staticmethod
-    def _save_scalings(file: str, scalings: List[torch.Tensor]):
-        result = torch.cat(scalings, dim=0)
-        npy = result.numpy()
-        numpy.save(file, npy)
-
-    def _flush_log_scalings(self, path: str):
         if not self.scalings_logging:
             raise Exception("Scalings logging is disabled!")
 
@@ -228,24 +201,12 @@ def _flush_log_scalings(self, path: str):
         for i, scaling in enumerate(self.log_scalings):
             seq_len = scaling.shape[1]
             if seq_len not in seqlens_map:
-                seqlens_map[seq_len] = ([i], [scaling.cpu()])
+                seqlens_map[seq_len] = ([i], [scaling])
             else:
                 seqlens_map[seq_len][0].append(i)
-                seqlens_map[seq_len][1].append(scaling.cpu())
-
-        if len(seqlens_map) == 1:
-            self._save_scalings(path, [scaling.unsqueeze(0) for scaling in self.log_scalings])
-        else:
-            indices_map: Dict[str, List[int]] = {}
-            for seq_len, (indices, scalings_list) in seqlens_map.items():
-                indices_map[f"{path}-{seq_len}.npy"] = indices
-
-                self._save_scalings(f"{path}-{seq_len}", [scaling.unsqueeze(0) for scaling in scalings_list])
-
-            with open(f"{path}-mapping.json", "w") as f:
-                f.write(json.dumps(indices_map))
+                seqlens_map[seq_len][1].append(scaling)
 
-        self.log_scalings.clear()
+        return seqlens_map
 
     def _set_override_scaling_pass_value(self, value: Union[Number, None]):
         if value is None:
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 0e87930f0d..5f128c8bae 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -351,18 +351,14 @@ def clear_scalings_log(self):
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.log_scalings.clear()
 
-    def flush_log_scalings(self, path: str):
+    def get_bucketed_scalings_log(self) -> dict[int, tuple[list[int], list[torch.Tensor]]]:
         """
-        Write the scalings log (a tensor of shape (num_logged, batch_size, seq_len, n_layers, n_classes)) to the specified path.
-        If the tensor cannot be constructed, multiple files are written containing tensors of shape
-        (num_logged, batch_size, seq_len, n_layers, n_classes) such that each file contains one sequence length. Additionally a JSON
-        file is outputted containing the mapping from each sequence log file to the index of the contained tensor so that one may reconstruct
-        the log order.
-
-        The file specified should not contain an extension.
+        Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first)
+        and the associated tensors. The positions are paired with the associated tensors and give the position
+        in the scaling log.
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
-        classifier._flush_log_scalings(path)
+        return classifier._get_bucketed_scalings()
 
     def set_use_trainable_adapters(self, use_trainable_adapters: bool):
         """
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 0d2b9e8055..48c442a9a8 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -18,31 +18,39 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from peft import LoraConfig, PeftType, TaskType, XLoraConfig, get_peft_model
-
-
-model_id = "facebook/opt-125m"
-tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
-tmp_path = "/tmp/xlora_test"
-os.makedirs(tmp_path, exist_ok=True)
-
-for i in range(1, 9):
-    torch.manual_seed(i)
-    lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
-    model = AutoModelForCausalLM.from_pretrained(model_id)
-    peft_model = get_peft_model(model, lora_config)
-    peft_model.save_pretrained(f"{tmp_path}/checkpoint-{i}")
-    print(f"finished {i} of 8")
+import pytest
 
 
 class TestXlora:
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
-    def test_functional(self):
-        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
-
-        model = AutoModelForCausalLM.from_pretrained(model_id)
+    model_id = "facebook/opt-125m"
+    num_loras = 4
+
+    @pytest.fixture(scope="class")
+    def tmp_dir(self, tmp_path_factory):
+        # create a class-scoped temp directory
+        return tmp_path_factory.mktemp("xlora")
+
+    @pytest.fixture(scope="class")
+    def saved_lora_adapters(self, tmp_dir):
+        file_names = []
+        for i in range(1, self.num_loras + 1):
+            torch.manual_seed(i)
+            lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
+            model = AutoModelForCausalLM.from_pretrained(self.model_id)
+            peft_model = get_peft_model(model, lora_config)
+            file_name = os.path.join(tmp_dir, f"checkpoint-{i}")
+            peft_model.save_pretrained(file_name)
+            file_names.append(file_name)
+        return file_names
+
+    def test_functional(self, saved_lora_adapters):
+        tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.device)
+
+        model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
+        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_adapters)}
 
         peft_config = XLoraConfig(
             task_type=TaskType.CAUSAL_LM,
@@ -51,7 +59,7 @@ def test_functional(self):
             xlora_depth=8,
             adapters=adapters,
         )
-        model = get_peft_model(model, peft_config).to("cuda")
+        model = get_peft_model(model, peft_config).to(self.device)
 
         model.enable_scalings_logging()
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
@@ -60,14 +68,14 @@ def test_functional(self):
             max_new_tokens=32,
         )
         text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        # TODO: do any check on the text?
 
-    def test_scalings_logging_methods(self):
-        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
+    def test_scalings_logging_methods(self, saved_lora_adapters):
+        tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map="cuda:0")
 
-        model = AutoModelForCausalLM.from_pretrained(model_id)
+        model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
+        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_adapters)}
 
         peft_config = XLoraConfig(
             task_type=TaskType.CAUSAL_LM,
@@ -106,40 +114,24 @@ def test_scalings_logging_methods(self):
         model.clear_scalings_log()
         assert len(model.get_scalings_log()) == 0
 
-    def test_scalings_flush(self):
-        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
-
-        model = AutoModelForCausalLM.from_pretrained(model_id)
-        model.config.use_cache = False
-        adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
-
-        peft_config = XLoraConfig(
-            task_type=TaskType.CAUSAL_LM,
-            peft_type=PeftType.XLORA,
-            hidden_size=model.config.hidden_size,
-            xlora_depth=8,
-            adapters=adapters,
-        )
-        model = get_peft_model(model, peft_config).to("cuda")
-
-        model.enable_scalings_logging()
-
-        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
-        outputs = model.generate(
-            input_ids=inputs.to("cuda"),
-            max_new_tokens=32,
-        )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
-
-        model.flush_log_scalings("output")  # writes to output.npy and a json file
-
-    def test_misc_methods(self):
-        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, device_map="cuda:0")
-
-        model = AutoModelForCausalLM.from_pretrained(model_id)
+        bucketed = model.get_bucketed_scalings_log()
+        keys = bucketed.keys()
+        assert len(bucketed) == 2
+        # One bucket for prompt (which has 1 elem)
+        assert len(bucketed[max(keys)][0]) == 1
+        assert len(bucketed[max(keys)][1]) == 1
+        assert bucketed[max(keys)][0][0] == 0
+        # One bucket for completions with bucket name 1
+        assert len(bucketed[1][0]) > 1
+        assert len(bucketed[1][1]) > 1
+        assert bucketed[1][0][0] > 0
+
+    def test_misc_methods(self, saved_lora_adapters):
+        tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map="cuda:0")
+
+        model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        adapters = {str(i): f"{tmp_path}/checkpoint-{i}" for i in range(1, 9)}
+        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_adapters)}
 
         peft_config = XLoraConfig(
             task_type=TaskType.CAUSAL_LM,

From d10f4eee58f4969072efc1a7f05ebd3c157ed409 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 22:34:23 -0400
Subject: [PATCH 120/182] Use some fixtures

---
 tests/test_xlora.py | 48 ++++++++++++---------------------------------
 1 file changed, 13 insertions(+), 35 deletions(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 48c442a9a8..c9042b26e1 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -45,9 +45,13 @@ def saved_lora_adapters(self, tmp_dir):
             file_names.append(file_name)
         return file_names
 
-    def test_functional(self, saved_lora_adapters):
+    @pytest.fixture(scope="class")
+    def tokenizer(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.device)
+        return tokenizer
 
+    @pytest.fixture(scope="function")
+    def model(self, saved_lora_adapters):
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
         adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_adapters)}
@@ -60,7 +64,9 @@ def test_functional(self, saved_lora_adapters):
             adapters=adapters,
         )
         model = get_peft_model(model, peft_config).to(self.device)
+        return model
 
+    def test_functional(self, tokenizer, model):
         model.enable_scalings_logging()
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
@@ -68,24 +74,10 @@ def test_functional(self, saved_lora_adapters):
             max_new_tokens=32,
         )
         text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        # TODO: do any check on the text?
-
-    def test_scalings_logging_methods(self, saved_lora_adapters):
-        tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map="cuda:0")
-
-        model = AutoModelForCausalLM.from_pretrained(self.model_id)
-        model.config.use_cache = False
-        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_adapters)}
-
-        peft_config = XLoraConfig(
-            task_type=TaskType.CAUSAL_LM,
-            peft_type=PeftType.XLORA,
-            hidden_size=model.config.hidden_size,
-            xlora_depth=8,
-            adapters=adapters,
-        )
-        model = get_peft_model(model, peft_config).to("cuda")
+        # No check on text because of sampling, untrained adapters etc.
+        print(text[0])
 
+    def test_scalings_logging_methods(self, tokenizer, model):
         model.enable_scalings_logging()
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
@@ -97,7 +89,8 @@ def test_scalings_logging_methods(self, saved_lora_adapters):
         print(text[0])
 
         _ = model.get_latest_scalings()
-        assert 32 >= len(model.get_scalings_log()) > 0
+        # 32 is the numeber of max scalings. 3 is the number of prompt tokens.
+        assert 32 + 3 >= len(model.get_scalings_log()) > 0
 
         model.disable_scalings_logging()
 
@@ -126,22 +119,7 @@ def test_scalings_logging_methods(self, saved_lora_adapters):
         assert len(bucketed[1][1]) > 1
         assert bucketed[1][0][0] > 0
 
-    def test_misc_methods(self, saved_lora_adapters):
-        tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map="cuda:0")
-
-        model = AutoModelForCausalLM.from_pretrained(self.model_id)
-        model.config.use_cache = False
-        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_adapters)}
-
-        peft_config = XLoraConfig(
-            task_type=TaskType.CAUSAL_LM,
-            peft_type=PeftType.XLORA,
-            hidden_size=model.config.hidden_size,
-            xlora_depth=8,
-            adapters=adapters,
-        )
-        model = get_peft_model(model, peft_config).to("cuda")
-
+    def test_misc_methods(self, tokenizer, model):
         model.set_global_scaling_weight(1.5)
         assert model.internal_xlora_classifier.config.global_scaling_weight == 1.5
         assert model.get_global_scaling_weight() == 1.5

From 677c64e792922ed58aa0e7e42ddba061ac382fa1 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 22:46:47 -0400
Subject: [PATCH 121/182] Fix passing arg

---
 src/peft/peft_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 0e73a7ccd3..86040eb865 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -135,7 +135,7 @@ def __init__(self, model: PreTrainedModel, peft_config: PeftConfig, adapter_name
             cls = PEFT_TYPE_TO_MODEL_MAPPING[peft_config.peft_type]
             self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
             if isinstance(self.base_model, XLoraModel):
-                self.base_model.post_init_lora(model, peft_config, adapter_name, self)
+                self.base_model.post_init_lora(model, peft_config, self)
             self.set_additional_trainable_modules(peft_config, adapter_name)
 
         if getattr(model, "is_gradient_checkpointing", True):

From 93db8bc42d98c17cbe62f3226770bcf5afc8a883 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 22:48:03 -0400
Subject: [PATCH 122/182] Remove unnecessary checks

---
 src/peft/tuners/xlora/classifier.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 6c2a13c21d..2530f55009 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -191,12 +191,6 @@ def _get_bucketed_scalings(self) -> Dict[int, Tuple[List[int], List[torch.Tensor
         and the associated tensors. The positions are paired with the associated tensors and give the position
         in the scaling log. Each scaling is a tensor of shape (batch_size, seq_len, n_layers, n_classes)).
         """
-        if not self.scalings_logging:
-            raise Exception("Scalings logging is disabled!")
-
-        if len(self.log_scalings) == 0:
-            raise ValueError("No log scalings to flush.")
-
         seqlens_map: Dict[int, Tuple[List[int], List[torch.Tensor]]] = {}
         for i, scaling in enumerate(self.log_scalings):
             seq_len = scaling.shape[1]

From 99fff5c3b7856d822a48c157192007c9c6e1c2c9 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 22:48:48 -0400
Subject: [PATCH 123/182] Fix test

---
 tests/test_xlora.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index c9042b26e1..633ca64d63 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -104,9 +104,6 @@ def test_scalings_logging_methods(self, tokenizer, model):
 
         assert 32 >= len(model.get_scalings_log()) > 0
 
-        model.clear_scalings_log()
-        assert len(model.get_scalings_log()) == 0
-
         bucketed = model.get_bucketed_scalings_log()
         keys = bucketed.keys()
         assert len(bucketed) == 2
@@ -119,6 +116,9 @@ def test_scalings_logging_methods(self, tokenizer, model):
         assert len(bucketed[1][1]) > 1
         assert bucketed[1][0][0] > 0
 
+        model.clear_scalings_log()
+        assert len(model.get_scalings_log()) == 0
+
     def test_misc_methods(self, tokenizer, model):
         model.set_global_scaling_weight(1.5)
         assert model.internal_xlora_classifier.config.global_scaling_weight == 1.5

From 42db87c1c1e764b4b5b5b6f4943908d5ff7b3e42 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 22:53:13 -0400
Subject: [PATCH 124/182] Fix a bug

---
 src/peft/tuners/xlora/layer.py | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index a86c60d590..4cef272535 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -78,11 +78,8 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         previous_dtype = x.dtype
         xlora_scalings = self.get_maybe_topk_scalings()
 
-        if self.target.disable_adapters:
-            if self.target.merged:
-                self.target.unmerge()
-            result = self.target.base_layer(x, *args, **kwargs)
-        elif self.target.merged:
+        # Ignore if disabled. We want to make sure this is always run.
+        if self.target.merged:
             result = self.target.base_layer(x, *args, **kwargs)
         else:
             result = self.target.base_layer(x, *args, **kwargs)
@@ -124,12 +121,8 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
 
         xlora_scalings = self.get_maybe_topk_scalings()
 
-        # TODO: no dtype conversion here, unlike in Linear, is that correct?
-        if self.target.disable_adapters:
-            if self.target.merged:
-                self.target.unmerge()
-            result = self.target.base_layer(x, *args, **kwargs)
-        elif self.target.merged:
+        # Ignore if disabled. We want to make sure this is always run.
+        if self.target.merged:
             result = self.target.base_layer(x, *args, **kwargs)
         else:
             result = self.target.base_layer(x, *args, **kwargs)
@@ -169,11 +162,8 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         previous_dtype = x.dtype
         xlora_scalings = self.get_maybe_topk_scalings()
 
-        if self.target.disable_adapters:
-            if self.target.merged:
-                self.target.unmerge()
-            result = self.target.base_layer(x, *args, **kwargs)
-        elif self.target.merged:
+        # Ignore if disabled. We want to make sure this is always run.
+        if self.target.merged:
             result = self.target.base_layer(x, *args, **kwargs)
         else:
             result = self.target.base_layer(x, *args, **kwargs)

From 6787b9f65c51a4a0f920606bbdac299cbf49dbbb Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 11 Apr 2024 22:57:26 -0400
Subject: [PATCH 125/182] More sensible defaults

---
 src/peft/tuners/xlora/config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index c51bd4dafa..fa5d438802 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -49,13 +49,13 @@ class XLoraConfig(PeftConfig):
             Depth of the X-LoRA classifier.
         xlora_size (`int`, *optional*, defaults to 2048):
             Hidden size of the X-LoRA classifier, irrelevant if `xlora_depth=1`.
-        enable_relu_and_dropout (`bool`, *optional*, defaults to `False`):
+        enable_relu_and_dropout (`bool`, *optional*, defaults to `True`):
             Enable ReLU activation and Dropout application of the X-LoRA classifier.
         use_bias (`bool`, *optional*, defaults to `True`):
             Enable bias in X-LoRA classifier.
         xlora_dropout_p (`float`, *optional*, defaults to 0.2):
             Dropout probability of the X-LoRA classifier, irrelevant if `xlora_depth=1` or
-            `enable_relu_and_dropout=True`.
+            `enable_relu_and_dropout=False`.
         use_trainable_adapters (`bool`, *optional*, defaults to False):
             Make the adapters trainable.
         scaling_pass_value (`float`, *optional*, defaults to 0):
@@ -71,7 +71,7 @@ class XLoraConfig(PeftConfig):
     layerwise_scalings: bool = False
     xlora_depth: int = 1
     xlora_size: int = 2048
-    enable_relu_and_dropout: bool = False
+    enable_relu_and_dropout: bool = True
     use_bias: bool = True
     xlora_dropout_p: float = 0.2
     use_trainable_adapters: bool = False

From 6fe86d15f30d4eeb0a2d5d0ab17e2a64ae3cb232 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 12 Apr 2024 07:53:22 -0400
Subject: [PATCH 126/182] Cast

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 5f128c8bae..0efcb84bc5 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -191,7 +191,7 @@ def hook(module, *args, **kwargs) -> None:
                 self.internal_xlora_scalings = torch.full(  # type: ignore
                     (payload.batch_size, payload.seq_len, xlora_classifier.n_layers, xlora_classifier.n_classes),
                     payload.override_scaling_pass_value,
-                )
+                ).to(model_peft.device)
 
                 return
 

From f9488832efe42632c90505a7743c65311df7ce77 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 12 Apr 2024 14:12:32 -0400
Subject: [PATCH 127/182] Refactor forward pass

---
 src/peft/tuners/xlora/classifier.py | 34 ++++++++++++
 src/peft/tuners/xlora/layer.py      |  2 +-
 src/peft/tuners/xlora/model.py      | 82 ++++++++++++++++++-----------
 3 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 2530f55009..48a6d5bc6e 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -118,6 +118,40 @@ def __init__(
                 last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(device).to(dtype)
         self.layers = nn.Sequential(*layers, last)
 
+    def make_dummy_scalingss(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        *args,
+        **kwargs,
+    ) -> torch.Tensor:
+        """
+        Make some dummy scalings
+        """
+        if input_ids is not None:
+            batch_size = input_ids.shape[0]
+            device = input_ids.device
+        else:
+            batch_size = inputs_embeds.shape[0]
+            device = input_ids.device
+
+        if input_ids is not None:
+            seq_len = input_ids.shape[1]
+            device = input_ids.device
+        else:
+            seq_len = inputs_embeds.shape[1]
+            device = input_ids.device
+
+        payload = InhibitorFlagPayload(
+            batch_size=batch_size,
+            seq_len=seq_len,
+            override_scaling_pass_value=self.override_scaling_pass_value,
+        )
+        return torch.full(  # type: ignore
+            (payload.batch_size, payload.seq_len, self.n_layers, self.n_classes),
+            payload.override_scaling_pass_value,
+        ).to(device)
+
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 4cef272535..8891da548c 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -39,7 +39,7 @@ def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter:
 
     def get_maybe_topk_scalings(self) -> torch.Tensor:
         # xlora_scalings = [batch_size, seq_len, n_classes]
-        xlora_scalings: Tensor = self.model.internal_xlora_scalings[:, :, self.layer_number, :]  # type: ignore
+        xlora_scalings: Tensor = self.scalings[:, :, self.layer_number, :]  # type: ignore
 
         if self.config.top_k_lora is not None:
             _, topk_indices = torch.topk(xlora_scalings, k=self.config.top_k_lora, dim=-1)
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 0efcb84bc5..a34d9f4984 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -25,7 +25,7 @@
 from .. import lora
 from .classifier import InhibitorFlagPayload, XLoraClassifier
 from .config import XLoraConfig
-from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALinearLayer
+from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALayer, XLoRALinearLayer
 
 
 @staticmethod
@@ -176,34 +176,6 @@ def post_init_lora(
 
         self.lora_model.set_adapter(list(peft_config.adapters.keys()))
 
-        def hook(module, *args, **kwargs) -> None:
-            args_real = args[0]
-            kwargs_real: dict = args[1]
-            kwargs_real.update(kwargs)
-
-            xlora_classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
-
-            if "_xlora_classifier_inhibitor_flag" in kwargs_real:
-                payload: InhibitorFlagPayload = kwargs_real["_xlora_classifier_inhibitor_flag"]
-
-                del kwargs_real["_xlora_classifier_inhibitor_flag"]
-
-                self.internal_xlora_scalings = torch.full(  # type: ignore
-                    (payload.batch_size, payload.seq_len, xlora_classifier.n_layers, xlora_classifier.n_classes),
-                    payload.override_scaling_pass_value,
-                ).to(model_peft.device)
-
-                return
-
-            xlora_scalings = xlora_classifier.forward(
-                *args_real,
-                **kwargs_real,
-            )
-            # Set the scalings
-            self.internal_xlora_scalings = xlora_scalings
-
-        model.register_forward_pre_hook(hook, with_kwargs=True, prepend=True)
-
         self._maybe_freeze_all_adapters()
 
         total_swapped, device = convert_layers_to_xlora(
@@ -293,7 +265,57 @@ def _save_pretrained_hook(
             json.dump(conf, f)
 
     def forward(self, *args, **kwargs):
-        return self.lora_model.model(*args, **kwargs)  # Important to *call* the model
+        model = self.lora_model.model
+
+        # =========================== Forward pass with "dummy" scalings ==================
+
+        dummy_scalings = self.internal_xlora_classifier.make_dummy_scalings(*args, **kwargs)
+
+        # Inject the dummy scalings to each layer
+        def hook(module, *args, **kwargs):
+            module.scalings = dummy_scalings
+            return args, kwargs
+
+        handles = []
+
+        for module in model.modules():
+            if isinstance(module, XLoRALayer):
+                handle = module.register_forward_pre_hook(hook, with_kwargs=True)
+                handles.append(handle)
+
+        with torch.no_grad():
+            with model.disable_adapter():
+                scaling_pass_kwargs = kwargs.copy()
+                scaling_pass_kwargs["output_hidden_states"] = True
+                scaling_pass_kwargs["return_dict"] = True
+                try:
+                    base_output = self.model(*args, **scaling_pass_kwargs)
+                finally:
+                    for handle in handles:
+                        handle.remove()
+
+        xlora_scalings = self.xlora_classifier(**base_output)
+
+        # =========================== Real forward pass with calculated scalings ==================
+
+        # Inject the *real* scalings to each layer
+        def hook(module, *args, **kwargs):
+            module.scalings = xlora_scalings
+            return args, kwargs
+
+        handles = []
+
+        for module in model.modules():
+            if isinstance(module, XLoRALayer):
+                handle = module.register_forward_pre_hook(hook, with_kwargs=True)
+                handles.append(handle)
+
+        try:
+            output = self.model(*args, **kwargs)
+        finally:
+            for handle in handles:
+                handle.remove()
+        return output
 
     def set_topk_lora(self, value: Optional[int]):
         """

From 83ecd3762eebcdbd30eee3c97b325dcedac0dc2d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 12 Apr 2024 14:53:29 -0400
Subject: [PATCH 128/182] Finish refactoring of model forward

---
 src/peft/tuners/xlora/classifier.py |  22 +-----
 src/peft/tuners/xlora/model.py      | 104 +++++++++++++---------------
 2 files changed, 51 insertions(+), 75 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 48a6d5bc6e..ba48ead06d 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -118,7 +118,7 @@ def __init__(
                 last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(device).to(dtype)
         self.layers = nn.Sequential(*layers, last)
 
-    def make_dummy_scalingss(
+    def make_dummy_scalings(
         self,
         input_ids: Optional[torch.LongTensor] = None,
         inputs_embeds: Optional[torch.FloatTensor] = None,
@@ -154,6 +154,7 @@ def make_dummy_scalingss(
 
     def forward(
         self,
+        result,
         input_ids: Optional[torch.LongTensor] = None,
         inputs_embeds: Optional[torch.FloatTensor] = None,
         *args,
@@ -172,25 +173,6 @@ def forward(
         else:
             seq_len = inputs_embeds.shape[1]
 
-        # For type checking
-        model = self.model
-        with torch.no_grad():
-            with model.disable_adapter():
-                kwargs["output_hidden_states"] = True
-                kwargs["return_dict"] = True
-
-                result: ModelOutput = model.forward(
-                    *args,
-                    input_ids=input_ids,
-                    inputs_embeds=inputs_embeds,
-                    _xlora_classifier_inhibitor_flag=InhibitorFlagPayload(
-                        batch_size=batch_size,
-                        seq_len=seq_len,
-                        override_scaling_pass_value=self.override_scaling_pass_value,
-                    ),
-                    **kwargs,
-                )
-
         hidden_states = result.hidden_states  # type: ignore
 
         hidden_state = hidden_states[-1]  # Get the last hidden state
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index a34d9f4984..ac59d21f61 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -25,7 +25,7 @@
 from .. import lora
 from .classifier import InhibitorFlagPayload, XLoraClassifier
 from .config import XLoraConfig
-from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALayer, XLoRALinearLayer
+from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALayer, XLoRALayer, XLoRALinearLayer
 
 
 @staticmethod
@@ -40,11 +40,12 @@ def convert_layers_to_xlora(
     base: nn.Module,  # PeftModel
     xloramodel: nn.Module,  # XLoraModel
     config: XLoraConfig,
-) -> (int, torch.device | None):
+) -> tuple[int, torch.device | None, list[nn.Module]]:
     """
     Returns the number of swapped layers.
     """
     total_swapped = 0
+    all_layers = []
 
     device = None
     for module in base.modules():
@@ -57,6 +58,7 @@ def convert_layers_to_xlora(
                 layer_number=total_swapped,
                 config=config,
             )
+            all_layers.append(new_layer)
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Embedding):
@@ -68,6 +70,7 @@ def convert_layers_to_xlora(
                 layer_number=total_swapped,
                 config=config,
             )
+            all_layers.append(new_layer)
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Conv2d):
@@ -79,10 +82,11 @@ def convert_layers_to_xlora(
                 layer_number=total_swapped,
                 config=config,
             )
+            all_layers.append(new_layer)
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
 
-    return (total_swapped, device)
+    return (total_swapped, device, all_layers)
 
 
 class XLoraModel(BaseTuner):
@@ -178,12 +182,52 @@ def post_init_lora(
 
         self._maybe_freeze_all_adapters()
 
-        total_swapped, device = convert_layers_to_xlora(
+        total_swapped, device, all_layers = convert_layers_to_xlora(
             model_peft,
             self,
             peft_config,
         )
 
+        # Now replace the old forward function with a new one that implements the X-LoRA architecture
+        old_model_forward = self.lora_model.model.forward
+
+        def new_model_forward(*args, **kwargs) -> None:
+            # =========================== Forward pass with "dummy" scalings ==================
+
+            dummy_scalings = self.internal_xlora_classifier.make_dummy_scalings(*args, **kwargs)
+
+            for layer in all_layers:
+                layer.scalings = dummy_scalings
+
+            with torch.no_grad():
+                with model_peft.disable_adapter():
+                    scaling_pass_kwargs = kwargs.copy()
+                    scaling_pass_kwargs["output_hidden_states"] = True
+                    scaling_pass_kwargs["return_dict"] = True
+                    try:
+                        base_output = old_model_forward(*args, **scaling_pass_kwargs)
+                    finally:
+                        # Clean everything up
+                        for layer in all_layers:
+                            layer.scalings = None
+
+            xlora_scalings = self.internal_xlora_classifier(result=base_output, *args, **kwargs)
+
+            # =========================== Real forward pass with calculated scalings ==================
+
+            for layer in all_layers:
+                layer.scalings = xlora_scalings
+
+            try:
+                output = old_model_forward(*args, **kwargs)
+            finally:
+                # Clean everything up
+                for layer in all_layers:
+                    layer.scalings = None
+            return output
+
+        self.lora_model.model.forward = new_model_forward
+
         n_classes = len(peft_config.adapters)
         xlora_classifier = XLoraClassifier(model_peft, peft_config, n_classes, total_swapped, device)
 
@@ -265,57 +309,7 @@ def _save_pretrained_hook(
             json.dump(conf, f)
 
     def forward(self, *args, **kwargs):
-        model = self.lora_model.model
-
-        # =========================== Forward pass with "dummy" scalings ==================
-
-        dummy_scalings = self.internal_xlora_classifier.make_dummy_scalings(*args, **kwargs)
-
-        # Inject the dummy scalings to each layer
-        def hook(module, *args, **kwargs):
-            module.scalings = dummy_scalings
-            return args, kwargs
-
-        handles = []
-
-        for module in model.modules():
-            if isinstance(module, XLoRALayer):
-                handle = module.register_forward_pre_hook(hook, with_kwargs=True)
-                handles.append(handle)
-
-        with torch.no_grad():
-            with model.disable_adapter():
-                scaling_pass_kwargs = kwargs.copy()
-                scaling_pass_kwargs["output_hidden_states"] = True
-                scaling_pass_kwargs["return_dict"] = True
-                try:
-                    base_output = self.model(*args, **scaling_pass_kwargs)
-                finally:
-                    for handle in handles:
-                        handle.remove()
-
-        xlora_scalings = self.xlora_classifier(**base_output)
-
-        # =========================== Real forward pass with calculated scalings ==================
-
-        # Inject the *real* scalings to each layer
-        def hook(module, *args, **kwargs):
-            module.scalings = xlora_scalings
-            return args, kwargs
-
-        handles = []
-
-        for module in model.modules():
-            if isinstance(module, XLoRALayer):
-                handle = module.register_forward_pre_hook(hook, with_kwargs=True)
-                handles.append(handle)
-
-        try:
-            output = self.model(*args, **kwargs)
-        finally:
-            for handle in handles:
-                handle.remove()
-        return output
+        return self.lora_model.model(*args, **kwargs)
 
     def set_topk_lora(self, value: Optional[int]):
         """

From ba4093a061e2764c9c048047efea67b95acadc69 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 12 Apr 2024 14:55:42 -0400
Subject: [PATCH 129/182] Remove some dead code

---
 src/peft/tuners/xlora/classifier.py | 6 ------
 src/peft/tuners/xlora/model.py      | 4 ++--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index ba48ead06d..4ed75c5e0f 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -13,16 +13,11 @@
 # limitations under the License.
 
 import builtins
-import json
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple, Union
 
-import numpy
 import torch
 import torch.nn as nn
-from transformers.modeling_outputs import (  # type: ignore
-    ModelOutput,
-)
 
 from .config import XLoraConfig
 
@@ -65,7 +60,6 @@ def __init__(
     ):
         super().__init__()
 
-        self.__dict__["model"] = model  # We want to hide this from Pytorch...
         self.n_classes = n_classes
         self.n_layers = n_layers
         self.config = config
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index ac59d21f61..c08045a21f 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -23,9 +23,9 @@
 from peft.tuners.tuners_utils import BaseTuner
 
 from .. import lora
-from .classifier import InhibitorFlagPayload, XLoraClassifier
+from .classifier import XLoraClassifier
 from .config import XLoraConfig
-from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALayer, XLoRALayer, XLoRALinearLayer
+from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALinearLayer
 
 
 @staticmethod

From e19cbc6247e667335c38cea4458300c6778fbee5 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 12 Apr 2024 15:07:36 -0400
Subject: [PATCH 130/182] Run formatting

---
 tests/test_xlora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 633ca64d63..62281c7ad7 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -14,11 +14,11 @@
 
 import os
 
+import pytest
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from peft import LoraConfig, PeftType, TaskType, XLoraConfig, get_peft_model
-import pytest
 
 
 class TestXlora:

From cf54d6a7dd969300a81112b69c07a442ed470d41 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 12 Apr 2024 15:21:45 -0400
Subject: [PATCH 131/182] Add test for saving and loading

---
 tests/test_xlora.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 62281c7ad7..56ead1acdc 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -14,6 +14,7 @@
 
 import os
 
+from peft.peft_model import PeftModel
 import pytest
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -140,3 +141,28 @@ def test_misc_methods(self, tokenizer, model):
         assert not model.xlora_config.use_trainable_adapters
 
         assert str(model) is not None
+
+    def test_save_load_functional(self, tokenizer, model, tmp_dir):
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
+
+        model.save_pretrained(save_directory=tmp_dir)
+
+        del model
+
+        model = AutoModelForCausalLM.from_pretrained(self.model_id)
+        model.config.use_cache = False
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir)
+
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])

From 750e7ace6f7157e926206ed53681341ecb774a3a Mon Sep 17 00:00:00 2001
From: Eric Buehler <65165915+EricLBuehler@users.noreply.github.com>
Date: Mon, 15 Apr 2024 11:21:33 -0400
Subject: [PATCH 132/182] Fix determining adapter device for embedding

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index c08045a21f..7d0e44261a 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -62,7 +62,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Embedding):
-            device = module.lora_A[next(iter(module.lora_A))].weight.device
+            device = module.lora_A[next(iter(module.lora_embedding_A))].weight.device
             new_layer = XLoRAEmbeddingLayer(
                 model=xloramodel,
                 target=module,

From a7ac01ce363f51847c635f36348414d331a823cf Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 17 Apr 2024 08:45:11 -0400
Subject: [PATCH 133/182] Fix the save and load

---
 src/peft/tuners/xlora/model.py  | 11 +++++++++++
 src/peft/utils/save_and_load.py |  2 ++
 tests/test_xlora.py             |  2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 7d0e44261a..6f41b95cc9 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -18,6 +18,7 @@
 
 import torch
 import torch.nn as nn
+from safetensors.torch import save_model  # type: ignore
 
 from peft.tuners.lora.model import LoraModel
 from peft.tuners.tuners_utils import BaseTuner
@@ -308,6 +309,16 @@ def _save_pretrained_hook(
         with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
             json.dump(conf, f)
 
+        if safe_serialization:
+            # https://github.com/huggingface/peft/blob/main/src/peft/peft_model.py#L223
+            if is_main_process and safe_serialization:
+                save_model(
+                    self.internal_xlora_classifier, os.path.join(save_directory, "xlora_classifier.safetensors")
+                )
+        elif is_main_process:
+            state_dict = self.internal_xlora_classifier.state_dict()
+            torch.save(state_dict, os.path.join(save_directory, "xlora_classifier.pt"))
+
     def forward(self, *args, **kwargs):
         return self.lora_model.model(*args, **kwargs)
 
diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py
index 8b7654f3b3..ae8134f594 100644
--- a/src/peft/utils/save_and_load.py
+++ b/src/peft/utils/save_and_load.py
@@ -124,6 +124,8 @@ def get_peft_model_state_dict(
         to_return = {k: state_dict[k] for k in state_dict if "oft_" in k}
     elif config.peft_type == PeftType.POLY:
         to_return = {k: state_dict[k] for k in state_dict if "poly_" in k}
+    elif config.peft_type == PeftType.XLORA:
+        to_return = {}
     else:
         raise NotImplementedError
     if getattr(model, "modules_to_save", None) is not None:
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 56ead1acdc..350e95bb25 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -14,12 +14,12 @@
 
 import os
 
-from peft.peft_model import PeftModel
 import pytest
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from peft import LoraConfig, PeftType, TaskType, XLoraConfig, get_peft_model
+from peft.peft_model import PeftModel
 
 
 class TestXlora:

From 7fd76cacfa5bd8e25f99a98227382a37663a242f Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 17 Apr 2024 08:46:40 -0400
Subject: [PATCH 134/182] Fix the save and load

---
 tests/test_xlora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 350e95bb25..da5fa34bbf 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -157,7 +157,7 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
 
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir)
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir).to(self.device)
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(

From dc8bd3b6aa577fb6c9b2dac0adb037a68779bd5d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 17 Apr 2024 08:58:05 -0400
Subject: [PATCH 135/182] Comment

---
 tests/test_xlora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index da5fa34bbf..8c1368e584 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -107,6 +107,7 @@ def test_scalings_logging_methods(self, tokenizer, model):
 
         bucketed = model.get_bucketed_scalings_log()
         keys = bucketed.keys()
+        # One bucket for prompt (seqlen=...) and one for the completion (seqlen=1)
         assert len(bucketed) == 2
         # One bucket for prompt (which has 1 elem)
         assert len(bucketed[max(keys)][0]) == 1

From c379f70d055c1f808b282e42b2a38672bac913a8 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 17 Apr 2024 09:51:41 -0400
Subject: [PATCH 136/182] Update the docstrings

---
 src/peft/tuners/xlora/classifier.py |  6 +++---
 src/peft/tuners/xlora/config.py     | 16 ++++++++--------
 src/peft/tuners/xlora/layer.py      | 17 ++++++++---------
 src/peft/tuners/xlora/model.py      | 20 ++++++++++++--------
 src/peft/tuners/xlora/util.py       |  4 ++--
 5 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 4ed75c5e0f..c2c177bb5c 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -197,9 +197,9 @@ def forward(
 
     def _get_bucketed_scalings(self) -> Dict[int, Tuple[List[int], List[torch.Tensor]]]:
         """
-        Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first)
-        and the associated tensors. The positions are paired with the associated tensors and give the position
-        in the scaling log. Each scaling is a tensor of shape (batch_size, seq_len, n_layers, n_classes)).
+        Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first) and the
+        associated tensors. The positions are paired with the associated tensors and give the position in the scaling
+        log. Each scaling is a tensor of shape (batch_size, seq_len, n_layers, n_classes)).
         """
         seqlens_map: Dict[int, Tuple[List[int], List[torch.Tensor]]] = {}
         for i, scaling in enumerate(self.log_scalings):
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index fa5d438802..aa467a8046 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -23,22 +23,22 @@
 @dataclass
 class XLoraConfig(PeftConfig):
     r"""
-    This is the configuration class to store the configuration of a `XLoraModel`.
-    When the config is reloaded, the paths of the `adapters` field is disregarded in favor of the
-    saved adapters. As such, only the keys matter during loading.
+    This is the configuration class to store the configuration of a `XLoraModel`. When the config is reloaded, the
+    paths of the `adapters` field is disregarded in favor of the saved adapters. As such, only the keys matter during
+    loading.
 
     Args:
         hidden_size (`int`):
             Hidden size of the base model.
         adapters (`dict`):
-            Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter.
-            *They will be automatically loaded*, to use as LoRA experts. When using from_pretrained, pass the new adapters dict
-            as a keyword argument.
+            Mapping of adapter names to the LoRA adapter id, as per PeftModel.load_adapter. *They will be automatically
+            loaded*, to use as LoRA experts. When using from_pretrained, pass the new adapters dict as a keyword
+            argument.
         enable_softmax (`bool`, *optional*, defaults to `True`):
             Enable softmax application for the X-LoRA classifier.
         enable_softmax_topk (`bool`, *optional*, defaults to `False`):
-            Enable softmax application for the top-k LoRA adapters. Mutually exclusive to
-            `enable_softmax` and must only be set if `top_k_lora` is.
+            Enable softmax application for the top-k LoRA adapters. Mutually exclusive to `enable_softmax` and must
+            only be set if `top_k_lora` is.
         softmax_temperature (`float`, *optional*, defaults to 1.0):
             Softmax temperature, lower yields sharper predictions
         layerwise_scalings (`bool`, *optional*, defaults to `False`):
diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 8891da548c..2c4a4f9c81 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -11,9 +11,8 @@
 
 class XLoRALayer:
     """
-    A XLoRALayer wraps any LoraLayer and performs the XLoRA operation on the LoRA adaptors specified.
-    Its primary API is the forward method, which uses the scalings to execute the
-    XLoRA algorithm.
+    A XLoRALayer wraps any LoraLayer and performs the XLoRA operation on the LoRA adaptors specified. Its primary API
+    is the forward method, which uses the scalings to execute the XLoRA algorithm.
     """
 
     def __init__(
@@ -71,8 +70,8 @@ def __init__(
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
-        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the XLoRALayer class).
+        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
+        method must be created (bound to an instance of the XLoRALayer class).
         """
 
         previous_dtype = x.dtype
@@ -115,8 +114,8 @@ def __init__(
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
-        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the XLoRALayer class).
+        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
+        method must be created (bound to an instance of the XLoRALayer class).
         """
 
         xlora_scalings = self.get_maybe_topk_scalings()
@@ -155,8 +154,8 @@ def __init__(
 
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
-        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method.
-        To use it, a bound method must be created (bound to an instance of the XLoRALayer class).
+        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
+        method must be created (bound to an instance of the XLoRALayer class).
         """
 
         previous_dtype = x.dtype
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 6f41b95cc9..7dc27b46d4 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -324,14 +324,16 @@ def forward(self, *args, **kwargs):
 
     def set_topk_lora(self, value: Optional[int]):
         """
-        Sparsely select the specified top_k LoRA experts instead of the default dense method. Set to None to use dense. This is reflected in the config.
+        Sparsely select the specified top_k LoRA experts instead of the default dense method. Set to None to use dense.
+        This is reflected in the config.
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.config.top_k_lora = value
 
     def set_global_scaling_weight(self, weight: float):
         """
-        Set the global LoRA weight, a scalar to multiply the output of each LoRA adapter by. This is by default 1. This is reflected in the config.
+        Set the global LoRA weight, a scalar to multiply the output of each LoRA adapter by. This is by default 1. This
+        is reflected in the config.
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.config.global_scaling_weight = weight
@@ -345,14 +347,16 @@ def get_global_scaling_weight(self) -> float:
 
     def get_latest_scalings(self) -> Optional[torch.Tensor]:
         """
-        Returns the latest scalings prediction, or None if no scalings have been predicted. The tensor is of shape (batch_size, seq_len, n_layers, n_classes).
+        Returns the latest scalings prediction, or None if no scalings have been predicted. The tensor is of shape
+        (batch_size, seq_len, n_layers, n_classes).
         """
         return self.internal_xlora_scalings
 
     def get_scalings_log(self) -> List[torch.Tensor]:
         """
-        Returns a shallow (only copying the list itself not the tensors) copy of the list containing the scalings log. Editing the list does not change the underlying log.
-        The tensors are of shape (batch_size, seq_len, n_layers, n_classes). The seq_len dim may vary with input dimension.
+        Returns a shallow (only copying the list itself not the tensors) copy of the list containing the scalings log.
+        Editing the list does not change the underlying log. The tensors are of shape (batch_size, seq_len, n_layers,
+        n_classes). The seq_len dim may vary with input dimension.
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         return classifier.log_scalings.copy()
@@ -380,9 +384,9 @@ def clear_scalings_log(self):
 
     def get_bucketed_scalings_log(self) -> dict[int, tuple[list[int], list[torch.Tensor]]]:
         """
-        Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first)
-        and the associated tensors. The positions are paired with the associated tensors and give the position
-        in the scaling log.
+        Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first) and the
+        associated tensors. The positions are paired with the associated tensors and give the position in the scaling
+        log.
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         return classifier._get_bucketed_scalings()
diff --git a/src/peft/tuners/xlora/util.py b/src/peft/tuners/xlora/util.py
index 6b7c58402f..25194610be 100644
--- a/src/peft/tuners/xlora/util.py
+++ b/src/peft/tuners/xlora/util.py
@@ -27,8 +27,8 @@
 
 def _load_classifier_weights(model_id: str, device: Optional[str] = None, **hf_hub_download_kwargs) -> dict:
     r"""
-    A helper method to load the classifier weights from the HuggingFace Hub or locally.
-    This is essentially `load_peft_weights`, but with the safetensors names changed.
+    A helper method to load the classifier weights from the HuggingFace Hub or locally. This is essentially
+    `load_peft_weights`, but with the safetensors names changed.
 
     Args:
         model_id (`str`):

From eaf505220dd63907699ba6bca25c5e16d2168010 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 17 Apr 2024 12:23:06 -0400
Subject: [PATCH 137/182] Remove the hacking

---
 src/peft/tuners/xlora/model.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 7dc27b46d4..621bd95f24 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -140,19 +140,16 @@ def __init__(
         config: Union[dict[str, XLoraConfig], XLoraConfig],
         adapter_name: str,
     ) -> None:
+        nn.Module.__init__(self)
+
         if isinstance(config, dict):
             conf = config[adapter_name]
         else:
             conf = config
         lora_model = LoraModel(model, config.copy(), adapter_name)
-        self.__dict__["xlora_config"] = conf
+        self.xlora_config = conf
         del self.xlora_config.target_modules
-        self.__dict__["lora_model"] = lora_model
-        super().__init__(model, config, adapter_name)
-        del self.__dict__["lora_model"]
-        del self.__dict__["xlora_config"]
         self.lora_model = lora_model
-        self.xlora_config = conf
 
     def post_init_lora(
         self,

From a510560cb4e151e5b04a33fa2c1426a51b3c482d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 22 Apr 2024 11:07:48 -0400
Subject: [PATCH 138/182] Remove the post_init_lora

---
 src/peft/peft_model.py         |  2 --
 src/peft/tuners/xlora/layer.py | 18 +++++------
 src/peft/tuners/xlora/model.py | 59 ++++++++++++++++------------------
 3 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 907d2c159b..728d7782a5 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -138,8 +138,6 @@ def __init__(self, model: PreTrainedModel, peft_config: PeftConfig, adapter_name
             self._peft_config = None
             cls = PEFT_TYPE_TO_MODEL_MAPPING[peft_config.peft_type]
             self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
-            if isinstance(self.base_model, XLoraModel):
-                self.base_model.post_init_lora(model, peft_config, self)
             self.set_additional_trainable_modules(peft_config, adapter_name)
 
         if getattr(model, "is_gradient_checkpointing", True):
diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 2c4a4f9c81..b65b2de134 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -9,10 +9,10 @@
 from .config import XLoraConfig
 
 
-class XLoRALayer:
+class XLoraLayer:
     """
-    A XLoRALayer wraps any LoraLayer and performs the XLoRA operation on the LoRA adaptors specified. Its primary API
-    is the forward method, which uses the scalings to execute the XLoRA algorithm.
+    A XLoraLayer wraps any LoraLayer and performs the XLora operation on the LoRA adaptors specified. Its primary API
+    is the forward method, which uses the scalings to execute the XLora algorithm.
     """
 
     def __init__(
@@ -57,7 +57,7 @@ def get_maybe_topk_scalings(self) -> torch.Tensor:
         return xlora_scalings
 
 
-class XLoRALinearLayer(XLoRALayer):
+class XLoraLinearLayer(XLoraLayer):
     def __init__(
         self,
         model: nn.Module,
@@ -71,7 +71,7 @@ def __init__(
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
-        method must be created (bound to an instance of the XLoRALayer class).
+        method must be created (bound to an instance of the XLoraLayer class).
         """
 
         previous_dtype = x.dtype
@@ -101,7 +101,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         return result
 
 
-class XLoRAEmbeddingLayer(XLoRALayer):
+class XLoraEmbeddingLayer(XLoraLayer):
     def __init__(
         self,
         model: nn.Module,
@@ -115,7 +115,7 @@ def __init__(
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
-        method must be created (bound to an instance of the XLoRALayer class).
+        method must be created (bound to an instance of the XLoraLayer class).
         """
 
         xlora_scalings = self.get_maybe_topk_scalings()
@@ -141,7 +141,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         return result
 
 
-class XLoRAConv2dLayer(XLoRALayer):
+class XLoraConv2dLayer(XLoraLayer):
     def __init__(
         self,
         model: nn.Module,
@@ -155,7 +155,7 @@ def __init__(
     def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
-        method must be created (bound to an instance of the XLoRALayer class).
+        method must be created (bound to an instance of the XLoraLayer class).
         """
 
         previous_dtype = x.dtype
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 621bd95f24..04e6ba243b 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -26,7 +26,7 @@
 from .. import lora
 from .classifier import XLoraClassifier
 from .config import XLoraConfig
-from .layer import XLoRAConv2dLayer, XLoRAEmbeddingLayer, XLoRALinearLayer
+from .layer import XLoraConv2dLayer, XLoraEmbeddingLayer, XLoraLinearLayer
 
 
 @staticmethod
@@ -52,7 +52,7 @@ def convert_layers_to_xlora(
     for module in base.modules():
         if isinstance(module, lora.Linear):
             device = module.lora_A[next(iter(module.lora_A))].weight.device
-            new_layer = XLoRALinearLayer(
+            new_layer = XLoraLinearLayer(
                 model=xloramodel,
                 target=module,
                 target_forward=module.forward,
@@ -64,7 +64,7 @@ def convert_layers_to_xlora(
             total_swapped += 1
         elif isinstance(module, lora.Embedding):
             device = module.lora_A[next(iter(module.lora_embedding_A))].weight.device
-            new_layer = XLoRAEmbeddingLayer(
+            new_layer = XLoraEmbeddingLayer(
                 model=xloramodel,
                 target=module,
                 target_forward=module.forward,
@@ -76,7 +76,7 @@ def convert_layers_to_xlora(
             total_swapped += 1
         elif isinstance(module, lora.Conv2d):
             device = module.lora_A[next(iter(module.lora_A))].weight.device
-            new_layer = XLoRAConv2dLayer(
+            new_layer = XLoraConv2dLayer(
                 model=xloramodel,
                 target=module,
                 target_forward=module.forward,
@@ -147,43 +147,34 @@ def __init__(
         else:
             conf = config
         lora_model = LoraModel(model, config.copy(), adapter_name)
-        self.xlora_config = conf
-        del self.xlora_config.target_modules
+        self.peft_config = conf
+        del self.peft_config.target_modules
         self.lora_model = lora_model
 
-    def post_init_lora(
-        self,
-        model: nn.Module,
-        peft_config: XLoraConfig,
-        model_peft: nn.Module,
-    ) -> None:
-        # model_peft: PeftModel
-        self.xlora_config = peft_config
-
         if hasattr(model.config, "use_cache") and model.config.use_cache:
             raise ValueError("`use_cache` must be False")
 
-        adapters_items = peft_config.adapters.items()
-        if hasattr(self.xlora_config, "_subfolders"):
-            adapters_items = zip(peft_config.adapters.items(), self.xlora_config._subfolders)
+        adapters_items = self.peft_config.adapters.items()
+        if hasattr(self.peft_config, "_subfolders"):
+            adapters_items = zip(self.peft_config.adapters.items(), self.peft_config._subfolders)
         else:
-            adapters_items = peft_config.adapters.items()
+            adapters_items = self.peft_config.adapters.items()
 
-        if hasattr(self.xlora_config, "_subfolders"):
+        if hasattr(self.peft_config, "_subfolders"):
             for (adapter_name, model_id), subfolder in adapters_items:
                 self.lora_model.load_adapter(model_id, adapter_name, subfolder=subfolder)
         else:
             for adapter_name, model_id in adapters_items:
                 self.lora_model.load_adapter(model_id, adapter_name)
 
-        self.lora_model.set_adapter(list(peft_config.adapters.keys()))
+        self.lora_model.set_adapter(list(self.peft_config.adapters.keys()))
 
         self._maybe_freeze_all_adapters()
 
         total_swapped, device, all_layers = convert_layers_to_xlora(
-            model_peft,
+            model,
             self,
-            peft_config,
+            self.peft_config,
         )
 
         # Now replace the old forward function with a new one that implements the X-LoRA architecture
@@ -198,7 +189,9 @@ def new_model_forward(*args, **kwargs) -> None:
                 layer.scalings = dummy_scalings
 
             with torch.no_grad():
-                with model_peft.disable_adapter():
+                self.lora_model.disable_adapters()
+
+                try:
                     scaling_pass_kwargs = kwargs.copy()
                     scaling_pass_kwargs["output_hidden_states"] = True
                     scaling_pass_kwargs["return_dict"] = True
@@ -208,6 +201,8 @@ def new_model_forward(*args, **kwargs) -> None:
                         # Clean everything up
                         for layer in all_layers:
                             layer.scalings = None
+                finally:
+                    self.lora_model.enable_adapters()
 
             xlora_scalings = self.internal_xlora_classifier(result=base_output, *args, **kwargs)
 
@@ -226,8 +221,8 @@ def new_model_forward(*args, **kwargs) -> None:
 
         self.lora_model.model.forward = new_model_forward
 
-        n_classes = len(peft_config.adapters)
-        xlora_classifier = XLoraClassifier(model_peft, peft_config, n_classes, total_swapped, device)
+        n_classes = len(self.peft_config.adapters)
+        xlora_classifier = XLoraClassifier(model, self.peft_config, n_classes, total_swapped, device)
 
         # Setup the model internal state
         self.internal_xlora_classifier = xlora_classifier
@@ -235,7 +230,7 @@ def new_model_forward(*args, **kwargs) -> None:
 
     def _maybe_freeze_all_adapters(self):
         self.eval()
-        if not self.xlora_config.use_trainable_adapters:
+        if not self.peft_config.use_trainable_adapters:
             for name, param in self.named_parameters():
                 if "lora_" in name:
                     param.requires_grad = False
@@ -263,7 +258,7 @@ def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
             if not isinstance(self.lora_model.peft_config[name], XLoraConfig):
                 active_adapters.append(name)
         self.lora_model.active_adapter = active_adapters
-        if self.xlora_config.use_trainable_adapters:
+        if self.peft_config.use_trainable_adapters:
             super()._mark_only_adapters_as_trainable(model)
 
         self.lora_model.active_adapter = copy
@@ -297,13 +292,13 @@ def _save_pretrained_hook(
         is_main_process: bool = True,
         **kwargs: Any,
     ) -> None:
-        conf = self.xlora_config.__dict__.copy()
+        conf = self.peft_config.__dict__.copy()
 
         # So that the adapters are unloadable and the user is forced to set them for from_pretrained
         conf["adapters"] = None
         if hasattr(conf, "_subfolders"):
             del conf["_subfolders"]  # It may have been added in from_pretrained
-        with open(os.path.join(save_directory, "xlora_config.json"), "w") as f:
+        with open(os.path.join(save_directory, "peft_config.json"), "w") as f:
             json.dump(conf, f)
 
         if safe_serialization:
@@ -398,10 +393,10 @@ def set_use_trainable_adapters(self, use_trainable_adapters: bool):
             if "lora_" in name:
                 param.requires_grad = use_trainable_adapters
 
-        self.xlora_config.use_trainable_adapters = use_trainable_adapters
+        self.peft_config.use_trainable_adapters = use_trainable_adapters
 
     def get_use_trainable_adapters(self) -> bool:
         """
         Get the trainable or not trainable state of the adapters.
         """
-        return self.xlora_config.use_trainable_adapters
+        return self.peft_config.use_trainable_adapters

From c9339a684e5f18210e81681e73f0dda31fc46c35 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 22 Apr 2024 11:18:19 -0400
Subject: [PATCH 139/182] Fix

---
 src/peft/tuners/xlora/layer.py | 30 +++++++++++++++---------------
 src/peft/tuners/xlora/model.py | 34 ++++++++++++++++++----------------
 2 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index b65b2de134..ad90e96083 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -37,24 +37,24 @@ def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter:
         return x * scalings
 
     def get_maybe_topk_scalings(self) -> torch.Tensor:
-        # xlora_scalings = [batch_size, seq_len, n_classes]
-        xlora_scalings: Tensor = self.scalings[:, :, self.layer_number, :]  # type: ignore
+        # XLora_scalings = [batch_size, seq_len, n_classes]
+        XLora_scalings: Tensor = self.scalings[:, :, self.layer_number, :]  # type: ignore
 
         if self.config.top_k_lora is not None:
-            _, topk_indices = torch.topk(xlora_scalings, k=self.config.top_k_lora, dim=-1)
+            _, topk_indices = torch.topk(XLora_scalings, k=self.config.top_k_lora, dim=-1)
 
             # Mask the topk to True, the rest to False
-            mask = torch.zeros_like(xlora_scalings, dtype=torch.bool)
+            mask = torch.zeros_like(XLora_scalings, dtype=torch.bool)
             mask.scatter_(-1, topk_indices, True)
 
-            xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
+            XLora_scalings = XLora_scalings * mask.to(XLora_scalings.dtype)
 
         if self.config.enable_softmax_topk:
-            nonzero_mask = xlora_scalings != 0
-            softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
-            xlora_scalings[nonzero_mask] = softmax_res_nonzero
+            nonzero_mask = XLora_scalings != 0
+            softmax_res_nonzero = torch.softmax(XLora_scalings[nonzero_mask], dim=-1)
+            XLora_scalings[nonzero_mask] = softmax_res_nonzero
 
-        return xlora_scalings
+        return XLora_scalings
 
 
 class XLoraLinearLayer(XLoraLayer):
@@ -75,7 +75,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
 
         previous_dtype = x.dtype
-        xlora_scalings = self.get_maybe_topk_scalings()
+        XLora_scalings = self.get_maybe_topk_scalings()
 
         # Ignore if disabled. We want to make sure this is always run.
         if self.target.merged:
@@ -94,7 +94,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
                 dropout = self.target.lora_dropout[active_adapter]
                 scaling = self.target.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)  # type: ignore
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                x_mod = self.apply_scalings_to_x(x, XLora_scalings, adapter_n)
                 result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
 
         result = result.to(previous_dtype)
@@ -118,7 +118,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         method must be created (bound to an instance of the XLoraLayer class).
         """
 
-        xlora_scalings = self.get_maybe_topk_scalings()
+        XLora_scalings = self.get_maybe_topk_scalings()
 
         # Ignore if disabled. We want to make sure this is always run.
         if self.target.merged:
@@ -134,7 +134,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
                 embedding_A = self.target.lora_embedding_A[active_adapter].T
                 embedding_B = self.target.lora_embedding_B[active_adapter].T
                 scaling = self.target.scaling[active_adapter]
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                x_mod = self.apply_scalings_to_x(x, XLora_scalings, adapter_n)
                 after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
                 result += (after_A @ embedding_B) * scaling * self.config.global_scaling_weight
 
@@ -159,7 +159,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
 
         previous_dtype = x.dtype
-        xlora_scalings = self.get_maybe_topk_scalings()
+        XLora_scalings = self.get_maybe_topk_scalings()
 
         # Ignore if disabled. We want to make sure this is always run.
         if self.target.merged:
@@ -177,7 +177,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
                 dropout = self.target.lora_dropout[active_adapter]
                 scaling = self.target.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)  # type: ignore
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                x_mod = self.apply_scalings_to_x(x, XLora_scalings, adapter_n)
                 result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
 
         result = result.to(previous_dtype)
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 04e6ba243b..33a25d2952 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -147,34 +147,36 @@ def __init__(
         else:
             conf = config
         lora_model = LoraModel(model, config.copy(), adapter_name)
-        self.peft_config = conf
-        del self.peft_config.target_modules
+        self.xlora_config = conf
+        del self.xlora_config.target_modules
         self.lora_model = lora_model
 
+        peft_config = conf
+
         if hasattr(model.config, "use_cache") and model.config.use_cache:
             raise ValueError("`use_cache` must be False")
 
-        adapters_items = self.peft_config.adapters.items()
-        if hasattr(self.peft_config, "_subfolders"):
-            adapters_items = zip(self.peft_config.adapters.items(), self.peft_config._subfolders)
+        adapters_items = peft_config.adapters.items()
+        if hasattr(self.xlora_config, "_subfolders"):
+            adapters_items = zip(peft_config.adapters.items(), self.xlora_config._subfolders)
         else:
-            adapters_items = self.peft_config.adapters.items()
+            adapters_items = peft_config.adapters.items()
 
-        if hasattr(self.peft_config, "_subfolders"):
+        if hasattr(self.xlora_config, "_subfolders"):
             for (adapter_name, model_id), subfolder in adapters_items:
                 self.lora_model.load_adapter(model_id, adapter_name, subfolder=subfolder)
         else:
             for adapter_name, model_id in adapters_items:
                 self.lora_model.load_adapter(model_id, adapter_name)
 
-        self.lora_model.set_adapter(list(self.peft_config.adapters.keys()))
+        self.lora_model.set_adapter(list(peft_config.adapters.keys()))
 
         self._maybe_freeze_all_adapters()
 
         total_swapped, device, all_layers = convert_layers_to_xlora(
             model,
             self,
-            self.peft_config,
+            peft_config,
         )
 
         # Now replace the old forward function with a new one that implements the X-LoRA architecture
@@ -221,8 +223,8 @@ def new_model_forward(*args, **kwargs) -> None:
 
         self.lora_model.model.forward = new_model_forward
 
-        n_classes = len(self.peft_config.adapters)
-        xlora_classifier = XLoraClassifier(model, self.peft_config, n_classes, total_swapped, device)
+        n_classes = len(peft_config.adapters)
+        xlora_classifier = XLoraClassifier(model, peft_config, n_classes, total_swapped, device)
 
         # Setup the model internal state
         self.internal_xlora_classifier = xlora_classifier
@@ -230,7 +232,7 @@ def new_model_forward(*args, **kwargs) -> None:
 
     def _maybe_freeze_all_adapters(self):
         self.eval()
-        if not self.peft_config.use_trainable_adapters:
+        if not self.xlora_config.use_trainable_adapters:
             for name, param in self.named_parameters():
                 if "lora_" in name:
                     param.requires_grad = False
@@ -258,7 +260,7 @@ def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
             if not isinstance(self.lora_model.peft_config[name], XLoraConfig):
                 active_adapters.append(name)
         self.lora_model.active_adapter = active_adapters
-        if self.peft_config.use_trainable_adapters:
+        if self.xlora_config.use_trainable_adapters:
             super()._mark_only_adapters_as_trainable(model)
 
         self.lora_model.active_adapter = copy
@@ -292,7 +294,7 @@ def _save_pretrained_hook(
         is_main_process: bool = True,
         **kwargs: Any,
     ) -> None:
-        conf = self.peft_config.__dict__.copy()
+        conf = self.xlora_config.__dict__.copy()
 
         # So that the adapters are unloadable and the user is forced to set them for from_pretrained
         conf["adapters"] = None
@@ -393,10 +395,10 @@ def set_use_trainable_adapters(self, use_trainable_adapters: bool):
             if "lora_" in name:
                 param.requires_grad = use_trainable_adapters
 
-        self.peft_config.use_trainable_adapters = use_trainable_adapters
+        self.xlora_config.use_trainable_adapters = use_trainable_adapters
 
     def get_use_trainable_adapters(self) -> bool:
         """
         Get the trainable or not trainable state of the adapters.
         """
-        return self.peft_config.use_trainable_adapters
+        return self.xlora_config.use_trainable_adapters

From a6d92b0527647dc6650c73ee7365b18542367f78 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 22 Apr 2024 11:39:35 -0400
Subject: [PATCH 140/182] More tests

---
 src/peft/tuners/xlora/model.py | 16 ++++-----
 tests/test_xlora.py            | 65 ++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 33a25d2952..0c456e20bb 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -29,14 +29,6 @@
 from .layer import XLoraConv2dLayer, XLoraEmbeddingLayer, XLoraLinearLayer
 
 
-@staticmethod
-def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter: int) -> torch.Tensor:
-    # scalings_layer = [batch_size, seq_len, n_classes]
-    scalings = scalings_layer[:, :, adapter].unsqueeze(-1)
-    # scalings_layer = [batch_size, seq_len, 1]
-    return x * scalings
-
-
 def convert_layers_to_xlora(
     base: nn.Module,  # PeftModel
     xloramodel: nn.Module,  # XLoraModel
@@ -332,6 +324,14 @@ def set_global_scaling_weight(self, weight: float):
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier.config.global_scaling_weight = weight
 
+    def set_scaling_pass_value(self, value: float | None):
+        """
+        Set the scaling pass value, the value to set the scalings to during the scaling pass. If the value is None,
+        the scaling pass value will be 1/n where n is the number of adapters.
+        """
+        classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
+        classifier._set_override_scaling_pass_value(value)
+
     def get_global_scaling_weight(self) -> float:
         """
         Get the global LoRA weight.
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 8c1368e584..09d2ec2d69 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -167,3 +167,68 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
         )
         text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
         print(text[0])
+
+    def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
+
+        model.save_pretrained(save_directory=tmp_dir, safe_serialization=False)
+
+        del model
+
+        model = AutoModelForCausalLM.from_pretrained(self.model_id)
+        model.config.use_cache = False
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir, safe_serialization=False).to(self.device)
+
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
+
+    def test_topk_lora(self, tokenizer, model):
+        model.set_topk_lora(2)
+        assert model.internal_xlora_classifier.config.top_k_lora == 2
+
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
+
+    def test_softmax_topk(self, tokenizer, model):
+        # Just reach in to set the config
+        model.internal_xlora_classifier.config.top_k_lora = 2
+        model.internal_xlora_classifier.config.enable_softmax = False
+        model.internal_xlora_classifier.config.enable_softmax_topk = True
+
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
+        print(text[0])
+
+    def test_set_override_scaling_pass_value(self, model):
+        # Defaults to 0
+        assert model.internal_xlora_classifier.override_scaling_pass_value == 0.0
+
+        # Set it to 2 and make sure it actually is
+        model.set_scaling_pass_value(2)
+        assert model.internal_xlora_classifier.override_scaling_pass_value == 2
+        assert model.internal_xlora_classifier.config.scaling_pass_value == 2
+
+        # Set it to 2 and make sure it is 1/a
+        model.set_scaling_pass_value(None)
+        assert model.internal_xlora_classifier.override_scaling_pass_value == 1 / self.num_loras
+        assert model.internal_xlora_classifier.config.scaling_pass_value == 1 / self.num_loras

From 261b336f99fd50f27f41626c3e1ec7182ad044c8 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 22 Apr 2024 11:53:17 -0400
Subject: [PATCH 141/182] Remove custom load and save code

---
 src/peft/peft_model.py          | 10 +++-------
 src/peft/tuners/xlora/model.py  | 31 +------------------------------
 src/peft/utils/save_and_load.py |  4 +++-
 3 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 728d7782a5..a3575943e0 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -57,8 +57,6 @@
     XLoraModel,
 )
 from .tuners.tuners_utils import BaseTunerLayer
-from .tuners.xlora.classifier import XLoraClassifier
-from .tuners.xlora.util import _load_classifier_weights as xlora_load_classifier_weights
 from .utils import (
     SAFETENSORS_WEIGHTS_NAME,
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
@@ -294,9 +292,6 @@ def save_pretrained(
                 peft_config.save_pretrained(output_dir, auto_mapping_dict=auto_mapping_dict)
             peft_config.inference_mode = inference_mode
 
-        if hasattr(self.base_model, "_save_pretrained_hook"):
-            self.base_model._save_pretrained_hook(save_directory, safe_serialization, is_main_process)
-
     @classmethod
     def from_pretrained(
         cls,
@@ -422,7 +417,7 @@ def from_pretrained(
         else:
             model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
 
-        if isinstance(model.base_model, XLoraModel):
+        """if isinstance(model.base_model, XLoraModel):
             if not isinstance(config, XLoraConfig):
                 raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
 
@@ -431,7 +426,8 @@ def from_pretrained(
             classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
             classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
         else:
-            model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
+        """
+        model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
 
         return model
 
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 0c456e20bb..8ef2f8fcbf 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -12,13 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
-import os
-from typing import Any, List, Optional, Union
+from typing import List, Optional, Union
 
 import torch
 import torch.nn as nn
-from safetensors.torch import save_model  # type: ignore
 
 from peft.tuners.lora.model import LoraModel
 from peft.tuners.tuners_utils import BaseTuner
@@ -279,32 +276,6 @@ def _check_target_module_exists(lora_config, key):
         # Does nothing because XLoraModel has no target modules
         return False
 
-    def _save_pretrained_hook(
-        self,
-        save_directory: str,
-        safe_serialization: bool = True,
-        is_main_process: bool = True,
-        **kwargs: Any,
-    ) -> None:
-        conf = self.xlora_config.__dict__.copy()
-
-        # So that the adapters are unloadable and the user is forced to set them for from_pretrained
-        conf["adapters"] = None
-        if hasattr(conf, "_subfolders"):
-            del conf["_subfolders"]  # It may have been added in from_pretrained
-        with open(os.path.join(save_directory, "peft_config.json"), "w") as f:
-            json.dump(conf, f)
-
-        if safe_serialization:
-            # https://github.com/huggingface/peft/blob/main/src/peft/peft_model.py#L223
-            if is_main_process and safe_serialization:
-                save_model(
-                    self.internal_xlora_classifier, os.path.join(save_directory, "xlora_classifier.safetensors")
-                )
-        elif is_main_process:
-            state_dict = self.internal_xlora_classifier.state_dict()
-            torch.save(state_dict, os.path.join(save_directory, "xlora_classifier.pt"))
-
     def forward(self, *args, **kwargs):
         return self.lora_model.model(*args, **kwargs)
 
diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py
index 77c0ad4a2e..0212bb8a9e 100644
--- a/src/peft/utils/save_and_load.py
+++ b/src/peft/utils/save_and_load.py
@@ -160,7 +160,7 @@ def get_peft_model_state_dict(
             to_return["base_model.vera_B." + adapter_name] = state_dict["base_model.vera_B." + adapter_name]
 
     elif config.peft_type == PeftType.XLORA:
-        to_return = {}
+        to_return = {k: state_dict[k] for k in state_dict if "internal_xlora_classifier" in k}
     else:
         raise ValueError(f"Unknown PEFT type passed: {config.peft_type}")
 
@@ -341,6 +341,8 @@ def set_peft_model_state_dict(
                 )
     elif config.is_prompt_learning or config.peft_type == PeftType.ADAPTION_PROMPT:
         peft_model_state_dict = state_dict
+    elif config.peft_type == PeftType.XLORA:
+        peft_model_state_dict = state_dict
     else:
         raise NotImplementedError
 

From 3c07d182a9e3ec8c61b46baeb78c16f10720f354 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 05:41:32 -0400
Subject: [PATCH 142/182] Improve tests

---
 src/peft/peft_model.py         |  5 ++---
 src/peft/tuners/xlora/model.py |  8 ++++----
 tests/test_xlora.py            | 37 +++++++++++++++-------------------
 3 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 49d2959ade..70ac8c3c12 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -481,15 +481,14 @@ def from_pretrained(
 
             device = device = kwargs.get("device") if kwargs.get("device") is not None else infer_device()
 
-            classifier: XLoraClassifier = model.base_model.internal_xlora_classifier  # type: ignore
-            classifier.load_state_dict(xlora_load_classifier_weights(model_id, device))  # type: ignore
+            classifier: XLoraClassifier = model.base_model.internal_xlora_classifier # type: ignore
+            classifier.load_state_dict(xlora_load_classifier_weights(model_id, device)) # type: ignore
         else:
         """
         model.load_adapter(
             model_id, adapter_name, is_trainable=is_trainable, autocast_adapter_dtype=autocast_adapter_dtype, **kwargs
         )
 
-
         return model
 
     def _setup_prompt_encoder(self, adapter_name: str):
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 8ef2f8fcbf..1d336974f8 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -81,8 +81,8 @@ def convert_layers_to_xlora(
 
 class XLoraModel(BaseTuner):
     """
-    Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model. Currently,
-    this X-LoRA implementation only works with models with a transformer architecture.
+    Creates an X-LoRA (Mixture of LoRA experts), model from a pretrained transformers model. Currently, this X-LoRA
+    implementation only works with models with a transformer architecture.
 
     The method is described in detail in https://arxiv.org/abs/2402.07148.
 
@@ -297,8 +297,8 @@ def set_global_scaling_weight(self, weight: float):
 
     def set_scaling_pass_value(self, value: float | None):
         """
-        Set the scaling pass value, the value to set the scalings to during the scaling pass. If the value is None,
-        the scaling pass value will be 1/n where n is the number of adapters.
+        Set the scaling pass value, the value to set the scalings to during the scaling pass. If the value is None, the
+        scaling pass value will be 1/n where n is the number of adapters.
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         classifier._set_override_scaling_pass_value(value)
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 09d2ec2d69..5e881c8cb2 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -74,9 +74,7 @@ def test_functional(self, tokenizer, model):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        # No check on text because of sampling, untrained adapters etc.
-        print(text[0])
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
     def test_scalings_logging_methods(self, tokenizer, model):
         model.enable_scalings_logging()
@@ -86,8 +84,7 @@ def test_scalings_logging_methods(self, tokenizer, model):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
         _ = model.get_latest_scalings()
         # 32 is the numeber of max scalings. 3 is the number of prompt tokens.
@@ -100,8 +97,7 @@ def test_scalings_logging_methods(self, tokenizer, model):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
         assert 32 >= len(model.get_scalings_log()) > 0
 
@@ -131,8 +127,7 @@ def test_misc_methods(self, tokenizer, model):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
         model.set_use_trainable_adapters(True)
         assert model.xlora_config.use_trainable_adapters
@@ -149,8 +144,8 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        before_logits = outputs[: inputs.shape[1] :]
+        assert torch.isfinite(before_logits).all()
 
         model.save_pretrained(save_directory=tmp_dir)
 
@@ -165,8 +160,9 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        after_logits = outputs
+        assert torch.isfinite(after_logits).all()
+        assert torch.equal(after_logits, before_logits)
 
     def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
@@ -174,8 +170,8 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        before_logits = outputs[: inputs.shape[1] :]
+        assert torch.isfinite(before_logits).all()
 
         model.save_pretrained(save_directory=tmp_dir, safe_serialization=False)
 
@@ -190,8 +186,9 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        after_logits = outputs
+        assert torch.isfinite(after_logits).all()
+        assert torch.equal(after_logits, before_logits)
 
     def test_topk_lora(self, tokenizer, model):
         model.set_topk_lora(2)
@@ -202,8 +199,7 @@ def test_topk_lora(self, tokenizer, model):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
     def test_softmax_topk(self, tokenizer, model):
         # Just reach in to set the config
@@ -216,8 +212,7 @@ def test_softmax_topk(self, tokenizer, model):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        text = tokenizer.batch_decode(outputs[: inputs.shape[1] :].detach().cpu().numpy(), skip_special_tokens=True)
-        print(text[0])
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
     def test_set_override_scaling_pass_value(self, model):
         # Defaults to 0

From 44a3bb93076393017d37d92e1f7e914fd33759c0 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 05:44:09 -0400
Subject: [PATCH 143/182] Remove InhibitorFlagPayload

---
 src/peft/tuners/xlora/classifier.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index c2c177bb5c..ded9b4ee48 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import builtins
-from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple, Union
 
 import torch
@@ -38,13 +37,6 @@ def forward(self, logits):
         return self.softmax(scaled_logits)
 
 
-@dataclass
-class InhibitorFlagPayload:
-    batch_size: int
-    seq_len: int
-    override_scaling_pass_value: Number
-
-
 class XLoraClassifier(nn.Module):
     """
     A classifier to select LoRA layers for XLora.
@@ -136,14 +128,9 @@ def make_dummy_scalings(
             seq_len = inputs_embeds.shape[1]
             device = input_ids.device
 
-        payload = InhibitorFlagPayload(
-            batch_size=batch_size,
-            seq_len=seq_len,
-            override_scaling_pass_value=self.override_scaling_pass_value,
-        )
         return torch.full(  # type: ignore
-            (payload.batch_size, payload.seq_len, self.n_layers, self.n_classes),
-            payload.override_scaling_pass_value,
+            (batch_size, seq_len, self.n_layers, self.n_classes),
+            self.override_scaling_pass_value,
         ).to(device)
 
     def forward(

From b9c80efc767b47d8aeed3096e45806159966ecd3 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 05:46:22 -0400
Subject: [PATCH 144/182] Improve the docstrings

---
 src/peft/tuners/xlora/classifier.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index ded9b4ee48..31199e1df1 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -50,6 +50,10 @@ def __init__(
         n_layers: int,
         device: torch.device,
     ):
+        """
+        Construct an X-LoRA classifier from a model, config and some metadata. Note that n_layers is the number of LoRA
+        adapter layers, not the number of model layers.
+        """
         super().__init__()
 
         self.n_classes = n_classes
@@ -112,7 +116,9 @@ def make_dummy_scalings(
         **kwargs,
     ) -> torch.Tensor:
         """
-        Make some dummy scalings
+        Make some dummy scalings for the scalings pass (the one to get the logits for the X-LoRA classifier). These are
+        of shape (batch_size, seq_len, n_layers, n_classes) and filled with the override scalings pass value. Note that
+        n_layers is the number of LoRA adapter layers, not the number of model layers.
         """
         if input_ids is not None:
             batch_size = input_ids.shape[0]

From 72f96102ed6bbab52281b8525da979c13391ce7f Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 05:49:53 -0400
Subject: [PATCH 145/182] Fix the docstrings and clean up a bit

---
 src/peft/tuners/xlora/classifier.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index 31199e1df1..edae6c4290 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -123,16 +123,11 @@ def make_dummy_scalings(
         if input_ids is not None:
             batch_size = input_ids.shape[0]
             device = input_ids.device
-        else:
-            batch_size = inputs_embeds.shape[0]
-            device = input_ids.device
-
-        if input_ids is not None:
             seq_len = input_ids.shape[1]
-            device = input_ids.device
         else:
+            batch_size = inputs_embeds.shape[0]
+            device = inputs_embeds.device
             seq_len = inputs_embeds.shape[1]
-            device = input_ids.device
 
         return torch.full(  # type: ignore
             (batch_size, seq_len, self.n_layers, self.n_classes),
@@ -148,16 +143,13 @@ def forward(
         **kwargs,
     ) -> torch.Tensor:
         """
-        Using the hidden states of the model, predict `n_classes` LoRA alpha values. Sets the scalings.
+        Using the hidden states of the model, predict `n_classes` LoRA alpha values. Returns the scalings.
         """
         if input_ids is not None:
             batch_size = input_ids.shape[0]
-        else:
-            batch_size = inputs_embeds.shape[0]
-
-        if input_ids is not None:
             seq_len = input_ids.shape[1]
         else:
+            batch_size = inputs_embeds.shape[0]
             seq_len = inputs_embeds.shape[1]
 
         hidden_states = result.hidden_states  # type: ignore

From f4b2df9884ef8d33c3db49118baa09c3ad545280 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 05:53:06 -0400
Subject: [PATCH 146/182] Remove some redundant config options

---
 src/peft/tuners/xlora/classifier.py | 32 ++++++++++++++---------------
 src/peft/tuners/xlora/config.py     |  9 +-------
 2 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index edae6c4290..e2faefb4e0 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -66,46 +66,46 @@ def __init__(
         self.scalings_logging = False
 
         dtype = next(model.parameters()).dtype
-        bias_flag = config.use_bias
+        add_dropout = config.xlora_dropout_p > 0.0
 
         layers = []
         if self.config.xlora_depth == 1:
             if config.layerwise_scalings:  # bias=False if we have just one layer
-                last = nn.Linear(config.hidden_size, n_classes * n_layers, bias=bias_flag).to(device).to(dtype)
+                last = nn.Linear(config.hidden_size, n_classes * n_layers, bias=True).to(device).to(dtype)
             else:
-                last = nn.Linear(config.hidden_size, n_classes, bias=bias_flag).to(device).to(dtype)
+                last = nn.Linear(config.hidden_size, n_classes, bias=True).to(device).to(dtype)
         elif self.config.xlora_depth == 2:
-            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(device).to(dtype))
+            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=True).to(device).to(dtype))
 
-            if config.enable_relu_and_dropout:
-                layers.append(nn.ReLU())
+            layers.append(nn.ReLU())
+            if add_dropout:
                 layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             if config.layerwise_scalings:
-                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=True).to(device).to(dtype)
             else:
-                last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes, bias=True).to(device).to(dtype)
         else:
             if self.config.xlora_depth <= 0:
                 raise ValueError("X-LoRA depth must be strictly positive.")
 
-            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=bias_flag).to(device).to(dtype))
+            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=True).to(device).to(dtype))
 
-            if config.enable_relu_and_dropout:
-                layers.append(nn.ReLU())
+            layers.append(nn.ReLU())
+            if add_dropout:
                 layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             for _ in range(config.xlora_depth - 2):
-                layers.append(nn.Linear(config.xlora_size, config.xlora_size, bias=bias_flag).to(device).to(dtype))
+                layers.append(nn.Linear(config.xlora_size, config.xlora_size, bias=True).to(device).to(dtype))
 
-                if config.enable_relu_and_dropout:
-                    layers.append(nn.ReLU())
+                layers.append(nn.ReLU())
+                if add_dropout:
                     layers.append(nn.Dropout(p=config.xlora_dropout_p))
 
             if config.layerwise_scalings:
-                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=bias_flag).to(device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=True).to(device).to(dtype)
             else:
-                last = nn.Linear(config.xlora_size, n_classes, bias=bias_flag).to(device).to(dtype)
+                last = nn.Linear(config.xlora_size, n_classes, bias=True).to(device).to(dtype)
         self.layers = nn.Sequential(*layers, last)
 
     def make_dummy_scalings(
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index aa467a8046..0f96577118 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -49,13 +49,8 @@ class XLoraConfig(PeftConfig):
             Depth of the X-LoRA classifier.
         xlora_size (`int`, *optional*, defaults to 2048):
             Hidden size of the X-LoRA classifier, irrelevant if `xlora_depth=1`.
-        enable_relu_and_dropout (`bool`, *optional*, defaults to `True`):
-            Enable ReLU activation and Dropout application of the X-LoRA classifier.
-        use_bias (`bool`, *optional*, defaults to `True`):
-            Enable bias in X-LoRA classifier.
         xlora_dropout_p (`float`, *optional*, defaults to 0.2):
-            Dropout probability of the X-LoRA classifier, irrelevant if `xlora_depth=1` or
-            `enable_relu_and_dropout=False`.
+            Dropout probability of the X-LoRA classifier, irrelevant if `xlora_depth=1`.
         use_trainable_adapters (`bool`, *optional*, defaults to False):
             Make the adapters trainable.
         scaling_pass_value (`float`, *optional*, defaults to 0):
@@ -71,8 +66,6 @@ class XLoraConfig(PeftConfig):
     layerwise_scalings: bool = False
     xlora_depth: int = 1
     xlora_size: int = 2048
-    enable_relu_and_dropout: bool = True
-    use_bias: bool = True
     xlora_dropout_p: float = 0.2
     use_trainable_adapters: bool = False
     softmax_temperature: float = 1.0

From a8fbf85127d77cbd00189b731afd54753ca0c799 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 05:54:14 -0400
Subject: [PATCH 147/182] Remove redundant case

---
 src/peft/tuners/xlora/classifier.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index e2faefb4e0..b502f6318e 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -74,17 +74,6 @@ def __init__(
                 last = nn.Linear(config.hidden_size, n_classes * n_layers, bias=True).to(device).to(dtype)
             else:
                 last = nn.Linear(config.hidden_size, n_classes, bias=True).to(device).to(dtype)
-        elif self.config.xlora_depth == 2:
-            layers.append(nn.Linear(config.hidden_size, config.xlora_size, bias=True).to(device).to(dtype))
-
-            layers.append(nn.ReLU())
-            if add_dropout:
-                layers.append(nn.Dropout(p=config.xlora_dropout_p))
-
-            if config.layerwise_scalings:
-                last = nn.Linear(config.xlora_size, n_classes * n_layers, bias=True).to(device).to(dtype)
-            else:
-                last = nn.Linear(config.xlora_size, n_classes, bias=True).to(device).to(dtype)
         else:
             if self.config.xlora_depth <= 0:
                 raise ValueError("X-LoRA depth must be strictly positive.")

From 3fb0cd066950fda67b8d594d192574b30961717a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 05:55:59 -0400
Subject: [PATCH 148/182] Better docstrings

---
 src/peft/tuners/xlora/config.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 0f96577118..3677a2c43f 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -42,7 +42,8 @@ class XLoraConfig(PeftConfig):
         softmax_temperature (`float`, *optional*, defaults to 1.0):
             Softmax temperature, lower yields sharper predictions
         layerwise_scalings (`bool`, *optional*, defaults to `False`):
-            Generate scalings for each layer.
+            If True, generate scalings for each LoRA adapter (each layer). If this is False, then scalings will be
+            broadcasted, the same, to each layer.
         top_k_lora (`int`, *optional*, defaults to None):
             Sparsely select the top_k LoRA experts instead of the default dense method.
         xlora_depth (`int`, *optional*, defaults to 1):

From 5e62d667a52500bab001fcb0f72718156d181341 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 05:57:55 -0400
Subject: [PATCH 149/182] Remove *et_use_trainable_adapters

---
 src/peft/tuners/xlora/model.py | 18 ------------------
 tests/test_xlora.py            |  7 -------
 2 files changed, 25 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 1d336974f8..8ac7f815b1 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -355,21 +355,3 @@ def get_bucketed_scalings_log(self) -> dict[int, tuple[list[int], list[torch.Ten
         """
         classifier: XLoraClassifier = self.internal_xlora_classifier  # type: ignore
         return classifier._get_bucketed_scalings()
-
-    def set_use_trainable_adapters(self, use_trainable_adapters: bool):
-        """
-        Set the adapters to trainable or not trainable.
-
-        This is reflected in the config.
-        """
-        for name, param in self.named_parameters():
-            if "lora_" in name:
-                param.requires_grad = use_trainable_adapters
-
-        self.xlora_config.use_trainable_adapters = use_trainable_adapters
-
-    def get_use_trainable_adapters(self) -> bool:
-        """
-        Get the trainable or not trainable state of the adapters.
-        """
-        return self.xlora_config.use_trainable_adapters
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 5e881c8cb2..f27379155f 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -129,13 +129,6 @@ def test_misc_methods(self, tokenizer, model):
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
-        model.set_use_trainable_adapters(True)
-        assert model.xlora_config.use_trainable_adapters
-
-        model.set_use_trainable_adapters(False)
-        model.get_use_trainable_adapters()
-        assert not model.xlora_config.use_trainable_adapters
-
         assert str(model) is not None
 
     def test_save_load_functional(self, tokenizer, model, tmp_dir):

From 8faaa0f418425bab831b0a37c3d8c70c274e152b Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 21 May 2024 06:11:19 -0400
Subject: [PATCH 150/182] Remove method and clean up checks

---
 src/peft/tuners/xlora/layer.py | 52 ++++++++++++++++------------------
 src/peft/tuners/xlora/model.py | 15 ----------
 2 files changed, 24 insertions(+), 43 deletions(-)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index ad90e96083..633323c76c 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -37,24 +37,24 @@ def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter:
         return x * scalings
 
     def get_maybe_topk_scalings(self) -> torch.Tensor:
-        # XLora_scalings = [batch_size, seq_len, n_classes]
-        XLora_scalings: Tensor = self.scalings[:, :, self.layer_number, :]  # type: ignore
+        # xlora_scalings = [batch_size, seq_len, n_classes]
+        xlora_scalings: Tensor = self.scalings[:, :, self.layer_number, :]  # type: ignore
 
         if self.config.top_k_lora is not None:
-            _, topk_indices = torch.topk(XLora_scalings, k=self.config.top_k_lora, dim=-1)
+            _, topk_indices = torch.topk(xlora_scalings, k=self.config.top_k_lora, dim=-1)
 
             # Mask the topk to True, the rest to False
-            mask = torch.zeros_like(XLora_scalings, dtype=torch.bool)
+            mask = torch.zeros_like(xlora_scalings, dtype=torch.bool)
             mask.scatter_(-1, topk_indices, True)
 
-            XLora_scalings = XLora_scalings * mask.to(XLora_scalings.dtype)
+            xlora_scalings = xlora_scalings * mask.to(xlora_scalings.dtype)
 
         if self.config.enable_softmax_topk:
-            nonzero_mask = XLora_scalings != 0
-            softmax_res_nonzero = torch.softmax(XLora_scalings[nonzero_mask], dim=-1)
-            XLora_scalings[nonzero_mask] = softmax_res_nonzero
+            nonzero_mask = xlora_scalings != 0
+            softmax_res_nonzero = torch.softmax(xlora_scalings[nonzero_mask], dim=-1)
+            xlora_scalings[nonzero_mask] = softmax_res_nonzero
 
-        return XLora_scalings
+        return xlora_scalings
 
 
 class XLoraLinearLayer(XLoraLayer):
@@ -75,14 +75,12 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
 
         previous_dtype = x.dtype
-        XLora_scalings = self.get_maybe_topk_scalings()
+        xlora_scalings = self.get_maybe_topk_scalings()
 
-        # Ignore if disabled. We want to make sure this is always run.
-        if self.target.merged:
-            result = self.target.base_layer(x, *args, **kwargs)
-        else:
-            result = self.target.base_layer(x, *args, **kwargs)
+        result = self.target.base_layer(x, *args, **kwargs)
 
+        # Ignore if disabled. We want to make sure this is always run.
+        if not self.target.merged:
             for adapter_n, active_adapter in enumerate(self.target.active_adapters):
                 # TODO: implement X-LoRA with Lora+Dora layers
                 if self.target.use_dora[active_adapter]:
@@ -94,7 +92,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
                 dropout = self.target.lora_dropout[active_adapter]
                 scaling = self.target.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)  # type: ignore
-                x_mod = self.apply_scalings_to_x(x, XLora_scalings, adapter_n)
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
                 result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
 
         result = result.to(previous_dtype)
@@ -118,13 +116,12 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         method must be created (bound to an instance of the XLoraLayer class).
         """
 
-        XLora_scalings = self.get_maybe_topk_scalings()
+        xlora_scalings = self.get_maybe_topk_scalings()
+
+        result = self.target.base_layer(x, *args, **kwargs)
 
         # Ignore if disabled. We want to make sure this is always run.
-        if self.target.merged:
-            result = self.target.base_layer(x, *args, **kwargs)
-        else:
-            result = self.target.base_layer(x, *args, **kwargs)
+        if not self.target.merged:
             for adapter_n, active_adapter in enumerate(self.target.active_adapters):
                 # TODO: implement X-LoRA with Lora+Dora layers
                 if self.target.use_dora[active_adapter]:
@@ -134,7 +131,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
                 embedding_A = self.target.lora_embedding_A[active_adapter].T
                 embedding_B = self.target.lora_embedding_B[active_adapter].T
                 scaling = self.target.scaling[active_adapter]
-                x_mod = self.apply_scalings_to_x(x, XLora_scalings, adapter_n)
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
                 after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
                 result += (after_A @ embedding_B) * scaling * self.config.global_scaling_weight
 
@@ -159,13 +156,12 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         """
 
         previous_dtype = x.dtype
-        XLora_scalings = self.get_maybe_topk_scalings()
+        xlora_scalings = self.get_maybe_topk_scalings()
+
+        result = self.target.base_layer(x, *args, **kwargs)
 
         # Ignore if disabled. We want to make sure this is always run.
-        if self.target.merged:
-            result = self.target.base_layer(x, *args, **kwargs)
-        else:
-            result = self.target.base_layer(x, *args, **kwargs)
+        if not self.target.merged:
             for adapter_n, active_adapter in enumerate(self.target.active_adapters):
                 # TODO: implement X-LoRA with Lora+Dora layers
                 if self.target.use_dora[active_adapter]:
@@ -177,7 +173,7 @@ def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
                 dropout = self.target.lora_dropout[active_adapter]
                 scaling = self.target.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)  # type: ignore
-                x_mod = self.apply_scalings_to_x(x, XLora_scalings, adapter_n)
+                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
                 result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
 
         result = result.to(previous_dtype)
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 8ac7f815b1..ddc629ce3a 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -239,21 +239,6 @@ def __getattr__(self, name: str):
         except AttributeError:
             return getattr(self.lora_model, name)
 
-    def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
-        # Handle case during init
-        if not hasattr(self, "lora_model"):
-            return
-        active_adapters = []
-        copy = self.lora_model.active_adapters.copy()
-        for name in self.lora_model.active_adapters:
-            if not isinstance(self.lora_model.peft_config[name], XLoraConfig):
-                active_adapters.append(name)
-        self.lora_model.active_adapter = active_adapters
-        if self.xlora_config.use_trainable_adapters:
-            super()._mark_only_adapters_as_trainable(model)
-
-        self.lora_model.active_adapter = copy
-
     @staticmethod
     def _prepare_adapter_config(peft_config, _model_config):
         # Handle X-LoRA case

From 6b542442e2d65c700bec44efa50c3352c9e0cb81 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 22 May 2024 08:31:22 -0400
Subject: [PATCH 151/182] Minor fixes

---
 src/peft/tuners/xlora/model.py | 21 +++++++++++++++++++++
 tests/test_xlora.py            |  4 ++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index ddc629ce3a..4b5347d0de 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -244,6 +244,27 @@ def _prepare_adapter_config(peft_config, _model_config):
         # Handle X-LoRA case
         return peft_config
 
+    """
+    Does nothing. X-LoRA needs adapters to be frozen.
+    """
+
+    def _mark_only_adapters_as_trainable(self) -> None:
+        ...
+
+    """
+    Does nothing. X-LoRA needs adapters to be frozen.
+    """
+
+    def enable_adapter_layers(self) -> None:
+        ...
+
+    """
+    Does nothing. X-LoRA needs adapters to be frozen.
+    """
+
+    def disable_adapter_layers(self) -> None:
+        ...
+
     def _create_and_replace(
         self,
         lora_config,
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index f27379155f..cbd6015c81 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -153,7 +153,7 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        after_logits = outputs
+        after_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(after_logits).all()
         assert torch.equal(after_logits, before_logits)
 
@@ -179,7 +179,7 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
             input_ids=inputs.to("cuda"),
             max_new_tokens=32,
         )
-        after_logits = outputs
+        after_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(after_logits).all()
         assert torch.equal(after_logits, before_logits)
 

From 0d7f9d362ee2b90c8d2e16aced0a88ee30c266ce Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 22 May 2024 08:41:57 -0400
Subject: [PATCH 152/182] Remove custom loading and saving code

---
 src/peft/peft_model.py        |  10 ----
 src/peft/tuners/xlora/util.py | 108 ----------------------------------
 tests/test_xlora.py           |   2 +-
 3 files changed, 1 insertion(+), 119 deletions(-)
 delete mode 100644 src/peft/tuners/xlora/util.py

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 70ac8c3c12..83cd707908 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -475,16 +475,6 @@ def from_pretrained(
                 model, config, adapter_name, autocast_adapter_dtype=autocast_adapter_dtype
             )
 
-        """if isinstance(model.base_model, XLoraModel):
-            if not isinstance(config, XLoraConfig):
-                raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
-
-            device = device = kwargs.get("device") if kwargs.get("device") is not None else infer_device()
-
-            classifier: XLoraClassifier = model.base_model.internal_xlora_classifier # type: ignore
-            classifier.load_state_dict(xlora_load_classifier_weights(model_id, device)) # type: ignore
-        else:
-        """
         model.load_adapter(
             model_id, adapter_name, is_trainable=is_trainable, autocast_adapter_dtype=autocast_adapter_dtype, **kwargs
         )
diff --git a/src/peft/tuners/xlora/util.py b/src/peft/tuners/xlora/util.py
deleted file mode 100644
index 25194610be..0000000000
--- a/src/peft/tuners/xlora/util.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2023-present the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-from typing import Optional, Union
-
-import torch
-from huggingface_hub import file_exists, hf_hub_download  # type: ignore
-from huggingface_hub.utils import EntryNotFoundError  # type: ignore
-from safetensors.torch import load_file as safe_load_file
-
-from peft.utils.other import (
-    infer_device,
-)
-
-
-def _load_classifier_weights(model_id: str, device: Optional[str] = None, **hf_hub_download_kwargs) -> dict:
-    r"""
-    A helper method to load the classifier weights from the HuggingFace Hub or locally. This is essentially
-    `load_peft_weights`, but with the safetensors names changed.
-
-    Args:
-        model_id (`str`):
-            The local path to the adapter weights or the name of the adapter to load from the HuggingFace Hub.
-        device (`str`):
-            The device to load the weights onto.
-        hf_hub_download_kwargs (`dict`):
-            Additional arguments to pass to the `hf_hub_download` method when loading from the HuggingFace Hub.
-    """
-    path = (
-        os.path.join(model_id, hf_hub_download_kwargs["subfolder"])
-        if hf_hub_download_kwargs.get("subfolder", None) is not None
-        else model_id
-    )
-
-    SAFETENSORS_WEIGHTS_NAME = "xlora_classifier.safetensors"
-    WEIGHTS_NAME = "xlora_classifier.pt"
-
-    if device is None:
-        device = infer_device()
-
-    if os.path.exists(os.path.join(path, SAFETENSORS_WEIGHTS_NAME)):
-        filename = os.path.join(path, SAFETENSORS_WEIGHTS_NAME)
-        use_safetensors = True
-    elif os.path.exists(os.path.join(path, WEIGHTS_NAME)):
-        filename = os.path.join(path, WEIGHTS_NAME)
-        use_safetensors = False
-    else:
-        token = hf_hub_download_kwargs.get("token", None)
-        if token is None:
-            token = hf_hub_download_kwargs.get("use_auth_token", None)
-
-        hub_filename = (
-            os.path.join(hf_hub_download_kwargs["subfolder"], SAFETENSORS_WEIGHTS_NAME)
-            if hf_hub_download_kwargs.get("subfolder", None) is not None
-            else SAFETENSORS_WEIGHTS_NAME
-        )
-        has_remote_safetensors_file = file_exists(
-            repo_id=model_id,
-            filename=hub_filename,
-            revision=hf_hub_download_kwargs.get("revision", None),
-            repo_type=hf_hub_download_kwargs.get("repo_type", None),
-            token=token,
-        )
-        use_safetensors = has_remote_safetensors_file
-
-        if has_remote_safetensors_file:
-            # Priority 1: load safetensors weights
-            filename = hf_hub_download(
-                model_id,
-                SAFETENSORS_WEIGHTS_NAME,
-                **hf_hub_download_kwargs,
-            )
-        else:
-            try:
-                filename = hf_hub_download(model_id, WEIGHTS_NAME, **hf_hub_download_kwargs)
-            except EntryNotFoundError:
-                raise ValueError(
-                    f"Can't find weights for {model_id} in {model_id} or in the Hugging Face Hub. "
-                    f"Please check that the file {WEIGHTS_NAME} or {SAFETENSORS_WEIGHTS_NAME} is present at {model_id}."
-                )
-
-    if use_safetensors:
-        if hasattr(torch.backends, "mps") and (device == torch.device("mps")):
-            adapters_weights = safe_load_file(filename, device="cpu")
-        else:
-            adapters_weights = safe_load_file(filename, device=device)
-    else:
-        adapters_weights = torch.load(filename, map_location=torch.device(device))
-
-    return adapters_weights
-
-
-def _get_file_path_dir(load_directory: Union[str, os.PathLike], name: str, dir: str) -> str:
-    if os.path.exists(os.path.join(load_directory, dir, name)):
-        return os.path.join(load_directory, dir, name)
-    return hf_hub_download(load_directory, filename=name, subfolder=dir)
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index cbd6015c81..0cab2e4302 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -181,7 +181,7 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
         )
         after_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(after_logits).all()
-        assert torch.equal(after_logits, before_logits)
+        assert torch.equal(after_logits, before_logits), (after_logits, before_logits)
 
     def test_topk_lora(self, tokenizer, model):
         model.set_topk_lora(2)

From 8c58e1c6f7cd4221da62af03edf757592df87455 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 23 May 2024 08:01:29 -0400
Subject: [PATCH 153/182] Update tests

---
 tests/test_xlora.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 0cab2e4302..4119cd16eb 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -131,7 +131,7 @@ def test_misc_methods(self, tokenizer, model):
 
         assert str(model) is not None
 
-    def test_save_load_functional(self, tokenizer, model, tmp_dir):
+    def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
             input_ids=inputs.to("cuda"),
@@ -140,13 +140,13 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
         before_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(before_logits).all()
 
-        model.save_pretrained(save_directory=tmp_dir)
+        model.save_pretrained(save_directory=tmp_dir, safe_serialization=False)
 
         del model
 
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir).to(self.device)
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir, safe_serialization=False).to(self.device)
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
@@ -155,9 +155,9 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
         )
         after_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(after_logits).all()
-        assert torch.equal(after_logits, before_logits)
+        assert torch.equal(after_logits, before_logits), (after_logits, before_logits)
 
-    def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
+    def test_save_load_functional(self, tokenizer, model, tmp_dir):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
             input_ids=inputs.to("cuda"),
@@ -166,13 +166,13 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
         before_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(before_logits).all()
 
-        model.save_pretrained(save_directory=tmp_dir, safe_serialization=False)
+        model.save_pretrained(save_directory=tmp_dir)
 
         del model
 
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir, safe_serialization=False).to(self.device)
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir).to(self.device)
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
@@ -181,7 +181,7 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
         )
         after_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(after_logits).all()
-        assert torch.equal(after_logits, before_logits), (after_logits, before_logits)
+        assert torch.equal(after_logits, before_logits)
 
     def test_topk_lora(self, tokenizer, model):
         model.set_topk_lora(2)

From 430f9e4ad0b78eb2202091c920ebc813ebb8e4fc Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 23 May 2024 10:12:50 -0400
Subject: [PATCH 154/182] Remove monkey patching for scalings passing

---
 src/peft/tuners/xlora/layer.py | 25 +++++++++++++++++--------
 src/peft/tuners/xlora/model.py | 31 +++++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 633323c76c..6954719ae2 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -29,6 +29,10 @@ def __init__(
         self.layer_number = layer_number
         self.config = config
 
+    """
+    Apply the scalings for the adapter.
+    """
+
     @staticmethod
     def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter: int) -> torch.Tensor:
         # scalings_layer = [batch_size, seq_len, n_classes]
@@ -36,9 +40,14 @@ def apply_scalings_to_x(x: torch.Tensor, scalings_layer: torch.Tensor, adapter:
         # scalings_layer = [batch_size, seq_len, 1]
         return x * scalings
 
-    def get_maybe_topk_scalings(self) -> torch.Tensor:
+    """
+    Get the scalings for this layer, potentially applying topk and topk+softmax. This is called before
+    `apply_scalings_to_x`
+    """
+
+    def get_maybe_topk_scalings(self, scalings) -> torch.Tensor:
         # xlora_scalings = [batch_size, seq_len, n_classes]
-        xlora_scalings: Tensor = self.scalings[:, :, self.layer_number, :]  # type: ignore
+        xlora_scalings: Tensor = scalings[:, :, self.layer_number, :]  # type: ignore
 
         if self.config.top_k_lora is not None:
             _, topk_indices = torch.topk(xlora_scalings, k=self.config.top_k_lora, dim=-1)
@@ -68,14 +77,14 @@ def __init__(
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
-    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+    def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
         method must be created (bound to an instance of the XLoraLayer class).
         """
 
         previous_dtype = x.dtype
-        xlora_scalings = self.get_maybe_topk_scalings()
+        xlora_scalings = self.get_maybe_topk_scalings(scalings)
 
         result = self.target.base_layer(x, *args, **kwargs)
 
@@ -110,13 +119,13 @@ def __init__(
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
-    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+    def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
         method must be created (bound to an instance of the XLoraLayer class).
         """
 
-        xlora_scalings = self.get_maybe_topk_scalings()
+        xlora_scalings = self.get_maybe_topk_scalings(scalings)
 
         result = self.target.base_layer(x, *args, **kwargs)
 
@@ -149,14 +158,14 @@ def __init__(
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
-    def forward(self, x: Tensor, *args: Any, **kwargs: Any) -> Tensor:
+    def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
         method must be created (bound to an instance of the XLoraLayer class).
         """
 
         previous_dtype = x.dtype
-        xlora_scalings = self.get_maybe_topk_scalings()
+        xlora_scalings = self.get_maybe_topk_scalings(scalings)
 
         result = self.target.base_layer(x, *args, **kwargs)
 
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 4b5347d0de..79cd1642b9 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import partial
 from typing import List, Optional, Union
 
 import torch
 import torch.nn as nn
 
+from peft.tuners.lora.layer import LoraLayer
 from peft.tuners.lora.model import LoraModel
 from peft.tuners.tuners_utils import BaseTuner
 
@@ -171,13 +173,22 @@ def __init__(
         # Now replace the old forward function with a new one that implements the X-LoRA architecture
         old_model_forward = self.lora_model.model.forward
 
+        def scalings_injection_hook(target, args, kwargs, scalings):
+            # pre-forward hook to inject the adapter_names argument when using mixed adapter batches inference
+            kwargs["scalings"] = scalings
+            return args, kwargs
+
         def new_model_forward(*args, **kwargs) -> None:
             # =========================== Forward pass with "dummy" scalings ==================
 
             dummy_scalings = self.internal_xlora_classifier.make_dummy_scalings(*args, **kwargs)
 
-            for layer in all_layers:
-                layer.scalings = dummy_scalings
+            hook_handles = []
+            for module in self.modules():
+                if isinstance(module, LoraLayer):
+                    pre_forward = partial(scalings_injection_hook, scalings=dummy_scalings)
+                    handle = module.register_forward_pre_hook(pre_forward, with_kwargs=True)
+                    hook_handles.append(handle)
 
             with torch.no_grad():
                 self.lora_model.disable_adapters()
@@ -190,8 +201,8 @@ def new_model_forward(*args, **kwargs) -> None:
                         base_output = old_model_forward(*args, **scaling_pass_kwargs)
                     finally:
                         # Clean everything up
-                        for layer in all_layers:
-                            layer.scalings = None
+                        for handle in hook_handles:
+                            handle.remove()
                 finally:
                     self.lora_model.enable_adapters()
 
@@ -199,15 +210,19 @@ def new_model_forward(*args, **kwargs) -> None:
 
             # =========================== Real forward pass with calculated scalings ==================
 
-            for layer in all_layers:
-                layer.scalings = xlora_scalings
+            hook_handles = []
+            for module in self.modules():
+                if isinstance(module, LoraLayer):
+                    pre_forward = partial(scalings_injection_hook, scalings=xlora_scalings)
+                    handle = module.register_forward_pre_hook(pre_forward, with_kwargs=True)
+                    hook_handles.append(handle)
 
             try:
                 output = old_model_forward(*args, **kwargs)
             finally:
                 # Clean everything up
-                for layer in all_layers:
-                    layer.scalings = None
+                for handle in hook_handles:
+                    handle.remove()
             return output
 
         self.lora_model.model.forward = new_model_forward

From 53f0342b4c59e29da9330f906ae825016a7a6a1f Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 23 May 2024 11:07:09 -0400
Subject: [PATCH 155/182] Add tests for disabling adapters

---
 src/peft/tuners/xlora/model.py | 21 +++++++++++++----
 tests/test_xlora.py            | 42 ++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 79cd1642b9..fbe9f4d177 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -178,7 +178,7 @@ def scalings_injection_hook(target, args, kwargs, scalings):
             kwargs["scalings"] = scalings
             return args, kwargs
 
-        def new_model_forward(*args, **kwargs) -> None:
+        def new_model_forward(*args, **kwargs):
             # =========================== Forward pass with "dummy" scalings ==================
 
             dummy_scalings = self.internal_xlora_classifier.make_dummy_scalings(*args, **kwargs)
@@ -190,6 +190,15 @@ def new_model_forward(*args, **kwargs) -> None:
                     handle = module.register_forward_pre_hook(pre_forward, with_kwargs=True)
                     hook_handles.append(handle)
 
+            if self.disabled:
+                try:
+                    output = old_model_forward(*args, **kwargs)
+                finally:
+                    # Clean everything up
+                    for handle in hook_handles:
+                        handle.remove()
+                return output
+
             with torch.no_grad():
                 self.lora_model.disable_adapters()
 
@@ -233,6 +242,8 @@ def new_model_forward(*args, **kwargs) -> None:
         # Setup the model internal state
         self.internal_xlora_classifier = xlora_classifier
         self.internal_xlora_scalings = None  # type: ignore
+        # Controlled by enable_adapter_layers or disable_adapter_layers
+        self.disabled = False
 
     def _maybe_freeze_all_adapters(self):
         self.eval()
@@ -267,18 +278,18 @@ def _mark_only_adapters_as_trainable(self) -> None:
         ...
 
     """
-    Does nothing. X-LoRA needs adapters to be frozen.
+    This enables the X-LoRA adapter.
     """
 
     def enable_adapter_layers(self) -> None:
-        ...
+        self.disabled = False
 
     """
-    Does nothing. X-LoRA needs adapters to be frozen.
+    This diasables the X-LoRA adapter.
     """
 
     def disable_adapter_layers(self) -> None:
-        ...
+        self.disabled = True
 
     def _create_and_replace(
         self,
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 4119cd16eb..897ec65b97 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -67,6 +67,23 @@ def model(self, saved_lora_adapters):
         model = get_peft_model(model, peft_config).to(self.device)
         return model
 
+    @pytest.fixture(scope="function")
+    def model_layerwise(self, saved_lora_adapters):
+        model = AutoModelForCausalLM.from_pretrained(self.model_id)
+        model.config.use_cache = False
+        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_adapters)}
+
+        peft_config = XLoraConfig(
+            task_type=TaskType.CAUSAL_LM,
+            peft_type=PeftType.XLORA,
+            hidden_size=model.config.hidden_size,
+            xlora_depth=8,
+            adapters=adapters,
+            layerwise_scalings=True,
+        )
+        model = get_peft_model(model, peft_config).to(self.device)
+        return model
+
     def test_functional(self, tokenizer, model):
         model.enable_scalings_logging()
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
@@ -220,3 +237,28 @@ def test_set_override_scaling_pass_value(self, model):
         model.set_scaling_pass_value(None)
         assert model.internal_xlora_classifier.override_scaling_pass_value == 1 / self.num_loras
         assert model.internal_xlora_classifier.config.scaling_pass_value == 1 / self.num_loras
+
+    def test_functional_layerwise(self, tokenizer, model_layerwise):
+        model_layerwise.enable_scalings_logging()
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = model_layerwise.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
+
+    def test_disable_adapter(self, tokenizer, model):
+        model.enable_scalings_logging()
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        with model.disable_adapter():
+            outputs_disabled = model.generate(
+                input_ids=inputs.to("cuda"),
+                max_new_tokens=32,
+            )
+        outputs = model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        assert torch.isfinite(outputs_disabled[: inputs.shape[1] :]).all()
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
+        assert not torch.equal(outputs, outputs_disabled)

From ee19910b6b93b9c8973cd8dd7f03b9e596193fa6 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 23 May 2024 11:41:01 -0400
Subject: [PATCH 156/182] Add test for embedding model

---
 src/peft/mapping.py |  3 +++
 tests/test_xlora.py | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/src/peft/mapping.py b/src/peft/mapping.py
index 9e5bce642e..26f7f579f8 100644
--- a/src/peft/mapping.py
+++ b/src/peft/mapping.py
@@ -19,6 +19,8 @@
 
 import torch
 
+from peft.tuners.xlora.model import XLoraModel
+
 from .config import PeftConfig
 from .mixed_model import PeftMixedModel
 from .peft_model import (
@@ -105,6 +107,7 @@
     "POLY": PolyModel,
     "LN_TUNING": LNTuningModel,
     "VERA": VeraModel,
+    "XLORA": XLoraModel,
 }
 
 
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 897ec65b97..26d63dc75e 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -46,11 +46,42 @@ def saved_lora_adapters(self, tmp_dir):
             file_names.append(file_name)
         return file_names
 
+    @pytest.fixture(scope="class")
+    def saved_lora_embedding_adapters(self, tmp_dir):
+        file_names = []
+        for i in range(1, self.num_loras + 1):
+            torch.manual_seed(i)
+            lora_config = LoraConfig(
+                task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["embed_positions", "embed_tokens"]
+            )
+            model = AutoModelForCausalLM.from_pretrained(self.model_id)
+            peft_model = get_peft_model(model, lora_config)
+            file_name = os.path.join(tmp_dir, f"checkpoint-{i}")
+            peft_model.save_pretrained(file_name)
+            file_names.append(file_name)
+        return file_names
+
     @pytest.fixture(scope="class")
     def tokenizer(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.device)
         return tokenizer
 
+    @pytest.fixture(scope="function")
+    def embedding_model(self, saved_lora_embedding_adapters):
+        model = AutoModelForCausalLM.from_pretrained(self.model_id)
+        model.config.use_cache = False
+        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_embedding_adapters)}
+
+        peft_config = XLoraConfig(
+            task_type=TaskType.CAUSAL_LM,
+            peft_type=PeftType.XLORA,
+            hidden_size=model.config.hidden_size,
+            xlora_depth=8,
+            adapters=adapters,
+        )
+        model = get_peft_model(model, peft_config).to(self.device)
+        return model
+
     @pytest.fixture(scope="function")
     def model(self, saved_lora_adapters):
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -262,3 +293,11 @@ def test_disable_adapter(self, tokenizer, model):
         assert torch.isfinite(outputs_disabled[: inputs.shape[1] :]).all()
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
         assert not torch.equal(outputs, outputs_disabled)
+
+    def test_functional_embedding(self, tokenizer, embedding_model):
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = embedding_model.generate(
+            input_ids=inputs.to("cuda"),
+            max_new_tokens=32,
+        )
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()

From 1735deb7d1e02265b5712c7b21f03c7d2c21848e Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 29 May 2024 08:02:00 -0400
Subject: [PATCH 157/182] Use peft forward hook

---
 src/peft/tuners/xlora/model.py | 82 +++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index fbe9f4d177..106c3397a8 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from contextlib import contextmanager
 from functools import partial
 from typing import List, Optional, Union
 
@@ -170,18 +171,47 @@ def __init__(
             peft_config,
         )
 
-        # Now replace the old forward function with a new one that implements the X-LoRA architecture
-        old_model_forward = self.lora_model.model.forward
+        n_classes = len(peft_config.adapters)
+        xlora_classifier = XLoraClassifier(model, peft_config, n_classes, total_swapped, device)
+
+        # Setup the model internal state
+        self.internal_xlora_classifier = xlora_classifier
+        self.internal_xlora_scalings = None  # type: ignore
+        # Controlled by enable_adapter_layers or disable_adapter_layers
+        self.disabled = False
+
+    def _maybe_freeze_all_adapters(self):
+        self.eval()
+        if not self.xlora_config.use_trainable_adapters:
+            for name, param in self.named_parameters():
+                if "lora_" in name:
+                    param.requires_grad = False
 
+    def generate(self, *args, **kwargs):
+        res = self.lora_model.generate(*args, **kwargs)  # type: ignore
+        #  This is necessary because we use PeftModel.disable_adapter() which reenables the adapters
+        self._maybe_freeze_all_adapters()
+        return res
+
+    @contextmanager
+    def _enable_peft_forward_hooks(self, *generate_args, **generate_kwargs):
         def scalings_injection_hook(target, args, kwargs, scalings):
             # pre-forward hook to inject the adapter_names argument when using mixed adapter batches inference
             kwargs["scalings"] = scalings
             return args, kwargs
 
-        def new_model_forward(*args, **kwargs):
+        handles_to_remove = None
+
+        def pre_forward(module, *args, **kwargs):
+            nonlocal handles_to_remove
+
             # =========================== Forward pass with "dummy" scalings ==================
 
-            dummy_scalings = self.internal_xlora_classifier.make_dummy_scalings(*args, **kwargs)
+            args_real = args[0]
+            kwargs_real = args[1]
+            kwargs_real.update(kwargs)
+
+            dummy_scalings = self.internal_xlora_classifier.make_dummy_scalings(*args_real, **kwargs_real)
 
             hook_handles = []
             for module in self.modules():
@@ -192,7 +222,7 @@ def new_model_forward(*args, **kwargs):
 
             if self.disabled:
                 try:
-                    output = old_model_forward(*args, **kwargs)
+                    output = self.lora_model.model.forward(*args_real, **kwargs_real)
                 finally:
                     # Clean everything up
                     for handle in hook_handles:
@@ -203,11 +233,11 @@ def new_model_forward(*args, **kwargs):
                 self.lora_model.disable_adapters()
 
                 try:
-                    scaling_pass_kwargs = kwargs.copy()
+                    scaling_pass_kwargs = kwargs_real.copy()
                     scaling_pass_kwargs["output_hidden_states"] = True
                     scaling_pass_kwargs["return_dict"] = True
                     try:
-                        base_output = old_model_forward(*args, **scaling_pass_kwargs)
+                        base_output = self.lora_model.model.forward(*args_real, **scaling_pass_kwargs)
                     finally:
                         # Clean everything up
                         for handle in hook_handles:
@@ -215,7 +245,7 @@ def new_model_forward(*args, **kwargs):
                 finally:
                     self.lora_model.enable_adapters()
 
-            xlora_scalings = self.internal_xlora_classifier(result=base_output, *args, **kwargs)
+            xlora_scalings = self.internal_xlora_classifier(result=base_output, *args_real, **kwargs_real)
 
             # =========================== Real forward pass with calculated scalings ==================
 
@@ -226,37 +256,17 @@ def new_model_forward(*args, **kwargs):
                     handle = module.register_forward_pre_hook(pre_forward, with_kwargs=True)
                     hook_handles.append(handle)
 
-            try:
-                output = old_model_forward(*args, **kwargs)
-            finally:
-                # Clean everything up
-                for handle in hook_handles:
-                    handle.remove()
-            return output
-
-        self.lora_model.model.forward = new_model_forward
-
-        n_classes = len(peft_config.adapters)
-        xlora_classifier = XLoraClassifier(model, peft_config, n_classes, total_swapped, device)
+            handles_to_remove = hook_handles
 
-        # Setup the model internal state
-        self.internal_xlora_classifier = xlora_classifier
-        self.internal_xlora_scalings = None  # type: ignore
-        # Controlled by enable_adapter_layers or disable_adapter_layers
-        self.disabled = False
+        forward_handle = self.lora_model.model.register_forward_pre_hook(pre_forward, with_kwargs=True)
 
-    def _maybe_freeze_all_adapters(self):
-        self.eval()
-        if not self.xlora_config.use_trainable_adapters:
-            for name, param in self.named_parameters():
-                if "lora_" in name:
-                    param.requires_grad = False
+        # Run the forward pass: first the scaling pass in the hook, and then with the base model
+        yield
 
-    def generate(self, *args, **kwargs):
-        res = self.lora_model.generate(*args, **kwargs)  # type: ignore
-        #  This is necessary because we use PeftModel.disable_adapter() which reenables the adapters
-        self._maybe_freeze_all_adapters()
-        return res
+        # TODO(EricLBuehler): If we get a forward exception, we may have multiple forward hooks.
+        for handle in handles_to_remove:
+            handle.remove()
+        forward_handle.remove()
 
     def __getattr__(self, name: str):
         """Forward missing attributes to the wrapped module."""

From d6612664faa202b4ac640a60fee1a567327d80b1 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 29 May 2024 08:10:57 -0400
Subject: [PATCH 158/182] Fix disable adapter test

---
 src/peft/tuners/xlora/layer.py | 45 ++++++++++++++++++++++++----------
 src/peft/tuners/xlora/model.py | 21 ++++++----------
 2 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 6954719ae2..c8c04c7ba0 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable
+from typing import Any, Callable, Optional
 
 import torch
 import torch.nn as nn
@@ -77,14 +77,15 @@ def __init__(
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
-    def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Tensor:
+    def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
         method must be created (bound to an instance of the XLoraLayer class).
         """
 
         previous_dtype = x.dtype
-        xlora_scalings = self.get_maybe_topk_scalings(scalings)
+        if scalings is not None:
+            xlora_scalings = self.get_maybe_topk_scalings(scalings)
 
         result = self.target.base_layer(x, *args, **kwargs)
 
@@ -101,8 +102,13 @@ def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Ten
                 dropout = self.target.lora_dropout[active_adapter]
                 scaling = self.target.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)  # type: ignore
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
-                result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
+                if scalings is not None:
+                    x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                    scaling_weight = self.config.global_scaling_weight
+                else:
+                    x_mod = x
+                    scaling_weight = 1
+                result += lora_B(lora_A(dropout(x_mod))) * scaling * scaling_weight
 
         result = result.to(previous_dtype)
         return result
@@ -119,13 +125,14 @@ def __init__(
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
-    def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Tensor:
+    def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
         method must be created (bound to an instance of the XLoraLayer class).
         """
 
-        xlora_scalings = self.get_maybe_topk_scalings(scalings)
+        if scalings is not None:
+            xlora_scalings = self.get_maybe_topk_scalings(scalings)
 
         result = self.target.base_layer(x, *args, **kwargs)
 
@@ -140,9 +147,14 @@ def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Ten
                 embedding_A = self.target.lora_embedding_A[active_adapter].T
                 embedding_B = self.target.lora_embedding_B[active_adapter].T
                 scaling = self.target.scaling[active_adapter]
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                if scalings is not None:
+                    x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                    scaling_weight = self.config.global_scaling_weight
+                else:
+                    x_mod = x
+                    scaling_weight = 1
                 after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
-                result += (after_A @ embedding_B) * scaling * self.config.global_scaling_weight
+                result += (after_A @ embedding_B) * scaling * scaling_weight
 
         return result
 
@@ -158,14 +170,16 @@ def __init__(
     ) -> None:
         super().__init__(model, target, target_forward, layer_number, config)
 
-    def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Tensor:
+    def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kwargs: Any) -> Tensor:
         """
         This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
         method must be created (bound to an instance of the XLoraLayer class).
         """
 
         previous_dtype = x.dtype
-        xlora_scalings = self.get_maybe_topk_scalings(scalings)
+
+        if scalings is not None:
+            xlora_scalings = self.get_maybe_topk_scalings(scalings)
 
         result = self.target.base_layer(x, *args, **kwargs)
 
@@ -182,8 +196,13 @@ def forward(self, x: Tensor, *args: Any, scalings: Tensor, **kwargs: Any) -> Ten
                 dropout = self.target.lora_dropout[active_adapter]
                 scaling = self.target.scaling[active_adapter]
                 x = x.to(lora_A.weight.dtype)  # type: ignore
-                x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
-                result += lora_B(lora_A(dropout(x_mod))) * scaling * self.config.global_scaling_weight
+                if scalings is not None:
+                    x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
+                    scaling_weight = self.config.global_scaling_weight
+                else:
+                    x_mod = x
+                    scaling_weight = 1
+                result += lora_B(lora_A(dropout(x_mod))) * scaling * scaling_weight
 
         result = result.to(previous_dtype)
         return result
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 106c3397a8..3d4852e687 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -220,15 +220,6 @@ def pre_forward(module, *args, **kwargs):
                     handle = module.register_forward_pre_hook(pre_forward, with_kwargs=True)
                     hook_handles.append(handle)
 
-            if self.disabled:
-                try:
-                    output = self.lora_model.model.forward(*args_real, **kwargs_real)
-                finally:
-                    # Clean everything up
-                    for handle in hook_handles:
-                        handle.remove()
-                return output
-
             with torch.no_grad():
                 self.lora_model.disable_adapters()
 
@@ -258,15 +249,17 @@ def pre_forward(module, *args, **kwargs):
 
             handles_to_remove = hook_handles
 
-        forward_handle = self.lora_model.model.register_forward_pre_hook(pre_forward, with_kwargs=True)
+        if not self.disabled:
+            forward_handle = self.lora_model.model.register_forward_pre_hook(pre_forward, with_kwargs=True)
 
         # Run the forward pass: first the scaling pass in the hook, and then with the base model
         yield
 
-        # TODO(EricLBuehler): If we get a forward exception, we may have multiple forward hooks.
-        for handle in handles_to_remove:
-            handle.remove()
-        forward_handle.remove()
+        if not self.disabled:
+            # TODO(EricLBuehler): If we get a forward exception, we may have multiple forward hooks.
+            for handle in handles_to_remove:
+                handle.remove()
+            forward_handle.remove()
 
     def __getattr__(self, name: str):
         """Forward missing attributes to the wrapped module."""

From 860183203b80d71e4b2d3f1e8a27d6cb98a63161 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 29 May 2024 08:23:37 -0400
Subject: [PATCH 159/182] Remove unnecessary var

---
 src/peft/tuners/xlora/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 3d4852e687..41b2515253 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -33,7 +33,7 @@ def convert_layers_to_xlora(
     base: nn.Module,  # PeftModel
     xloramodel: nn.Module,  # XLoraModel
     config: XLoraConfig,
-) -> tuple[int, torch.device | None, list[nn.Module]]:
+) -> tuple[int, torch.device | None]:
     """
     Returns the number of swapped layers.
     """
@@ -79,7 +79,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
 
-    return (total_swapped, device, all_layers)
+    return (total_swapped, device)
 
 
 class XLoraModel(BaseTuner):
@@ -165,7 +165,7 @@ def __init__(
 
         self._maybe_freeze_all_adapters()
 
-        total_swapped, device, all_layers = convert_layers_to_xlora(
+        total_swapped, device = convert_layers_to_xlora(
             model,
             self,
             peft_config,

From a3b83c11341b53ba78ffdcccd5213818e6cb3de1 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 29 May 2024 11:52:13 -0400
Subject: [PATCH 160/182] Add futures annotations

---
 src/peft/tuners/xlora/classifier.py |  7 ++++---
 src/peft/tuners/xlora/config.py     |  5 +++--
 src/peft/tuners/xlora/layer.py      | 15 +++++++++++++++
 src/peft/tuners/xlora/model.py      |  5 +++--
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/peft/tuners/xlora/classifier.py b/src/peft/tuners/xlora/classifier.py
index b502f6318e..dffebba08a 100644
--- a/src/peft/tuners/xlora/classifier.py
+++ b/src/peft/tuners/xlora/classifier.py
@@ -11,9 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 
 import builtins
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Optional, Union
 
 import torch
 import torch.nn as nn
@@ -169,13 +170,13 @@ def forward(
 
         return scalings
 
-    def _get_bucketed_scalings(self) -> Dict[int, Tuple[List[int], List[torch.Tensor]]]:
+    def _get_bucketed_scalings(self) -> dict[int, tuple[list[int], list[torch.Tensor]]]:
         """
         Returns bucketed scalings, bucketed by seq_len. Each value consists of the positions (the first) and the
         associated tensors. The positions are paired with the associated tensors and give the position in the scaling
         log. Each scaling is a tensor of shape (batch_size, seq_len, n_layers, n_classes)).
         """
-        seqlens_map: Dict[int, Tuple[List[int], List[torch.Tensor]]] = {}
+        seqlens_map: dict[int, tuple[list[int], list[torch.Tensor]]] = {}
         for i, scaling in enumerate(self.log_scalings):
             seq_len = scaling.shape[1]
             if seq_len not in seqlens_map:
diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 3677a2c43f..06d19c1568 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 
 import warnings
 from dataclasses import dataclass
-from typing import Dict, Optional
+from typing import Optional
 
 from peft.config import PeftConfig
 from peft.utils.peft_types import PeftType
@@ -61,7 +62,7 @@ class XLoraConfig(PeftConfig):
     """
 
     hidden_size: int = None  # type: ignore
-    adapters: Dict[str, str] = None  # type: ignore
+    adapters: dict[str, str] = None  # type: ignore
     enable_softmax: bool = True
     enable_softmax_topk: bool = False
     layerwise_scalings: bool = False
diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index c8c04c7ba0..19e0577056 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -1,3 +1,18 @@
+# Copyright 2024-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
 from typing import Any, Callable, Optional
 
 import torch
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 41b2515253..59dff91327 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 
 from contextlib import contextmanager
 from functools import partial
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 import torch
 import torch.nn as nn
@@ -352,7 +353,7 @@ def get_latest_scalings(self) -> Optional[torch.Tensor]:
         """
         return self.internal_xlora_scalings
 
-    def get_scalings_log(self) -> List[torch.Tensor]:
+    def get_scalings_log(self) -> list[torch.Tensor]:
         """
         Returns a shallow (only copying the list itself not the tensors) copy of the list containing the scalings log.
         Editing the list does not change the underlying log. The tensors are of shape (batch_size, seq_len, n_layers,

From b0f3062c570f895c26d6e3458442af0b3455fc56 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 29 May 2024 12:36:34 -0400
Subject: [PATCH 161/182] Fix changes to lora prepare adapter config

---
 src/peft/tuners/lora/model.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 376188910e..152c7282e3 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -436,11 +436,12 @@ def _check_merge_allowed(self):
 
     @staticmethod
     def _prepare_adapter_config(peft_config, model_config):
-        if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
-            raise ValueError("Please specify `target_modules` in `peft_config`")
-        peft_config.target_modules = set(
-            TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
-        )
+        if peft_config.target_modules is None:
+            if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
+                raise ValueError("Please specify `target_modules` in `peft_config`")
+            peft_config.target_modules = set(
+                TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
+            )
         return peft_config
 
     def _unload_and_optionally_merge(

From 81e337d0b274b558932d96d0fcc091619f4b0fa3 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 30 May 2024 08:06:02 -0400
Subject: [PATCH 162/182] Fix tests

---
 src/peft/peft_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index d949765be4..1ad7f0504b 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -443,7 +443,7 @@ def from_pretrained(
             raise ValueError("Cannot set a prompt learning adapter to trainable when loading pretrained adapter.")
         else:
             config.inference_mode = not is_trainable
-        if isinstance(model.base_model, XLoraModel):
+        if isinstance(getattr(model, "base_model", None), XLoraModel):
             if not isinstance(config, XLoraConfig):
                 raise TypeError(f"Expected 'XLoraConfig', got '{type(config)}' instead.")
             if "adapters" in kwargs:

From 7ddd14b222df8e6340af1f1b3c6c88c3f08f8053 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 30 May 2024 11:49:44 -0400
Subject: [PATCH 163/182] Fix hacks

---
 src/peft/tuners/lora/model.py  | 6 ++++--
 src/peft/tuners/xlora/model.py | 1 -
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 152c7282e3..de6e389113 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -155,7 +155,9 @@ def _check_new_adapter_config(self, config: LoraConfig) -> None:
 
     @staticmethod
     def _check_target_module_exists(lora_config, key):
-        return check_target_module_exists(lora_config, key)
+        if hasattr(lora_config, "target_modules"):
+            return check_target_module_exists(lora_config, key)
+        return False
 
     def _prepare_model(self, peft_config: LoraConfig, model: nn.Module):
         r"""
@@ -436,7 +438,7 @@ def _check_merge_allowed(self):
 
     @staticmethod
     def _prepare_adapter_config(peft_config, model_config):
-        if peft_config.target_modules is None:
+        if hasattr(peft_config, "target_modules") and peft_config.target_modules is None:
             if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
                 raise ValueError("Please specify `target_modules` in `peft_config`")
             peft_config.target_modules = set(
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 59dff91327..5adee309d0 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -141,7 +141,6 @@ def __init__(
             conf = config
         lora_model = LoraModel(model, config.copy(), adapter_name)
         self.xlora_config = conf
-        del self.xlora_config.target_modules
         self.lora_model = lora_model
 
         peft_config = conf

From 7ea1a121d857fae7d87fdf007132e32581833ffc Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 30 May 2024 12:38:46 -0400
Subject: [PATCH 164/182] Use infer_device

---
 tests/test_xlora.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 26d63dc75e..9aaa373339 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -20,10 +20,11 @@
 
 from peft import LoraConfig, PeftType, TaskType, XLoraConfig, get_peft_model
 from peft.peft_model import PeftModel
+from peft.utils import infer_device
 
 
 class TestXlora:
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    torch_device = infer_device()
 
     model_id = "facebook/opt-125m"
     num_loras = 4
@@ -119,7 +120,7 @@ def test_functional(self, tokenizer, model):
         model.enable_scalings_logging()
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
@@ -129,7 +130,7 @@ def test_scalings_logging_methods(self, tokenizer, model):
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
@@ -142,7 +143,7 @@ def test_scalings_logging_methods(self, tokenizer, model):
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
@@ -172,7 +173,7 @@ def test_misc_methods(self, tokenizer, model):
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
@@ -182,7 +183,7 @@ def test_misc_methods(self, tokenizer, model):
     def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         before_logits = outputs[: inputs.shape[1] :]
@@ -198,7 +199,7 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         after_logits = outputs[: inputs.shape[1] :]
@@ -208,7 +209,7 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
     def test_save_load_functional(self, tokenizer, model, tmp_dir):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         before_logits = outputs[: inputs.shape[1] :]
@@ -224,7 +225,7 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         after_logits = outputs[: inputs.shape[1] :]
@@ -237,7 +238,7 @@ def test_topk_lora(self, tokenizer, model):
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
@@ -250,7 +251,7 @@ def test_softmax_topk(self, tokenizer, model):
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
@@ -273,7 +274,7 @@ def test_functional_layerwise(self, tokenizer, model_layerwise):
         model_layerwise.enable_scalings_logging()
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model_layerwise.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
@@ -283,11 +284,11 @@ def test_disable_adapter(self, tokenizer, model):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         with model.disable_adapter():
             outputs_disabled = model.generate(
-                input_ids=inputs.to("cuda"),
+                input_ids=inputs.to(self.torch_device),
                 max_new_tokens=32,
             )
         outputs = model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs_disabled[: inputs.shape[1] :]).all()
@@ -297,7 +298,7 @@ def test_disable_adapter(self, tokenizer, model):
     def test_functional_embedding(self, tokenizer, embedding_model):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = embedding_model.generate(
-            input_ids=inputs.to("cuda"),
+            input_ids=inputs.to(self.torch_device),
             max_new_tokens=32,
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()

From 7058c15517995d32738893b50f9eede2862f598d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 31 May 2024 11:06:16 -0400
Subject: [PATCH 165/182] Fix the tests and provide fake default values

---
 src/peft/tuners/xlora/config.py | 10 ++++++++--
 tests/test_xlora.py             | 14 ++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/peft/tuners/xlora/config.py b/src/peft/tuners/xlora/config.py
index 06d19c1568..0ca9c5e45d 100644
--- a/src/peft/tuners/xlora/config.py
+++ b/src/peft/tuners/xlora/config.py
@@ -79,9 +79,15 @@ def __post_init__(self):
         self.peft_type = PeftType.XLORA
 
         if self.hidden_size is None:
-            raise TypeError("Expected value for hidden size.")
+            warnings.warn(
+                "No value was provided for `hidden_size`. This will be set to 4096 by default, please ensure that this is correct."
+            )
+            self.hidden_size = 4096
         if self.adapters is None:
-            raise TypeError("Expected value for adapters.")
+            warnings.warn(
+                "No value was provided for for `adapters`. This will be set to empty, please ensure that this is correct."
+            )
+            self.adapters = {}
 
         if self.enable_softmax_topk and self.top_k_lora is None:
             warnings.warn("`enable_softmax_topk` enabled `top_k_lora` is not set")
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 9aaa373339..bff23ecf92 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -64,7 +64,7 @@ def saved_lora_embedding_adapters(self, tmp_dir):
 
     @pytest.fixture(scope="class")
     def tokenizer(self):
-        tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.device)
+        tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.torch_device)
         return tokenizer
 
     @pytest.fixture(scope="function")
@@ -80,7 +80,7 @@ def embedding_model(self, saved_lora_embedding_adapters):
             xlora_depth=8,
             adapters=adapters,
         )
-        model = get_peft_model(model, peft_config).to(self.device)
+        model = get_peft_model(model, peft_config).to(self.torch_device)
         return model
 
     @pytest.fixture(scope="function")
@@ -96,7 +96,7 @@ def model(self, saved_lora_adapters):
             xlora_depth=8,
             adapters=adapters,
         )
-        model = get_peft_model(model, peft_config).to(self.device)
+        model = get_peft_model(model, peft_config).to(self.torch_device)
         return model
 
     @pytest.fixture(scope="function")
@@ -113,7 +113,7 @@ def model_layerwise(self, saved_lora_adapters):
             adapters=adapters,
             layerwise_scalings=True,
         )
-        model = get_peft_model(model, peft_config).to(self.device)
+        model = get_peft_model(model, peft_config).to(self.torch_device)
         return model
 
     def test_functional(self, tokenizer, model):
@@ -195,7 +195,9 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
 
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir, safe_serialization=False).to(self.device)
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir, safe_serialization=False).to(
+            self.torch_device
+        )
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
@@ -221,7 +223,7 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
 
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir).to(self.device)
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir).to(self.torch_device)
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(

From 587c78a5b3f0edc12363d16b9e7466bfcea57a95 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Fri, 31 May 2024 11:36:43 -0400
Subject: [PATCH 166/182] Fix a mistake

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 5adee309d0..1c54a342c4 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -56,7 +56,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Embedding):
-            device = module.lora_A[next(iter(module.lora_embedding_A))].weight.device
+            device = module.lora_embedding_A[next(iter(module.lora_embedding_A))].weight.device
             new_layer = XLoraEmbeddingLayer(
                 model=xloramodel,
                 target=module,

From 6a809cbb859ec37c76f6d75c34ac71a3277b152a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sun, 23 Jun 2024 23:40:31 -0400
Subject: [PATCH 167/182] Fix .weight

---
 src/peft/tuners/lycoris_utils.py | 18 ++++++++++++------
 src/peft/tuners/poly/router.py   |  6 ++++--
 src/peft/tuners/xlora/model.py   |  2 +-
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index b70206497d..1d9f02a67a 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -79,7 +79,8 @@ def __init__(self, base_layer: nn.Module) -> None:
 
     @property
     @abstractmethod
-    def _available_adapters(self) -> set[str]: ...
+    def _available_adapters(self) -> set[str]:
+        ...
 
     def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         # A helper method that allows to initialize the layer of the given class without spending time to initialize the
@@ -94,7 +95,8 @@ def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         self.to_empty(device=final_device)
 
     @abstractmethod
-    def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs): ...
+    def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs):
+        ...
 
     # TODO: refactor LoRA to use the same approach
     @abstractmethod
@@ -102,7 +104,8 @@ def _get_delta_activations(self, adapter_name: str, x: torch.Tensor, *args: Any,
         """Activations added on top of the base layer output (i.e. after the base layer forward pass)"""
 
     @abstractmethod
-    def get_delta_weight(self, adapter_name: str) -> torch.Tensor: ...
+    def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
+        ...
 
     def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
         """
@@ -140,7 +143,8 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
                 self.merged_adapters.append(active_adapter)
 
     @abstractmethod
-    def reset_adapter_parameters(self, adapter_name: str): ...
+    def reset_adapter_parameters(self, adapter_name: str):
+        ...
 
     def set_scale(self, adapter, scale):
         if adapter not in self._available_adapters:
@@ -181,7 +185,8 @@ def unscale_layer(self, scale=None) -> None:
                 self.scaling[active_adapter] /= scale
 
     @abstractmethod
-    def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs): ...
+    def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs):
+        ...
 
 
 class LycorisTuner(BaseTuner):
@@ -215,7 +220,8 @@ def _create_and_replace(
         target_name,
         parent,
         current_key,
-    ): ...
+    ):
+        ...
 
     @classmethod
     def _create_new_module(cls, config: LycorisConfig, adapter_name: str, target: nn.Module, **kwargs) -> LycorisLayer:
diff --git a/src/peft/tuners/poly/router.py b/src/peft/tuners/poly/router.py
index 3dda3e75e3..0249398a9f 100644
--- a/src/peft/tuners/poly/router.py
+++ b/src/peft/tuners/poly/router.py
@@ -37,10 +37,12 @@ def get_router(poly_config: PolyConfig) -> nn.Module:
 
 class Router(nn.Module, ABC):
     @abstractmethod
-    def reset(self): ...
+    def reset(self):
+        ...
 
     @abstractmethod
-    def forward(self, task_ids: torch.Tensor, input_ids: torch.Tensor): ...
+    def forward(self, task_ids: torch.Tensor, input_ids: torch.Tensor):
+        ...
 
 
 class PolyRouter(Router):
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 1c54a342c4..1d62a5cec2 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -56,7 +56,7 @@ def convert_layers_to_xlora(
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
         elif isinstance(module, lora.Embedding):
-            device = module.lora_embedding_A[next(iter(module.lora_embedding_A))].weight.device
+            device = module.lora_embedding_A[next(iter(module.lora_embedding_A))].device
             new_layer = XLoraEmbeddingLayer(
                 model=xloramodel,
                 target=module,

From 164af9172a53d84db2b3709dc21f14146edee47d Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 25 Jun 2024 15:39:36 -0400
Subject: [PATCH 168/182] Format

---
 src/peft/tuners/lycoris_utils.py | 18 ++++++------------
 src/peft/tuners/poly/router.py   |  6 ++----
 src/peft/tuners/xlora/model.py   |  3 +--
 3 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 1d9f02a67a..b70206497d 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -79,8 +79,7 @@ def __init__(self, base_layer: nn.Module) -> None:
 
     @property
     @abstractmethod
-    def _available_adapters(self) -> set[str]:
-        ...
+    def _available_adapters(self) -> set[str]: ...
 
     def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         # A helper method that allows to initialize the layer of the given class without spending time to initialize the
@@ -95,8 +94,7 @@ def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         self.to_empty(device=final_device)
 
     @abstractmethod
-    def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs):
-        ...
+    def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs): ...
 
     # TODO: refactor LoRA to use the same approach
     @abstractmethod
@@ -104,8 +102,7 @@ def _get_delta_activations(self, adapter_name: str, x: torch.Tensor, *args: Any,
         """Activations added on top of the base layer output (i.e. after the base layer forward pass)"""
 
     @abstractmethod
-    def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
-        ...
+    def get_delta_weight(self, adapter_name: str) -> torch.Tensor: ...
 
     def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
         """
@@ -143,8 +140,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
                 self.merged_adapters.append(active_adapter)
 
     @abstractmethod
-    def reset_adapter_parameters(self, adapter_name: str):
-        ...
+    def reset_adapter_parameters(self, adapter_name: str): ...
 
     def set_scale(self, adapter, scale):
         if adapter not in self._available_adapters:
@@ -185,8 +181,7 @@ def unscale_layer(self, scale=None) -> None:
                 self.scaling[active_adapter] /= scale
 
     @abstractmethod
-    def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs):
-        ...
+    def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs): ...
 
 
 class LycorisTuner(BaseTuner):
@@ -220,8 +215,7 @@ def _create_and_replace(
         target_name,
         parent,
         current_key,
-    ):
-        ...
+    ): ...
 
     @classmethod
     def _create_new_module(cls, config: LycorisConfig, adapter_name: str, target: nn.Module, **kwargs) -> LycorisLayer:
diff --git a/src/peft/tuners/poly/router.py b/src/peft/tuners/poly/router.py
index 0249398a9f..3dda3e75e3 100644
--- a/src/peft/tuners/poly/router.py
+++ b/src/peft/tuners/poly/router.py
@@ -37,12 +37,10 @@ def get_router(poly_config: PolyConfig) -> nn.Module:
 
 class Router(nn.Module, ABC):
     @abstractmethod
-    def reset(self):
-        ...
+    def reset(self): ...
 
     @abstractmethod
-    def forward(self, task_ids: torch.Tensor, input_ids: torch.Tensor):
-        ...
+    def forward(self, task_ids: torch.Tensor, input_ids: torch.Tensor): ...
 
 
 class PolyRouter(Router):
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 1d62a5cec2..e4877395fe 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -277,8 +277,7 @@ def _prepare_adapter_config(peft_config, _model_config):
     Does nothing. X-LoRA needs adapters to be frozen.
     """
 
-    def _mark_only_adapters_as_trainable(self) -> None:
-        ...
+    def _mark_only_adapters_as_trainable(self) -> None: ...
 
     """
     This enables the X-LoRA adapter.

From fadc5d91897fdfff410c344465e716eb3de6b282 Mon Sep 17 00:00:00 2001
From: Eric Buehler <65165915+EricLBuehler@users.noreply.github.com>
Date: Sat, 29 Jun 2024 18:57:20 -0400
Subject: [PATCH 169/182] Embedding does not support DoRA

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/peft/tuners/xlora/layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 19e0577056..5c01e90209 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -155,7 +155,7 @@ def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kw
         if not self.target.merged:
             for adapter_n, active_adapter in enumerate(self.target.active_adapters):
                 # TODO: implement X-LoRA with Lora+Dora layers
-                if self.target.use_dora[active_adapter]:
+                if self.target.use_dora.get(active_adapter, False):
                     raise ValueError("X-LoRA currently does not support LoRA layers with DoRA")
                 if active_adapter not in self.target.lora_embedding_A:
                     continue

From 013a0842e61ea3d4582e4fcbca8573f9151f16bc Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sat, 29 Jun 2024 19:06:39 -0400
Subject: [PATCH 170/182] Remove xlora for embedding layers

---
 src/peft/tuners/xlora/layer.py | 46 ----------------------------------
 src/peft/tuners/xlora/model.py | 14 +----------
 tests/test_xlora.py            | 39 ----------------------------
 3 files changed, 1 insertion(+), 98 deletions(-)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 5c01e90209..5bb3ad5c8f 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -128,52 +128,6 @@ def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kw
         result = result.to(previous_dtype)
         return result
 
-
-class XLoraEmbeddingLayer(XLoraLayer):
-    def __init__(
-        self,
-        model: nn.Module,
-        target: lora.Embedding,
-        target_forward: Callable[..., Any],
-        layer_number: int,
-        config: XLoraConfig,
-    ) -> None:
-        super().__init__(model, target, target_forward, layer_number, config)
-
-    def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kwargs: Any) -> Tensor:
-        """
-        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
-        method must be created (bound to an instance of the XLoraLayer class).
-        """
-
-        if scalings is not None:
-            xlora_scalings = self.get_maybe_topk_scalings(scalings)
-
-        result = self.target.base_layer(x, *args, **kwargs)
-
-        # Ignore if disabled. We want to make sure this is always run.
-        if not self.target.merged:
-            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
-                # TODO: implement X-LoRA with Lora+Dora layers
-                if self.target.use_dora.get(active_adapter, False):
-                    raise ValueError("X-LoRA currently does not support LoRA layers with DoRA")
-                if active_adapter not in self.target.lora_embedding_A:
-                    continue
-                embedding_A = self.target.lora_embedding_A[active_adapter].T
-                embedding_B = self.target.lora_embedding_B[active_adapter].T
-                scaling = self.target.scaling[active_adapter]
-                if scalings is not None:
-                    x_mod = self.apply_scalings_to_x(x, xlora_scalings, adapter_n)
-                    scaling_weight = self.config.global_scaling_weight
-                else:
-                    x_mod = x
-                    scaling_weight = 1
-                after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
-                result += (after_A @ embedding_B) * scaling * scaling_weight
-
-        return result
-
-
 class XLoraConv2dLayer(XLoraLayer):
     def __init__(
         self,
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index e4877395fe..9a992a056d 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -27,7 +27,7 @@
 from .. import lora
 from .classifier import XLoraClassifier
 from .config import XLoraConfig
-from .layer import XLoraConv2dLayer, XLoraEmbeddingLayer, XLoraLinearLayer
+from .layer import XLoraConv2dLayer, XLoraLinearLayer
 
 
 def convert_layers_to_xlora(
@@ -55,18 +55,6 @@ def convert_layers_to_xlora(
             all_layers.append(new_layer)
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
-        elif isinstance(module, lora.Embedding):
-            device = module.lora_embedding_A[next(iter(module.lora_embedding_A))].device
-            new_layer = XLoraEmbeddingLayer(
-                model=xloramodel,
-                target=module,
-                target_forward=module.forward,
-                layer_number=total_swapped,
-                config=config,
-            )
-            all_layers.append(new_layer)
-            module.forward = new_layer.forward  # type: ignore[method-assign]
-            total_swapped += 1
         elif isinstance(module, lora.Conv2d):
             device = module.lora_A[next(iter(module.lora_A))].weight.device
             new_layer = XLoraConv2dLayer(
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index bff23ecf92..f210a8f908 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -47,42 +47,11 @@ def saved_lora_adapters(self, tmp_dir):
             file_names.append(file_name)
         return file_names
 
-    @pytest.fixture(scope="class")
-    def saved_lora_embedding_adapters(self, tmp_dir):
-        file_names = []
-        for i in range(1, self.num_loras + 1):
-            torch.manual_seed(i)
-            lora_config = LoraConfig(
-                task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["embed_positions", "embed_tokens"]
-            )
-            model = AutoModelForCausalLM.from_pretrained(self.model_id)
-            peft_model = get_peft_model(model, lora_config)
-            file_name = os.path.join(tmp_dir, f"checkpoint-{i}")
-            peft_model.save_pretrained(file_name)
-            file_names.append(file_name)
-        return file_names
-
     @pytest.fixture(scope="class")
     def tokenizer(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.torch_device)
         return tokenizer
 
-    @pytest.fixture(scope="function")
-    def embedding_model(self, saved_lora_embedding_adapters):
-        model = AutoModelForCausalLM.from_pretrained(self.model_id)
-        model.config.use_cache = False
-        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_embedding_adapters)}
-
-        peft_config = XLoraConfig(
-            task_type=TaskType.CAUSAL_LM,
-            peft_type=PeftType.XLORA,
-            hidden_size=model.config.hidden_size,
-            xlora_depth=8,
-            adapters=adapters,
-        )
-        model = get_peft_model(model, peft_config).to(self.torch_device)
-        return model
-
     @pytest.fixture(scope="function")
     def model(self, saved_lora_adapters):
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -296,11 +265,3 @@ def test_disable_adapter(self, tokenizer, model):
         assert torch.isfinite(outputs_disabled[: inputs.shape[1] :]).all()
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
         assert not torch.equal(outputs, outputs_disabled)
-
-    def test_functional_embedding(self, tokenizer, embedding_model):
-        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
-        outputs = embedding_model.generate(
-            input_ids=inputs.to(self.torch_device),
-            max_new_tokens=32,
-        )
-        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()

From 14efafaf98e675f9073ad3322f1fcbc405bacba9 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sat, 29 Jun 2024 19:09:56 -0400
Subject: [PATCH 171/182] Change order of scalings application

---
 src/peft/tuners/xlora/layer.py | 46 ++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index 5bb3ad5c8f..cd4c69257c 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -128,6 +128,52 @@ def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kw
         result = result.to(previous_dtype)
         return result
 
+
+class XLoraEmbeddingLayer(XLoraLayer):
+    def __init__(
+        self,
+        model: nn.Module,
+        target: lora.Embedding,
+        target_forward: Callable[..., Any],
+        layer_number: int,
+        config: XLoraConfig,
+    ) -> None:
+        super().__init__(model, target, target_forward, layer_number, config)
+
+    def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kwargs: Any) -> Tensor:
+        """
+        This method is designed to be a drop-in-replacement for the LoRA layers' .forward method. To use it, a bound
+        method must be created (bound to an instance of the XLoraLayer class).
+        """
+
+        if scalings is not None:
+            xlora_scalings = self.get_maybe_topk_scalings(scalings)
+
+        result = self.target.base_layer(x, *args, **kwargs)
+
+        # Ignore if disabled. We want to make sure this is always run.
+        if not self.target.merged:
+            for adapter_n, active_adapter in enumerate(self.target.active_adapters):
+                # TODO: implement X-LoRA with Lora+Dora layers
+                if self.target.use_dora.get(active_adapter, False):
+                    raise ValueError("X-LoRA currently does not support LoRA layers with DoRA")
+                if active_adapter not in self.target.lora_embedding_A:
+                    continue
+                embedding_A = self.target.lora_embedding_A[active_adapter].T
+                embedding_B = self.target.lora_embedding_B[active_adapter].T
+                scaling = self.target.scaling[active_adapter]
+                after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
+                if scalings is not None:
+                    after_A_mod = self.apply_scalings_to_x(after_A, xlora_scalings, adapter_n)
+                    scaling_weight = self.config.global_scaling_weight
+                else:
+                    after_A_mod = after_A
+                    scaling_weight = 1
+                result += (after_A_mod @ embedding_B) * scaling * scaling_weight
+
+        return result
+
+
 class XLoraConv2dLayer(XLoraLayer):
     def __init__(
         self,

From 624d316d211c236f1614524f20d5b6eca51bc9f2 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sat, 29 Jun 2024 19:11:15 -0400
Subject: [PATCH 172/182] Add tests

---
 tests/test_xlora.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index f210a8f908..bff23ecf92 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -47,11 +47,42 @@ def saved_lora_adapters(self, tmp_dir):
             file_names.append(file_name)
         return file_names
 
+    @pytest.fixture(scope="class")
+    def saved_lora_embedding_adapters(self, tmp_dir):
+        file_names = []
+        for i in range(1, self.num_loras + 1):
+            torch.manual_seed(i)
+            lora_config = LoraConfig(
+                task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["embed_positions", "embed_tokens"]
+            )
+            model = AutoModelForCausalLM.from_pretrained(self.model_id)
+            peft_model = get_peft_model(model, lora_config)
+            file_name = os.path.join(tmp_dir, f"checkpoint-{i}")
+            peft_model.save_pretrained(file_name)
+            file_names.append(file_name)
+        return file_names
+
     @pytest.fixture(scope="class")
     def tokenizer(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.torch_device)
         return tokenizer
 
+    @pytest.fixture(scope="function")
+    def embedding_model(self, saved_lora_embedding_adapters):
+        model = AutoModelForCausalLM.from_pretrained(self.model_id)
+        model.config.use_cache = False
+        adapters = {str(i): file_name for i, file_name in enumerate(saved_lora_embedding_adapters)}
+
+        peft_config = XLoraConfig(
+            task_type=TaskType.CAUSAL_LM,
+            peft_type=PeftType.XLORA,
+            hidden_size=model.config.hidden_size,
+            xlora_depth=8,
+            adapters=adapters,
+        )
+        model = get_peft_model(model, peft_config).to(self.torch_device)
+        return model
+
     @pytest.fixture(scope="function")
     def model(self, saved_lora_adapters):
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -265,3 +296,11 @@ def test_disable_adapter(self, tokenizer, model):
         assert torch.isfinite(outputs_disabled[: inputs.shape[1] :]).all()
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
         assert not torch.equal(outputs, outputs_disabled)
+
+    def test_functional_embedding(self, tokenizer, embedding_model):
+        inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
+        outputs = embedding_model.generate(
+            input_ids=inputs.to(self.torch_device),
+            max_new_tokens=32,
+        )
+        assert torch.isfinite(outputs[: inputs.shape[1] :]).all()

From 1505f31c9ee49d55bd0e52b8d9a41bbaef066fd4 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sat, 29 Jun 2024 19:12:24 -0400
Subject: [PATCH 173/182] Handle case when inserting

---
 src/peft/tuners/xlora/model.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 9a992a056d..e4877395fe 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -27,7 +27,7 @@
 from .. import lora
 from .classifier import XLoraClassifier
 from .config import XLoraConfig
-from .layer import XLoraConv2dLayer, XLoraLinearLayer
+from .layer import XLoraConv2dLayer, XLoraEmbeddingLayer, XLoraLinearLayer
 
 
 def convert_layers_to_xlora(
@@ -55,6 +55,18 @@ def convert_layers_to_xlora(
             all_layers.append(new_layer)
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
+        elif isinstance(module, lora.Embedding):
+            device = module.lora_embedding_A[next(iter(module.lora_embedding_A))].device
+            new_layer = XLoraEmbeddingLayer(
+                model=xloramodel,
+                target=module,
+                target_forward=module.forward,
+                layer_number=total_swapped,
+                config=config,
+            )
+            all_layers.append(new_layer)
+            module.forward = new_layer.forward  # type: ignore[method-assign]
+            total_swapped += 1
         elif isinstance(module, lora.Conv2d):
             device = module.lora_A[next(iter(module.lora_A))].weight.device
             new_layer = XLoraConv2dLayer(

From 0b2dcd020ad22732ba95ef4c33fd6ed47882d467 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sat, 29 Jun 2024 19:29:18 -0400
Subject: [PATCH 174/182] Check exact type

---
 src/peft/tuners/xlora/layer.py | 2 +-
 src/peft/tuners/xlora/model.py | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/peft/tuners/xlora/layer.py b/src/peft/tuners/xlora/layer.py
index cd4c69257c..a5035456d4 100644
--- a/src/peft/tuners/xlora/layer.py
+++ b/src/peft/tuners/xlora/layer.py
@@ -162,7 +162,7 @@ def forward(self, x: Tensor, *args: Any, scalings: Optional[Tensor] = None, **kw
                 embedding_A = self.target.lora_embedding_A[active_adapter].T
                 embedding_B = self.target.lora_embedding_B[active_adapter].T
                 scaling = self.target.scaling[active_adapter]
-                after_A = self.target._embed(x_mod, embedding_A)  # type: ignore
+                after_A = self.target._embed(x, embedding_A)  # type: ignore
                 if scalings is not None:
                     after_A_mod = self.apply_scalings_to_x(after_A, xlora_scalings, adapter_n)
                     scaling_weight = self.config.global_scaling_weight
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index e4877395fe..e2f22cf860 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -43,7 +43,8 @@ def convert_layers_to_xlora(
 
     device = None
     for module in base.modules():
-        if isinstance(module, lora.Linear):
+        # Check the exact type because classes like OPTLearnedPositionalEmbedding inherit from nn.Embedding
+        if type(module) == lora.Linear:
             device = module.lora_A[next(iter(module.lora_A))].weight.device
             new_layer = XLoraLinearLayer(
                 model=xloramodel,
@@ -55,7 +56,7 @@ def convert_layers_to_xlora(
             all_layers.append(new_layer)
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
-        elif isinstance(module, lora.Embedding):
+        elif type(module) == lora.Embedding:
             device = module.lora_embedding_A[next(iter(module.lora_embedding_A))].device
             new_layer = XLoraEmbeddingLayer(
                 model=xloramodel,
@@ -67,7 +68,7 @@ def convert_layers_to_xlora(
             all_layers.append(new_layer)
             module.forward = new_layer.forward  # type: ignore[method-assign]
             total_swapped += 1
-        elif isinstance(module, lora.Conv2d):
+        elif type(module) == lora.Conv2d:
             device = module.lora_A[next(iter(module.lora_A))].weight.device
             new_layer = XLoraConv2dLayer(
                 model=xloramodel,

From abdcb50916aec2883e91e80ba7eff29ba7bdf4dd Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Sat, 29 Jun 2024 19:33:41 -0400
Subject: [PATCH 175/182] Fix target modules

---
 tests/test_xlora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index bff23ecf92..e6f876a422 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -53,7 +53,7 @@ def saved_lora_embedding_adapters(self, tmp_dir):
         for i in range(1, self.num_loras + 1):
             torch.manual_seed(i)
             lora_config = LoraConfig(
-                task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["embed_positions", "embed_tokens"]
+                task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["embed_tokens"]
             )
             model = AutoModelForCausalLM.from_pretrained(self.model_id)
             peft_model = get_peft_model(model, lora_config)

From aecd4927506d2c4ad2aa66eb6f6fdc4792c73a7a Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 1 Jul 2024 05:06:04 -0400
Subject: [PATCH 176/182] Somehow it didn't get formatted

---
 tests/test_xlora.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index e6f876a422..136a6776e5 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -52,9 +52,7 @@ def saved_lora_embedding_adapters(self, tmp_dir):
         file_names = []
         for i in range(1, self.num_loras + 1):
             torch.manual_seed(i)
-            lora_config = LoraConfig(
-                task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["embed_tokens"]
-            )
+            lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["embed_tokens"])
             model = AutoModelForCausalLM.from_pretrained(self.model_id)
             peft_model = get_peft_model(model, lora_config)
             file_name = os.path.join(tmp_dir, f"checkpoint-{i}")

From 68e5f2f49b843ba51756dbc301c0c434a2b0d750 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 1 Jul 2024 07:19:32 -0400
Subject: [PATCH 177/182] Make tmp dir and tokenizer function scoped

---
 tests/test_xlora.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 136a6776e5..12552a25aa 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -29,7 +29,7 @@ class TestXlora:
     model_id = "facebook/opt-125m"
     num_loras = 4
 
-    @pytest.fixture(scope="class")
+    @pytest.fixture(scope="function")
     def tmp_dir(self, tmp_path_factory):
         # create a class-scoped temp directory
         return tmp_path_factory.mktemp("xlora")
@@ -60,7 +60,7 @@ def saved_lora_embedding_adapters(self, tmp_dir):
             file_names.append(file_name)
         return file_names
 
-    @pytest.fixture(scope="class")
+    @pytest.fixture(scope="function")
     def tokenizer(self):
         tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True, device_map=self.torch_device)
         return tokenizer

From 1628227469df5eaee2ebba1d4f47429ab4de7930 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Mon, 1 Jul 2024 07:48:31 -0400
Subject: [PATCH 178/182] Scope the lora adapters too

---
 tests/test_xlora.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 12552a25aa..0f5efffe0c 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -31,10 +31,9 @@ class TestXlora:
 
     @pytest.fixture(scope="function")
     def tmp_dir(self, tmp_path_factory):
-        # create a class-scoped temp directory
         return tmp_path_factory.mktemp("xlora")
 
-    @pytest.fixture(scope="class")
+    @pytest.fixture(scope="function")
     def saved_lora_adapters(self, tmp_dir):
         file_names = []
         for i in range(1, self.num_loras + 1):
@@ -47,7 +46,7 @@ def saved_lora_adapters(self, tmp_dir):
             file_names.append(file_name)
         return file_names
 
-    @pytest.fixture(scope="class")
+    @pytest.fixture(scope="function")
     def saved_lora_embedding_adapters(self, tmp_dir):
         file_names = []
         for i in range(1, self.num_loras + 1):

From 5a889be460675314bf4faaf15f1b1ac72bf9c027 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Tue, 2 Jul 2024 09:19:23 -0400
Subject: [PATCH 179/182] Use unique temp dirs for lora adapters, all tests

---
 tests/test_xlora.py | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/tests/test_xlora.py b/tests/test_xlora.py
index 0f5efffe0c..8e8fa2ab41 100644
--- a/tests/test_xlora.py
+++ b/tests/test_xlora.py
@@ -30,31 +30,35 @@ class TestXlora:
     num_loras = 4
 
     @pytest.fixture(scope="function")
-    def tmp_dir(self, tmp_path_factory):
-        return tmp_path_factory.mktemp("xlora")
+    def lora_dir(self, tmp_path_factory):
+        return tmp_path_factory.mktemp("lora")
 
     @pytest.fixture(scope="function")
-    def saved_lora_adapters(self, tmp_dir):
+    def lora_embedding_dir(self, tmp_path_factory):
+        return tmp_path_factory.mktemp("lora_embedding")
+
+    @pytest.fixture(scope="function")
+    def saved_lora_adapters(self, lora_dir):
         file_names = []
         for i in range(1, self.num_loras + 1):
             torch.manual_seed(i)
             lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
             model = AutoModelForCausalLM.from_pretrained(self.model_id)
             peft_model = get_peft_model(model, lora_config)
-            file_name = os.path.join(tmp_dir, f"checkpoint-{i}")
+            file_name = os.path.join(lora_dir, f"checkpoint-{i}")
             peft_model.save_pretrained(file_name)
             file_names.append(file_name)
         return file_names
 
     @pytest.fixture(scope="function")
-    def saved_lora_embedding_adapters(self, tmp_dir):
+    def saved_lora_embedding_adapters(self, lora_embedding_dir):
         file_names = []
         for i in range(1, self.num_loras + 1):
             torch.manual_seed(i)
             lora_config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["embed_tokens"])
             model = AutoModelForCausalLM.from_pretrained(self.model_id)
             peft_model = get_peft_model(model, lora_config)
-            file_name = os.path.join(tmp_dir, f"checkpoint-{i}")
+            file_name = os.path.join(lora_embedding_dir, f"checkpoint-{i}")
             peft_model.save_pretrained(file_name)
             file_names.append(file_name)
         return file_names
@@ -177,7 +181,7 @@ def test_misc_methods(self, tokenizer, model):
 
         assert str(model) is not None
 
-    def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
+    def test_save_load_functional(self, tokenizer, model, tmp_path):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
             input_ids=inputs.to(self.torch_device),
@@ -186,15 +190,13 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
         before_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(before_logits).all()
 
-        model.save_pretrained(save_directory=tmp_dir, safe_serialization=False)
+        model.save_pretrained(save_directory=tmp_path)
 
         del model
 
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir, safe_serialization=False).to(
-            self.torch_device
-        )
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_path).to(self.torch_device)
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
@@ -203,9 +205,9 @@ def test_save_load_functional_pt(self, tokenizer, model, tmp_dir):
         )
         after_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(after_logits).all()
-        assert torch.equal(after_logits, before_logits), (after_logits, before_logits)
+        assert torch.equal(after_logits, before_logits)
 
-    def test_save_load_functional(self, tokenizer, model, tmp_dir):
+    def test_save_load_functional_pt(self, tokenizer, model, tmp_path):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
             input_ids=inputs.to(self.torch_device),
@@ -214,13 +216,15 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
         before_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(before_logits).all()
 
-        model.save_pretrained(save_directory=tmp_dir)
+        model.save_pretrained(save_directory=tmp_path, safe_serialization=False)
 
         del model
 
         model = AutoModelForCausalLM.from_pretrained(self.model_id)
         model.config.use_cache = False
-        model = PeftModel.from_pretrained(model=model, model_id=tmp_dir).to(self.torch_device)
+        model = PeftModel.from_pretrained(model=model, model_id=tmp_path, safe_serialization=False).to(
+            self.torch_device
+        )
 
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(
@@ -229,7 +233,7 @@ def test_save_load_functional(self, tokenizer, model, tmp_dir):
         )
         after_logits = outputs[: inputs.shape[1] :]
         assert torch.isfinite(after_logits).all()
-        assert torch.equal(after_logits, before_logits)
+        assert torch.equal(after_logits, before_logits), (after_logits, before_logits)
 
     def test_topk_lora(self, tokenizer, model):
         model.set_topk_lora(2)

From 072987ee74c16d104aedc95a2763a5063380a8c4 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 3 Jul 2024 10:38:36 -0400
Subject: [PATCH 180/182] Seperation of concerns for xlora and lora

---
 src/peft/tuners/lora/model.py   | 17 ++++-------------
 src/peft/tuners/tuners_utils.py |  6 ++++++
 src/peft/tuners/xlora/model.py  | 12 +++++++++++-
 src/peft/utils/constants.py     |  1 +
 4 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 8a94315eea..83153a8cf6 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -155,9 +155,7 @@ def _check_new_adapter_config(self, config: LoraConfig) -> None:
 
     @staticmethod
     def _check_target_module_exists(lora_config, key):
-        if hasattr(lora_config, "target_modules"):
-            return check_target_module_exists(lora_config, key)
-        return False
+        return check_target_module_exists(lora_config, key)
 
     def _prepare_model(self, peft_config: LoraConfig, model: nn.Module):
         r"""
@@ -169,8 +167,7 @@ def _prepare_model(self, peft_config: LoraConfig, model: nn.Module):
             model (`nn.Module`):
                 The model that is going to be adapted.
         """
-        # Handle X-LoRA case
-        if hasattr(peft_config, "layer_replication") and peft_config.layer_replication:
+        if peft_config.layer_replication:
             replicate_layers(model, peft_config.layer_replication)
 
     def _create_and_replace(
@@ -184,9 +181,6 @@ def _create_and_replace(
     ):
         if current_key is None:
             raise ValueError("Current Key shouldn't be `None`")
-        # Handle X-LoRA case:
-        if not hasattr(lora_config, "rank_pattern"):
-            return
 
         # Regexp matching - Find key which matches current target_name in patterns provided
         pattern_keys = list(chain(lora_config.rank_pattern.keys(), lora_config.alpha_pattern.keys()))
@@ -276,9 +270,6 @@ def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
                 p.requires_grad = False
 
         for active_adapter in self.active_adapters:
-            # Handle X-LoRA case
-            if not hasattr(self.peft_config[active_adapter], "bias"):
-                return
             bias = self.peft_config[active_adapter].bias
             if bias == "none":
                 continue
@@ -460,7 +451,7 @@ def _check_merge_allowed(self):
 
     @staticmethod
     def _prepare_adapter_config(peft_config, model_config):
-        if hasattr(peft_config, "target_modules") and peft_config.target_modules is None:
+        if peft_config.target_modules is None:
             if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
                 raise ValueError("Please specify `target_modules` in `peft_config`")
             peft_config.target_modules = set(
@@ -913,4 +904,4 @@ def subtract_mutated_init(self, output_state_dict: dict[str, torch.Tensor], adap
                     [output_state_dict[name], -mutated_init_state_dict[".".join(name.split(".")[1:])]], dim=1
                 )
 
-        return tensors_lora
+        return tensors_lora
\ No newline at end of file
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index d1f202e855..97e5cfbe90 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -30,6 +30,7 @@
 from transformers.pytorch_utils import Conv1D
 
 from peft.utils import INCLUDE_LINEAR_LAYERS_SHORTHAND
+from peft.utils.constants import DUMMY_TARGET_MODULES
 from peft.utils.peft_types import PeftType
 
 from ..config import PeftConfig
@@ -396,6 +397,11 @@ def inject_adapter(self, model: nn.Module, adapter_name: str, autocast_adapter_d
         is_target_modules_in_base_model = False
         key_list = [key for key, _ in model.named_modules()]
 
+        if getattr(peft_config, "target_modules", None) == DUMMY_TARGET_MODULES:
+            # dummy adapter, we allow not matching any module
+            key_list = []
+            is_target_modules_in_base_model = True
+
         # update peft_config.target_modules if required
         peft_config = _maybe_include_all_linear_layers(peft_config, model)
 
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index e2f22cf860..48101482d3 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import copy
 from contextlib import contextmanager
 from functools import partial
 from typing import Optional, Union
@@ -23,6 +24,7 @@
 from peft.tuners.lora.layer import LoraLayer
 from peft.tuners.lora.model import LoraModel
 from peft.tuners.tuners_utils import BaseTuner
+from peft.utils.constants import DUMMY_TARGET_MODULES
 
 from .. import lora
 from .classifier import XLoraClassifier
@@ -140,7 +142,15 @@ def __init__(
             conf = config[adapter_name]
         else:
             conf = config
-        lora_model = LoraModel(model, config.copy(), adapter_name)
+
+        # Create an empty LoraModel
+        base_lora_config = copy.copy(conf)
+        base_lora_config.target_modules = DUMMY_TARGET_MODULES
+        # Imitate a LoraConfig, fields might need to be updated if LoraConfig is updated
+        base_lora_config.layer_replication = None
+        base_lora_config.bias = "none"
+        lora_model = LoraModel(model, base_lora_config, adapter_name)
+
         self.xlora_config = conf
         self.lora_model = lora_model
 
diff --git a/src/peft/utils/constants.py b/src/peft/utils/constants.py
index 98df496275..40441f5850 100644
--- a/src/peft/utils/constants.py
+++ b/src/peft/utils/constants.py
@@ -213,3 +213,4 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
 EMBEDDING_LAYER_NAMES = ["embed_tokens", "lm_head"]
 INCLUDE_LINEAR_LAYERS_SHORTHAND = "all-linear"
 TOKENIZER_CONFIG_NAME = "tokenizer_config.json"
+DUMMY_TARGET_MODULES = "dummy-target-modules"

From 77fb6b02cc883409191f41a518c759ea63495665 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Wed, 3 Jul 2024 12:12:30 -0400
Subject: [PATCH 181/182] Prevent inf recursion as per 1892

---
 src/peft/tuners/lora/model.py  | 2 +-
 src/peft/tuners/xlora/model.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
index 2ba92af494..692dda7b9a 100644
--- a/src/peft/tuners/lora/model.py
+++ b/src/peft/tuners/lora/model.py
@@ -905,4 +905,4 @@ def subtract_mutated_init(self, output_state_dict: dict[str, torch.Tensor], adap
                     [output_state_dict[name], -mutated_init_state_dict[".".join(name.split(".")[1:])]], dim=1
                 )
 
-        return tensors_lora
\ No newline at end of file
+        return tensors_lora
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index 48101482d3..c7f83d1497 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -277,6 +277,8 @@ def __getattr__(self, name: str):
         try:
             return super().__getattr__(name)  # defer to nn.Module's logic
         except AttributeError:
+            if name == "base_model":  # see #1892: prevent infinite recursion if class is not initialized
+                raise
             return getattr(self.lora_model, name)
 
     @staticmethod

From 2f85f91fc7685133432e6666e3d392a0d5092723 Mon Sep 17 00:00:00 2001
From: EricLBuehler <ericlbuehler@gmail.com>
Date: Thu, 4 Jul 2024 06:02:18 -0400
Subject: [PATCH 182/182] Prevent inf recursion on lora_model

---
 src/peft/tuners/xlora/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
index c7f83d1497..b71065164c 100644
--- a/src/peft/tuners/xlora/model.py
+++ b/src/peft/tuners/xlora/model.py
@@ -277,7 +277,7 @@ def __getattr__(self, name: str):
         try:
             return super().__getattr__(name)  # defer to nn.Module's logic
         except AttributeError:
-            if name == "base_model":  # see #1892: prevent infinite recursion if class is not initialized
+            if name == "lora_model":  # see #1892: prevent infinite recursion if class is not initialized
                 raise
             return getattr(self.lora_model, name)