Merge branch 'huggingface:main' into feature/vera-quantization-support

huggingface · Oct 4, 2024 · 640874f · 640874f
2 parents 3b2c017 + 8d9ecbe
commit 640874f
Show file tree

Hide file tree

Showing 14 changed files with 300 additions and 47 deletions.
diff --git a/src/peft/tuners/adalora/config.py b/src/peft/tuners/adalora/config.py
@@ -61,6 +61,9 @@ def __post_init__(self):
  self.target_modules = (
  set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
  )
+ self.exclude_modules = (
+ set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules
+ )
  # if target_modules is a regex expression, then layers_to_transform should be None
  if isinstance(self.target_modules, str) and self.layers_to_transform is not None:
  raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.")

diff --git a/src/peft/tuners/boft/config.py b/src/peft/tuners/boft/config.py
@@ -15,8 +15,10 @@
 # The implementation is based on "Parameter-Efficient Orthogonal Finetuning
 # via Butterfly Factorization" (https://arxiv.org/abs/2311.06243) in ICLR 2024.
 
+from __future__ import annotations
+
 from dataclasses import dataclass, field
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 from peft.config import PeftConfig
 from peft.utils import PeftType
@@ -32,6 +34,10 @@ class BOFTConfig(PeftConfig):
  boft_block_num (`int`): Number of BOFT blocks per injected layer.
  boft_n_butterfly_factor (`int`): Number of butterfly factors across different layers.
  target_modules (`Union[List[str],str]`): The names of the modules to apply the adapter to.
+ exclude_modules (`Optional[Union[List[str], str]]`):
+ The names of the modules to not apply the adapter. When passing a string, a regex match will be performed.
+ When passing a list of strings, either an exact match will be performed or it is checked if the name of the
+ module ends with any of the passed strings.
  boft_dropout (`float`):
  The multiplicative dropout probability, by setting OFT blocks to identity during training, similar to the
  dropout layer in LoRA.
@@ -76,13 +82,17 @@ class BOFTConfig(PeftConfig):
  ),
  },
  )
- target_modules: Optional[Union[List[str], str]] = field(
+ target_modules: Optional[Union[list[str], str]] = field(
  default=None,
  metadata={
  "help": "List of module names or regex expression of the module names to replace with BOFT.",
  "example": "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' ",
  },
  )
+ exclude_modules: Optional[Union[list[str], str]] = field(
+ default=None,
+ metadata={"help": "List of module names or regex expression of the module names to exclude from BOFT."},
+ )
  boft_dropout: float = field(
  default=0.0,
  metadata={
@@ -94,7 +104,7 @@ class BOFTConfig(PeftConfig):
  metadata={"help": "Set this to True if the layer to replace stores weight like (fan_in, fan_out)"},
  )
  bias: str = field(default="none", metadata={"help": "Bias type for BOFT. Can be 'none', 'all' or 'boft_only'"})
- modules_to_save: Optional[List[str]] = field(
+ modules_to_save: Optional[list[str]] = field(
  default=None,
  metadata={
  "help": "List of modules apart from BOFT layers to be set as trainable and saved in the final checkpoint. ",
@@ -113,7 +123,7 @@ class BOFTConfig(PeftConfig):
  ),
  },
  )
- layers_to_transform: Optional[Union[List[int], int]] = field(
+ layers_to_transform: Optional[Union[list[int], int]] = field(
  default=None,
  metadata={
  "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index."
@@ -131,6 +141,9 @@ def __post_init__(self):
  self.target_modules = (
  set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
  )
+ self.exclude_modules = (
+ set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules
+ )
  if self.boft_block_size == 0 and self.boft_block_num == 0:
  raise ValueError(
  f"Either `boft_block_size` or `boft_block_num` must be non-zero. Currently, boft_block_size = {self.boft_block_size} and boft_block_num = {self.boft_block_num}."

diff --git a/src/peft/tuners/fourierft/config.py b/src/peft/tuners/fourierft/config.py
@@ -52,6 +52,10 @@ class FourierFTConfig(PeftConfig):
  target_modules (`Union[list[str],str]`):
  List of module names or regex expression of the module names to replace with FourierFT. For example, ['q',
  'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Only linear layers are supported.
+ exclude_modules (`Optional[Union[List[str], str]]`):
+ The names of the modules to not apply the adapter. When passing a string, a regex match will be performed.
+ When passing a list of strings, either an exact match will be performed or it is checked if the name of the
+ module ends with any of the passed strings.
  fan_in_fan_out (`bool`):
  Set this to True if the layer to replace stores weight like (fan_in, fan_out).
  bias (`str`):
@@ -123,6 +127,10 @@ class FourierFTConfig(PeftConfig):
  )
  },
  )
+ exclude_modules: Optional[Union[list[str], str]] = field(
+ default=None,
+ metadata={"help": "List of module names or regex expression of the module names to exclude from fourierft."},
+ )
  bias: str = field(
  default="none", metadata={"help": "Bias type for FourierFT. Can be 'none', 'all' or 'fourier_only'."}
  )
@@ -179,6 +187,9 @@ def __post_init__(self):
  self.target_modules = (
  set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
  )
+ self.exclude_modules = (
+ set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules
+ )
  # if target_modules is a regex expression, then layers_to_transform should be None
  if isinstance(self.target_modules, str) and self.layers_to_transform is not None:
  raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.")

diff --git a/src/peft/tuners/hra/config.py b/src/peft/tuners/hra/config.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 from dataclasses import dataclass, field
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 from peft.config import PeftConfig
 from peft.utils import PeftType
@@ -38,6 +40,10 @@ class HRAConfig(PeftConfig):
  the output layer. If this is not specified, modules will be chosen according to the model architecture. If
  the architecture is not known, an error will be raised -- in this case, you should specify the target
  modules manually.
+ exclude_modules (`Optional[Union[List[str], str]]`):
+ The names of the modules to not apply the adapter. When passing a string, a regex match will be performed.
+ When passing a list of strings, either an exact match will be performed or it is checked if the name of the
+ module ends with any of the passed strings.
  init_weights (`bool`):
  Whether to perform initialization of HRA weights.
  layers_to_transform (`Union[List[int], int]`):
@@ -64,13 +70,17 @@ class HRAConfig(PeftConfig):
  default=False,
  metadata={"help": "Whether to apply Gram-Schmidt orthogonalization or not."},
  )
- target_modules: Optional[Union[List[str], str]] = field(
+ target_modules: Optional[Union[list[str], str]] = field(
  default=None,
  metadata={
  "help": "List of module names or regex expression of the module names to replace with HRA.",
  "example": "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' ",
  },
  )
+ exclude_modules: Optional[Union[list[str], str]] = field(
+ default=None,
+ metadata={"help": "List of module names or regex expression of the module names to exclude from HRA."},
+ )
  init_weights: bool = field(
  default=True,
  metadata={
@@ -80,7 +90,7 @@ class HRAConfig(PeftConfig):
  ),
  },
  )
- layers_to_transform: Optional[Union[List[int], int]] = field(
+ layers_to_transform: Optional[Union[list[int], int]] = field(
  default=None,
  metadata={
  "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index."
@@ -93,7 +103,7 @@ class HRAConfig(PeftConfig):
  },
  )
  bias: str = field(default="none", metadata={"help": "Bias type for HRA. Can be 'none', 'all' or 'hra_only'"})
- modules_to_save: Optional[List[str]] = field(
+ modules_to_save: Optional[list[str]] = field(
  default=None,
  metadata={
  "help": "List of modules apart from HRA layers to be set as trainable and saved in the final checkpoint. "
@@ -107,6 +117,9 @@ def __post_init__(self):
  self.target_modules = (
  set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
  )
+ self.exclude_modules = (
+ set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules
+ )
  # if target_modules is a regex expression, then layers_to_transform should be None
  if isinstance(self.target_modules, str) and self.layers_to_transform is not None:
  raise ValueError("`layers_to_transform` cannot be used when `target_modules` is a str.")

diff --git a/src/peft/tuners/ia3/config.py b/src/peft/tuners/ia3/config.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 from dataclasses import dataclass, field
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 from peft.config import PeftConfig
 from peft.utils import PeftType
@@ -33,6 +35,10 @@ class IA3Config(PeftConfig):
  excluding the output layer. If this is not specified, modules will be chosen according to the model
  architecture. If the architecture is not known, an error will be raised -- in this case, you should specify
  the target modules manually.
+ exclude_modules (`Optional[Union[List[str], str]]`):
+ The names of the modules to not apply the adapter. When passing a string, a regex match will be performed.
+ When passing a list of strings, either an exact match will be performed or it is checked if the name of the
+ module ends with any of the passed strings.
  feedforward_modules (`Optional[Union[List[str], str]]`):
  The names of the modules to be treated as feedforward modules, as in the original paper. These modules will
  have (IA)³ vectors multiplied to the input, instead of the output. `feedforward_modules` must be a name or
@@ -47,7 +53,7 @@ class IA3Config(PeftConfig):
  discouraged.
  """
 
- target_modules: Optional[Union[List[str], str]] = field(
+ target_modules: Optional[Union[list[str], str]] = field(
  default=None,
  metadata={
  "help": (
@@ -59,7 +65,11 @@ class IA3Config(PeftConfig):
  ),
  },
  )
- feedforward_modules: Optional[Union[List[str], str]] = field(
+ exclude_modules: Optional[Union[list[str], str]] = field(
+ default=None,
+ metadata={"help": "List of module names or regex expression of the module names to exclude from (IA)³."},
+ )
+ feedforward_modules: Optional[Union[list[str], str]] = field(
  default=None,
  metadata={
  "help": "List of module names or a regex expression of module names which are feedforward"
@@ -70,7 +80,7 @@ class IA3Config(PeftConfig):
  default=False,
  metadata={"help": "Set this to True if the layer to replace stores weight like (fan_in, fan_out)"},
  )
- modules_to_save: Optional[List[str]] = field(
+ modules_to_save: Optional[list[str]] = field(
  default=None,
  metadata={
  "help": "List of modules apart from (IA)^3 layers to be set as trainable and saved in the final checkpoint. "
@@ -88,6 +98,9 @@ def __post_init__(self):
  self.target_modules = (
  set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
  )
+ self.exclude_modules = (
+ set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules
+ )
  self.feedforward_modules = (
  set(self.feedforward_modules) if isinstance(self.feedforward_modules, list) else self.feedforward_modules
  )

diff --git a/src/peft/tuners/ln_tuning/config.py b/src/peft/tuners/ln_tuning/config.py
@@ -31,6 +31,10 @@ class LNTuningConfig(PeftConfig):
  '.*decoder.*' or '.*encoder.*'. If this is not specified, modules will be chosen according to the model
  architecture. If the architecture is not known, an error will be raised -- in this case, you should specify
  the target modules manually.
+ exclude_modules (`Optional[Union[List[str], str]]`):
+ The names of the modules to not apply the adapter. When passing a string, a regex match will be performed.
+ When passing a list of strings, either an exact match will be performed or it is checked if the name of the
+ module ends with any of the passed strings.
  modules_to_save (`Optional[Union[List[str], str]]`):
  List of modules to be set as trainable and saved in the final checkpoint. For example, in Sequence
  Classification or Token Classification tasks, the final layer `classifier/score` are randomly initialized
@@ -48,6 +52,10 @@ class LNTuningConfig(PeftConfig):
  ),
  },
  )
+ exclude_modules: Optional[Union[list[str], str]] = field(
+ default=None,
+ metadata={"help": "List of module names or regex expression of the module names to exclude from LNTuning."},
+ )
  modules_to_save: Optional[Union[list[str], str]] = field(
  default=None,
  metadata={

diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py
@@ -11,9 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 from peft.tuners.lycoris_utils import LycorisConfig
 from peft.utils import PeftType
@@ -43,6 +44,10 @@ class LoHaConfig(LycorisConfig):
  excluding the output layer. If this is not specified, modules will be chosen according to the model
  architecture. If the architecture is not known, an error will be raised -- in this case, you should specify
  the target modules manually.
+ exclude_modules (`Optional[Union[List[str], str]]`):
+ The names of the modules to not apply the adapter. When passing a string, a regex match will be performed.
+ When passing a list of strings, either an exact match will be performed or it is checked if the name of the
+ module ends with any of the passed strings.
  init_weights (`bool`):
  Whether to perform initialization of adapter weights. This defaults to `True`, passing `False` is
  discouraged.
@@ -76,14 +81,18 @@ class LoHaConfig(LycorisConfig):
  "help": 'Use parameter effective decomposition for Conv2d 3x3 with ksize > 1 ("Proposition 3" from FedPara paper)'
  },
  )
- target_modules: Optional[Union[List[str], str]] = field(
+ target_modules: Optional[Union[list[str], str]] = field(
  default=None,
  metadata={
  "help": "List of module names or regex expression of the module names to replace with LoHa."
  "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' "
  "This can also be a wildcard 'all-linear' which matches all linear/Conv1D layers except the output layer."
  },
  )
+ exclude_modules: Optional[Union[list[str], str]] = field(
+ default=None,
+ metadata={"help": "List of module names or regex expression of the module names to exclude from LoHa."},
+ )
  init_weights: bool = field(
  default=True,
  metadata={
@@ -93,7 +102,7 @@ class LoHaConfig(LycorisConfig):
  ),
  },
  )
- layers_to_transform: Optional[Union[List[int], int]] = field(
+ layers_to_transform: Optional[Union[list[int], int]] = field(
  default=None,
  metadata={
  "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index."
@@ -105,7 +114,7 @@ class LoHaConfig(LycorisConfig):
  "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern."
  },
  )
- modules_to_save: Optional[List[str]] = field(
+ modules_to_save: Optional[list[str]] = field(
  default=None,
  metadata={
  "help": "List of modules apart from LoHA layers to be set as trainable and saved in the final checkpoint. "
@@ -119,3 +128,6 @@ def __post_init__(self):
  self.target_modules = (
  set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
  )
+ self.exclude_modules = (
+ set(self.exclude_modules) if isinstance(self.exclude_modules, list) else self.exclude_modules
+ )