Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade Transformers to v4.33.3 #586

Merged
merged 6 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Installation

The `adapters` package is designed as an add-on for Hugging Face's Transformers library.
It currently supports Python 3.7+ and PyTorch 1.3.1+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first.
It currently supports Python 3.8+ and PyTorch 1.10+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first.

```{eval-rst}
.. important::
Expand Down
2 changes: 1 addition & 1 deletion hf_transformers
Submodule hf_transformers updated 1003 files
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# We try to follow their general layout wherever sensible.

_deps = [
"accelerate>=0.20.1",
"accelerate>=0.20.3",
"black==22.3", # after updating to black 2023, also update Python version in pyproject.toml to 3.7
"datasets!=2.5.0",
"dill<0.3.5",
Expand Down Expand Up @@ -60,8 +60,8 @@
"sphinx-intl",
"sphinx-multiversion",
"timeout-decorator",
"torch>=1.7,!=1.12.0",
"transformers==4.31.0",
"torch>=1.10,!=1.12.0",
"transformers==4.33.3",
"beautifulsoup4",
]

Expand Down
14 changes: 14 additions & 0 deletions src/adapters/head_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,20 @@
},
"layers": [None, "qa_outputs"],
},
"T5ForSequenceClassification": {
"config": {
"head_type": "classification",
"layers": 2,
"activation_function": "tanh",
},
"layers": [
None,
"classification_head.dense",
None,
None,
"classification_head.out_proj",
],
},
"DebertaV2ForSequenceClassification": {
"config": {
"head_type": "classification",
Expand Down
47 changes: 28 additions & 19 deletions src/adapters/heads/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,9 +554,9 @@ def tie_weights(self):
self = getattr(self, self.base_model_prefix)
self._tie_encoder_decoder_weights(self.encoder, self.decoder, self.base_model_prefix)

def _resize_token_embeddings(self, new_num_tokens):
def _resize_token_embeddings(self, new_num_tokens, pad_to_multiple_of=None):
old_embeddings = self.get_input_embeddings()
new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens)
new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens, pad_to_multiple_of)
self.set_input_embeddings(new_embeddings)

# if word embeddings are not tied, make sure that lm head is resized as well
Expand Down Expand Up @@ -730,6 +730,27 @@ def delete_head(self, head_name: str):
if self.active_head == head_name:
self.active_head = None

def _get_used_heads(self, head_name: str = None):
if head_name:
used_heads = [head_name]
# together with context, check if we have heads at all to allow for models without heads
elif len(self.heads) > 0 and AdapterSetup.get_context_head_setup():
used_heads = AdapterSetup.get_context_head_setup()
if isinstance(used_heads, str):
used_heads = [used_heads]
elif self._active_heads:
used_heads = self._active_heads
else:
return []

head_modules = []
for head in used_heads:
if head not in self.heads:
raise ValueError("Unknown head_name '{}'".format(head))
head_modules.append(self.heads[head])

return head_modules

def forward_head(
self, all_outputs, head_name=None, cls_output=None, attention_mask=None, return_dict=False, **kwargs
):
Expand All @@ -750,16 +771,8 @@ def forward_head(
return_dict (bool): Whether or not to return a ``ModelOutput`` instead of a plain tuple.
**kwargs: Additional keyword arguments passed to the forward pass of the head.
"""
if head_name:
used_heads = [head_name]
# together with context, check if we have heads at all to allow for models without heads
elif len(self.heads) > 0 and AdapterSetup.get_context_head_setup():
used_heads = AdapterSetup.get_context_head_setup()
if isinstance(used_heads, str):
used_heads = [used_heads]
elif self._active_heads:
used_heads = self._active_heads
else:
used_head_modules = self._get_used_heads(head_name)
if len(used_head_modules) == 0:
logger.debug("No prediction head is used.")
return all_outputs

Expand Down Expand Up @@ -787,9 +800,6 @@ def _get_head_input(outputs, cls_out, batch):
if inv_adapter:
kwargs["invertible_adapter"] = inv_adapter

for head in used_heads:
if head not in self.heads:
raise ValueError("Unknown head_name '{}'".format(head))
if isinstance(self.active_head, BatchSplit):
if sum(self.active_head.batch_sizes) != all_outputs[0].size()[0]:
raise ValueError(
Expand Down Expand Up @@ -830,14 +840,13 @@ def _get_head_input(outputs, cls_out, batch):
else None
)
return_output = MultiHeadOutput(head_outputs=head_outputs, loss=combined_loss)
elif len(used_heads) > 1:
elif len(used_head_modules) > 1:
head_outputs = []
for head in used_heads:
head_module = self.heads[head]
for head_module in used_head_modules:
head_outputs.append(head_module(all_outputs, cls_output, attention_mask, return_dict, **kwargs))
return_output = MultiHeadOutput(head_outputs=head_outputs)
else:
head_module = self.heads[used_heads[0]]
head_module = used_head_modules[0]
return_output = head_module(all_outputs, cls_output, attention_mask, return_dict, **kwargs)

if isinstance(return_output, ModelOutput):
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
CLIPVisionModelAdaptersMixin,
)
from .distilbert.mixin_distilbert import DistilBertModelAdaptersMixin, DistilBertTransformerAdaptersMixin
from .gptj.mixin_gptj import GPTJMLPAdaptersMixin
from .gpt2.mixin_gpt2 import GPT2ModelAdapterMixin
from .gptj.mixin_gptj import GPTJMLPAdaptersMixin, GPTJModelAdapterMixin
from .llama.mixin_llama import LlamaModelAdapterMixin
from .t5.mixin_t5 import T5BlockAdaptersMixin, T5ModelAdaptersMixin, T5ModelAdaptersWithHeadsMixin
from .vit.mixin_vit import ViTIntermediateAdaptersMixin, ViTModelAdaptersMixin
Expand Down Expand Up @@ -49,7 +50,9 @@
"MBartDecoder": BartDecoderAdaptersMixin,
"MBartDecoderWrapper": BartDecoderWrapperAdaptersMixin,
"MBartModel": BartModelAdaptersMixin,
"GPT2Model": GPT2ModelAdapterMixin,
"GPTJMLP": GPTJMLPAdaptersMixin,
"GPTJModel": GPTJModelAdapterMixin,
"RobertaLayer": BertLayerAdaptersMixin,
"RobertaModel": BertModelAdaptersMixin,
"T5Block": T5BlockAdaptersMixin,
Expand Down
4 changes: 2 additions & 2 deletions src/adapters/models/gpt2/mixin_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ...layer import AdapterLayer
from ...lora import Linear as LoRALinear
from ...lora import MergedLinear as LoRAMergedLinear
from ...model_mixin import EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelAdaptersMixin
from ...model_mixin import EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelBaseAdaptersMixin
from ...prefix_tuning import PrefixTuningShim


Expand Down Expand Up @@ -54,7 +54,7 @@ def init_adapters(self, model_config, adapters_config):
self.output_adapters = AdapterLayer("output_adapter")


class GPT2ModelAdapterMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelAdaptersMixin):
class GPT2ModelAdapterMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelBaseAdaptersMixin):
def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]:
for i, layer in enumerate(self.base_model.h):
yield i, layer
Expand Down
Loading