Skip to content

Commit

Permalink
Merge branch 'main' into dev/interface
Browse files Browse the repository at this point in the history
  • Loading branch information
calpt committed Dec 23, 2024
2 parents 89e7aa7 + 7550099 commit 31c8b2a
Show file tree
Hide file tree
Showing 42 changed files with 2,643 additions and 421 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/tests_torch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
- name: Install
run: |
pip install torch==2.3
pip install .[sklearn,testing,sentencepiece]
pip install .[sklearn,testing,sentencepiece,torchvision]
- name: Test
run: |
make test-adapter-methods
Expand All @@ -86,7 +86,7 @@ jobs:
- name: Install
run: |
pip install torch==2.3
pip install .[sklearn,testing,sentencepiece]
pip install .[sklearn,testing,sentencepiece,torchvision]
- name: Test
run: |
make test-adapter-models
Expand All @@ -109,7 +109,7 @@ jobs:
- name: Install
run: |
pip install torch==2.3
pip install .[sklearn,testing,sentencepiece]
pip install .[sklearn,testing,sentencepiece,torchvision]
pip install conllu seqeval
- name: Test Examples
run: |
Expand Down
3 changes: 3 additions & 0 deletions docs/classes/adapter_config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ Single (bottleneck) adapters
.. autoclass:: adapters.CompacterPlusPlusConfig
:members:

.. autoclass:: adapters.AdapterPlusConfig
:members:

Prefix Tuning
~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
3 changes: 2 additions & 1 deletion docs/methods.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ A visualization of further configuration options related to the adapter structur
- [`DoubleSeqBnConfig`](adapters.DoubleSeqBnConfig), as proposed by [Houlsby et al. (2019)](https://arxiv.org/pdf/1902.00751.pdf) places adapter layers after both the multi-head attention and feed-forward block in each Transformer layer.
- [`SeqBnConfig`](adapters.SeqBnConfig), as proposed by [Pfeiffer et al. (2020)](https://arxiv.org/pdf/2005.00052.pdf) places an adapter layer only after the feed-forward block in each Transformer layer.
- [`ParBnConfig`](adapters.ParBnConfig), as proposed by [He et al. (2021)](https://arxiv.org/pdf/2110.04366.pdf) places adapter layers in parallel to the original Transformer layers.

- [`AdapterPlusConfig`](adapters.AdapterPlusConfig), as proposed by [Steitz and Roth (2024)](https://arxiv.org/pdf/2406.06820) places adapter layers adapter layers after the multi-head attention and has channel wise scaling and houlsby weight initialization
_Example_:
```python
from adapters import BnConfig
Expand All @@ -56,6 +56,7 @@ _Papers:_
* [Parameter-Efficient Transfer Learning for NLP](https://arxiv.org/pdf/1902.00751.pdf) (Houlsby et al., 2019)
* [Simple, Scalable Adaptation for Neural Machine Translation](https://arxiv.org/pdf/1909.08478.pdf) (Bapna and Firat, 2019)
* [AdapterFusion: Non-Destructive Task Composition for Transfer Learning](https://aclanthology.org/2021.eacl-main.39.pdf) (Pfeiffer et al., 2021)
* [Adapters Strike Back](https://arxiv.org/pdf/2406.06820) (Steitz and Roth., 2024)
* [AdapterHub: A Framework for Adapting Transformers](https://arxiv.org/pdf/2007.07779.pdf) (Pfeiffer et al., 2020)

## Language Adapters - Invertible Adapters
Expand Down
168 changes: 114 additions & 54 deletions examples/pytorch/language-modeling/run_clm.py

Large diffs are not rendered by default.

193 changes: 136 additions & 57 deletions examples/pytorch/language-modeling/run_mlm.py

Large diffs are not rendered by default.

17 changes: 10 additions & 7 deletions examples/pytorch/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@
)
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import PaddingStrategy, check_min_version
from transformers.utils import PaddingStrategy, check_min_version, send_example_telemetry


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.26.0")
check_min_version("4.44.0")

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -94,9 +94,9 @@ class ModelArguments:
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
" should only be set to `True` for repositories you trust and in which you have read the code, as it"
" will execute code present on the Hub on your local machine."
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
)
Expand Down Expand Up @@ -239,6 +239,10 @@ def main():
else:
model_args, data_args, training_args, adapter_args = parser.parse_args_into_dataclasses()

# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_swag", model_args, data_args)

# Setup logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
Expand All @@ -260,8 +264,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
+ f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training:"
f" {training_args.fp16}"
+ f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")

Expand Down
80 changes: 59 additions & 21 deletions examples/pytorch/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import logging
import os
import sys
import warnings
from dataclasses import dataclass, field
from typing import Optional

Expand All @@ -45,13 +46,13 @@
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
from transformers.utils import check_min_version, send_example_telemetry
from transformers.utils.versions import require_version
from utils_qa import postprocess_qa_predictions


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.26.0")
check_min_version("4.44.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

Expand Down Expand Up @@ -81,12 +82,22 @@ class ModelArguments:
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
use_auth_token: bool = field(
token: str = field(
default=None,
metadata={
"help": (
"The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
)
},
)
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script "
"with private models)."
"Whether to trust the execution of code from datasets/models defined on the Hub."
" This option should only be set to `True` for repositories you trust and in which you have read the"
" code, as it will execute code present on the Hub on your local machine."
)
},
)
Expand Down Expand Up @@ -231,13 +242,21 @@ def main():
else:
model_args, data_args, training_args, adapter_args = parser.parse_args_into_dataclasses()

# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa", model_args, data_args)

# Setup logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)

if training_args.should_log:
# The default of training_args.log_level is passive, so we set log level at info here to have that default.
transformers.utils.logging.set_verbosity_info()

log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
Expand All @@ -247,8 +266,8 @@ def main():

# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
+ f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")

Expand Down Expand Up @@ -285,7 +304,8 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
trust_remote_code=model_args.trust_remote_code,
)
else:
data_files = {}
Expand All @@ -304,10 +324,10 @@ def main():
data_files=data_files,
field="data",
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html.
# https://huggingface.co/docs/datasets/loading_datasets.

# Load pretrained model and tokenizer
#
Expand All @@ -318,25 +338,27 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
trust_remote_code=model_args.trust_remote_code,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=True,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
trust_remote_code=model_args.trust_remote_code,
)
model = AutoModelForQuestionAnswering.from_pretrained(
model_args.model_name_or_path,
from_tf=bool(".ckpt" in model_args.model_name_or_path),
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
trust_remote_code=model_args.trust_remote_code,
)

# Convert the model into an adapter model
adapters.init(model)

# Tokenizer check: this script requires a fast tokenizer.
Expand All @@ -348,7 +370,7 @@ def main():
)

# Preprocessing the datasets.
# Preprocessing is slighlty different for training and evaluation.
# Preprocessing is slightly different for training and evaluation.
if training_args.do_train:
column_names = raw_datasets["train"].column_names
elif training_args.do_eval:
Expand All @@ -364,7 +386,7 @@ def main():

if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
Expand Down Expand Up @@ -404,7 +426,12 @@ def prepare_train_features(examples):
for i, offsets in enumerate(offset_mapping):
# We will label impossible answers with the index of the CLS token.
input_ids = tokenized_examples["input_ids"][i]
cls_index = input_ids.index(tokenizer.cls_token_id)
if tokenizer.cls_token_id in input_ids:
cls_index = input_ids.index(tokenizer.cls_token_id)
elif tokenizer.bos_token_id in input_ids:
cls_index = input_ids.index(tokenizer.bos_token_id)
else:
cls_index = 0

# Grab the sequence corresponding to that example (to know what is the context and what is the question).
sequence_ids = tokenized_examples.sequence_ids(i)
Expand Down Expand Up @@ -589,21 +616,32 @@ def post_processing_function(examples, features, predictions, stage="eval"):
# Format the result to the format the metric expects.
if data_args.version_2_with_negative:
formatted_predictions = [
{"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
{"id": str(k), "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
]
else:
formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
formatted_predictions = [{"id": str(k), "prediction_text": v} for k, v in predictions.items()]

references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
references = [{"id": str(ex["id"]), "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)

metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
if data_args.version_2_with_negative:
accepted_best_metrics = ("exact", "f1", "HasAns_exact", "HasAns_f1")
else:
accepted_best_metrics = ("exact_match", "f1")

if training_args.load_best_model_at_end and training_args.metric_for_best_model not in accepted_best_metrics:
warnings.warn(f"--metric_for_best_model should be set to one of {accepted_best_metrics}")

metric = evaluate.load(
"squad_v2" if data_args.version_2_with_negative else "squad", cache_dir=model_args.cache_dir
)

def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids)

# Setup adapters
setup_adapter_training(model, adapter_args, data_args.dataset_name or "squad")

# Initialize our Trainer
trainer_class = QuestionAnsweringAdapterTrainer if adapter_args.train_adapter else QuestionAnsweringTrainer
trainer = trainer_class(
Expand Down
Loading

0 comments on commit 31c8b2a

Please sign in to comment.