diff --git a/.gitignore b/.gitignore index 3e563d1d..83339037 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,13 @@ __pycache__ dist .venv +# Byte-compiled / optimized / DLL files +*.py[cod] +*$py.class + +# C extensions +*.so + # Log *.log *.log.* @@ -33,4 +40,157 @@ tests/state_of_the_union.txt # Build build -!dummy_file \ No newline at end of file +!dummy_file + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/llm_rl/requirements.txt b/llm_rl/requirements.txt index 17482bd2..840d2f2d 100644 --- a/llm_rl/requirements.txt +++ b/llm_rl/requirements.txt @@ -1,11 +1,9 @@ -packaging -wheel torch>=1.13.1 transformers>=4.31.0,<4.35.0 datasets>=2.12.0 accelerate>=0.21.0 peft>=0.4.0 -trl==0.7.2 +trl>=0.7.2 gradio>=3.38.0,<4.0.0 scipy sentencepiece @@ -20,8 +18,3 @@ pydantic fastapi sse-starlette matplotlib -py-cpuinfo -deepspeed -bitsandbytes>=0.39.0 -flash-attn -wandb \ No newline at end of file diff --git a/llm_rl/reward_model.sh b/llm_rl/reward_model.sh index 873b07a5..3068fb43 100644 --- a/llm_rl/reward_model.sh +++ b/llm_rl/reward_model.sh @@ -1,15 +1,16 @@ -CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ +python src/train_bash.py \ --stage rm \ - --model_name_or_path meta-llama/Llama-2-13b-hf \ + --model_name_or_path meta-llama/Llama-2-13b \ --do_train \ --dataset comparison_gpt4_en \ --template default \ --finetuning_type lora \ --lora_target q_proj,v_proj \ --resume_lora_training False \ - --output_dir ./llama-2-13b-rm_cache \ - --per_device_train_batch_size 8 \ - --gradient_accumulation_steps 8 \ + --checkpoint_dir ./llama-2-13b-rm \ + --output_dir ./llama-2-13b-rm \ + --per_device_train_batch_size 2 \ + --gradient_accumulation_steps 4 \ --lr_scheduler_type cosine \ --logging_steps 10 \ --save_steps 1000 \ @@ -17,7 +18,4 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --num_train_epochs 1.0 \ --plot_loss \ --fp16 \ - --use_auth_token True \ - --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \ - --hf_auth_token "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" \ - --deepspeed ./deepspeed_config_s2.json + --hf_auth_token "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" \ No newline at end of file diff --git a/llm_rl/src/llmtuner/extras/template.py b/llm_rl/src/llmtuner/extras/template.py index 508c2ce0..401750ce 100644 --- a/llm_rl/src/llmtuner/extras/template.py +++ b/llm_rl/src/llmtuner/extras/template.py @@ -499,19 +499,6 @@ def get_template_and_fix_tokenizer( sep=[] ) -register_template( - name="llama2-sotopia", - prefix=[ - "{{system}}" - ], - prompt=[ - "[INST] {{query}} [/INST]" - ], - system=( - "" - ), - sep=[] -) r""" Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b diff --git a/llm_rl/src/llmtuner/hparams/finetuning_args.py b/llm_rl/src/llmtuner/hparams/finetuning_args.py index 4479ae6b..d5ef323d 100644 --- a/llm_rl/src/llmtuner/hparams/finetuning_args.py +++ b/llm_rl/src/llmtuner/hparams/finetuning_args.py @@ -1,4 +1,3 @@ -from typing import List import json from typing import Literal, Optional from dataclasses import asdict, dataclass, field @@ -52,10 +51,6 @@ class FinetuningArguments: Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \ LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."} ) - lora_bias: Optional[str] = field( - default="none", - metadata={"help": "The lora_bias option from bitsandbytes."} - ) additional_target: Optional[str] = field( default=None, metadata={"help": "Name(s) of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint."} @@ -88,18 +83,6 @@ class FinetuningArguments: default=0, metadata={"help": "The alpha parameter to control the noise magnitude in NEFTune."} ) - wandb_token: Optional[str] = field( - default=None, - metadata={"help": "The login api token for wandb."} - ) - wandb_project: Optional[str] = field( - default=None, - metadata={"help": "The project name for the current wandb log."} - ) - wandb_tags: Optional[List[str]] = field( - default=None, - metadata={"help": "The tag for the current wandb run."} - ) def __post_init__(self): if isinstance(self.lora_target, str): # support custom target modules/layers of LoRA diff --git a/llm_rl/src/llmtuner/hparams/model_args.py b/llm_rl/src/llmtuner/hparams/model_args.py index 531e37a6..7c25fad1 100644 --- a/llm_rl/src/llmtuner/hparams/model_args.py +++ b/llm_rl/src/llmtuner/hparams/model_args.py @@ -1,4 +1,3 @@ -from torch import float16, bfloat16, float32 from typing import Literal, Optional from dataclasses import dataclass, field @@ -75,21 +74,9 @@ class ModelArguments: default=None, metadata={"help": "Path to the directory to save the exported model."} ) - qlora_compute_dtype: Optional[str] = field( - default="fp32", - metadata={"help": "The compute_dtype option from bitsandbytes."} - ) - use_custom_callback: Optional[bool] = field( - default=False, - metadata={"help": "Whether enable custom callbacks."} - ) - call_back_save_epochs: Optional[int] = field( - default=1, - metadata={"help": "The number of epochs before each call back save."} - ) - def __post_init__(self): + self.compute_dtype = None self.model_max_length = None if self.split_special_tokens and self.use_fast_tokenizer: @@ -104,10 +91,3 @@ def __post_init__(self): if self.use_auth_token == True and self.hf_auth_token is not None: from huggingface_hub.hf_api import HfFolder # lazy load HfFolder.save_token(self.hf_auth_token) - - if self.qlora_compute_dtype == "bf16": - self.compute_dtype = bfloat16 - elif self.qlora_compute_dtype == "fp16": - self.compute_dtype = float16 - else: - self.compute_dtype = float32 \ No newline at end of file diff --git a/llm_rl/src/llmtuner/tuner/core/adapter.py b/llm_rl/src/llmtuner/tuner/core/adapter.py index 22633d54..4fcc6e62 100644 --- a/llm_rl/src/llmtuner/tuner/core/adapter.py +++ b/llm_rl/src/llmtuner/tuner/core/adapter.py @@ -89,7 +89,6 @@ def init_adapter( lora_alpha=finetuning_args.lora_alpha, lora_dropout=finetuning_args.lora_dropout, target_modules=target_modules, - bias=finetuning_args.lora_bias, modules_to_save=finetuning_args.additional_target ) model = get_peft_model(model, lora_config) diff --git a/llm_rl/src/llmtuner/tuner/core/loader.py b/llm_rl/src/llmtuner/tuner/core/loader.py index 2df9baf1..e77c4945 100644 --- a/llm_rl/src/llmtuner/tuner/core/loader.py +++ b/llm_rl/src/llmtuner/tuner/core/loader.py @@ -74,7 +74,6 @@ def load_model_and_tokenizer( padding_side="right", # training with left-padded tensors in fp16 precision may cause overflow **config_kwargs ) - tokenizer.pad_token = tokenizer.unk_token if finetuning_args.finetuning_type != "lora" and model_args.checkpoint_dir is not None: model_to_load = model_args.checkpoint_dir[0] diff --git a/llm_rl/src/llmtuner/tuner/core/utils.py b/llm_rl/src/llmtuner/tuner/core/utils.py index 03043e20..d9a1aac9 100644 --- a/llm_rl/src/llmtuner/tuner/core/utils.py +++ b/llm_rl/src/llmtuner/tuner/core/utils.py @@ -9,16 +9,10 @@ from transformers.modeling_utils import PreTrainedModel from llmtuner.hparams import FinetuningArguments -import torch.distributed as dist -import os logger = get_logger(__name__) -def is_first_node(): - world_rank = dist.get_rank() if torch.distributed.is_initialized() else 0 - local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0 - return world_rank == local_rank == 0 - + def find_all_linear_modules( model: "PreTrainedModel", quantization_bit: Optional[int] = None, diff --git a/llm_rl/src/llmtuner/tuner/rm/trainer.py b/llm_rl/src/llmtuner/tuner/rm/trainer.py index 94549f18..80502937 100644 --- a/llm_rl/src/llmtuner/tuner/rm/trainer.py +++ b/llm_rl/src/llmtuner/tuner/rm/trainer.py @@ -38,7 +38,7 @@ def compute_loss( See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509 """ # Compute rewards - _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) # (lm_logits, loss, value) + _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) if values.size(0) != inputs["input_ids"].size(0): # adapt to chatglm2 values = torch.transpose(values, 0, 1) diff --git a/llm_rl/src/llmtuner/tuner/sft/workflow.py b/llm_rl/src/llmtuner/tuner/sft/workflow.py index 332ceff6..8d53605d 100644 --- a/llm_rl/src/llmtuner/tuner/sft/workflow.py +++ b/llm_rl/src/llmtuner/tuner/sft/workflow.py @@ -10,8 +10,6 @@ from llmtuner.tuner.core import load_model_and_tokenizer from llmtuner.tuner.sft.metric import ComputeMetrics from llmtuner.tuner.sft.trainer import CustomSeq2SeqTrainer -from llmtuner.tuner.core.utils import is_first_node -from llmtuner.tuner.sft.custom_callback import SaveModelCallback if TYPE_CHECKING: from transformers import TrainerCallback @@ -29,11 +27,6 @@ def run_sft( dataset = get_dataset(model_args, data_args) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="sft") dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="sft") - - if training_args.gradient_checkpointing: - model.enable_input_require_grads() - model.gradient_checkpointing_enable() - model.config.use_cache = False if training_args.predict_with_generate: tokenizer.padding_side = "left" # use left-padding in generation @@ -51,11 +44,6 @@ def run_sft( generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams )) training_args = Seq2SeqTrainingArguments(**training_args_dict) - if is_first_node(): - training_args.report_to = ["wandb"] - - if model_args.use_custom_callback: - callbacks.append(SaveModelCallback(model_args.call_back_save_epochs, training_args.output_dir)) # Initialize our Trainer trainer = CustomSeq2SeqTrainer( diff --git a/llm_rl/src/llmtuner/tuner/tune.py b/llm_rl/src/llmtuner/tuner/tune.py index 054a6b1c..4eb7f78f 100644 --- a/llm_rl/src/llmtuner/tuner/tune.py +++ b/llm_rl/src/llmtuner/tuner/tune.py @@ -9,9 +9,6 @@ from llmtuner.tuner.ppo import run_ppo from llmtuner.tuner.dpo import run_dpo -from llmtuner.tuner.core.utils import is_first_node -import wandb - if TYPE_CHECKING: from transformers import TrainerCallback @@ -22,10 +19,7 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None): model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args) callbacks = [LogCallback()] if callbacks is None else callbacks - if is_first_node(): - wandb.login(key=finetuning_args.wandb_token) - wandb.init(project=finetuning_args.wandb_project, tags=[*finetuning_args.wandb_tags] if finetuning_args.wandb_tags else None) - + if finetuning_args.stage == "pt": run_pt(model_args, data_args, training_args, finetuning_args, callbacks) elif finetuning_args.stage == "sft":