Skip to content

Commit

Permalink
added llama-factory under llm_rl (#87)
Browse files Browse the repository at this point in the history
(cherry picked from commit a74862e)
  • Loading branch information
Jasonqi146 authored and lwaekfjlk committed Nov 7, 2023
1 parent 2c8c6b0 commit 523975c
Show file tree
Hide file tree
Showing 12 changed files with 173 additions and 98 deletions.
162 changes: 161 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ __pycache__
dist
.venv

# Byte-compiled / optimized / DLL files
*.py[cod]
*$py.class

# C extensions
*.so

# Log
*.log
*.log.*
Expand Down Expand Up @@ -33,4 +40,157 @@ tests/state_of_the_union.txt

# Build
build
!dummy_file
!dummy_file

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
9 changes: 1 addition & 8 deletions llm_rl/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
packaging
wheel
torch>=1.13.1
transformers>=4.31.0,<4.35.0
datasets>=2.12.0
accelerate>=0.21.0
peft>=0.4.0
trl==0.7.2
trl>=0.7.2
gradio>=3.38.0,<4.0.0
scipy
sentencepiece
Expand All @@ -20,8 +18,3 @@ pydantic
fastapi
sse-starlette
matplotlib
py-cpuinfo
deepspeed
bitsandbytes>=0.39.0
flash-attn
wandb
16 changes: 7 additions & 9 deletions llm_rl/reward_model.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
python src/train_bash.py \
--stage rm \
--model_name_or_path meta-llama/Llama-2-13b-hf \
--model_name_or_path meta-llama/Llama-2-13b \
--do_train \
--dataset comparison_gpt4_en \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--resume_lora_training False \
--output_dir ./llama-2-13b-rm_cache \
--per_device_train_batch_size 8 \
--gradient_accumulation_steps 8 \
--checkpoint_dir ./llama-2-13b-rm \
--output_dir ./llama-2-13b-rm \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 4 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--save_steps 1000 \
--learning_rate 1e-6 \
--num_train_epochs 1.0 \
--plot_loss \
--fp16 \
--use_auth_token True \
--wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \
--hf_auth_token "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" \
--deepspeed ./deepspeed_config_s2.json
--hf_auth_token "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"
13 changes: 0 additions & 13 deletions llm_rl/src/llmtuner/extras/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,19 +499,6 @@ def get_template_and_fix_tokenizer(
sep=[]
)

register_template(
name="llama2-sotopia",
prefix=[
"{{system}}"
],
prompt=[
"[INST] {{query}} [/INST]"
],
system=(
""
),
sep=[]
)

r"""
Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
Expand Down
17 changes: 0 additions & 17 deletions llm_rl/src/llmtuner/hparams/finetuning_args.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import List
import json
from typing import Literal, Optional
from dataclasses import asdict, dataclass, field
Expand Down Expand Up @@ -52,10 +51,6 @@ class FinetuningArguments:
Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \
LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."}
)
lora_bias: Optional[str] = field(
default="none",
metadata={"help": "The lora_bias option from bitsandbytes."}
)
additional_target: Optional[str] = field(
default=None,
metadata={"help": "Name(s) of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint."}
Expand Down Expand Up @@ -88,18 +83,6 @@ class FinetuningArguments:
default=0,
metadata={"help": "The alpha parameter to control the noise magnitude in NEFTune."}
)
wandb_token: Optional[str] = field(
default=None,
metadata={"help": "The login api token for wandb."}
)
wandb_project: Optional[str] = field(
default=None,
metadata={"help": "The project name for the current wandb log."}
)
wandb_tags: Optional[List[str]] = field(
default=None,
metadata={"help": "The tag for the current wandb run."}
)

def __post_init__(self):
if isinstance(self.lora_target, str): # support custom target modules/layers of LoRA
Expand Down
22 changes: 1 addition & 21 deletions llm_rl/src/llmtuner/hparams/model_args.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from torch import float16, bfloat16, float32
from typing import Literal, Optional
from dataclasses import dataclass, field

Expand Down Expand Up @@ -75,21 +74,9 @@ class ModelArguments:
default=None,
metadata={"help": "Path to the directory to save the exported model."}
)
qlora_compute_dtype: Optional[str] = field(
default="fp32",
metadata={"help": "The compute_dtype option from bitsandbytes."}
)
use_custom_callback: Optional[bool] = field(
default=False,
metadata={"help": "Whether enable custom callbacks."}
)
call_back_save_epochs: Optional[int] = field(
default=1,
metadata={"help": "The number of epochs before each call back save."}
)


def __post_init__(self):
self.compute_dtype = None
self.model_max_length = None

if self.split_special_tokens and self.use_fast_tokenizer:
Expand All @@ -104,10 +91,3 @@ def __post_init__(self):
if self.use_auth_token == True and self.hf_auth_token is not None:
from huggingface_hub.hf_api import HfFolder # lazy load
HfFolder.save_token(self.hf_auth_token)

if self.qlora_compute_dtype == "bf16":
self.compute_dtype = bfloat16
elif self.qlora_compute_dtype == "fp16":
self.compute_dtype = float16
else:
self.compute_dtype = float32
1 change: 0 additions & 1 deletion llm_rl/src/llmtuner/tuner/core/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def init_adapter(
lora_alpha=finetuning_args.lora_alpha,
lora_dropout=finetuning_args.lora_dropout,
target_modules=target_modules,
bias=finetuning_args.lora_bias,
modules_to_save=finetuning_args.additional_target
)
model = get_peft_model(model, lora_config)
Expand Down
1 change: 0 additions & 1 deletion llm_rl/src/llmtuner/tuner/core/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def load_model_and_tokenizer(
padding_side="right", # training with left-padded tensors in fp16 precision may cause overflow
**config_kwargs
)
tokenizer.pad_token = tokenizer.unk_token

if finetuning_args.finetuning_type != "lora" and model_args.checkpoint_dir is not None:
model_to_load = model_args.checkpoint_dir[0]
Expand Down
8 changes: 1 addition & 7 deletions llm_rl/src/llmtuner/tuner/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,10 @@
from transformers.modeling_utils import PreTrainedModel
from llmtuner.hparams import FinetuningArguments

import torch.distributed as dist
import os

logger = get_logger(__name__)

def is_first_node():
world_rank = dist.get_rank() if torch.distributed.is_initialized() else 0
local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0
return world_rank == local_rank == 0


def find_all_linear_modules(
model: "PreTrainedModel",
quantization_bit: Optional[int] = None,
Expand Down
2 changes: 1 addition & 1 deletion llm_rl/src/llmtuner/tuner/rm/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def compute_loss(
See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509
"""
# Compute rewards
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True) # (lm_logits, loss, value)
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
if values.size(0) != inputs["input_ids"].size(0): # adapt to chatglm2
values = torch.transpose(values, 0, 1)

Expand Down
Loading

0 comments on commit 523975c

Please sign in to comment.