Skip to content

Commit

Permalink
support llama2 13b train and inference pipeline in fastchat (#73)
Browse files Browse the repository at this point in the history
* support qlora mistral training

* added deep speed to requirements

* temporary save for switching disk region

* added shuffle and access token

* finished training pipeline; need to fix inference

* finished training pipeline; need to fix inference

* fixed inference pipeline

* commiting to test deepspeed

* added featurere to remove seq longer than 2048

* try to merge

* minor changes

* minor changes

---------

Co-authored-by: lwaekfjlk <1125027232@qq.com>
Co-authored-by: zqi2cmu <zqi2@andrew.cmu.edu>
  • Loading branch information
3 people authored Oct 25, 2023
1 parent 1e44e74 commit 0e8d939
Show file tree
Hide file tree
Showing 14 changed files with 140 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ tests/state_of_the_union.txt

# Build
build
!dummy_file
!dummy_file
12 changes: 12 additions & 0 deletions llm_ft/data/create_dummy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import json

dummy_qa = {"id": "", "conversations": [{"from": "human", "value": "How old is Haofei?"}, {"from": "gpt", "value": "He is one year old."}]}

res = []
for i in range(1000):
new_qa = dict(dummy_qa)
new_qa["id"] = f"identity_{i}"
res.append(new_qa)

with open("./dummy_convs.json", "w") as f:
json.dump(res, f, indent=4)
28 changes: 28 additions & 0 deletions llm_ft/data/data_filter_out_long.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json
import transformers

INPUT_PATH = "fastchat-ft-gpt4-gpt4-easy-2-side-partial-speak.json"
OUTPUT_PATH = "fastchat-ft-gpt4-gpt4-easy-2-side-partial-speak-no-long.json"
MODEL_CHECKPOINT = "meta-llama/Llama-2-13b-chat-hf"
HF_TOKEN = "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"

with open(INPUT_PATH, 'r') as f:
data = json.load(f)

tokenizer = transformers.AutoTokenizer.from_pretrained(
MODEL_CHECKPOINT,
padding = False,
truncation = False,
token=HF_TOKEN,
)

res = []
for d in data:
for conv in d['conversations']:
if conv['from'] == "human":
input_ids = tokenizer(conv['value'])
if len(input_ids) <= 2048:
res.append(d)

with open(OUTPUT_PATH, 'w') as f:
json.dump(res, f, indent=4)
16 changes: 16 additions & 0 deletions llm_ft/data/data_keep_only_speak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import json

INPUT_PATH = "fastchat-ft-gpt4-gpt4-easy-2-side-partial.json"
OUTPUT_PATH = "fastchat-ft-gpt4-gpt4-easy-2-side-partial-speak.json"

with open(INPUT_PATH, 'r') as f:
data = json.load(f)

res = []
for d in data:
for conv in d['conversations']:
if conv['from'] == "gpt" and "'action_type': 'speak'" in conv['value']:
res.append(d)

with open(OUTPUT_PATH, 'w') as f:
json.dump(res, f, indent=4)
8 changes: 7 additions & 1 deletion llm_ft/fastchat/model/model_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,12 @@ def load_model(self, model_path: str, from_pretrained_kwargs: dict):
use_fast=self.use_fast_tokenizer,
revision=revision,
trust_remote_code=True,
token=None if not "token" in from_pretrained_kwargs else from_pretrained_kwargs["token"]
)
except TypeError:
tokenizer = AutoTokenizer.from_pretrained(
model_path, use_fast=False, revision=revision, trust_remote_code=True
model_path, use_fast=False, revision=revision, trust_remote_code=True,
token=None if not "token" in from_pretrained_kwargs else from_pretrained_kwargs["token"]
)
try:
model = AutoModelForCausalLM.from_pretrained(
Expand Down Expand Up @@ -154,6 +156,7 @@ def load_model(
awq_config: Optional[AWQConfig] = None,
revision: str = "main",
debug: bool = False,
hf_access_token: Optional[str|None] = None,
):
"""Load a model from Hugging Face."""
# get model adapter
Expand Down Expand Up @@ -280,6 +283,9 @@ def load_model(

if dtype is not None: # Overwrite dtype if it is provided in the arguments.
kwargs["torch_dtype"] = dtype

if hf_access_token:
kwargs["token"] = hf_access_token

# Load model
model, tokenizer = adapter.load_model(model_path, kwargs)
Expand Down
4 changes: 4 additions & 0 deletions llm_ft/fastchat/serve/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def main(args):
judge_sent_end=args.judge_sent_end,
debug=args.debug,
history=not args.no_history,
hf_access_token = args.hf_access_token,
)
except KeyboardInterrupt:
print("exit...")
Expand Down Expand Up @@ -281,5 +282,8 @@ def main(args):
action="store_true",
help="Print useful debug information (e.g., prompts)",
)
parser.add_argument(
"--hf-access-token", type=str, default=None, help="Optional access token for Hugging Face."
)
args = parser.parse_args()
main(args)
2 changes: 2 additions & 0 deletions llm_ft/fastchat/serve/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ def chat_loop(
judge_sent_end: bool = True,
debug: bool = True,
history: bool = True,
hf_access_token: Optional[str|None] = None,
):
# Model
model, tokenizer = load_model(
Expand All @@ -322,6 +323,7 @@ def chat_loop(
awq_config=awq_config,
revision=revision,
debug=debug,
hf_access_token=hf_access_token,
)
generate_stream_func = get_generate_stream_function(model, model_path)

Expand Down
32 changes: 25 additions & 7 deletions llm_ft/fastchat/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import json
import math
import pathlib
import random
from typing import Dict, Optional, Sequence

import numpy as np
Expand All @@ -36,6 +37,7 @@
@dataclass
class ModelArguments:
model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
hf_access_token: Optional[str] = field(default=None)


@dataclass
Expand All @@ -47,6 +49,8 @@ class DataArguments:
default=None, metadata={"help": "Path to the evaluation data."}
)
lazy_preprocess: bool = False
shuffle: bool = True
abort_long_seq: bool = False


@dataclass
Expand Down Expand Up @@ -83,6 +87,7 @@ def trainer_save_model_safe(trainer: transformers.Trainer):
def preprocess(
sources,
tokenizer: transformers.PreTrainedTokenizer,
abort_long_seq: bool = False,
) -> Dict:
conv = get_conversation_template("vicuna")
roles = {"human": conv.roles[0], "gpt": conv.roles[1]}
Expand All @@ -100,7 +105,15 @@ def preprocess(
assert role == conv.roles[j % 2], f"{i}"
conv.append_message(role, sentence["value"])
conversations.append(conv.get_prompt())


if abort_long_seq:
new_conversation = []
for temp_conv in conversations:
token_len = tokenizer(temp_conv, return_tensors="pt", padding=False, truncation=False).input_ids.size()[1]
if token_len <= tokenizer.model_max_length: new_conversation.append(temp_conv)
conversation = new_conversation
print(f"Aborted conversations longer than {tokenizer.model_max_length}; Now have {len(conversation)} conversations")

# Tokenize conversations
input_ids = tokenizer(
conversations,
Expand Down Expand Up @@ -151,7 +164,6 @@ def preprocess(
f"WARNING: tokenization mismatch: {cur_len} vs. {total_len}."
f" (ignored)"
)

return dict(
input_ids=input_ids,
labels=targets,
Expand All @@ -162,12 +174,12 @@ def preprocess(
class SupervisedDataset(Dataset):
"""Dataset for supervised fine-tuning."""

def __init__(self, raw_data, tokenizer: transformers.PreTrainedTokenizer):
def __init__(self, raw_data, tokenizer: transformers.PreTrainedTokenizer, abort_long_seq: bool = False):
super(SupervisedDataset, self).__init__()

rank0_print("Formatting inputs...")
sources = [example["conversations"] for example in raw_data]
data_dict = preprocess(sources, tokenizer)
data_dict = preprocess(sources, tokenizer, abort_long_seq=abort_long_seq)

self.input_ids = data_dict["input_ids"]
self.labels = data_dict["labels"]
Expand All @@ -187,9 +199,10 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]:
class LazySupervisedDataset(Dataset):
"""Dataset for supervised fine-tuning."""

def __init__(self, raw_data, tokenizer: transformers.PreTrainedTokenizer):
def __init__(self, raw_data, tokenizer: transformers.PreTrainedTokenizer, abort_long_seq: bool = False):
super(LazySupervisedDataset, self).__init__()
self.tokenizer = tokenizer
self.abort_long_seq = abort_long_seq

rank0_print("Formatting inputs...Skip in lazy mode")
self.tokenizer = tokenizer
Expand Down Expand Up @@ -224,13 +237,18 @@ def make_supervised_data_module(
rank0_print("Loading data...")

train_json = json.load(open(data_args.data_path, "r"))
train_dataset = dataset_cls(train_json, tokenizer=tokenizer)
if data_args.shuffle: random.shuffle(train_json)

train_dataset = dataset_cls(train_json, tokenizer=tokenizer, abort_long_seq = data_args.abort_long_seq)

if data_args.eval_data_path:
eval_json = json.load(open(data_args.eval_data_path, "r"))
eval_dataset = dataset_cls(eval_json, tokenizer=tokenizer)
if data_args.shuffle: random.shuffle(train_json)

eval_dataset = dataset_cls(eval_json, tokenizer=tokenizer, abort_long_seq = data_args.abort_long_seq)
else:
eval_dataset = None


return dict(train_dataset=train_dataset, eval_dataset=eval_dataset)

Expand Down
8 changes: 7 additions & 1 deletion llm_ft/fastchat/train/train_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
import pathlib
import typing
import os
import sys

current_directory = os.path.dirname(os.path.abspath(__file__))
root_directory = os.path.join(current_directory, '..', '..')
sys.path.append(root_directory)

from deepspeed import zero
from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
Expand All @@ -35,7 +40,6 @@
)



@dataclass
class TrainingArguments(transformers.TrainingArguments):
cache_dir: typing.Optional[str] = field(default=None)
Expand Down Expand Up @@ -137,6 +141,7 @@ def train():
)
if lora_args.q_lora
else None,
token=None if not model_args.hf_access_token else model_args.hf_access_token,
)
lora_config = LoraConfig(
r=lora_args.lora_r,
Expand Down Expand Up @@ -176,6 +181,7 @@ def train():
model_max_length=training_args.model_max_length,
padding_side="right",
use_fast=False,
token=None if not model_args.hf_access_token else model_args.hf_access_token,
)
tokenizer.pad_token = tokenizer.unk_token

Expand Down
1 change: 0 additions & 1 deletion llm_ft/inference.sh

This file was deleted.

1 change: 1 addition & 0 deletions llm_ft/llama2-13b_inference.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python3 -m fastchat.serve.cli --model-path ./checkpoint-shuffle/checkpoint-161 --hf-access-token "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" --conv-template "vicuna_v1.1"
36 changes: 36 additions & 0 deletions llm_ft/llama2-13b_qlora_train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
deepspeed --num_gpus=1 fastchat/train/train_lora.py \
--model_name_or_path meta-llama/Llama-2-13b-chat-hf \
--lora_r 8 \
--lora_alpha 16 \
--lora_dropout 0.05 \
--data_path ./data/fastchat-ft-gpt4-gpt4-easy-2-side-partial-speak-drop-long.json \
--shuffle True \
--bf16 True \
--output_dir ./checkpoint-shuffle-speak-drop-long \
--num_train_epochs 4 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 32 \
--evaluation_strategy "no" \
--save_strategy "epoch" \
--save_total_limit 6 \
--learning_rate 5e-5 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--model_max_length 2048 \
--q_lora True \
--deepspeed ./deepspeed_config_s2.json \
--hf_access_token "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" \
--tf32 True \
--flash_attn True \
--abort_long_seq True \

# Possible other options
# --flash_attn True \
# --tf32 True \
# --save_strategy "steps" \
# --save_steps 1200 \
# --abort_long_seq True \
# --lazy_preprocess True \
Empty file removed llm_ft/vicuna-7b-1.5/dummy_file
Empty file.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ datasets
names
together
pydantic==1.10.12

0 comments on commit 0e8d939

Please sign in to comment.