support llama2 13b train and inference pipeline in fastchat (#73)

* support qlora mistral training * added deep speed to requirements * temporary save for switching disk region * added shuffle and access token * finished training pipeline; need to fix inference * finished training pipeline; need to fix inference * fixed inference pipeline * commiting to test deepspeed * added featurere to remove seq longer than 2048 * try to merge * minor changes * minor changes --------- Co-authored-by: lwaekfjlk <1125027232@qq.com> Co-authored-by: zqi2cmu <zqi2@andrew.cmu.edu>
sotopia-lab · Oct 25, 2023 · 0e8d939 · 0e8d939
1 parent 1e44e74
commit 0e8d939
Show file tree

Hide file tree

Showing 14 changed files with 140 additions and 11 deletions.
diff --git a/.gitignore b/.gitignore
@@ -33,4 +33,4 @@ tests/state_of_the_union.txt
 
 # Build
 build
-!dummy_file
+!dummy_file
diff --git a/llm_ft/data/create_dummy.py b/llm_ft/data/create_dummy.py
@@ -0,0 +1,12 @@
+import json
+
+dummy_qa = {"id": "", "conversations": [{"from": "human", "value": "How old is Haofei?"}, {"from": "gpt", "value": "He is one year old."}]}
+
+res = []
+for i in range(1000):
+    new_qa = dict(dummy_qa)
+    new_qa["id"] = f"identity_{i}"
+    res.append(new_qa)
+
+with open("./dummy_convs.json", "w") as f:
+    json.dump(res, f, indent=4)
diff --git a/llm_ft/data/data_filter_out_long.py b/llm_ft/data/data_filter_out_long.py
@@ -0,0 +1,28 @@
+import json
+import transformers
+
+INPUT_PATH = "fastchat-ft-gpt4-gpt4-easy-2-side-partial-speak.json"
+OUTPUT_PATH = "fastchat-ft-gpt4-gpt4-easy-2-side-partial-speak-no-long.json"
+MODEL_CHECKPOINT = "meta-llama/Llama-2-13b-chat-hf"
+HF_TOKEN = "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"
+
+with open(INPUT_PATH, 'r') as f:
+    data = json.load(f)
+
+tokenizer = transformers.AutoTokenizer.from_pretrained(
+    MODEL_CHECKPOINT,
+    padding = False,
+    truncation = False,
+    token=HF_TOKEN,
+    )
+
+res = []
+for d in data:
+    for conv in d['conversations']:
+        if conv['from'] == "human":
+            input_ids = tokenizer(conv['value'])
+            if len(input_ids) <= 2048:
+                res.append(d)
+
+with open(OUTPUT_PATH, 'w') as f:
+    json.dump(res, f, indent=4)
diff --git a/llm_ft/data/data_keep_only_speak.py b/llm_ft/data/data_keep_only_speak.py
@@ -0,0 +1,16 @@
+import json
+
+INPUT_PATH = "fastchat-ft-gpt4-gpt4-easy-2-side-partial.json"
+OUTPUT_PATH = "fastchat-ft-gpt4-gpt4-easy-2-side-partial-speak.json"
+
+with open(INPUT_PATH, 'r') as f:
+    data = json.load(f)
+
+res = []
+for d in data:
+    for conv in d['conversations']:
+        if conv['from'] == "gpt" and "'action_type': 'speak'" in conv['value']:
+            res.append(d)
+
+with open(OUTPUT_PATH, 'w') as f:
+    json.dump(res, f, indent=4)
diff --git a/llm_ft/fastchat/model/model_adapter.py b/llm_ft/fastchat/model/model_adapter.py
@@ -60,10 +60,12 @@ def load_model(self, model_path: str, from_pretrained_kwargs: dict):
                 use_fast=self.use_fast_tokenizer,
                 revision=revision,
                 trust_remote_code=True,
+                token=None if not "token" in from_pretrained_kwargs else from_pretrained_kwargs["token"]
             )
         except TypeError:
             tokenizer = AutoTokenizer.from_pretrained(
-                model_path, use_fast=False, revision=revision, trust_remote_code=True
+                model_path, use_fast=False, revision=revision, trust_remote_code=True,
+                token=None if not "token" in from_pretrained_kwargs else from_pretrained_kwargs["token"]
             )
         try:
             model = AutoModelForCausalLM.from_pretrained(
@@ -154,6 +156,7 @@ def load_model(
     awq_config: Optional[AWQConfig] = None,
     revision: str = "main",
     debug: bool = False,
+    hf_access_token: Optional[str|None] = None,
 ):
     """Load a model from Hugging Face."""
     # get model adapter
@@ -280,6 +283,9 @@ def load_model(
 
     if dtype is not None:  # Overwrite dtype if it is provided in the arguments.
         kwargs["torch_dtype"] = dtype
+
+    if hf_access_token:
+        kwargs["token"] = hf_access_token
 
     # Load model
     model, tokenizer = adapter.load_model(model_path, kwargs)

diff --git a/llm_ft/fastchat/serve/cli.py b/llm_ft/fastchat/serve/cli.py
@@ -236,6 +236,7 @@ def main(args):
             judge_sent_end=args.judge_sent_end,
             debug=args.debug,
             history=not args.no_history,
+            hf_access_token = args.hf_access_token,
         )
     except KeyboardInterrupt:
         print("exit...")
@@ -281,5 +282,8 @@ def main(args):
         action="store_true",
         help="Print useful debug information (e.g., prompts)",
     )
+    parser.add_argument(
+        "--hf-access-token", type=str, default=None, help="Optional access token for Hugging Face."
+    )
     args = parser.parse_args()
     main(args)
diff --git a/llm_ft/fastchat/serve/inference.py b/llm_ft/fastchat/serve/inference.py
@@ -308,6 +308,7 @@ def chat_loop(
     judge_sent_end: bool = True,
     debug: bool = True,
     history: bool = True,
+    hf_access_token: Optional[str|None] = None,
 ):
     # Model
     model, tokenizer = load_model(
@@ -322,6 +323,7 @@ def chat_loop(
         awq_config=awq_config,
         revision=revision,
         debug=debug,
+        hf_access_token=hf_access_token,
     )
     generate_stream_func = get_generate_stream_function(model, model_path)
 

diff --git a/llm_ft/fastchat/train/train.py b/llm_ft/fastchat/train/train.py
@@ -18,6 +18,7 @@
 import json
 import math
 import pathlib
+import random
 from typing import Dict, Optional, Sequence
 
 import numpy as np
@@ -36,6 +37,7 @@
 @dataclass
 class ModelArguments:
     model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
+    hf_access_token: Optional[str] = field(default=None)
 
 
 @dataclass
@@ -47,6 +49,8 @@ class DataArguments:
         default=None, metadata={"help": "Path to the evaluation data."}
     )
     lazy_preprocess: bool = False
+    shuffle: bool = True
+    abort_long_seq: bool = False
 
 
 @dataclass
@@ -83,6 +87,7 @@ def trainer_save_model_safe(trainer: transformers.Trainer):
 def preprocess(
     sources,
     tokenizer: transformers.PreTrainedTokenizer,
+    abort_long_seq: bool = False,
 ) -> Dict:
     conv = get_conversation_template("vicuna")
     roles = {"human": conv.roles[0], "gpt": conv.roles[1]}
@@ -100,7 +105,15 @@ def preprocess(
             assert role == conv.roles[j % 2], f"{i}"
             conv.append_message(role, sentence["value"])
         conversations.append(conv.get_prompt())
-
+
+    if abort_long_seq:
+        new_conversation = []
+        for temp_conv in conversations:
+            token_len = tokenizer(temp_conv, return_tensors="pt", padding=False, truncation=False).input_ids.size()[1]
+            if token_len <= tokenizer.model_max_length: new_conversation.append(temp_conv)
+        conversation = new_conversation
+        print(f"Aborted conversations longer than {tokenizer.model_max_length}; Now have {len(conversation)} conversations")
+
     # Tokenize conversations
     input_ids = tokenizer(
         conversations,
@@ -151,7 +164,6 @@ def preprocess(
                     f"WARNING: tokenization mismatch: {cur_len} vs. {total_len}."
                     f" (ignored)"
                 )
-
     return dict(
         input_ids=input_ids,
         labels=targets,
@@ -162,12 +174,12 @@ def preprocess(
 class SupervisedDataset(Dataset):
     """Dataset for supervised fine-tuning."""
 
-    def __init__(self, raw_data, tokenizer: transformers.PreTrainedTokenizer):
+    def __init__(self, raw_data, tokenizer: transformers.PreTrainedTokenizer, abort_long_seq: bool = False):
         super(SupervisedDataset, self).__init__()
 
         rank0_print("Formatting inputs...")
         sources = [example["conversations"] for example in raw_data]
-        data_dict = preprocess(sources, tokenizer)
+        data_dict = preprocess(sources, tokenizer, abort_long_seq=abort_long_seq)
 
         self.input_ids = data_dict["input_ids"]
         self.labels = data_dict["labels"]
@@ -187,9 +199,10 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]:
 class LazySupervisedDataset(Dataset):
     """Dataset for supervised fine-tuning."""
 
-    def __init__(self, raw_data, tokenizer: transformers.PreTrainedTokenizer):
+    def __init__(self, raw_data, tokenizer: transformers.PreTrainedTokenizer, abort_long_seq: bool = False):
         super(LazySupervisedDataset, self).__init__()
         self.tokenizer = tokenizer
+        self.abort_long_seq = abort_long_seq
 
         rank0_print("Formatting inputs...Skip in lazy mode")
         self.tokenizer = tokenizer
@@ -224,13 +237,18 @@ def make_supervised_data_module(
     rank0_print("Loading data...")
 
     train_json = json.load(open(data_args.data_path, "r"))
-    train_dataset = dataset_cls(train_json, tokenizer=tokenizer)
+    if data_args.shuffle: random.shuffle(train_json)
+
+    train_dataset = dataset_cls(train_json, tokenizer=tokenizer, abort_long_seq = data_args.abort_long_seq)
 
     if data_args.eval_data_path:
         eval_json = json.load(open(data_args.eval_data_path, "r"))
-        eval_dataset = dataset_cls(eval_json, tokenizer=tokenizer)
+        if data_args.shuffle: random.shuffle(train_json)
+
+        eval_dataset = dataset_cls(eval_json, tokenizer=tokenizer, abort_long_seq = data_args.abort_long_seq)
     else:
         eval_dataset = None
+
 
     return dict(train_dataset=train_dataset, eval_dataset=eval_dataset)
 

diff --git a/llm_ft/fastchat/train/train_lora.py b/llm_ft/fastchat/train/train_lora.py
@@ -20,6 +20,11 @@
 import pathlib
 import typing
 import os
+import sys
+
+current_directory = os.path.dirname(os.path.abspath(__file__))
+root_directory = os.path.join(current_directory, '..', '..')
+sys.path.append(root_directory)
 
 from deepspeed import zero
 from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
@@ -35,7 +40,6 @@
 )
 
 
-
 @dataclass
 class TrainingArguments(transformers.TrainingArguments):
     cache_dir: typing.Optional[str] = field(default=None)
@@ -137,6 +141,7 @@ def train():
         )
         if lora_args.q_lora
         else None,
+        token=None if not model_args.hf_access_token else model_args.hf_access_token,
     )
     lora_config = LoraConfig(
         r=lora_args.lora_r,
@@ -176,6 +181,7 @@ def train():
         model_max_length=training_args.model_max_length,
         padding_side="right",
         use_fast=False,
+        token=None if not model_args.hf_access_token else model_args.hf_access_token,
     )
     tokenizer.pad_token = tokenizer.unk_token
 

diff --git a/llm_ft/inference.sh b/llm_ft/inference.sh
diff --git a/llm_ft/llama2-13b_inference.sh b/llm_ft/llama2-13b_inference.sh
@@ -0,0 +1 @@
+python3 -m fastchat.serve.cli --model-path ./checkpoint-shuffle/checkpoint-161 --hf-access-token "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" --conv-template "vicuna_v1.1"
diff --git a/llm_ft/llama2-13b_qlora_train.sh b/llm_ft/llama2-13b_qlora_train.sh
@@ -0,0 +1,36 @@
+deepspeed --num_gpus=1 fastchat/train/train_lora.py \
+    --model_name_or_path meta-llama/Llama-2-13b-chat-hf \
+    --lora_r 8 \
+    --lora_alpha 16 \
+    --lora_dropout 0.05 \
+    --data_path ./data/fastchat-ft-gpt4-gpt4-easy-2-side-partial-speak-drop-long.json \
+    --shuffle True \
+    --bf16 True \
+    --output_dir ./checkpoint-shuffle-speak-drop-long \
+    --num_train_epochs 4 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --evaluation_strategy "no" \
+    --save_strategy "epoch" \
+    --save_total_limit 6 \
+    --learning_rate 5e-5 \
+    --weight_decay 0. \
+    --warmup_ratio 0.03 \
+    --lr_scheduler_type "cosine" \
+    --logging_steps 1 \
+    --model_max_length 2048 \
+    --q_lora True \
+    --deepspeed ./deepspeed_config_s2.json \
+    --hf_access_token "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" \
+    --tf32 True \
+    --flash_attn True \
+    --abort_long_seq True \
+
+# Possible other options
+# --flash_attn True \
+# --tf32 True \
+# --save_strategy "steps" \ 
+# --save_steps 1200 \
+# --abort_long_seq True \
+# --lazy_preprocess True \
diff --git a/llm_ft/vicuna-7b-1.5/dummy_file b/llm_ft/vicuna-7b-1.5/dummy_file
diff --git a/requirements.txt b/requirements.txt
@@ -16,3 +16,4 @@ datasets
 names
 together
 pydantic==1.10.12
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -33,4 +33,4 @@ tests/state_of_the_union.txt @@
     # Build
     build
-    !dummy_file
+    !dummy_file
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		python3 -m fastchat.serve.cli --model-path ./checkpoint-shuffle/checkpoint-161 --hf-access-token "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" --conv-template "vicuna_v1.1"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,4 @@ datasets
		names
		together
		pydantic==1.10.12