diff --git a/llm_rl/finetune-llama-2-13b.sh b/llm_rl/finetune-llama-2-13b.sh index 3fd321bb..ee13f7b6 100644 --- a/llm_rl/finetune-llama-2-13b.sh +++ b/llm_rl/finetune-llama-2-13b.sh @@ -1,9 +1,8 @@ deepspeed src/train_bash.py \ --stage sft \ --model_name_or_path meta-llama/Llama-2-13b-hf \ - --dataset sotopia_easy_sft \ + --dataset fastchat-sft \ --dataset_dir ./data/ \ - --val_size 0.1 \ --cutoff_len 4096 \ --template llama2-sotopia \ --wandb_project "llama-factory-sft" \ @@ -11,31 +10,26 @@ deepspeed src/train_bash.py \ --use_fast_tokenizer False \ --do_train \ --num_train_epochs 15.0 \ - --per_device_train_batch_size 8 \ - --gradient_accumulation_steps 8 \ + --per_device_train_batch_size 1 \ + --gradient_accumulation_steps 32 \ --finetuning_type lora \ --lora_target q_proj,v_proj \ - --lora_rank 8 \ - --lora_alpha 16 \ - --lora_dropout 0.05 \ + --qlora_compute_dtype bf16 \ --learning_rate 5e-5 \ --lr_scheduler_type cosine \ --weight_decay 0. \ --warmup_ratio 0.03 \ --quantization_bit 4 \ --quantization_type nf4 \ - --double_quantization \ + --double_quantization True \ --flash_attn True \ --gradient_checkpointing True \ - --bf16 \ + --bf16 True \ --cache_dir ./model_cache \ --overwrite_cache \ --output_dir ./llama2-13b-sft_cache \ --overwrite_output_dir \ --logging_steps 1 \ - --evaluation_strategy "steps" \ - --per_device_eval_batch_size 32 \ - --eval_accumulation_steps 32 \ --save_strategy "epoch" \ --save_total_limit 5 \ --use_auth_token True \ @@ -43,4 +37,11 @@ deepspeed src/train_bash.py \ --hf_auth_token "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" \ --deepspeed ./deepspeed_config_s2.json - # --dataset alpaca_gpt4_en \ \ No newline at end of file + # --dataset alpaca_gpt4_en \ + # --val_size 0.1 \ + # --evaluation_strategy "steps" \ + # --per_device_eval_batch_size 32 \ + # --eval_accumulation_steps 32 \ + # --lora_rank 8 \ + # --lora_alpha 16 \ + # --lora_dropout 0.05 \ \ No newline at end of file diff --git a/llm_rl/finetune-mistral-7b.sh b/llm_rl/finetune-mistral-7b.sh index d2007e55..4f8120ec 100644 --- a/llm_rl/finetune-mistral-7b.sh +++ b/llm_rl/finetune-mistral-7b.sh @@ -1,9 +1,8 @@ deepspeed src/train_bash.py \ --stage sft \ --model_name_or_path mistralai/Mistral-7B-v0.1 \ - --dataset dummy_convs \ + --dataset sotopia_no_slide_no_filter_format_sft \ --dataset_dir ./data/ \ - --val_size 0.1 \ --cutoff_len 4096 \ --template llama2-sotopia \ --wandb_project "llama-factory-sft" \ @@ -11,8 +10,8 @@ deepspeed src/train_bash.py \ --use_fast_tokenizer False \ --do_train \ --num_train_epochs 15.0 \ - --per_device_train_batch_size 8 \ - --gradient_accumulation_steps 8 \ + --per_device_train_batch_size 1 \ + --gradient_accumulation_steps 32 \ --finetuning_type lora \ --lora_target q_proj,v_proj \ --learning_rate 5e-5 \ @@ -21,19 +20,15 @@ deepspeed src/train_bash.py \ --warmup_ratio 0.03 \ --quantization_bit 4 \ --quantization_type nf4 \ - --double_quantization \ + --double_quantization True \ --flash_attn True \ --gradient_checkpointing True \ - --bf16 \ - --tf32 True \ + --bf16 True \ --cache_dir ./model_cache \ --overwrite_cache \ --output_dir ./mistral-7b-sft_cache \ --overwrite_output_dir \ --logging_steps 1 \ - --evaluation_strategy "steps" \ - --per_device_eval_batch_size 32 \ - --eval_accumulation_steps 32 \ --save_strategy "epoch" \ --save_total_limit 5 \ --use_auth_token True \ @@ -42,6 +37,10 @@ deepspeed src/train_bash.py \ --deepspeed ./deepspeed_config_s2.json # --dataset alpaca_gpt4_en \ + # --val_size 0.1 \ + # --evaluation_strategy "steps" \ + # --per_device_eval_batch_size 32 \ + # --eval_accumulation_steps 32 \ # --lora_rank 8 \ # --lora_alpha 16 \ # --lora_dropout 0.05 \ \ No newline at end of file diff --git a/llm_rl/preprocess/create_sft_data.py b/llm_rl/preprocess/create_sft_data.py index b50ca7c5..6d2785ad 100644 --- a/llm_rl/preprocess/create_sft_data.py +++ b/llm_rl/preprocess/create_sft_data.py @@ -22,6 +22,6 @@ def join_json_files(directory_path): joined_data.append(new_data) return joined_data -joined_data = join_json_files("./GPT4-4_Redis_Easy_No_Slide/") -with open("../data/GPT4-4_Redis_Easy_No_Slide.json", "w") as f: +joined_data = join_json_files("./GPT4-4_Redis_Easy_No_Slide_No_Filter_Format/") +with open("../data/GPT4-4_Redis_Easy_No_Slide_No_Filter_Format.json", "w") as f: json.dump(joined_data, f) \ No newline at end of file diff --git a/llm_rl/requirements.txt b/llm_rl/requirements.txt index 03ac2855..cc376589 100644 --- a/llm_rl/requirements.txt +++ b/llm_rl/requirements.txt @@ -1,3 +1,5 @@ +packaging +wheel torch>=1.13.1 transformers>=4.31.0,<4.35.0 datasets>=2.12.0 @@ -17,7 +19,6 @@ uvicorn pydantic fastapi sse-starlette -packaging matplotlib py-cpuinfo deepspeed diff --git a/llm_rl/src/llmtuner/hparams/finetuning_args.py b/llm_rl/src/llmtuner/hparams/finetuning_args.py index d8f2d299..4479ae6b 100644 --- a/llm_rl/src/llmtuner/hparams/finetuning_args.py +++ b/llm_rl/src/llmtuner/hparams/finetuning_args.py @@ -52,6 +52,10 @@ class FinetuningArguments: Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \ LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."} ) + lora_bias: Optional[str] = field( + default="none", + metadata={"help": "The lora_bias option from bitsandbytes."} + ) additional_target: Optional[str] = field( default=None, metadata={"help": "Name(s) of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint."} diff --git a/llm_rl/src/llmtuner/hparams/model_args.py b/llm_rl/src/llmtuner/hparams/model_args.py index 7c25fad1..edad7e33 100644 --- a/llm_rl/src/llmtuner/hparams/model_args.py +++ b/llm_rl/src/llmtuner/hparams/model_args.py @@ -1,3 +1,4 @@ +from torch import float16, bfloat16, float32 from typing import Literal, Optional from dataclasses import dataclass, field @@ -74,9 +75,12 @@ class ModelArguments: default=None, metadata={"help": "Path to the directory to save the exported model."} ) + qlora_compute_dtype: Optional[str] = field( + default="fp32", + metadata={"help": "The compute_dtype option from bitsandbytes."} + ) def __post_init__(self): - self.compute_dtype = None self.model_max_length = None if self.split_special_tokens and self.use_fast_tokenizer: @@ -91,3 +95,10 @@ def __post_init__(self): if self.use_auth_token == True and self.hf_auth_token is not None: from huggingface_hub.hf_api import HfFolder # lazy load HfFolder.save_token(self.hf_auth_token) + + if self.qlora_compute_dtype == "bf16": + self.compute_dtype = bfloat16 + elif self.qlora_compute_dtype == "fp16": + self.compute_dtype = float16 + else: + self.compute_dtype = float32 \ No newline at end of file diff --git a/llm_rl/src/llmtuner/tuner/core/adapter.py b/llm_rl/src/llmtuner/tuner/core/adapter.py index 4fcc6e62..22633d54 100644 --- a/llm_rl/src/llmtuner/tuner/core/adapter.py +++ b/llm_rl/src/llmtuner/tuner/core/adapter.py @@ -89,6 +89,7 @@ def init_adapter( lora_alpha=finetuning_args.lora_alpha, lora_dropout=finetuning_args.lora_dropout, target_modules=target_modules, + bias=finetuning_args.lora_bias, modules_to_save=finetuning_args.additional_target ) model = get_peft_model(model, lora_config) diff --git a/llm_rl/src/llmtuner/tuner/core/loader.py b/llm_rl/src/llmtuner/tuner/core/loader.py index e77c4945..2df9baf1 100644 --- a/llm_rl/src/llmtuner/tuner/core/loader.py +++ b/llm_rl/src/llmtuner/tuner/core/loader.py @@ -74,6 +74,7 @@ def load_model_and_tokenizer( padding_side="right", # training with left-padded tensors in fp16 precision may cause overflow **config_kwargs ) + tokenizer.pad_token = tokenizer.unk_token if finetuning_args.finetuning_type != "lora" and model_args.checkpoint_dir is not None: model_to_load = model_args.checkpoint_dir[0] diff --git a/llm_rl/src/llmtuner/tuner/sft/workflow.py b/llm_rl/src/llmtuner/tuner/sft/workflow.py index 171fc5da..1b44f775 100644 --- a/llm_rl/src/llmtuner/tuner/sft/workflow.py +++ b/llm_rl/src/llmtuner/tuner/sft/workflow.py @@ -31,6 +31,8 @@ def run_sft( if training_args.gradient_checkpointing: model.enable_input_require_grads() + model.gradient_checkpointing_enable() + model.config.use_cache = False if training_args.predict_with_generate: tokenizer.padding_side = "left" # use left-padding in generation