From e113dd70af3273bced2766052e190fcf4a9869b1 Mon Sep 17 00:00:00 2001 From: Jasonqi146 Date: Wed, 8 Nov 2023 06:51:21 +0000 Subject: [PATCH] starting mistral 7b pipeline --- llm_rl/finetune-mistral-7b.sh | 47 +++++++++++++++++++++++++++++++++++ llm_rl/reward_model.sh | 2 +- 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 llm_rl/finetune-mistral-7b.sh diff --git a/llm_rl/finetune-mistral-7b.sh b/llm_rl/finetune-mistral-7b.sh new file mode 100644 index 00000000..d2007e55 --- /dev/null +++ b/llm_rl/finetune-mistral-7b.sh @@ -0,0 +1,47 @@ +deepspeed src/train_bash.py \ + --stage sft \ + --model_name_or_path mistralai/Mistral-7B-v0.1 \ + --dataset dummy_convs \ + --dataset_dir ./data/ \ + --val_size 0.1 \ + --cutoff_len 4096 \ + --template llama2-sotopia \ + --wandb_project "llama-factory-sft" \ + --wandb_tags "['mistral-7b']" \ + --use_fast_tokenizer False \ + --do_train \ + --num_train_epochs 15.0 \ + --per_device_train_batch_size 8 \ + --gradient_accumulation_steps 8 \ + --finetuning_type lora \ + --lora_target q_proj,v_proj \ + --learning_rate 5e-5 \ + --lr_scheduler_type cosine \ + --weight_decay 0. \ + --warmup_ratio 0.03 \ + --quantization_bit 4 \ + --quantization_type nf4 \ + --double_quantization \ + --flash_attn True \ + --gradient_checkpointing True \ + --bf16 \ + --tf32 True \ + --cache_dir ./model_cache \ + --overwrite_cache \ + --output_dir ./mistral-7b-sft_cache \ + --overwrite_output_dir \ + --logging_steps 1 \ + --evaluation_strategy "steps" \ + --per_device_eval_batch_size 32 \ + --eval_accumulation_steps 32 \ + --save_strategy "epoch" \ + --save_total_limit 5 \ + --use_auth_token True \ + --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \ + --hf_auth_token "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" \ + --deepspeed ./deepspeed_config_s2.json + + # --dataset alpaca_gpt4_en \ + # --lora_rank 8 \ + # --lora_alpha 16 \ + # --lora_dropout 0.05 \ \ No newline at end of file diff --git a/llm_rl/reward_model.sh b/llm_rl/reward_model.sh index fa5424df..873b07a5 100644 --- a/llm_rl/reward_model.sh +++ b/llm_rl/reward_model.sh @@ -7,7 +7,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --finetuning_type lora \ --lora_target q_proj,v_proj \ --resume_lora_training False \ - --output_dir ./llama-2-13b-rm \ + --output_dir ./llama-2-13b-rm_cache \ --per_device_train_batch_size 8 \ --gradient_accumulation_steps 8 \ --lr_scheduler_type cosine \