From e113dd70af3273bced2766052e190fcf4a9869b1 Mon Sep 17 00:00:00 2001
From: Jasonqi146 <jasonqi146@gmail.com>
Date: Wed, 8 Nov 2023 06:51:21 +0000
Subject: [PATCH] starting mistral 7b pipeline

---
 llm_rl/finetune-mistral-7b.sh | 47 +++++++++++++++++++++++++++++++++++
 llm_rl/reward_model.sh        |  2 +-
 2 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 llm_rl/finetune-mistral-7b.sh

diff --git a/llm_rl/finetune-mistral-7b.sh b/llm_rl/finetune-mistral-7b.sh
new file mode 100644
index 00000000..d2007e55
--- /dev/null
+++ b/llm_rl/finetune-mistral-7b.sh
@@ -0,0 +1,47 @@
+deepspeed src/train_bash.py \
+    --stage sft \
+    --model_name_or_path mistralai/Mistral-7B-v0.1 \
+    --dataset dummy_convs \
+    --dataset_dir ./data/ \
+    --val_size 0.1 \
+    --cutoff_len 4096 \
+    --template llama2-sotopia \
+    --wandb_project "llama-factory-sft" \
+    --wandb_tags "['mistral-7b']" \
+    --use_fast_tokenizer False \
+    --do_train \
+    --num_train_epochs 15.0 \
+    --per_device_train_batch_size 8 \
+    --gradient_accumulation_steps 8 \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --learning_rate 5e-5 \
+    --lr_scheduler_type cosine \
+    --weight_decay 0. \
+    --warmup_ratio 0.03 \
+    --quantization_bit 4 \
+    --quantization_type nf4 \
+    --double_quantization \
+    --flash_attn True \
+    --gradient_checkpointing True \
+    --bf16 \
+    --tf32 True \
+    --cache_dir ./model_cache \
+    --overwrite_cache \
+    --output_dir ./mistral-7b-sft_cache \
+    --overwrite_output_dir \
+    --logging_steps 1 \
+    --evaluation_strategy "steps" \
+    --per_device_eval_batch_size 32 \
+    --eval_accumulation_steps 32 \
+    --save_strategy "epoch" \
+    --save_total_limit 5 \
+    --use_auth_token True \
+    --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \
+    --hf_auth_token "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" \
+    --deepspeed ./deepspeed_config_s2.json
+
+    # --dataset alpaca_gpt4_en \
+    # --lora_rank 8 \
+    # --lora_alpha 16 \
+    # --lora_dropout 0.05 \
\ No newline at end of file
diff --git a/llm_rl/reward_model.sh b/llm_rl/reward_model.sh
index fa5424df..873b07a5 100644
--- a/llm_rl/reward_model.sh
+++ b/llm_rl/reward_model.sh
@@ -7,7 +7,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --finetuning_type lora \
     --lora_target q_proj,v_proj \
     --resume_lora_training False \
-    --output_dir ./llama-2-13b-rm \
+    --output_dir ./llama-2-13b-rm_cache \
     --per_device_train_batch_size 8 \
     --gradient_accumulation_steps 8 \
     --lr_scheduler_type cosine \