diff --git a/llm_rl/example_ppo.sh b/llm_rl/example_ppo.sh index 0e609310..ef69912e 100644 --- a/llm_rl/example_ppo.sh +++ b/llm_rl/example_ppo.sh @@ -1,15 +1,18 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --stage ppo \ - --model_name_or_path path_to_llama_model \ + --model_name_or_path mistralai/Mistral-7B-v0.1 \ --do_train \ --dataset alpaca_gpt4_en \ --template default \ --finetuning_type lora \ --lora_target q_proj,v_proj \ --resume_lora_training False \ - --checkpoint_dir path_to_sft_checkpoint \ - --reward_model path_to_rm_checkpoint \ - --output_dir path_to_ppo_checkpoint \ + --checkpoint_dir /workspace/sotopia-llm/llm_rl/mistral-7b-sft_cache/checkpoint-10 \ + --reward_model /workspace/sotopia-llm/llm_rl/mistral-7b-rm_cache/checkpoint-10 \ + --cache_dir ./model_cache \ + --overwrite_cache \ + --output_dir ./mistral-7b-ppo_cache \ + --overwrite_output_dir \ --per_device_train_batch_size 2 \ --gradient_accumulation_steps 4 \ --lr_scheduler_type cosine \ @@ -18,4 +21,5 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --learning_rate 1e-5 \ --num_train_epochs 1.0 \ --plot_loss \ - --fp16 \ No newline at end of file + --bf16 + diff --git a/llm_rl/example_reward_model.sh b/llm_rl/example_rm.sh similarity index 57% rename from llm_rl/example_reward_model.sh rename to llm_rl/example_rm.sh index 873b07a5..c2295018 100644 --- a/llm_rl/example_reward_model.sh +++ b/llm_rl/example_rm.sh @@ -1,23 +1,27 @@ -CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ +CUDA_VISIBLE_DEVICES=0 deepspeed src/train_bash.py \ --stage rm \ - --model_name_or_path meta-llama/Llama-2-13b-hf \ + --model_name_or_path mistralai/Mistral-7B-v0.1 \ --do_train \ --dataset comparison_gpt4_en \ --template default \ --finetuning_type lora \ --lora_target q_proj,v_proj \ --resume_lora_training False \ - --output_dir ./llama-2-13b-rm_cache \ - --per_device_train_batch_size 8 \ + --per_device_train_batch_size 1 \ --gradient_accumulation_steps 8 \ --lr_scheduler_type cosine \ --logging_steps 10 \ - --save_steps 1000 \ + --save_steps 10 \ --learning_rate 1e-6 \ --num_train_epochs 1.0 \ --plot_loss \ - --fp16 \ + --bf16 \ + --cache_dir ./model_cache \ + --overwrite_cache \ + --output_dir ./mistral-7b-rm_cache \ + --overwrite_output_dir \ --use_auth_token True \ --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \ --hf_auth_token "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" \ - --deepspeed ./deepspeed_config_s2.json + --deepspeed ./deepspeed_config_s2.json \ + --gradient_checkpointing True diff --git a/llm_rl/example_sft.sh b/llm_rl/example_sft.sh new file mode 100644 index 00000000..e8656af3 --- /dev/null +++ b/llm_rl/example_sft.sh @@ -0,0 +1,24 @@ +CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ + --stage sft \ + --model_name_or_path mistralai/Mistral-7B-v0.1 \ + --do_train \ + --dataset alpaca_gpt4_en \ + --template default \ + --finetuning_type lora \ + --lora_target q_proj,v_proj \ + --resume_lora_training False \ + --cache_dir ./model_cache \ + --overwrite_cache \ + --output_dir ./mistral-7b-sft_cache \ + --overwrite_output_dir \ + --per_device_train_batch_size 4 \ + --gradient_accumulation_steps 4 \ + --lr_scheduler_type cosine \ + --logging_steps 10 \ + --save_steps 1000 \ + --learning_rate 5e-5 \ + --num_train_epochs 3.0 \ + --plot_loss \ + --bf16 \ + --gradient_checkpointing True \ + --save_steps 10 \ No newline at end of file