diff --git a/llm_rl/finetune-llama-2-13b.sh b/llm_rl/finetune-llama-2-13b.sh
index 636c213f..ee13f7b6 100644
--- a/llm_rl/finetune-llama-2-13b.sh
+++ b/llm_rl/finetune-llama-2-13b.sh
@@ -10,8 +10,8 @@ deepspeed src/train_bash.py \
     --use_fast_tokenizer False \
     --do_train \
     --num_train_epochs 15.0 \
-    --per_device_train_batch_size 8 \
-    --gradient_accumulation_steps 4 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
     --finetuning_type lora \
     --lora_target q_proj,v_proj \
     --qlora_compute_dtype bf16 \