[LLaMa] Adding support converting checkpoint from mds to hf (#432)

* add support converting checkpoint from hf to mds * Fix PP issue * update
microsoft · Aug 10, 2024 · f132876 · f132876
1 parent 61350c5
commit f132876
Show file tree

Hide file tree

Showing 4 changed files with 280 additions and 58 deletions.
diff --git a/examples_deepspeed/finetune_hf_llama/ds_config.json b/examples_deepspeed/finetune_hf_llama/ds_config.json
@@ -1,11 +1,5 @@
 {
   "train_batch_size" : 256,
   "train_micro_batch_size_per_gpu": 16,
-  "steps_per_print": 100,
-  "zero_optimization": {
-    "stage": 0
-  },
-  "bf16": {
-    "enabled": true
-  }
+  "steps_per_print": 1
 }
diff --git a/examples_deepspeed/finetune_hf_llama/finetune_llama.sh b/examples_deepspeed/finetune_hf_llama/finetune_llama.sh
@@ -1,8 +1,8 @@
 DS_CONFIG=./examples_deepspeed/finetune_hf_llama/ds_config.json
-DATASET_PATH=./alpaca_data.json
+DATASET_PATH=./examples_deepspeed/finetune_hf_llama/alpaca_data.json
 # dataset link: https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json
 
-HF_LLAMA_PATH=/data/llama-7b/
+HF_LLAMA_PATH=/data/llama-2-7b-hf/
 # weights link: https://huggingface.co/huggyllama/llama-7b
 
 MICRO_BATCH_SIZE=16
@@ -44,11 +44,20 @@ cat <<EOT > $DS_CONFIG
 EOT
 
 
-covert_args="deepspeed tools/hf2megads_weight_converter.py \
+covert_hf2mds_args="deepspeed tools/hf2megads_weight_converter.py \
 --hf-ckpt-num-shards 2 \
---origin-hf-ckpt-dir $HF_LLAMA_PATH \
+--hf-ckpt-dir $HF_LLAMA_PATH \
+--load-mode auto \
 --save $MEGA_DS_LLAMA_PATH"
 
+covert_mds2hf_args="deepspeed tools/hf2megads_weight_converter.py \
+--hf-ckpt-num-shards 2 \
+--hf-ckpt-dir $HF_LLAMA_PATH \
+--load-mode auto \
+--to-hf-ckpt \
+--load $MEGA_DS_LLAMA_PATH \
+--save $HF_LLAMA_PATH'-hf-out' "
+
 finetune_args="deepspeed finetune_llama.py \
 --load $MEGA_DS_LLAMA_PATH"
 
@@ -98,8 +107,10 @@ comm_args="--tensor-model-parallel-size $TP \
 --no-gradient-accumulation-fusion \
 --repeated-dataloader"
 
-if [ "$1" = "convert" ]; then
-    task_args="$covert_args"
+if [ "$1" = "convert_hf2mds" ]; then
+    task_args="$covert_hf2mds_args"
+elif [ "$1" = "convert_mds2hf" ]; then
+    task_args="$covert_mds2hf_args"
 else
     task_args="$finetune_args"
 fi

diff --git a/megatron/global_vars.py b/megatron/global_vars.py
@@ -175,6 +175,7 @@ def _set_wandb_writer(args):
                   'project or experiment name provided, '
                   'therefore WANDB logs will be written '
                   'according to random generated project or experiment name.', flush=True)
+            return
 
         try:
             import wandb