PaddlePaddle · Liujie0926 · Jan 25, 2024
diff --git a/distributed/benchmark_scripts/check.sh b/distributed/benchmark_scripts/check.sh
@@ -0,0 +1,162 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+####################################
+export dir_name=$1  # /path/to/demo-model_name
+
+# 判断shellcheck是否安装
+echo "==============shellcheck=============="
+if command -v shellcheck &> /dev/null; then  
+    echo "shellcheck is installed"  
+else  
+    echo "shellcheck is not installed, install it now"
+    echo "apt install shellcheck or yum install shellcheck, exit 1"
+    exit 1
+fi
+
+check_case_name(){
+    file=$1
+    echo "=========校验文件名称格式========="
+    file_temp=${file##*/}
+    file_name=${file_temp%.sh}
+    file_model_item=${file_name%%_bs*}
+    file_global_batch_size=$(echo "$file_name" | grep -oP '_bs\K\d+')
+    file_fp_item=$(echo "$file_name" | grep -oP 'bf[^_]*|fp[^_]*') 
+    file_run_mode=$(echo "$file_name" | sed 's/.*_//')
+
+    model_item=$(cat $file|grep -oP 'model_item=\K[^"]*' | sed 's/ *$//')
+    global_batch_size=$(cat $file|grep -oP 'global_batch_size=\K\d+' | sed 's/ *$//')
+    fp_item=$(cat $file|grep -oP 'fp_item=\K[^"]*' | sed 's/ *$//')
+    run_mode=$(cat $file|grep -oP 'run_mode=\K[^"]*' | sed 's/ *$//')
+    model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode}
+
+    if [[ $file_name != "$model_name" ]]; then
+        echo "异常退出,文件名与model_name拼接结果不一致!!!文件名:$file_name, model_name拼接结果:$model_name"
+        exit 1
+    fi
+    if [[ $file_model_item != $model_item ]]; then
+        echo "异常退出,model_item不一致!!!文件名的model_item:$file_model_item, 文件名内容的model_item:$model_item"
+        exit 1
+    fi
+    if [[ $file_global_batch_size != $global_batch_size ]]; then
+        echo "异常退出,global_batch_size不一致!!!文件名的global_batch_size:$file_global_batch_size, 文件名内容的global_batch_size:$global_batch_size"
+        exit 1
+    fi
+    # fp_item为空时，不校验
+    if [[ $file_fp_item != "$fp_item" ]] && [[ $fp_item != null ]]; then
+        echo "异常退出,fp_item不一致!!!文件名的fp_item:$file_fp_item, 文件名内容的fp_item:$fp_item"
+        exit 1
+    fi
+    if [[ $file_run_mode != "$run_mode" ]]; then
+        echo "异常退出,run_mode不一致!!!文件名的run_mode:$file_run_mode, 文件名内容的run_mode:$run_mode"
+        exit 1
+    fi
+    echo "=========校验文件名称格式成功========="
+}
+check_param_mode(){
+    file=$1
+    file_content=$(cat "$file")  
+    if [[ $file_content == *'param+='* ]]; then
+        echo "匹配param模式校验"
+        if grep -qE '^param="[^"]+' "$file"; then
+            echo "param=存在"
+        else  
+            echo "异常退出,文件内容不包含'param='"  
+            exit 1  
+        fi
+        # 遍历文件的每一行，确认包括param的行为空格结束
+        while IFS= read -r line; do  
+            # 检查行是否包含 param+=  
+            if [[ "$line" == *"param+="* ]] || [[ "$line" == *"param="* ]]; then  
+                # 检查行是否包含空格和双引号  
+                if [[ "$line" == *" "* ]] && [[ "$line" == *'"'* ]]; then  
+                    continue
+                else
+                    echo "异常退出, param=|+=结尾不以空格和双引号结尾：$line"  
+                    exit 1  
+                fi  
+            fi  
+        done < "$file"
+        echo "param=|+=均以空格和双引号结尾，符合预期"
+    fi
+}
+check_run_mode(){
+    file=$1
+    run_mode=$(cat $file|grep -oP 'run_mode=\K[^"]*' | sed 's/ *$//')
+    run_benchmark=$(dirname "$file")/../benchmark_common/run_benchmark.sh
+    if grep -qE "{run_mode} in" "$run_benchmark"; then 
+        echo "匹配run_mode模式校验"
+        if grep -qE "$run_mode" "$run_benchmark"; then
+            echo "run_mode匹配成功"
+        else
+            echo "异常退出，文件内容不包含$run_mode"
+            exit 1
+        fi
+    fi
+}
+
+check_args(){
+    file=$1
+    if ! grep -qP '^model_item=[^/]*$' "$file"; then  
+        echo "异常退出,文件内容不包含model_item或model_item包含/字符"
+        exit 1
+    fi
+    if ! grep -qP '^global_batch_size=' "$file"; then  
+        echo "异常退出,文件内容不包含global_batch_size"
+        exit 1
+    fi
+    if ! grep -qP '^fp_item=' "$file"; then
+        echo "异常退出,文件内容不包含fp_item"
+        exit 1
+    fi
+    if ! grep -qP '^run_mode=' "$file"; then
+        echo "异常退出,文件内容不包含run_mode"
+        exit 1
+    fi
+    model_name_str=$(cat $file|grep -oP 'model_item=\K[^"]*' | sed 's/ *$//')
+}
+
+# 定义递归函数，用于遍历文件夹  
+traverse_folder() {  
+    local folder=$1
+
+    # 遍历当前文件夹下的所有文件和子文件夹  
+    for item in "$folder"/*; do  
+        # 检查是否为文件夹  
+        if [[ -d "$item" ]]; then  
+            traverse_folder "$item"  # 递归调用自身，遍历子文件夹  
+        else  
+            echo "文件: ${item}" 
+            # shellcheck
+            shellcheck --format=gcc ${item} | grep -v '^$' | grep -v '^#'| grep error 
+            if [[ ${item} == *"N"*"C"*".sh" ]]; then
+                # 校验文件名称格式
+                check_case_name ${item}
+                # 校验param模式
+                check_param_mode ${item}
+                # 校验run_mode匹配
+                check_run_mode ${item}
+            elif [[ ${item} == *"run_benchmark.sh" ]]; then
+                check_args ${item}
+            else
+                echo "其他脚本人工check"
+            fi
+
+        fi  
+    done  
+}  
+
+# 调用递归函数，开始遍历  
+traverse_folder "$dir_name"
diff --git a/distributed/benchmark_scripts/generate/generate_sh.py b/distributed/benchmark_scripts/generate/generate_sh.py
@@ -0,0 +1,117 @@
+import yaml
+import os
+import sys
+# 已完成benchmark_common及其中一个case编写，适配generate_sh.py，根据case配置更新params.yaml
+# 执行位置在该模型套件的根目录:cd /path/to/repo && python /path/to/generate_sh.py paddle /path/to/params.yaml
+def generate_paddle_case(arg_params):
+    test_case_template = """
+#!/usr/bin/env bash
+param="model_item={model_item_temp} "
+param+="global_batch_size={global_batch_size_temp} "
+param+="fp_item={fp_item_temp} "
+param+="run_mode={run_mode_temp} "
+param+="device_num={device_num_temp} "
+param+="micro_batch_size={micro_batch_size_temp}  "
+# 以下为run_benchmark.sh的可选参数
+param+="dp_degree={dp_degree_temp} "
+param+="mp_degree={mp_degree_temp} "
+param+="pp_degree={pp_degree_temp} "
+param+="sharding_degree={sharding_degree_temp} "
+param+="sharding_stage={sharding_stage_temp} "
+param+="level={level_temp} "
+param+="local_batch_size={local_batch_size_temp} "
+param+="workerlog_id={workerlog_id_temp} "
+
+cd ./benchmarks
+# get data
+bash {model_item_script_path_temp}/benchmark_common/prepare.sh
+# run
+bash -c "${{param}} bash {model_item_script_path_temp}/benchmark_common/run_benchmark.sh"
+"""
+
+    # 分割参数值并为每个组合生成测试用例
+    print(arg_params)
+    model_item=arg_params['model_item']
+    base_batch_size=arg_params['global_batch_size']
+    fp_item=arg_params['fp_item']
+    run_mode=arg_params['run_mode']
+    device_num=arg_params['device_num']
+    test_case = test_case_template.format(
+        model_item_temp = model_item,
+        global_batch_size_temp = base_batch_size,
+        fp_item_temp = fp_item,
+        run_mode_temp = run_mode,
+        device_num_temp = device_num,
+        micro_batch_size_temp=arg_params['micro_batch_size'],
+        dp_degree_temp=arg_params['dp_degree'],
+        mp_degree_temp=arg_params['mp_degree'],
+        pp_degree_temp=arg_params['pp_degree'],
+        sharding_degree_temp=arg_params['sharding_degree'],
+        sharding_stage_temp=arg_params['sharding_stage'],
+        level_temp=arg_params['level'],
+        local_batch_size_temp=arg_params['local_batch_size'],
+        workerlog_id_temp=arg_params['workerlog_id'],
+        model_item_script_path_temp=arg_params['model_item_script_path'],
+    )
+
+    # 创建目录
+    os.makedirs(os.path.join(arg_params['benchmark_path'], arg_params['model_item_script_path'], device_num), exist_ok=True)
+    with open(os.path.join(arg_params['benchmark_path'], arg_params['model_item_script_path'], \
+        device_num, f'{model_item}_bs{base_batch_size}_{fp_item}_{run_mode}.sh'), 'w') as f:
+        f.write(test_case)
+
+
+
+def generate_pytorch_case(arg_params):
+    test_case_template = """
+#!/usr/bin/env bash
+model_item={model_item_temp}
+bs_item={base_batch_size_temp}
+fp_item={fp_item_temp}
+run_mode={run_mode_temp}
+device_num={device_num_temp}
+max_iter={max_iter_temp}
+num_workers={num_workers_temp}
+# get data
+bash prepare.sh
+# run
+bash run_benchmark.sh ${{model_item}} ${{bs_item}} ${{fp_item}} ${{run_mode}} ${{device_num}} ${{max_iter}} ${{num_workers}} 2>&1;
+"""
+
+    # 分割参数值并为每个组合生成测试用例
+    print(arg_params)
+    model_item = arg_params['model_item']
+    base_batch_size = arg_params['base_batch_size']
+    fp_item = arg_params['fp_item']
+    run_mode = arg_params['run_mode']
+    device_num = arg_params['device_num']
+    max_iter = arg_params['max_iter']
+    num_workers = arg_params['num_workers']
+    test_case = test_case_template.format(
+        model_item_temp=model_item,
+        base_batch_size_temp=base_batch_size,
+        fp_item_temp=fp_item,
+        run_mode_temp=run_mode,
+        device_num_temp=device_num,
+        max_iter_temp=max_iter,
+        num_workers_temp=num_workers,
+        model_item_script_path_temp=arg_params['model_item_script_path'],
+    )
+    # 创建目录
+    os.makedirs(os.path.join(arg_params['model_item_script_path'], model_item, device_num), exist_ok=True)
+    with open(os.path.join(arg_params['model_item_script_path'], model_item, device_num, f'{model_item}_bs{base_batch_size}_{fp_item}_{run_mode}.sh'), 'w') as f:
+        f.write(test_case)
+
+
+if __name__ == "__main__":
+    # 获取传入的参数
+    frame = sys.argv[1]
+    config_path = sys.argv[2]
+    #mode 读取 YAML 文件中的参数
+    with open(config_path, 'r') as file:
+        arg_params = yaml.safe_load(file)
+    if frame == 'paddle':
+        generate_paddle_case(arg_params)
+    else:
+        generate_pytorch_case(arg_params)
+
diff --git a/distributed/benchmark_scripts/generate/params.yaml b/distributed/benchmark_scripts/generate/params.yaml
@@ -0,0 +1,16 @@
+model_item: gpt_auto
+global_batch_size: 16
+fp_item: fp16O2
+run_mode: DP2-MP2-PP2-SD2-stage3
+device_num: N1C8
+micro_batch_size: 4
+dp_degree: 2
+mp_degree: 2
+pp_degree: 2
+sharding_degree: 2
+sharding_stage: 3
+level: o2
+local_batch_size: 8
+workerlog_id: 2
+benchmark_path: ./model_zoo/gpt-3/benchmarks/   # benchmark脚本存放的目录，一般为套件根目录; 比如./tests
+model_item_script_path: ./test_tipc/gpt/static/auto_parallel   # model_item脚本存放的目录; 比如./test_tipc/dygraph/hybrid_parallelism/gpt
diff --git a/distributed/benchmark_scripts/gpt_auto/N1C8/gpt_auto_bs16_fp16O2_DP1-MP1-PP8-SD1-stage1.sh b/distributed/benchmark_scripts/gpt_auto/N1C8/gpt_auto_bs16_fp16O2_DP1-MP1-PP8-SD1-stage1.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 以下为run_benchmark.sh的必选参数
+param="model_item=gpt_auto "
+param+="global_batch_size=16 "
+param+="fp_item=fp16O2 "
+param+="run_mode=DP1-MP1-PP8-SD1-stage1 "
+param+="device_num=N1C8 "
+param+="micro_batch_size=2 "
+# 以下为run_benchmark.sh的可选参数
+param+="dp_degree=1 "
+param+="mp_degree=1 "
+param+="pp_degree=8 "
+param+="sharding_degree=1 "
+param+="sharding_stage=1 "
+param+="level=o2 "
+param+="local_batch_size=16 "
+param+="workerlog_id=7 "
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/auto_parallel/benchmark_common/prepare.sh
+# run
+bash -c "${param} bash ./test_tipc/gpt/static/auto_parallel/benchmark_common/run_benchmark.sh"
diff --git a/distributed/benchmark_scripts/gpt_auto/benchmark_common/prepare.sh b/distributed/benchmark_scripts/gpt_auto/benchmark_common/prepare.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+python -m pip install -r ../requirements.txt
+# get data
+cd ../ || return
+rm -rf data
+mkdir data
+wget -O data/gpt_en_dataset_300m_ids.npy https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy
+wget -O data/gpt_en_dataset_300m_idx.npz https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz