From 66b9412fbebafce1c0a95992ce9b0d466be837e9 Mon Sep 17 00:00:00 2001 From: Wenxin Zhang Date: Tue, 14 May 2024 09:41:25 +0800 Subject: [PATCH] update Signed-off-by: Wenxin Zhang --- .github/workflows/model_test.yml | 80 +++++- .../workflows/scripts/models/collect_log.sh | 7 +- .../scripts/models/generate_report.sh | 254 ++++++++++++++++++ .../workflows/scripts/models/model_test.sh | 29 +- 4 files changed, 348 insertions(+), 22 deletions(-) create mode 100644 .github/workflows/scripts/models/generate_report.sh diff --git a/.github/workflows/model_test.yml b/.github/workflows/model_test.yml index 4f18c630..44d417be 100644 --- a/.github/workflows/model_test.yml +++ b/.github/workflows/model_test.yml @@ -36,7 +36,7 @@ jobs: strategy: matrix: include: - - modelName: "facebook/opt-125m" + - modelName: "opt-125m" datasets: "piqa" device: "cpu" tasks: "text-generation" @@ -45,6 +45,11 @@ jobs: steps: - name: Clean Up Working Directory run: sudo rm -rf ${{github.workspace}}/* + + - name: Load environment variables + if: ${{ matrix.device }} == "cpu" + run: + cat ~/actions-runner4/.env >> $GITHUB_ENV - name: Checkout out Repo uses: actions/checkout@v4 @@ -54,7 +59,7 @@ jobs: # We need this because GitHub needs to clone the branch to pipeline - name: Docker Build run: | - docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . + docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . - name: Docker Run run: | @@ -64,6 +69,7 @@ jobs: fi docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \ -v ${{ github.workspace }}:/GenAIEval \ + -e http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" -e https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" \ ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} - name: Binary build @@ -86,9 +92,6 @@ jobs: # skip_unpack: false # if_no_artifact_found: warn - #- name: Display structure of downloaded files - # run: ls -R - - name: Evaluation run: | docker exec ${{ env.CONTAINER_NAME }} \ @@ -102,7 +105,7 @@ jobs: && bash -x collect_log.sh --model=${{ matrix.modelName }} \ --device=${{ matrix.device }} \ --datasets=${{ matrix.datasets }} \ - --tasks=${{ matrix.tasks }} + --tasks=${{ matrix.tasks }}" - name: Publish pipeline artifact uses: actions/upload-artifact@v4 @@ -110,7 +113,68 @@ jobs: with: name: ${{ matrix.device }}-${{ matrix.modelName }} path: | - ${{ github.workspace }}/${{ matrix.device }}/${{ matrix.modelName }} - ${{ github.workspace }}/.summary.log + ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log + ${{ github.workspace }}/summary.log if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn` retention-days: 60 # 1 <= retention-days <= 90 + + Genreate-Report: + runs-on: ubuntu-latest + needs: [Evaluation-Workflow] + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Download Summary Log + uses: actions/download-artifact@v4 + with: + path: ${{ env.OUT_SCRIPT_PATH }}/log + - name: Display structure of downloaded files + run: ls -R + - name: Analysis Summary + run: | + cd ${{ env.OUT_SCRIPT_PATH }} + ls -R + + - name: Download Reference Artifact + id: download-artifact + uses: dawidd6/action-download-artifact@v3.1.2 + with: + workflow: model-test.yml + name: FinalReport + run_id: ${{ vars.ModelTest_REF_ID }} + path: ${{ env.OUT_SCRIPT_PATH }} + name_is_regexp: true + repo: ${{ github.repository }} + check_artifacts: false + search_artifacts: false + skip_unpack: false + if_no_artifact_found: warn + + - name: Display structure of downloaded files + run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R + + - name: Generate report + run: | + echo "------ Generating final report.html ------" + cd ${{ env.OUT_SCRIPT_PATH }} + /usr/bin/bash generate_report.sh + env: + RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }} + BUILD_NUMBER: ${{ github.run_id }} + JOB_STATUS: succeed + + - name: Publish Report + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: FinalReport + path: ${{ env.OUT_SCRIPT_PATH }}/generated + + #- name: Specify performance regression + # if: ${{ !cancelled() }} + # run: | + # if [ ${{ env.is_perf_reg }} == 'true' ]; then + # echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports." + # exit 1 + # fi diff --git a/.github/workflows/scripts/models/collect_log.sh b/.github/workflows/scripts/models/collect_log.sh index a197393a..e4e878ef 100644 --- a/.github/workflows/scripts/models/collect_log.sh +++ b/.github/workflows/scripts/models/collect_log.sh @@ -14,6 +14,7 @@ # limitations under the License. set -eo pipefail +set -x source /GenAIEval/.github/workflows/scripts/change_color WORKSPACE="/GenAIEval" # get parameters @@ -34,7 +35,7 @@ for i in "$@"; do esac done -log_file="/GenAIEval/${device}/${model}/${device}-${model}-${tasks}-${datasets}.log" +log_file="/log/${device}/${model}/${device}-${tasks}-${model}-${datasets}.log" $BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET echo "working in" @@ -42,6 +43,6 @@ pwd if [[ ! -f ${log_file} ]]; then echo "${device};${model};${tasks};${datasets};;${logfile}" >> ${WORKSPACE}/summary.log else - acc=$(grep -Po "Accuracy .* is:\\s+(\\d+(\\.\\d+)?)" ${log_file} | head -n 1 | sed 's/.*://;s/[^0-9.]//g') - echo "${device};${model};${tasks};${datasets};${acc};${logfile}" >> ${WORKSPACE}/summary.log + acc=$(grep -Po "acc .*(\d+(\.\d+)?)" ${log_file} | awk -F "|" '{print $2}' | head -n 1 | sed 's/.*://;s/[^0-9.]//g') + echo "${device};${model};${tasks};${datasets};${acc};" >> ${WORKSPACE}/summary.log fi diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh new file mode 100644 index 00000000..4cb54808 --- /dev/null +++ b/.github/workflows/scripts/models/generate_report.sh @@ -0,0 +1,254 @@ +#!/bin/bash +set -x +WORKSPACE=generated +last_log_path=FinalReport +summaryLog=${WORKSPACE}/summary.log +summaryLogLast=${last_log_path}/summary.log +PATTERN='[-a-zA-Z0-9_]*=' + +function main { + echo "summaryLog: ${summaryLog}" + echo "summaryLogLast: ${summaryLogLast}" + echo "is_perf_reg=false" >> "$GITHUB_ENV" + + generate_html_head + generate_html_overview + if [[ -f ${summaryLog} ]]; then + generate_results + fi + generate_html_footer +} + +function generate_html_overview { + Test_Info_Title="Test Branch Commit ID " + Test_Info="${MR_source_branch} ${ghprbActualCommit} " + + cat >>${WORKSPACE}/report.html < +
+

ITREX Tests + [ Job-${BUILD_NUMBER} ]

+

Test Status: ${JOB_STATUS}

+

Summary

+ + + + ${Test_Info_Title} + + + + ${Test_Info} + +
Repo
ITREX
+eof +} + +function generate_results { + cat >>${WORKSPACE}/report.html <Performance + + + + + + + + +eof + + devices=$(cat ${summaryLog} | cut -d',' -f1 | awk '!a[$0]++') + for device in ${devices[@]}; do + models=$(cat ${summaryLog} | grep "${device}," | cut -d',' -f2 | awk '!a[$0]++') + for model in ${models[@]}; do + tasks=$(cat ${summaryLog} | grep "${device},${model}," | cut -d',' -f3 | awk '!a[$0]++') + for task in ${tasks[@]}; do + datasets=$(cat ${summaryLog} | grep "${device},${model},${task}," | cut -d',' -f4 | awk '!a[$0]++') + for dataset in ${datasets[@]}; do + benchmark_pattern="${device},${model},${task},${dataset}," + acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++') + link=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d',' -f6 | awk '!a[$0]++') + if [ $(cat ${summaryLogLast} | grep -c "${benchmark_pattern}") == 0 ]; then + acc_last=nan + link_last=nan + else + acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d',' -f5 | awk '!a[$0]++') + link_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d',' -f6 | awk '!a[$0]++') + fi + generate_core + done + done + done + done + cat >>${WORKSPACE}/report.html < +eof +} + +function generate_core { + echo "" >>${WORKSPACE}/report.html + echo | awk -v acc=${acc} -v link=${link} -v acc_l=${acc_last} -v link_l=${link_last} ' + function show_benchmark(a,b) { + if(a ~/[1-9]/) { + printf("\n",b,a); + }else { + if(a == "") { + printf("\n",b,a); + }else{ + printf("\n"); + } + } + } + function compare_new_last(a,b){ + if(a ~/[1-9]/ && b ~/[1-9]/) { + target = b / a; + if(target >= 0.945) { + status_png = "background-color:#90EE90"; + }else { + status_png = "background-color:#FFD2D2"; + job_status = "fail" + } + printf("", status_png, target); + }else{ + if(a == ""){ + job_status = "fail" + status_png = "background-color:#FFD2D2"; + printf("", status_png); + }else{ + printf(""); + } + } + } + BEGIN { + job_status = "pass" + }{ + // current + show_benchmark(acc,link) + // Last + printf("\n") + show_benchmark(acc_l,link_l) + // current vs last + printf("\n"); + compare_new_last(acc,acc_l) + printf("\n"); + } END{ + printf("\n%s", job_status); + } + ' >>${WORKSPACE}/report.html + job_state=$(tail -1 ${WORKSPACE}/report.html) + sed -i '$s/.*//' ${WORKSPACE}/report.html + if [ ${job_state} == 'fail' ]; then + echo "is_perf_reg=true" >> "$GITHUB_ENV" + fi +} + +function generate_html_head { + cat >${WORKSPACE}/report.html < + + + + + + + Daily Tests - TensorFlow - Jenkins + + +eof +} + +function generate_html_footer { + cat >>${WORKSPACE}/report.html < + + +eof +} + +main diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh index fb420086..b0c2cba4 100644 --- a/.github/workflows/scripts/models/model_test.sh +++ b/.github/workflows/scripts/models/model_test.sh @@ -13,9 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -eo pipefail +set -o pipefail +set -x source /GenAIEval/.github/workflows/scripts/change_color - +git config --global --add safe.directory /GenAIEval # get parameters PATTERN='[-a-zA-Z0-9_]*=' PERF_STABLE_CHECK=true @@ -34,11 +35,7 @@ for i in "$@"; do esac done -log_dir="/GenAIEval/${device}/${model}" -mkdir -p ${log_dir} working_dir="" -$BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET - main() { case ${tasks} in "text-generation") @@ -48,7 +45,16 @@ main() { *) echo "Not suppotted task"; exit 1;; esac + if [[ ${model} == *"opt"* ]]; then + pretrained="facebook/${model}" + else + pretrained="${model}" + fi + log_dir="/log/${device}/${model}" + mkdir -p ${log_dir} + $BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET run_benchmark + cp ${log_dir}/${device}-${tasks}-${model}-${datasets}.log /GenAIEval/ } function prepare() { @@ -69,15 +75,16 @@ function prepare() { function run_benchmark() { cd ${working_dir} - overall_log="${log_dir}/${device}-${model}-${tasks}-${datasets}.log" + overall_log="${log_dir}/${device}-${tasks}-${model}-${datasets}.log" python main.py \ --model hf \ - --model_args pretrained=${model} \ + --model_args pretrained=${pretrained} \ --tasks ${datasets} \ --device ${device} \ - --batch_size 112 - 2>&1 | tee ${overall_log} - + --batch_size 112 2>&1 | tee ${overall_log} + + echo "print log content:" + cat ${overall_log} status=$? if [ ${status} != 0 ]; then echo "Evaluation process returned non-zero exit code."
DeviceTasksModelDatasetsAccuracy
${device}${model}${task}${dataset}New%.2f%s%.2f
Last
New/Last