.github/workflows/model_test_cpu.yml


# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

name: Model Test on CPU

on:
  pull_request:
    branches: [main]
    types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
    paths:
      - .github/workflows/model_test_cpu.yml
      - evals/evaluation/**
      - evals/metrics/**
      - setup.py
      - "!**.md"
  workflow_dispatch:

# If there is a new commit, the previous jobs will be canceled
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
permissions: write-all
env:
  OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
  SCRIPT_PATH: /GenAIEval/.github/workflows/scripts
  DOCKER_NAME: "genaieval"
  DOCKER_TAG: "latest"
  CONTAINER_NAME: "modelTest"


jobs:
  Evaluation-Workflow:
    runs-on: aise-cluster-cpu
    strategy:
      matrix:
        include:
          - modelName: "opt-125m"
            datasets: "lambada_openai"
            device: "cpu"
            tasks: "text-generation"
      fail-fast: true

    steps:
      - name: Clean Up Working Directory
        run: sudo rm -rf ${{github.workspace}}/*

      - name: Checkout out Repo
        uses: actions/checkout@v4
        with:
          submodules: "recursive"
          fetch-tags: true
    # We need this because GitHub needs to clone the branch to pipeline
      - name: Docker Build
        run: |
          docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .

      - name: Docker Run
        run: |
          if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then
            docker stop ${{ env.CONTAINER_NAME }}
            docker rm -vf ${{ env.CONTAINER_NAME }} || true
          fi
          docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
          -v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}

      - name: Binary build
        run: |
            docker exec ${{ env.CONTAINER_NAME }} \
            bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install"

      - name: Evaluation
        run: |
            docker exec ${{ env.CONTAINER_NAME }} \
            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
            && bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"

      - name: Collect Log
        run: |
            docker exec ${{ env.CONTAINER_NAME }} \
            bash -c "cd /GenAIEval/.github/workflows/scripts/models \
            && bash -x collect_log.sh --model=${{ matrix.modelName }} \
             --device=${{ matrix.device }} \
             --datasets=${{ matrix.datasets }} \
             --tasks=${{ matrix.tasks }}"

      - name: Publish pipeline artifact
        uses: actions/upload-artifact@v4
        if: ${{ !cancelled() }}
        with:
          name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}
          path: |
            ${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log
            ${{ github.workspace }}/summary.log
          if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
          retention-days: 60 # 1 <= retention-days <= 90

  Genreate-Report:
    runs-on: ubuntu-latest
    needs: [Evaluation-Workflow]
    steps:
      - name: Checkout out Repo
        uses: actions/checkout@v4

      - name: Download Summary Log
        uses: actions/download-artifact@v4
        with:
          path: ${{ env.OUT_SCRIPT_PATH }}/log
      - name: Display structure of downloaded files
        run: ls -R
      - name: Analysis Summary
        run: |
            cd ${{ env.OUT_SCRIPT_PATH }}
            ls -R

      - name: Download Reference Artifact
        id: download-artifact
        uses: dawidd6/action-download-artifact@v3.1.2
        with:
          workflow: model_test_cpu.yml
          name: FinalReport
          run_id: ${{ vars.ModelTest_CPU_REF_ID }}
          path: ${{ env.OUT_SCRIPT_PATH }}
          name_is_regexp: true
          repo: ${{ github.repository }}
          check_artifacts: false
          search_artifacts: false
          skip_unpack: false
          if_no_artifact_found: warn

      - name: Display structure of downloaded files
        run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R

      - name: Generate report
        run: |
          echo "------ Generating final report.html ------"
          cd ${{ env.OUT_SCRIPT_PATH }}
          mkdir -p generated
          /usr/bin/bash -x generate_report.sh
        env:
          RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
          BUILD_NUMBER: ${{ github.run_id }}
          JOB_STATUS: succeed

      - name: Publish Report
        uses: actions/upload-artifact@v4
        if: ${{ !cancelled() }}
        with:
          name: FinalReport
          path: ${{ env.OUT_SCRIPT_PATH }}/generated

      - name: Specify performance regression
        if: ${{ !cancelled() }}
        run: |
          if [ ${{ env.is_perf_reg }} == 'true' ]; then
            echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
            exit 1
          fi