Skip to content

Fix gibberish token problem for prefill also (#723) #2

Fix gibberish token problem for prefill also (#723)

Fix gibberish token problem for prefill also (#723) #2

# Copyright 2024 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
name: Llama Benchmarking 8B Tests
on:
workflow_dispatch:
pull_request:
push:
branches:
- main
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
test_llama_quick:
name: "Llama Benchmarking 8B Tests"
strategy:
matrix:
version: [3.11]
fail-fast: false
runs-on: llama-mi300x-1
defaults:
run:
shell: bash
env:
VENV_DIR: ${{ github.workspace }}/.venv
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Get Current Date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{matrix.version}}
- name: Create Python venv
run: python -m venv ${VENV_DIR}
- name: Install pip deps
run: |
source ${VENV_DIR}/bin/activate
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first. Installing the PyTorch CPU
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
# Install nightly IREE packages.
# We could also pin to a known working or stable version.
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine
pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
pip freeze
- name: Run llama 8b f16 decomposed test
run: |
source ${VENV_DIR}/bin/activate
pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --iree-hip-target=gfx942 --iree-device=hip://0 --run-quick-llama-test
- name: Upload llama executable files
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: llama-files
path: ${{ github.workspace }}/${{ steps.date.outputs.date }}