forked from nod-ai/shark-ai
-
Notifications
You must be signed in to change notification settings - Fork 0
78 lines (66 loc) · 2.68 KB
/
ci-sglang-integration-tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Copyright 2024 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
name: SGLang Llama Integration Tests
on:
workflow_dispatch:
schedule:
# Run periodically, every 4 hours. This is ran periodically with the
# intent of catching regressions early, and allowing for those
# regressions to be easily triaged to a small subset of commits.
- cron: '0 */4 * * *'
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
sglang_bench_serve:
name: "SGLang Integration Tests"
strategy:
matrix:
version: [3.11]
fail-fast: false
runs-on: mi300x-4
defaults:
run:
shell: bash
env:
VENV_DIR: ${{ github.workspace }}/.venv
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{matrix.version}}
- name: Create Python venv
run: python -m venv ${VENV_DIR}
- name: Install pip deps
run: |
source ${VENV_DIR}/bin/activate
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first. Installing the PyTorch CPU
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
# Use newest possible releases to be able to track commits that may
# cause errors.
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine \
"numpy<2.0"
pip install --no-compile -r requirements.txt -e sharktank/ shortfin/
# Install SGLang and sentence_transformers
pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python"
pip install sentence_transformers
pip freeze
- name: Run Integration Tests
run: |
source ${VENV_DIR}/bin/activate
pytest -v app_tests/integration_tests/llm/sglang --log-cli-level=INFO