From 3c21be026a2595c57db5d705babe877e2e7d6e19 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 01:12:42 +0000 Subject: [PATCH 01/38] Add benchmark using sglang server, Add sgl server benchmark to workflow file, Restructure `app_tests/benchmark_tests` --- .github/workflows/ci-sglang-benchmark.yml | 113 ++++++++++++++++-- .../llm/sglang_benchmarks/__init__.py | 5 + .../llm/{ => sglang_benchmarks}/conftest.py | 18 ++- .../sglang_benchmark_test.py | 66 ++++++++++ .../shortfin_benchmark_test.py} | 21 ++-- .../llm/{ => sglang_benchmarks}/utils.py | 13 ++ app_tests/integration_tests/llm/utils.py | 2 +- 7 files changed, 210 insertions(+), 28 deletions(-) create mode 100644 app_tests/benchmark_tests/llm/sglang_benchmarks/__init__.py rename app_tests/benchmark_tests/llm/{ => sglang_benchmarks}/conftest.py (77%) create mode 100644 app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py rename app_tests/benchmark_tests/llm/{sglang_benchmark_test.py => sglang_benchmarks/shortfin_benchmark_test.py} (87%) rename app_tests/benchmark_tests/llm/{ => sglang_benchmarks}/utils.py (84%) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 504e7e5e3..c05d651fd 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -7,10 +7,14 @@ name: SGLang Llama Benchmarking Tests on: + # TODO: Remove PR trigger after verification + pull_request: workflow_dispatch: schedule: - # Weekdays at 4:00 AM UTC = 9:00 PM PST. - - cron: "0 4 * * 1-5" + # Weekdays at 6:00 AM UTC = 11:00 PM PST. + # This is a pretty GPU intensive test, so want to avoid conflicting + # with other potentially scheduled tests. + - cron: "0 6 * * 1-5" concurrency: # A PR number if a pull request and otherwise the commit hash. This cancels @@ -21,9 +25,9 @@ concurrency: cancel-in-progress: true jobs: - sglang_bench_serve: + benchmark_shortfin: if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} - name: "SGLang Serving Benchmark Tests" + name: "SGLang Serving Benchmark With Shortfin" strategy: matrix: version: [3.11] @@ -77,13 +81,98 @@ jobs: - name: Install SGLang run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" - - name: Launch Shortfin Server - run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmark_test.py --log-cli-level=INFO --html=out/llm/sglang/index.html + - name: Run Shortfin Benchmark Tests + run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html - - name: Deploy to GitHub Pages - uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + # TODO: Uncomment after verification + # - name: Deploy to GitHub Pages + # uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + # with: + # github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + # publish_dir: ./out/llm/sgl_benchmark/shortfin + # destination_dir: ./llm/sgl_benchmark/shortfin + # keep_files: true + + benchmark_sglang: + if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} + name: "SGLang Serving Benchmark With SGLang" + needs: benchmark_shortfin + strategy: + matrix: + version: [3.11] + fail-fast: false + runs-on: llama-mi300x-3 + defaults: + run: + shell: bash + env: + PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + steps: + - name: Get Current Date + id: date + run: echo "::set-output name=date::$(date +'%Y-%m-%d')" + + - name: "Setting up Python" + id: setup_python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: - github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out/llm/sglang - destination_dir: ./llm/sglang - keep_files: true + python-version: ${{matrix.version}} + + - name: Install SGLang + run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # Instruction for SGLang image sourced from here: + # https://sgl-project.github.io/start/install.html#method-3-using-docker + # We have to run in a docker container due to their vLLM dependency. + # From their pyproject.toml: + # HIP (Heterogeneous-computing Interface for Portability) for AMD + # => base docker rocm/vllm-dev:20241022, not from public vllm whl + # srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"] + - name: Pull SGLang Image (Had issues with sglang:v0.3.5.post1-rocm620) + run: | + docker pull lmsysorg/sglang:v0.3.5.post1-rocm620 + + - name: Run SGLang Server + run: | + docker run -d --rm \ + --device=/dev/kfd \ + --device=/dev/dri \ + --ipc=host \ + --shm-size 16G \ + --group-add video \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + -v $HOME/dockerx:/dockerx \ + -v /data:/data \ + -p 30000:30000 \ + -v ~/.cache/huggingface:/root/.cache/huggingface \ + --env "HF_TOKEN={{ secrets.HF_TOKEN }}" \ + lmsysorg/sglang:v0.3.5.post1-rocm620 \ + python3 -m sglang.launch_server \ + --model-path meta-llama/Llama-3.1-8b \ + --host 0.0.0.0 \ + --port 30000 \ + --tp 1 \ + --dtype float16 + + - name: Run SGLang Benchmark Tests + run: | + pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py --port 30000 --log-cli-level=INFO --html=out/llm/sglang/index.html + + - name: Stop sglang-server + run: docker stop sglang-server || true # Stop container if it's running + + - name: Cleanup SGLang Image + run: docker image rm lmsysorg/sglang:v0.3.5.post1-rocm620 + + # TODO: Uncomment after verifying + # - name: Deploy to GitHub Pages + # uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + # with: + # github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + # publish_dir: ./out/llm/sgl_benchmark/sglang + # destination_dir: ./llm/sgl_benchmark/sglang + # keep_files: true diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/__init__.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/__init__.py new file mode 100644 index 000000000..a85ba359d --- /dev/null +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/__init__.py @@ -0,0 +1,5 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/app_tests/benchmark_tests/llm/conftest.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py similarity index 77% rename from app_tests/benchmark_tests/llm/conftest.py rename to app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py index cc354b7eb..eceaec857 100644 --- a/app_tests/benchmark_tests/llm/conftest.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py @@ -9,7 +9,9 @@ import pytest import sys -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) +) from integration_tests.llm.utils import compile_model, export_paged_llm_v1 @@ -44,3 +46,17 @@ def pre_process_model(request, tmp_path_factory): compile_model(mlir_path, vmfb_path, settings) return tmp_dir + + +def pytest_addoption(parser): + parser.addoption( + "--port", + action="store", + default="30000", + help="Port that SGLang server is running on", + ) + + +@pytest.fixture(scope="module") +def sglang_args(request): + return request.config.getoption("--port") diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py new file mode 100644 index 000000000..fe6b28cee --- /dev/null +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py @@ -0,0 +1,66 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import logging +from pathlib import Path +import pytest +import time +from unittest.mock import patch + +pytest.importorskip("sglang") +from sglang import bench_serving + +from .utils import SGLangBenchmarkArgs, log_jsonl_result + +from integration_tests.llm.utils import wait_for_server + +logger = logging.getLogger(__name__) + +TOKENIZER_DIR = Path("/data/llama3.1/8b/") + + +@pytest.mark.parametrize( + "request_rate", + [1, 2, 4, 8, 16, 32], +) +def test_sglang_benchmark(request_rate, sglang_args, tmp_path_factory): + tmp_dir = tmp_path_factory.mktemp("sglang_benchmark_test") + logger.info("Beginning SGLang benchmark test...") + + port = sglang_args + base_url = f"http://localhost:{port}" + + # Setting a high timeout gives enough time for downloading model artifacts + # and starting up server... Takes a little longer than shortfin. + wait_for_server(base_url, timeout=600) + + benchmark_args = SGLangBenchmarkArgs( + backend="sglang", + num_prompt=10, + base_url=f"http://localhost:{port}", + tokenizer=TOKENIZER_DIR, + request_rate=request_rate, + ) + output_file = ( + tmp_dir + / f"{benchmark_args.backend}_{benchmark_args.num_prompt}_{benchmark_args.request_rate}.jsonl" + ) + benchmark_args.output_file = output_file + + logger.info("Running SGLang Benchmark with the following args:") + logger.info(benchmark_args) + + try: + start = time.time() + with patch.object(bench_serving, "print", side_effect=logger.info): + bench_serving.run_benchmark( + benchmark_args.as_namespace(), + ) + logger.info(f"Benchmark run completed in {str(time.time() - start)} seconds") + logger.info("======== RESULTS ========") + log_jsonl_result(benchmark_args.output_file) + except Exception as e: + logger.error(e) diff --git a/app_tests/benchmark_tests/llm/sglang_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py similarity index 87% rename from app_tests/benchmark_tests/llm/sglang_benchmark_test.py rename to app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py index 0de775795..e9750fa5a 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmark_test.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py @@ -4,7 +4,6 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import json import logging import multiprocessing import os @@ -16,14 +15,17 @@ pytest.importorskip("sglang") from sglang import bench_serving -from utils import SGLangBenchmarkArgs +from app_tests.benchmark_tests.llm.sglang_benchmarks.utils import ( + SGLangBenchmarkArgs, + log_jsonl_result, +) from integration_tests.llm.utils import ( find_available_port, start_llm_server, ) -logger = logging.getLogger("__name__") +logger = logging.getLogger(__name__) device_settings = { "device_flags": [ @@ -38,15 +40,6 @@ TOKENIZER_DIR = Path("/data/llama3.1/8b/") -def log_jsonl_result(file_path): - with open(file_path, "r") as file: - json_string = file.readline().strip() - - json_data = json.loads(json_string) - for key, val in json_data.items(): - logger.info(f"{key.upper()}: {val}") - - @pytest.mark.parametrize( "request_rate", [1, 2, 4, 8, 16, 32], @@ -64,7 +57,7 @@ def log_jsonl_result(file_path): ], indirect=True, ) -def test_sglang_benchmark_server(request_rate, pre_process_model): +def test_shortfin_benchmark(request_rate, pre_process_model): # TODO: Remove when multi-device is fixed os.environ["ROCR_VISIBLE_DEVICES"] = "1" @@ -116,7 +109,7 @@ def test_sglang_benchmark_server(request_rate, pre_process_model): logger.info("======== RESULTS ========") log_jsonl_result(benchmark_args.output_file) except Exception as e: - logger.info(e) + logger.error(e) server_process.terminate() server_process.wait() diff --git a/app_tests/benchmark_tests/llm/utils.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/utils.py similarity index 84% rename from app_tests/benchmark_tests/llm/utils.py rename to app_tests/benchmark_tests/llm/sglang_benchmarks/utils.py index 55b01da04..47cea4d76 100644 --- a/app_tests/benchmark_tests/llm/utils.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/utils.py @@ -6,8 +6,12 @@ from argparse import Namespace from dataclasses import dataclass +import json +import logging from pathlib import Path +logger = logging.getLogger(__name__) + @dataclass class SGLangBenchmarkArgs: @@ -54,3 +58,12 @@ def __repr__(self): f"Tokenizer: {self.tokenizer}\n" f"Request Rate: {self.request_rate}" ) + + +def log_jsonl_result(file_path): + with open(file_path, "r") as file: + json_string = file.readline().strip() + + json_data = json.loads(json_string) + for key, val in json_data.items(): + logger.info(f"{key.upper()}: {val}") diff --git a/app_tests/integration_tests/llm/utils.py b/app_tests/integration_tests/llm/utils.py index 05712039e..80b5b3c09 100644 --- a/app_tests/integration_tests/llm/utils.py +++ b/app_tests/integration_tests/llm/utils.py @@ -15,7 +15,7 @@ import requests from transformers import AutoTokenizer -logger = logging.getLogger("__name__") +logger = logging.getLogger(__name__) class AccuracyValidationException(RuntimeError): From 31398a5ce6f0355e5e40abd74e2d046aa8f71b87 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 14:56:26 +0000 Subject: [PATCH 02/38] Fix import path in `shortfin_benchmark_test`, Temporarily comment out shortfin job to verify sglang benchmark job --- .github/workflows/ci-sglang-benchmark.yml | 120 +++++++++--------- .../shortfin_benchmark_test.py | 2 +- 2 files changed, 62 insertions(+), 60 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index c05d651fd..5d2125952 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -25,64 +25,65 @@ concurrency: cancel-in-progress: true jobs: - benchmark_shortfin: - if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} - name: "SGLang Serving Benchmark With Shortfin" - strategy: - matrix: - version: [3.11] - fail-fast: false - runs-on: llama-mi300x-3 - defaults: - run: - shell: bash - env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" - steps: - - name: Get Current Date - id: date - run: echo "::set-output name=date::$(date +'%Y-%m-%d')" - - - name: "Setting up Python" - id: setup_python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: ${{matrix.version}} - - - name: "Checkout Code" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} - - - name: Install pip deps - run: | - python -m pip install --no-compile --upgrade pip - # Note: We install in three steps in order to satisfy requirements - # from non default locations first. Installing the PyTorch CPU - # wheels saves multiple minutes and a lot of bandwidth on runner setup. - pip install --no-compile -r pytorch-cpu-requirements.txt - pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ - -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ - - # Try with the latest nightly releases, not what iree-turbine pins. - # We could also pin to a known working or stable version. - # This should eventually stabilize. Do the best we can for now. - pip install -f https://iree.dev/pip-release-links.html --upgrade \ - iree-base-compiler==3.0.0rc20241118 \ - iree-base-runtime==3.0.0rc20241118 \ - "numpy<2.0" - - - name: Install SGLang - run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" - - - name: Run Shortfin Benchmark Tests - run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html + # TODO: Uncomment after verification + # benchmark_shortfin: + # if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} + # name: "SGLang Serving Benchmark With Shortfin" + # strategy: + # matrix: + # version: [3.11] + # fail-fast: false + # runs-on: llama-mi300x-3 + # defaults: + # run: + # shell: bash + # env: + # PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + # steps: + # - name: Get Current Date + # id: date + # run: echo "::set-output name=date::$(date +'%Y-%m-%d')" + + # - name: "Setting up Python" + # id: setup_python + # uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + # with: + # python-version: ${{matrix.version}} + + # - name: "Checkout Code" + # uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + # - name: Cache Pip Packages + # uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + # id: cache-pip + # with: + # path: ${{ env.PIP_CACHE_DIR }} + # key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + + # - name: Install pip deps + # run: | + # python -m pip install --no-compile --upgrade pip + # # Note: We install in three steps in order to satisfy requirements + # # from non default locations first. Installing the PyTorch CPU + # # wheels saves multiple minutes and a lot of bandwidth on runner setup. + # pip install --no-compile -r pytorch-cpu-requirements.txt + # pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ + # -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" + # pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ + + # # Try with the latest nightly releases, not what iree-turbine pins. + # # We could also pin to a known working or stable version. + # # This should eventually stabilize. Do the best we can for now. + # pip install -f https://iree.dev/pip-release-links.html --upgrade \ + # iree-base-compiler==3.0.0rc20241118 \ + # iree-base-runtime==3.0.0rc20241118 \ + # "numpy<2.0" + + # - name: Install SGLang + # run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + + # - name: Run Shortfin Benchmark Tests + # run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html # TODO: Uncomment after verification # - name: Deploy to GitHub Pages @@ -96,7 +97,8 @@ jobs: benchmark_sglang: if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} name: "SGLang Serving Benchmark With SGLang" - needs: benchmark_shortfin + # TODO: Uncomment after verifying + # needs: benchmark_shortfin strategy: matrix: version: [3.11] diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py index e9750fa5a..0c49642c8 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py @@ -15,7 +15,7 @@ pytest.importorskip("sglang") from sglang import bench_serving -from app_tests.benchmark_tests.llm.sglang_benchmarks.utils import ( +from .utils import ( SGLangBenchmarkArgs, log_jsonl_result, ) From fc7828484b4c0db1b1244cb4189144faa237c9d3 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 22:15:24 +0000 Subject: [PATCH 03/38] Change `ci-sglang-benchmark/integration` to use `mi300x-4`, Update benchmark tests to download model on demand --- .github/workflows/ci-sglang-benchmark.yml | 4 ++-- .../workflows/ci-sglang-integration-tests.yml | 2 +- .../llm/sglang_benchmarks/conftest.py | 12 ++++++++-- .../sglang_benchmark_test.py | 16 +++++++------ .../shortfin_benchmark_test.py | 23 ++++++++++--------- 5 files changed, 34 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 5d2125952..265e9edae 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -33,7 +33,7 @@ jobs: # matrix: # version: [3.11] # fail-fast: false - # runs-on: llama-mi300x-3 + # runs-on: llama-mi300x-4 # defaults: # run: # shell: bash @@ -103,7 +103,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: llama-mi300x-3 + runs-on: llama-mi300x-4 defaults: run: shell: bash diff --git a/.github/workflows/ci-sglang-integration-tests.yml b/.github/workflows/ci-sglang-integration-tests.yml index 1c382617d..7d51da0eb 100644 --- a/.github/workflows/ci-sglang-integration-tests.yml +++ b/.github/workflows/ci-sglang-integration-tests.yml @@ -29,7 +29,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: llama-mi300x-3 + runs-on: llama-mi300x-4 defaults: run: shell: bash diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py index eceaec857..95d628bf1 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py @@ -12,14 +12,19 @@ sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) ) -from integration_tests.llm.utils import compile_model, export_paged_llm_v1 +from integration_tests.llm.utils import ( + compile_model, + export_paged_llm_v1, + download_with_hf_datasets, +) @pytest.fixture(scope="module") def pre_process_model(request, tmp_path_factory): tmp_dir = tmp_path_factory.mktemp("sglang_benchmark_test") - model_path = request.param["model_path"] + model_name = request.param["model_name"] + model_param_file_name = request.param["model_param_file_name"] settings = request.param["settings"] batch_sizes = request.param["batch_sizes"] @@ -27,6 +32,9 @@ def pre_process_model(request, tmp_path_factory): config_path = tmp_dir / "config.json" vmfb_path = tmp_dir / "model.vmfb" + model_path = tmp_dir / model_param_file_name + download_with_hf_datasets(tmp_dir, model_name) + export_paged_llm_v1(mlir_path, config_path, model_path, batch_sizes) config = { diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py index fe6b28cee..43f13abb2 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py @@ -15,19 +15,21 @@ from .utils import SGLangBenchmarkArgs, log_jsonl_result -from integration_tests.llm.utils import wait_for_server +from integration_tests.llm.utils import wait_for_server, download_with_hf_datasets logger = logging.getLogger(__name__) -TOKENIZER_DIR = Path("/data/llama3.1/8b/") - @pytest.mark.parametrize( - "request_rate", - [1, 2, 4, 8, 16, 32], + "request_rate,model_name", + [(req_rate, "llama3_8b_f16") for req_rate in [1, 2, 4, 8, 16, 32]], ) -def test_sglang_benchmark(request_rate, sglang_args, tmp_path_factory): +def test_sglang_benchmark(request_rate, model_name, sglang_args, tmp_path_factory): tmp_dir = tmp_path_factory.mktemp("sglang_benchmark_test") + + # Download tokenizer for llama3_8b_f16 + download_with_hf_datasets(tmp_dir, model_name) + logger.info("Beginning SGLang benchmark test...") port = sglang_args @@ -41,7 +43,7 @@ def test_sglang_benchmark(request_rate, sglang_args, tmp_path_factory): backend="sglang", num_prompt=10, base_url=f"http://localhost:{port}", - tokenizer=TOKENIZER_DIR, + tokenizer=tmp_dir, request_rate=request_rate, ) output_file = ( diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py index 0c49642c8..33c21b104 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py @@ -35,21 +35,21 @@ "device": "hip", } -# TODO: Download on demand instead of assuming files exist at this path -MODEL_PATH = Path("/data/llama3.1/8b/llama8b_f16.irpa") -TOKENIZER_DIR = Path("/data/llama3.1/8b/") - @pytest.mark.parametrize( - "request_rate", - [1, 2, 4, 8, 16, 32], + "request_rate,model_param_file_name", + [ + (req_rate, "meta-llama-3.1-8b-instruct.f16.gguf") + for req_rate in [1, 2, 4, 8, 16, 32] + ], ) @pytest.mark.parametrize( "pre_process_model", [ ( { - "model_path": MODEL_PATH, + "model_name": "llama3_8B_fp16", + "model_param_file_name": "meta-llama-3.1-8b-instruct.f16.gguf", "settings": device_settings, "batch_sizes": [1, 4], } @@ -57,7 +57,7 @@ ], indirect=True, ) -def test_shortfin_benchmark(request_rate, pre_process_model): +def test_shortfin_benchmark(request_rate, model_param_file_name, pre_process_model): # TODO: Remove when multi-device is fixed os.environ["ROCR_VISIBLE_DEVICES"] = "1" @@ -65,7 +65,8 @@ def test_shortfin_benchmark(request_rate, pre_process_model): config_path = tmp_dir / "config.json" vmfb_path = tmp_dir / "model.vmfb" - tokenizer_path = TOKENIZER_DIR / "tokenizer.json" + tokenizer_path = tmp_dir / "tokenizer.json" + model_path = tmp_dir / model_param_file_name # Start shortfin llm server port = find_available_port() @@ -74,7 +75,7 @@ def test_shortfin_benchmark(request_rate, pre_process_model): tokenizer_path, config_path, vmfb_path, - MODEL_PATH, + model_path, device_settings, timeout=30, ) @@ -84,7 +85,7 @@ def test_shortfin_benchmark(request_rate, pre_process_model): backend="shortfin", num_prompt=10, base_url=f"http://localhost:{port}", - tokenizer=TOKENIZER_DIR, + tokenizer=tmp_dir, request_rate=request_rate, ) output_file = ( From 0909e8f1f4893711a964fb903f5ad71f14947978 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 22:21:44 +0000 Subject: [PATCH 04/38] Fix github runner label --- .github/workflows/ci-sglang-benchmark.yml | 6 +++--- .github/workflows/ci-sglang-integration-tests.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 265e9edae..a115702fa 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -14,7 +14,7 @@ on: # Weekdays at 6:00 AM UTC = 11:00 PM PST. # This is a pretty GPU intensive test, so want to avoid conflicting # with other potentially scheduled tests. - - cron: "0 6 * * 1-5" + - cron: "0 4 * * 1-5" concurrency: # A PR number if a pull request and otherwise the commit hash. This cancels @@ -33,7 +33,7 @@ jobs: # matrix: # version: [3.11] # fail-fast: false - # runs-on: llama-mi300x-4 + # runs-on: mi300x-4 # defaults: # run: # shell: bash @@ -103,7 +103,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: llama-mi300x-4 + runs-on: mi300x-4 defaults: run: shell: bash diff --git a/.github/workflows/ci-sglang-integration-tests.yml b/.github/workflows/ci-sglang-integration-tests.yml index 7d51da0eb..c61756d78 100644 --- a/.github/workflows/ci-sglang-integration-tests.yml +++ b/.github/workflows/ci-sglang-integration-tests.yml @@ -29,7 +29,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: llama-mi300x-4 + runs-on: mi300x-4 defaults: run: shell: bash From d7cc53925c8c59e9fcf0b1f5acc027a403b6b0aa Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 22:39:19 +0000 Subject: [PATCH 05/38] Add installation steps, since test does require some functionality from shortfin/sharktank --- .github/workflows/ci-sglang-benchmark.yml | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index a115702fa..975c4c127 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -120,6 +120,35 @@ jobs: with: python-version: ${{matrix.version}} + - name: "Checkout Code" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Cache Pip Packages + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + id: cache-pip + with: + path: ${{ env.PIP_CACHE_DIR }} + key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + + - name: Install pip deps + run: | + python -m pip install --no-compile --upgrade pip + # Note: We install in three steps in order to satisfy requirements + # from non default locations first. Installing the PyTorch CPU + # wheels saves multiple minutes and a lot of bandwidth on runner setup. + pip install --no-compile -r pytorch-cpu-requirements.txt + pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ + -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" + pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ + + # Try with the latest nightly releases, not what iree-turbine pins. + # We could also pin to a known working or stable version. + # This should eventually stabilize. Do the best we can for now. + pip install -f https://iree.dev/pip-release-links.html --upgrade \ + iree-base-compiler==3.0.0rc20241118 \ + iree-base-runtime==3.0.0rc20241118 \ + "numpy<2.0" + - name: Install SGLang run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" From cf16e541a7969b7c08c223c91f64929b959eb555 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 22:51:34 +0000 Subject: [PATCH 06/38] Fix typo in model names --- .github/workflows/ci-sglang-benchmark.yml | 2 +- .../llm/sglang_benchmarks/sglang_benchmark_test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 975c4c127..ad511f0be 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -183,7 +183,7 @@ jobs: --env "HF_TOKEN={{ secrets.HF_TOKEN }}" \ lmsysorg/sglang:v0.3.5.post1-rocm620 \ python3 -m sglang.launch_server \ - --model-path meta-llama/Llama-3.1-8b \ + --model-path meta-llama/Llama-3.1-8B-Instruct \ --host 0.0.0.0 \ --port 30000 \ --tp 1 \ diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py index 43f13abb2..b4006c4e6 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py @@ -22,12 +22,12 @@ @pytest.mark.parametrize( "request_rate,model_name", - [(req_rate, "llama3_8b_f16") for req_rate in [1, 2, 4, 8, 16, 32]], + [(req_rate, "llama3_8B_fp16") for req_rate in [1, 2, 4, 8, 16, 32]], ) def test_sglang_benchmark(request_rate, model_name, sglang_args, tmp_path_factory): tmp_dir = tmp_path_factory.mktemp("sglang_benchmark_test") - # Download tokenizer for llama3_8b_f16 + # Download tokenizer for llama3_8B_fp16 download_with_hf_datasets(tmp_dir, model_name) logger.info("Beginning SGLang benchmark test...") From 86058b8f7190b1d18e4b3a172013ec9fd5d2949c Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 23:23:36 +0000 Subject: [PATCH 07/38] Add container name, Add disable-cuda-graph option to allow server to properly run --- .github/workflows/ci-sglang-benchmark.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index ad511f0be..8ed2eb631 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -169,6 +169,7 @@ jobs: - name: Run SGLang Server run: | docker run -d --rm \ + --name=sglang-server \ --device=/dev/kfd \ --device=/dev/dri \ --ipc=host \ @@ -187,7 +188,8 @@ jobs: --host 0.0.0.0 \ --port 30000 \ --tp 1 \ - --dtype float16 + --dtype float16 \ + --disable-cuda-graph - name: Run SGLang Benchmark Tests run: | From acbedb0eba6d677b65b289077dc4adda2c5aa32e Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 23:30:31 +0000 Subject: [PATCH 08/38] Temporarily remove `--rm` to try and obtain container logs after failure --- .github/workflows/ci-sglang-benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 8ed2eb631..a9f4877e3 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -168,7 +168,7 @@ jobs: - name: Run SGLang Server run: | - docker run -d --rm \ + docker run -d \ --name=sglang-server \ --device=/dev/kfd \ --device=/dev/dri \ From 34c8410bb5f926e04ae73994bad8db975881b063 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Fri, 22 Nov 2024 23:54:35 +0000 Subject: [PATCH 09/38] Remove quotes around HF_TOKEN --- .github/workflows/ci-sglang-benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index a9f4877e3..0310fd14e 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -181,7 +181,7 @@ jobs: -v /data:/data \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ - --env "HF_TOKEN={{ secrets.HF_TOKEN }}" \ + --env HF_TOKEN={{ secrets.HF_TOKEN }} \ lmsysorg/sglang:v0.3.5.post1-rocm620 \ python3 -m sglang.launch_server \ --model-path meta-llama/Llama-3.1-8B-Instruct \ From 0d5574d519df20e879b0b85967ad335d9a781882 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Sat, 23 Nov 2024 00:28:55 +0000 Subject: [PATCH 10/38] Try using env var for HF_SECRET --- .github/workflows/ci-sglang-benchmark.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 0310fd14e..fa51b655b 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -110,6 +110,9 @@ jobs: env: PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" steps: + - name: Set HF_TOKEN + run: echo "HF_TOKEN=${{ secrets.HF_TOKEN }}" >> $GITHUB_ENV + - name: Get Current Date id: date run: echo "::set-output name=date::$(date +'%Y-%m-%d')" @@ -181,7 +184,7 @@ jobs: -v /data:/data \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ - --env HF_TOKEN={{ secrets.HF_TOKEN }} \ + --env HF_TOKEN=$HF_TOKEN \ lmsysorg/sglang:v0.3.5.post1-rocm620 \ python3 -m sglang.launch_server \ --model-path meta-llama/Llama-3.1-8B-Instruct \ From c9f4d338abf9bc0c655590b5256ad15a92579642 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 25 Nov 2024 13:02:48 +0000 Subject: [PATCH 11/38] Move secrets.HF_TOKEN back to command --- .github/workflows/ci-sglang-benchmark.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index fa51b655b..47eabc01d 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -110,9 +110,6 @@ jobs: env: PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" steps: - - name: Set HF_TOKEN - run: echo "HF_TOKEN=${{ secrets.HF_TOKEN }}" >> $GITHUB_ENV - - name: Get Current Date id: date run: echo "::set-output name=date::$(date +'%Y-%m-%d')" @@ -184,7 +181,7 @@ jobs: -v /data:/data \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ - --env HF_TOKEN=$HF_TOKEN \ + --env HF_TOKEN=${{ secrets.HF_TOKEN }} \ lmsysorg/sglang:v0.3.5.post1-rocm620 \ python3 -m sglang.launch_server \ --model-path meta-llama/Llama-3.1-8B-Instruct \ From 4fa094cbc43c76589fe053ab015c1096ccf43f8d Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 25 Nov 2024 13:54:19 +0000 Subject: [PATCH 12/38] Add temporary command to see if HF_TOKEN is being set properly --- .github/workflows/ci-sglang-benchmark.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 47eabc01d..2dc55e61f 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -162,10 +162,21 @@ jobs: # HIP (Heterogeneous-computing Interface for Portability) for AMD # => base docker rocm/vllm-dev:20241022, not from public vllm whl # srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"] - - name: Pull SGLang Image (Had issues with sglang:v0.3.5.post1-rocm620) + - name: Pull SGLang Image (Had issues with sglang:v0.3.5.post2-rocm620) run: | docker pull lmsysorg/sglang:v0.3.5.post1-rocm620 + - name: Check HF_TOKEN + run: | + if [ -z "$HF_TOKEN" ]; then + echo "Error: HF_TOKEN is not set or empty." + exit 1 + else + echo "HF_TOKEN is set" + fi + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + - name: Run SGLang Server run: | docker run -d \ From c33ef750995ac60bfe7cd5ed1a23767f7b3d1628 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 25 Nov 2024 14:53:16 +0000 Subject: [PATCH 13/38] Add back command to rm container once stopped --- .github/workflows/ci-sglang-benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 2dc55e61f..4aa36897f 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -179,7 +179,7 @@ jobs: - name: Run SGLang Server run: | - docker run -d \ + docker run --rm -d \ --name=sglang-server \ --device=/dev/kfd \ --device=/dev/dri \ From fea26559dc1e0a6cf128a907efa3d46c2f113ce5 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 25 Nov 2024 15:25:16 +0000 Subject: [PATCH 14/38] Allow for full e2e verification --- .github/workflows/ci-sglang-benchmark.yml | 135 ++++++++++------------ 1 file changed, 60 insertions(+), 75 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 4aa36897f..cd266e43c 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -11,9 +11,7 @@ on: pull_request: workflow_dispatch: schedule: - # Weekdays at 6:00 AM UTC = 11:00 PM PST. - # This is a pretty GPU intensive test, so want to avoid conflicting - # with other potentially scheduled tests. + # Weekdays at 4:00 AM UTC = 9:00 PM PST. - cron: "0 4 * * 1-5" concurrency: @@ -25,65 +23,64 @@ concurrency: cancel-in-progress: true jobs: - # TODO: Uncomment after verification - # benchmark_shortfin: - # if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} - # name: "SGLang Serving Benchmark With Shortfin" - # strategy: - # matrix: - # version: [3.11] - # fail-fast: false - # runs-on: mi300x-4 - # defaults: - # run: - # shell: bash - # env: - # PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" - # steps: - # - name: Get Current Date - # id: date - # run: echo "::set-output name=date::$(date +'%Y-%m-%d')" - - # - name: "Setting up Python" - # id: setup_python - # uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - # with: - # python-version: ${{matrix.version}} - - # - name: "Checkout Code" - # uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - # - name: Cache Pip Packages - # uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - # id: cache-pip - # with: - # path: ${{ env.PIP_CACHE_DIR }} - # key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} - - # - name: Install pip deps - # run: | - # python -m pip install --no-compile --upgrade pip - # # Note: We install in three steps in order to satisfy requirements - # # from non default locations first. Installing the PyTorch CPU - # # wheels saves multiple minutes and a lot of bandwidth on runner setup. - # pip install --no-compile -r pytorch-cpu-requirements.txt - # pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ - # -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - # pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ - - # # Try with the latest nightly releases, not what iree-turbine pins. - # # We could also pin to a known working or stable version. - # # This should eventually stabilize. Do the best we can for now. - # pip install -f https://iree.dev/pip-release-links.html --upgrade \ - # iree-base-compiler==3.0.0rc20241118 \ - # iree-base-runtime==3.0.0rc20241118 \ - # "numpy<2.0" - - # - name: Install SGLang - # run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" - - # - name: Run Shortfin Benchmark Tests - # run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html + benchmark_shortfin: + if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} + name: "SGLang Serving Benchmark With Shortfin" + strategy: + matrix: + version: [3.11] + fail-fast: false + runs-on: mi300x-4 + defaults: + run: + shell: bash + env: + PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + steps: + - name: Get Current Date + id: date + run: echo "::set-output name=date::$(date +'%Y-%m-%d')" + + - name: "Setting up Python" + id: setup_python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + python-version: ${{matrix.version}} + + - name: "Checkout Code" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Cache Pip Packages + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + id: cache-pip + with: + path: ${{ env.PIP_CACHE_DIR }} + key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + + - name: Install pip deps + run: | + python -m pip install --no-compile --upgrade pip + # Note: We install in three steps in order to satisfy requirements + # from non default locations first. Installing the PyTorch CPU + # wheels saves multiple minutes and a lot of bandwidth on runner setup. + pip install --no-compile -r pytorch-cpu-requirements.txt + pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ + -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" + pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ + + # Try with the latest nightly releases, not what iree-turbine pins. + # We could also pin to a known working or stable version. + # This should eventually stabilize. Do the best we can for now. + pip install -f https://iree.dev/pip-release-links.html --upgrade \ + iree-base-compiler==3.0.0rc20241118 \ + iree-base-runtime==3.0.0rc20241118 \ + "numpy<2.0" + + - name: Install SGLang + run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + + - name: Run Shortfin Benchmark Tests + run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html # TODO: Uncomment after verification # - name: Deploy to GitHub Pages @@ -97,8 +94,7 @@ jobs: benchmark_sglang: if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} name: "SGLang Serving Benchmark With SGLang" - # TODO: Uncomment after verifying - # needs: benchmark_shortfin + needs: benchmark_shortfin strategy: matrix: version: [3.11] @@ -166,17 +162,6 @@ jobs: run: | docker pull lmsysorg/sglang:v0.3.5.post1-rocm620 - - name: Check HF_TOKEN - run: | - if [ -z "$HF_TOKEN" ]; then - echo "Error: HF_TOKEN is not set or empty." - exit 1 - else - echo "HF_TOKEN is set" - fi - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - - name: Run SGLang Server run: | docker run --rm -d \ From 3641445e7e4e2f50caf6a0cb8644b5f75c520e9f Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 25 Nov 2024 16:47:51 +0000 Subject: [PATCH 15/38] Update hash for pip cache in benchmark and integration tests --- .github/workflows/ci-sglang-benchmark.yml | 4 ++-- .github/workflows/ci-sglang-integration-tests.yml | 2 +- .github/workflows/ci-shark-ai.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index cd266e43c..f03936699 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -55,7 +55,7 @@ jobs: id: cache-pip with: path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} - name: Install pip deps run: | @@ -124,7 +124,7 @@ jobs: id: cache-pip with: path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} - name: Install pip deps run: | diff --git a/.github/workflows/ci-sglang-integration-tests.yml b/.github/workflows/ci-sglang-integration-tests.yml index c61756d78..20b829918 100644 --- a/.github/workflows/ci-sglang-integration-tests.yml +++ b/.github/workflows/ci-sglang-integration-tests.yml @@ -54,7 +54,7 @@ jobs: id: cache-pip with: path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} - name: Install pip deps run: | diff --git a/.github/workflows/ci-shark-ai.yml b/.github/workflows/ci-shark-ai.yml index bf8007e65..fc85a76a7 100644 --- a/.github/workflows/ci-shark-ai.yml +++ b/.github/workflows/ci-shark-ai.yml @@ -49,7 +49,7 @@ jobs: id: cache-pip with: path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} - name: Install pip deps run: | From d82d9dffe1f69cdf1d063a95f4acbf3663407a69 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 25 Nov 2024 21:38:54 +0000 Subject: [PATCH 16/38] Remove version pinning for `iree-base-compiler` and `iree-base-runtime` --- .github/workflows/ci-sglang-benchmark.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index f03936699..5b173baa5 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -72,8 +72,8 @@ jobs: # We could also pin to a known working or stable version. # This should eventually stabilize. Do the best we can for now. pip install -f https://iree.dev/pip-release-links.html --upgrade \ - iree-base-compiler==3.0.0rc20241118 \ - iree-base-runtime==3.0.0rc20241118 \ + iree-base-compiler \ + iree-base-runtime \ "numpy<2.0" - name: Install SGLang @@ -141,8 +141,8 @@ jobs: # We could also pin to a known working or stable version. # This should eventually stabilize. Do the best we can for now. pip install -f https://iree.dev/pip-release-links.html --upgrade \ - iree-base-compiler==3.0.0rc20241118 \ - iree-base-runtime==3.0.0rc20241118 \ + iree-base-compiler \ + iree-base-runtime \ "numpy<2.0" - name: Install SGLang From e8432819a1c6b808ffff6e6fd5a7359ad61228a2 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 26 Nov 2024 11:42:02 +0000 Subject: [PATCH 17/38] Add `--pre` to iree installations in SGLang tests --- .github/workflows/ci-sglang-benchmark.yml | 4 ++-- .github/workflows/ci-sglang-integration-tests.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 5b173baa5..555e880eb 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -71,7 +71,7 @@ jobs: # Try with the latest nightly releases, not what iree-turbine pins. # We could also pin to a known working or stable version. # This should eventually stabilize. Do the best we can for now. - pip install -f https://iree.dev/pip-release-links.html --upgrade \ + pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ iree-base-compiler \ iree-base-runtime \ "numpy<2.0" @@ -140,7 +140,7 @@ jobs: # Try with the latest nightly releases, not what iree-turbine pins. # We could also pin to a known working or stable version. # This should eventually stabilize. Do the best we can for now. - pip install -f https://iree.dev/pip-release-links.html --upgrade \ + pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ iree-base-compiler \ iree-base-runtime \ "numpy<2.0" diff --git a/.github/workflows/ci-sglang-integration-tests.yml b/.github/workflows/ci-sglang-integration-tests.yml index 20b829918..8ff74a094 100644 --- a/.github/workflows/ci-sglang-integration-tests.yml +++ b/.github/workflows/ci-sglang-integration-tests.yml @@ -69,7 +69,7 @@ jobs: # Use newest possible releases to be able to track commits that may # cause errors. - pip install -f https://iree.dev/pip-release-links.html --upgrade \ + pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ iree-base-compiler \ iree-base-runtime \ "numpy<2.0" From 01da13c507cb6635fcea7b2028cc3d01b4f82816 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 2 Dec 2024 19:47:05 +0000 Subject: [PATCH 18/38] Slightly lower threshold in integration tests, to allow still valid, but differing answers to be accepted --- app_tests/integration_tests/llm/sglang/sglang_frontend_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py b/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py index efab14ea7..72b3d4052 100644 --- a/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py +++ b/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py @@ -29,7 +29,7 @@ "device": "hip", } -ACCEPTED_THRESHOLD = 0.8 +ACCEPTED_THRESHOLD = 0.7 def compute_similarity(model: SentenceTransformer, sentence_1: str, sentence_2: str): From ed37ef1a9c49580e96bacbc472b765b9ca11ac69 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 2 Dec 2024 20:57:21 +0000 Subject: [PATCH 19/38] Fix `publish_dir` in `Deploy to Github Pages` step --- .github/workflows/ci-sglang-benchmark.yml | 32 ++++++++++------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 555e880eb..435ba8a2f 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -7,8 +7,6 @@ name: SGLang Llama Benchmarking Tests on: - # TODO: Remove PR trigger after verification - pull_request: workflow_dispatch: schedule: # Weekdays at 4:00 AM UTC = 9:00 PM PST. @@ -82,14 +80,13 @@ jobs: - name: Run Shortfin Benchmark Tests run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html - # TODO: Uncomment after verification - # - name: Deploy to GitHub Pages - # uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 - # with: - # github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - # publish_dir: ./out/llm/sgl_benchmark/shortfin - # destination_dir: ./llm/sgl_benchmark/shortfin - # keep_files: true + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + with: + github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + publish_dir: ./out/llm/shortfin + destination_dir: ./llm/sgl_benchmark/shortfin + keep_files: true benchmark_sglang: if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} @@ -197,11 +194,10 @@ jobs: - name: Cleanup SGLang Image run: docker image rm lmsysorg/sglang:v0.3.5.post1-rocm620 - # TODO: Uncomment after verifying - # - name: Deploy to GitHub Pages - # uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 - # with: - # github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - # publish_dir: ./out/llm/sgl_benchmark/sglang - # destination_dir: ./llm/sgl_benchmark/sglang - # keep_files: true + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + with: + github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + publish_dir: ./out/llm/sglang + destination_dir: ./llm/sgl_benchmark/sglang + keep_files: true From d29c7bb94d7eb919a970bd3bc597a172b956d9df Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 2 Dec 2024 22:55:38 +0000 Subject: [PATCH 20/38] Remove unneeded deps for SGLang benchmark, Get rid of unneeded `Get Current Date` step, Use `matrix.version` for Pip Cache, Leave docker image cached on runner, Temporarily enable PR trigger and disable shortfin half for CI validation --- .github/workflows/ci-sglang-benchmark.yml | 177 ++++++++++------------ 1 file changed, 81 insertions(+), 96 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 435ba8a2f..7a2627e63 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -7,6 +7,8 @@ name: SGLang Llama Benchmarking Tests on: + # TODO: Temporarily setting PR trigger for CI validation + pull_request: workflow_dispatch: schedule: # Weekdays at 4:00 AM UTC = 9:00 PM PST. @@ -21,72 +23,73 @@ concurrency: cancel-in-progress: true jobs: - benchmark_shortfin: - if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} - name: "SGLang Serving Benchmark With Shortfin" - strategy: - matrix: - version: [3.11] - fail-fast: false - runs-on: mi300x-4 - defaults: - run: - shell: bash - env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" - steps: - - name: Get Current Date - id: date - run: echo "::set-output name=date::$(date +'%Y-%m-%d')" - - - name: "Setting up Python" - id: setup_python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: ${{matrix.version}} - - - name: "Checkout Code" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} - - - name: Install pip deps - run: | - python -m pip install --no-compile --upgrade pip - # Note: We install in three steps in order to satisfy requirements - # from non default locations first. Installing the PyTorch CPU - # wheels saves multiple minutes and a lot of bandwidth on runner setup. - pip install --no-compile -r pytorch-cpu-requirements.txt - pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ - -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ - - # Try with the latest nightly releases, not what iree-turbine pins. - # We could also pin to a known working or stable version. - # This should eventually stabilize. Do the best we can for now. - pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ - iree-base-compiler \ - iree-base-runtime \ - "numpy<2.0" - - - name: Install SGLang - run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" - - - name: Run Shortfin Benchmark Tests - run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html - - - name: Deploy to GitHub Pages - uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 - with: - github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out/llm/shortfin - destination_dir: ./llm/sgl_benchmark/shortfin - keep_files: true + # TODO: Temporarily disabling this half for CI validation + # benchmark_shortfin: + # if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} + # name: "SGLang Serving Benchmark With Shortfin" + # strategy: + # matrix: + # version: [3.11] + # fail-fast: false + # runs-on: mi300x-4 + # defaults: + # run: + # shell: bash + # env: + # PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + # steps: + # - name: Get Current Date + # id: date + # run: echo "::set-output name=date::$(date +'%Y-%m-%d')" + + # - name: "Setting up Python" + # id: setup_python + # uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + # with: + # python-version: ${{matrix.version}} + + # - name: "Checkout Code" + # uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + # - name: Cache Pip Packages + # uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + # id: cache-pip + # with: + # path: ${{ env.PIP_CACHE_DIR }} + # key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} + + # - name: Install pip deps + # run: | + # python -m pip install --no-compile --upgrade pip + # # Note: We install in three steps in order to satisfy requirements + # # from non default locations first. Installing the PyTorch CPU + # # wheels saves multiple minutes and a lot of bandwidth on runner setup. + # pip install --no-compile -r pytorch-cpu-requirements.txt + # pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ + # -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" + # pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ + + # # Try with the latest nightly releases, not what iree-turbine pins. + # # We could also pin to a known working or stable version. + # # This should eventually stabilize. Do the best we can for now. + # pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ + # iree-base-compiler \ + # iree-base-runtime \ + # "numpy<2.0" + + # - name: Install SGLang + # run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + + # - name: Run Shortfin Benchmark Tests + # run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html + + # - name: Deploy to GitHub Pages + # uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + # with: + # github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + # publish_dir: ./out/llm/shortfin + # destination_dir: ./llm/sgl_benchmark/shortfin + # keep_files: true benchmark_sglang: if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} @@ -103,10 +106,6 @@ jobs: env: PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" steps: - - name: Get Current Date - id: date - run: echo "::set-output name=date::$(date +'%Y-%m-%d')" - - name: "Setting up Python" id: setup_python uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 @@ -121,26 +120,14 @@ jobs: id: cache-pip with: path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} + key: pip-${{ matrix.version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }} - name: Install pip deps run: | python -m pip install --no-compile --upgrade pip - # Note: We install in three steps in order to satisfy requirements - # from non default locations first. Installing the PyTorch CPU - # wheels saves multiple minutes and a lot of bandwidth on runner setup. - pip install --no-compile -r pytorch-cpu-requirements.txt - pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ - -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ - - # Try with the latest nightly releases, not what iree-turbine pins. - # We could also pin to a known working or stable version. - # This should eventually stabilize. Do the best we can for now. - pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ - iree-base-compiler \ - iree-base-runtime \ - "numpy<2.0" + # Note: Only sharktank is required to use `hf_datasets` script + # for downloading model weights. + pip install --no-compile -r requirements.txt -e sharktank/ - name: Install SGLang run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" @@ -191,13 +178,11 @@ jobs: - name: Stop sglang-server run: docker stop sglang-server || true # Stop container if it's running - - name: Cleanup SGLang Image - run: docker image rm lmsysorg/sglang:v0.3.5.post1-rocm620 - - - name: Deploy to GitHub Pages - uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 - with: - github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out/llm/sglang - destination_dir: ./llm/sgl_benchmark/sglang - keep_files: true + # TODO: Temporarily disabling for CI Validation + # - name: Deploy to GitHub Pages + # uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + # with: + # github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + # publish_dir: ./out/llm/sglang + # destination_dir: ./llm/sgl_benchmark/sglang + # keep_files: true From 4529e097c02b9709da79d3bdf074d621a68e43c9 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 2 Dec 2024 23:01:02 +0000 Subject: [PATCH 21/38] Comment out `needs` line for CI validation --- .github/workflows/ci-sglang-benchmark.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 7a2627e63..ade855825 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -94,7 +94,8 @@ jobs: benchmark_sglang: if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} name: "SGLang Serving Benchmark With SGLang" - needs: benchmark_shortfin + # TODO: Temporarily commenting out for CI validation + # needs: benchmark_shortfin strategy: matrix: version: [3.11] From 09e0fb6d91e0b09ab5b59e06e9f9dc0399176f3a Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 2 Dec 2024 23:07:04 +0000 Subject: [PATCH 22/38] Remove temporary disablements, Add back step to clean up docker image --- .github/workflows/ci-sglang-benchmark.yml | 157 +++++++++++----------- 1 file changed, 78 insertions(+), 79 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index ade855825..b0bca9bb0 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -7,8 +7,6 @@ name: SGLang Llama Benchmarking Tests on: - # TODO: Temporarily setting PR trigger for CI validation - pull_request: workflow_dispatch: schedule: # Weekdays at 4:00 AM UTC = 9:00 PM PST. @@ -23,79 +21,77 @@ concurrency: cancel-in-progress: true jobs: - # TODO: Temporarily disabling this half for CI validation - # benchmark_shortfin: - # if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} - # name: "SGLang Serving Benchmark With Shortfin" - # strategy: - # matrix: - # version: [3.11] - # fail-fast: false - # runs-on: mi300x-4 - # defaults: - # run: - # shell: bash - # env: - # PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" - # steps: - # - name: Get Current Date - # id: date - # run: echo "::set-output name=date::$(date +'%Y-%m-%d')" - - # - name: "Setting up Python" - # id: setup_python - # uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - # with: - # python-version: ${{matrix.version}} - - # - name: "Checkout Code" - # uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - # - name: Cache Pip Packages - # uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - # id: cache-pip - # with: - # path: ${{ env.PIP_CACHE_DIR }} - # key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} - - # - name: Install pip deps - # run: | - # python -m pip install --no-compile --upgrade pip - # # Note: We install in three steps in order to satisfy requirements - # # from non default locations first. Installing the PyTorch CPU - # # wheels saves multiple minutes and a lot of bandwidth on runner setup. - # pip install --no-compile -r pytorch-cpu-requirements.txt - # pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ - # -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - # pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ - - # # Try with the latest nightly releases, not what iree-turbine pins. - # # We could also pin to a known working or stable version. - # # This should eventually stabilize. Do the best we can for now. - # pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ - # iree-base-compiler \ - # iree-base-runtime \ - # "numpy<2.0" - - # - name: Install SGLang - # run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" - - # - name: Run Shortfin Benchmark Tests - # run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html - - # - name: Deploy to GitHub Pages - # uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 - # with: - # github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - # publish_dir: ./out/llm/shortfin - # destination_dir: ./llm/sgl_benchmark/shortfin - # keep_files: true + benchmark_shortfin: + if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} + name: "SGLang Serving Benchmark With Shortfin" + strategy: + matrix: + version: [3.11] + fail-fast: false + runs-on: mi300x-4 + defaults: + run: + shell: bash + env: + PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + steps: + - name: Get Current Date + id: date + run: echo "::set-output name=date::$(date +'%Y-%m-%d')" + + - name: "Setting up Python" + id: setup_python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + python-version: ${{matrix.version}} + + - name: "Checkout Code" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Cache Pip Packages + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + id: cache-pip + with: + path: ${{ env.PIP_CACHE_DIR }} + key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} + + - name: Install pip deps + run: | + python -m pip install --no-compile --upgrade pip + # Note: We install in three steps in order to satisfy requirements + # from non default locations first. Installing the PyTorch CPU + # wheels saves multiple minutes and a lot of bandwidth on runner setup. + pip install --no-compile -r pytorch-cpu-requirements.txt + pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ + -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" + pip install --no-compile -r requirements.txt -e sharktank/ shortfin/ + + # Try with the latest nightly releases, not what iree-turbine pins. + # We could also pin to a known working or stable version. + # This should eventually stabilize. Do the best we can for now. + pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ + iree-base-compiler \ + iree-base-runtime \ + "numpy<2.0" + + - name: Install SGLang + run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + + - name: Run Shortfin Benchmark Tests + run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + with: + github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + publish_dir: ./out/llm/shortfin + destination_dir: ./llm/sgl_benchmark/shortfin + keep_files: true benchmark_sglang: if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} name: "SGLang Serving Benchmark With SGLang" - # TODO: Temporarily commenting out for CI validation - # needs: benchmark_shortfin + needs: benchmark_shortfin strategy: matrix: version: [3.11] @@ -179,11 +175,14 @@ jobs: - name: Stop sglang-server run: docker stop sglang-server || true # Stop container if it's running - # TODO: Temporarily disabling for CI Validation - # - name: Deploy to GitHub Pages - # uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 - # with: - # github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - # publish_dir: ./out/llm/sglang - # destination_dir: ./llm/sgl_benchmark/sglang - # keep_files: true + # Deleting image after run due to large disk space requirement (83 GB) + - name: Cleanup SGLang Image + run: docker image rm lmsysorg/sglang:v0.3.5.post1-rocm620 + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + with: + github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + publish_dir: ./out/llm/sglang + destination_dir: ./llm/sgl_benchmark/sglang + keep_files: true From 422729ffe6a5e02a503aa1b7ba4e01d6cab6704e Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 2 Dec 2024 23:34:09 +0000 Subject: [PATCH 23/38] Remove `Get Current Date` step in shortfin benchmark job --- .github/workflows/ci-sglang-benchmark.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index b0bca9bb0..08a28992d 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -35,10 +35,6 @@ jobs: env: PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" steps: - - name: Get Current Date - id: date - run: echo "::set-output name=date::$(date +'%Y-%m-%d')" - - name: "Setting up Python" id: setup_python uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 From 969b608b26988ec79d3204b945fd955541a0f081 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Mon, 2 Dec 2024 23:48:59 +0000 Subject: [PATCH 24/38] Add `README` description to top of CI file --- .github/workflows/ci-sglang-benchmark.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 08a28992d..0fc111a8f 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -4,6 +4,18 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# =================================== README =================================== +# The `benchmark_sglang` job in this CI is mostly dependent on code outside +# of the `shark-ai` repo itself. By including it here, we are able to maintain +# an apples-to-apples comparison between shortfin and SGLang performance in a +# centralized location, as we place more effort in shortfin LLM performance, and +# WHILE WE WORK TOWARDS A BETTER ALTERNATIVE. + +# We should not be generally repeating this pattern, and should never repeat +# this pattern outside of specifically benchmarking shortfin apps against +# external projects, as part of an organized and clearly defined effort. +# ============================================================================== + name: SGLang Llama Benchmarking Tests on: From f67e399aeed6ef2d627c99a1208c8425e931bc43 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 15:42:30 +0000 Subject: [PATCH 25/38] Add job to merge html reports from both benchmark jobs and upload to gh-pages --- .github/workflows/ci-sglang-benchmark.yml | 61 +++++++++++++++++++---- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 0fc111a8f..e2574d533 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -19,6 +19,8 @@ name: SGLang Llama Benchmarking Tests on: + # TODO: Temporary trigger to validate workflow + pull_request: workflow_dispatch: schedule: # Weekdays at 4:00 AM UTC = 9:00 PM PST. @@ -61,7 +63,7 @@ jobs: id: cache-pip with: path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} + key: pip-${{ matrix.version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} - name: Install pip deps run: | @@ -86,15 +88,13 @@ jobs: run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" - name: Run Shortfin Benchmark Tests - run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=out/llm/shortfin/index.html + run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=shortfin_index.html --self-contained-html - - name: Deploy to GitHub Pages - uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + - name: Upload pytest report + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 with: - github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out/llm/shortfin - destination_dir: ./llm/sgl_benchmark/shortfin - keep_files: true + name: sglang_reports + path: shortfin_index.html benchmark_sglang: if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} @@ -178,7 +178,7 @@ jobs: - name: Run SGLang Benchmark Tests run: | - pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py --port 30000 --log-cli-level=INFO --html=out/llm/sglang/index.html + pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py --port 30000 --log-cli-level=INFO --html=sglang_index.html --self-contained-html - name: Stop sglang-server run: docker stop sglang-server || true # Stop container if it's running @@ -187,10 +187,49 @@ jobs: - name: Cleanup SGLang Image run: docker image rm lmsysorg/sglang:v0.3.5.post1-rocm620 + - name: Upload pytest report + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 + with: + name: sglang_reports + path: sglang_index.html + + merge_and_upload_reports: + if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} + name: "Merge and upload benchmark reports" + needs: [benchmark_shortfin, benchmark_sglang] + strategy: + matrix: + version: [3.11] + fail-fast: false + runs-on: ubuntu-24.04 + defaults: + run: + shell: bash + steps: + - name: "Setting up Python" + id: setup_python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + python-version: ${{matrix.version}} + + - name: Install pytest-html-merger + run: pip install pytest-html-merger + - name: Download reports + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 + with: + name: slgang_reports + path: reports + + - name: Create merged report directory + run: mkdir merged_reports + + - name: Merge html reports + run: pytest_html_merger -i reports -o merged_reports/index.html + - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out/llm/sglang - destination_dir: ./llm/sgl_benchmark/sglang + publish_dir: merged_reports + destination_dir: ./llm/sglang keep_files: true From 6909edcf38eaddea3eea13ac4fce383d120c9931 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 15:55:58 +0000 Subject: [PATCH 26/38] Fix upload/download paths --- .github/workflows/ci-sglang-benchmark.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index e2574d533..2594c3aad 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -93,7 +93,7 @@ jobs: - name: Upload pytest report uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 with: - name: sglang_reports + name: shortfin_benchmark path: shortfin_index.html benchmark_sglang: @@ -190,7 +190,7 @@ jobs: - name: Upload pytest report uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 with: - name: sglang_reports + name: sglang_benchmark path: sglang_index.html merge_and_upload_reports: @@ -214,17 +214,20 @@ jobs: - name: Install pytest-html-merger run: pip install pytest-html-merger + - name: Download reports uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 with: - name: slgang_reports + name: | + shortfin_benchmark + sglang_benchmark path: reports - name: Create merged report directory run: mkdir merged_reports - name: Merge html reports - run: pytest_html_merger -i reports -o merged_reports/index.html + run: pytest_html_merger -i reports/*/*.html -o merged_reports/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 From aa35176fd3c51a0178e246a096b7fa767d3eb0e5 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 16:11:28 +0000 Subject: [PATCH 27/38] Split download into two steps --- .github/workflows/ci-sglang-benchmark.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 2594c3aad..491a76557 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -215,13 +215,17 @@ jobs: - name: Install pytest-html-merger run: pip install pytest-html-merger - - name: Download reports + - name: Download shortfin report uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 with: - name: | - shortfin_benchmark - sglang_benchmark - path: reports + name: shortfin_benchmark + path: reports/shortfin + + - name: Download sglang report + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 + with: + name: sglang_benchmark + path: reports/sglang - name: Create merged report directory run: mkdir merged_reports From 9578acc7cb87bf5c76e718db9737a38d7ba31964 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 16:34:23 +0000 Subject: [PATCH 28/38] Ensure all html files are in same dir --- .github/workflows/ci-sglang-benchmark.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 491a76557..629e309ff 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -219,19 +219,19 @@ jobs: uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 with: name: shortfin_benchmark - path: reports/shortfin + path: reports - name: Download sglang report uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 with: name: sglang_benchmark - path: reports/sglang + path: reports - name: Create merged report directory run: mkdir merged_reports - name: Merge html reports - run: pytest_html_merger -i reports/*/*.html -o merged_reports/index.html + run: pytest_html_merger -i reports -o merged_reports/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 From b9b9ea518014c1ee62f8df5a28e1844de431ea74 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 16:58:59 +0000 Subject: [PATCH 29/38] Remove PR trigger --- .github/workflows/ci-sglang-benchmark.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 629e309ff..8c32415af 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -19,8 +19,6 @@ name: SGLang Llama Benchmarking Tests on: - # TODO: Temporary trigger to validate workflow - pull_request: workflow_dispatch: schedule: # Weekdays at 4:00 AM UTC = 9:00 PM PST. From 526194f68ac1b8a5188492b512a5d831f3933e79 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 19:56:20 +0000 Subject: [PATCH 30/38] Remove `sharktank` installation from SGLang benchmark, Always use python3.11 for merging reports, Make merging reports one step, Temporarily enable PR trigger for validation --- .github/workflows/ci-sglang-benchmark.yml | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 8c32415af..8930a441d 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -19,6 +19,8 @@ name: SGLang Llama Benchmarking Tests on: + # TODO: Remove after validating in CI + pull_request: workflow_dispatch: schedule: # Weekdays at 4:00 AM UTC = 9:00 PM PST. @@ -95,7 +97,6 @@ jobs: path: shortfin_index.html benchmark_sglang: - if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} name: "SGLang Serving Benchmark With SGLang" needs: benchmark_shortfin strategy: @@ -128,9 +129,6 @@ jobs: - name: Install pip deps run: | python -m pip install --no-compile --upgrade pip - # Note: Only sharktank is required to use `hf_datasets` script - # for downloading model weights. - pip install --no-compile -r requirements.txt -e sharktank/ - name: Install SGLang run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" @@ -192,13 +190,8 @@ jobs: path: sglang_index.html merge_and_upload_reports: - if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} name: "Merge and upload benchmark reports" needs: [benchmark_shortfin, benchmark_sglang] - strategy: - matrix: - version: [3.11] - fail-fast: false runs-on: ubuntu-24.04 defaults: run: @@ -208,7 +201,7 @@ jobs: id: setup_python uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: - python-version: ${{matrix.version}} + python-version: 3.11 - name: Install pytest-html-merger run: pip install pytest-html-merger @@ -225,11 +218,10 @@ jobs: name: sglang_benchmark path: reports - - name: Create merged report directory - run: mkdir merged_reports - - name: Merge html reports - run: pytest_html_merger -i reports -o merged_reports/index.html + run: | + mkdir merged_reports + pytest_html_merger -i reports -o merged_reports/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 From 57babdf922dd8ffb66e4ff0f1afa2a078759ccc8 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 20:04:30 +0000 Subject: [PATCH 31/38] Use hf to download tokenizer in `sglang_benchmark_test` --- .../llm/sglang_benchmarks/sglang_benchmark_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py index a678f92c1..675f9ef54 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py @@ -14,20 +14,20 @@ from .utils import SGLangBenchmarkArgs, log_jsonl_result -from integration_tests.llm.utils import wait_for_server, download_with_hf_datasets +from integration_tests.llm.utils import download_tokenizer, wait_for_server logger = logging.getLogger(__name__) @pytest.mark.parametrize( - "request_rate,model_name", - [(req_rate, "llama3_8B_fp16") for req_rate in [1, 2, 4, 8, 16, 32]], + "request_rate,tokenizer_id", + [(req_rate, "NousResearch/Meta-Llama-3-8B") for req_rate in [1, 2, 4, 8, 16, 32]], ) -def test_sglang_benchmark(request_rate, model_name, sglang_args, tmp_path_factory): +def test_sglang_benchmark(request_rate, tokenizer_id, sglang_args, tmp_path_factory): tmp_dir = tmp_path_factory.mktemp("sglang_benchmark_test") # Download tokenizer for llama3_8B_fp16 - download_with_hf_datasets(tmp_dir, model_name) + download_tokenizer(tmp_dir, tokenizer_id) logger.info("Beginning SGLang benchmark test...") From 8f7f0fbc55e4833f5e53d7ae985090378e7f4463 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 21:25:56 +0000 Subject: [PATCH 32/38] Small cleanup of sglang ci deps section --- .github/workflows/ci-sglang-benchmark.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 8930a441d..d5fd2ba12 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -124,14 +124,12 @@ jobs: id: cache-pip with: path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ matrix.version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }} + key: pip-${{ matrix.version }} - - name: Install pip deps + - name: Install SGLang run: | python -m pip install --no-compile --upgrade pip - - - name: Install SGLang - run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 From 305c4b0f811b186593d9d31d9a30749f396467fb Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 21:31:33 +0000 Subject: [PATCH 33/38] Make shortfin/sglang benchmark such that they still run sequentially, but are dependent on each other's success --- .github/workflows/ci-sglang-benchmark.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index d5fd2ba12..1063291e4 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -99,6 +99,7 @@ jobs: benchmark_sglang: name: "SGLang Serving Benchmark With SGLang" needs: benchmark_shortfin + if: always() strategy: matrix: version: [3.11] @@ -209,12 +210,14 @@ jobs: with: name: shortfin_benchmark path: reports + continue-on-error: true - name: Download sglang report uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 with: name: sglang_benchmark path: reports + continue-on-error: true - name: Merge html reports run: | From d78ab73d1679850c6b9a0725861225e90c366612 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 22:19:07 +0000 Subject: [PATCH 34/38] Remove dep on shortfin benchmark in sgl benchmark, Make `merge_and_upload_reports` run conditionally on either succeeding --- .github/workflows/ci-sglang-benchmark.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 1063291e4..5ee9f08a2 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -98,8 +98,6 @@ jobs: benchmark_sglang: name: "SGLang Serving Benchmark With SGLang" - needs: benchmark_shortfin - if: always() strategy: matrix: version: [3.11] @@ -190,7 +188,7 @@ jobs: merge_and_upload_reports: name: "Merge and upload benchmark reports" - needs: [benchmark_shortfin, benchmark_sglang] + if: success() || needs.benchmark_shortfin.result == 'success' || needs.benchmark_sglang.result == 'success' runs-on: ubuntu-24.04 defaults: run: From 29a82216fbf1a74aaee1ce2687907b9b2d65609f Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 22:22:16 +0000 Subject: [PATCH 35/38] Make sure `merge_and_upload_reports` waits for prior jobs to finish --- .github/workflows/ci-sglang-benchmark.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 5ee9f08a2..0b8359adc 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -188,7 +188,8 @@ jobs: merge_and_upload_reports: name: "Merge and upload benchmark reports" - if: success() || needs.benchmark_shortfin.result == 'success' || needs.benchmark_sglang.result == 'success' + needs: [benchmark_shortfin, benchmark_sglang] + if: needs.benchmark_shortfin.result == 'success' || needs.benchmark_sglang.result == 'success' runs-on: ubuntu-24.04 defaults: run: From 7efecb81ca4530cb815ba13e4ac6fcbae2c56f1c Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 22:27:27 +0000 Subject: [PATCH 36/38] Move code checkout to first step in `benchmark_sglang` --- .github/workflows/ci-sglang-benchmark.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 0b8359adc..daea21890 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -109,15 +109,15 @@ jobs: env: PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" steps: + - name: "Checkout Code" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: "Setting up Python" id: setup_python uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - name: "Checkout Code" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Cache Pip Packages uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 id: cache-pip From 1e905736f65a96f8f18a49f3518a663e3c48b7f9 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Tue, 3 Dec 2024 23:01:40 +0000 Subject: [PATCH 37/38] Remove PR trigger --- .github/workflows/ci-sglang-benchmark.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 6d1978244..7afec9336 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -19,8 +19,6 @@ name: SGLang Llama Benchmarking Tests on: - # TODO: Remove after validating in CI - pull_request: workflow_dispatch: schedule: # Weekdays at 4:00 AM UTC = 9:00 PM PST. From b1ec485416d1990ce86080d972e8af2daaf51cd2 Mon Sep 17 00:00:00 2001 From: Stephen Baione Date: Wed, 4 Dec 2024 16:20:15 +0000 Subject: [PATCH 38/38] Repin `iree-base-compiler` and `iree-base-runtime` due to `abort` issue --- .github/workflows/ci-sglang-benchmark.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index e301ed2fd..f8f19402a 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -77,8 +77,8 @@ jobs: # We could also pin to a known working or stable version. # This should eventually stabilize. Do the best we can for now. pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ - iree-base-compiler \ - iree-base-runtime \ + iree-base-compiler==3.0.0rc20241118 \ + iree-base-runtime==3.0.0rc20241118 \ "numpy<2.0" - name: Install SGLang @@ -106,8 +106,7 @@ jobs: env: PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" steps: - - name: "Checkout Code" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: "Setting up Python" id: setup_python