Skip to content

Merge pull request #12189 from gitccl/issue_12181 #1

Merge pull request #12189 from gitccl/issue_12181

Merge pull request #12189 from gitccl/issue_12181 #1

Workflow file for this run

name: Regression
on:
workflow_dispatch:
repository_dispatch:
push:
branches:
- '**'
- '!main'
- '!feature'
paths-ignore:
- '**.md'
- 'tools/**'
- '!tools/pythonpkg/**'
- '.github/patches/duckdb-wasm/**'
- '.github/workflows/**'
- '!.github/workflows/Regression.yml'
pull_request:
types: [opened, reopened, ready_for_review]
paths-ignore:
- '**.md'
- 'tools/**'
- '!tools/pythonpkg/**'
- '.github/patches/duckdb-wasm/**'
- '.github/workflows/**'
- '!.github/workflows/Regression.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
cancel-in-progress: true
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
BASE_BRANCH: ${{ github.base_ref || (endsWith(github.ref, '_feature') && 'feature' || 'main') }}
jobs:
regression-test-benchmark-runner:
name: Regression Tests
runs-on: ubuntu-20.04
env:
CC: gcc-10
CXX: g++-10
GEN: ninja
BUILD_BENCHMARK: 1
BUILD_TPCH: 1
BUILD_TPCDS: 1
BUILD_HTTPFS: 1
BUILD_JEMALLOC: 1
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install
shell: bash
run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build && pip install requests
- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
with:
key: ${{ github.job }}
save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }}
- name: Build
shell: bash
run: |
make
git clone --branch ${{ env.BASE_BRANCH }} https://github.com/duckdb/duckdb.git --depth=1
cd duckdb
make
cd ..
- name: Set up benchmarks
shell: bash
run: |
cp -r benchmark duckdb/
- name: Regression Test Micro
if: always()
shell: bash
run: |
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/micro.csv --verbose --threads=2
- name: Regression Test Ingestion Perf
if: always()
shell: bash
run: |
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/ingestion.csv --verbose --threads=2
- name: Regression Test TPCH
if: always()
shell: bash
run: |
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpch.csv --verbose --threads=2
- name: Regression Test TPCH-PARQUET
if: always()
shell: bash
run: |
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpch_parquet.csv --verbose --threads=2
- name: Regression Test TPCDS
if: always()
shell: bash
run: |
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpcds.csv --verbose --threads=2
- name: Regression Test H2OAI
if: always()
shell: bash
run: |
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/h2oai.csv --verbose --threads=2
- name: Regression Test IMDB
if: always()
shell: bash
run: |
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/imdb.csv --verbose --threads=2
- name: Regression Test CSV
if: always()
shell: bash
run: |
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/csv.csv --verbose --threads=2
- name: Regression Test new micro benchmark
if: always()
shell: bash
run: |
git diff --name-only origin/main | (grep "benchmark/micro/*" || true) > .github/regression/updated_micro_benchmarks.csv
if [ $(wc -l < .github/regression/updated_micro_benchmarks.csv) -gt 0 ]; then
echo "Detected the following modified benchmarks. Running a regression test"
cat .github/regression/updated_micro_benchmarks.csv
python scripts/regression_test_runner.py --old=duckdb/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/updated_micro_benchmarks.csv --verbose --threads=2
else
echo "No modified micro benchmarks"
fi
regression-test-storage:
name: Storage Size Regression Test
runs-on: ubuntu-20.04
env:
CC: gcc-10
CXX: g++-10
GEN: ninja
BUILD_TPCH: 1
BUILD_TPCDS: 1
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install
shell: bash
run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build && pip install requests
- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
with:
key: ${{ github.job }}
save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }}
- name: Build
shell: bash
run: |
make
git clone --branch ${{ env.BASE_BRANCH }} https://github.com/duckdb/duckdb.git --depth=1
cd duckdb
make
cd ..
- name: Regression Test
shell: bash
run: |
python scripts/regression_test_storage_size.py --old=duckdb/build/release/duckdb --new=build/release/duckdb
- name: Test for incompatibility
shell: bash
run: |
if (cmp test/sql/storage_version/storage_version.db duckdb/test/sql/storage_version/storage_version.db); then
echo "storage_changed=false" >> $GITHUB_ENV
else
echo "storage_changed=true" >> $GITHUB_ENV
fi
- name: Regression Compatibility Test (testing bidirectional compatibility)
shell: bash
if: env.storage_changed == 'false'
run: |
# Regenerate test/sql/storage_version.db with newer version -> read with older version
python3 scripts/generate_storage_version.py
./duckdb/build/release/duckdb test/sql/storage_version/storage_version.db
# Regenerate test/sql/storage_version.db with older version -> read with newer version (already performed as part of test.slow)
cd duckdb
python3 ../scripts/generate_storage_version.py
../build/release/duckdb duckdb/test/sql/storage_version/storage_version.db
cd ..
- name: Regression Compatibility Test (testing storage version has been bumped)
shell: bash
if: env.storage_changed == 'true'
run: |
python3 scripts/generate_storage_version.py
cd duckdb
python3 scripts/generate_storage_version.py
cd ..
if (cmp -i 8 -n 12 test/sql/storage_version.db duckdb/test/sql/storage_version.db); then
echo "Expected storage format to be bumped, but this is not the case"
echo "This might fail spuriously if changes to content of test database / generation script happened"
exit 1
else
echo "Storage bump detected, all good!"
fi
regression-test-python:
name: Regression Test (Python Client)
runs-on: ubuntu-20.04
env:
CC: gcc-10
CXX: g++-10
GEN: ninja
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install
shell: bash
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build
pip install numpy pytest pandas mypy psutil "pyarrow<=11.0.0"
- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
with:
key: ${{ github.job }}
save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }}
- name: Build Current Version
shell: bash
run: |
cd tools/pythonpkg
pip install --use-pep517 . --user
cd ../..
- name: Run New Version
shell: bash
run: |
python scripts/regression_test_python.py --threads=2 --out-file=new.csv
- name: Cleanup New Version
shell: bash
run: |
cd tools/pythonpkg
./clean.sh
cd ../..
- name: Build Current
shell: bash
run: |
git clone --branch ${{ env.BASE_BRANCH }} https://github.com/duckdb/duckdb.git --depth=1
cd duckdb/tools/pythonpkg
pip install --use-pep517 . --user
cd ../../..
- name: Run Current Version
shell: bash
run: |
python scripts/regression_test_python.py --threads=2 --out-file=current.csv
- name: Regression Test
shell: bash
run: |
cp -r benchmark duckdb/
python scripts/regression_check.py --old=current.csv --new=new.csv
regression-test-plan-cost:
name: Regression Test Join Order Plan Cost
runs-on: ubuntu-20.04
env:
CC: gcc-10
CXX: g++-10
GEN: ninja
BUILD_TPCH: 1
BUILD_HTTPFS: 1
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install
shell: bash
run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build && pip install tqdm
- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
with:
key: ${{ github.job }}
save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }}
- name: Build
shell: bash
run: |
make
git clone --branch ${{ env.BASE_BRANCH }} https://github.com/duckdb/duckdb.git --depth=1
cd duckdb
make
cd ..
- name: Set up benchmarks
shell: bash
run: |
cp -r benchmark duckdb/
- name: Regression Test IMDB
if: always()
shell: bash
run: |
python scripts/plan_cost_runner.py --old=duckdb/build/release/duckdb --new=build/release/duckdb --dir=benchmark/imdb_plan_cost
- name: Regression Test TPCH
if: always()
shell: bash
run: |
python scripts/plan_cost_runner.py --old=duckdb/build/release/duckdb --new=build/release/duckdb --dir=benchmark/tpch_plan_cost