Add support for parquet_writer_version
session property
#40154
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) Facebook, Inc. and its affiliates. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
name: "Ubuntu Benchmark" | |
on: | |
pull_request: | |
paths: | |
- 'velox/**' | |
- '!velox/docs/**' | |
- 'third_party/**' | |
- 'pyvelox/**' | |
- '.github/workflows/benchmark.yml' | |
- 'scripts/benchmark-requirements.txt' | |
push: | |
branches: [main] | |
permissions: | |
contents: read | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.repository }}-${{ github.head_ref || github.sha }} | |
cancel-in-progress: true | |
defaults: | |
run: | |
shell: bash | |
jobs: | |
benchmark: | |
if: github.repository == 'facebookincubator/velox' | |
runs-on: 8-core-ubuntu-22.04 | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/ccache/" | |
CCACHE_BASEDIR: "${{ github.workspace }}" | |
BINARY_DIR: "${{ github.workspace }}/benchmarks/" | |
LINUX_DISTRO: "ubuntu" | |
RESULTS_ROOT: "${{ github.workspace }}/benchmark-results" | |
BASELINE_OUTPUT_PATH: "${{ github.workspace }}/benchmark-results/baseline/" | |
CONTENDER_OUTPUT_PATH: "${{ github.workspace }}/benchmark-results/contender/" | |
steps: | |
- name: "Restore ccache" | |
uses: actions/cache/restore@v3 | |
id: restore-cache | |
with: | |
path: "ccache" | |
key: ccache-benchmark-${{ github.sha }} | |
restore-keys: | | |
ccache-benchmark- | |
- name: "Checkout Repo" | |
if: ${{ github.event_name == 'pull_request' }} | |
uses: actions/checkout@v3 | |
with: | |
path: 'velox' | |
repository: ${{ github.event.pull_request.head.repo.full_name }} | |
ref: ${{ github.head_ref }} | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: "Checkout Merge Base" | |
if: ${{ github.event_name == 'pull_request' }} | |
working-directory: velox | |
run: | | |
# Choose merge base from upstream main to avoid issues with | |
# outdated fork branches | |
git fetch origin | |
git remote add upstream https://github.com/facebookincubator/velox | |
git fetch upstream | |
git status | |
merge_base=$(git merge-base 'upstream/${{ github.base_ref }}' 'origin/${{ github.head_ref }}') || \ | |
{ echo "::error::Failed to find merge base"; exit 1; } | |
echo "Merge Base: $merge_base" | |
git checkout $merge_base | |
git submodule update --init --recursive | |
echo $(git log -n 1) | |
- name: "Install dependencies" | |
if: ${{ github.event_name == 'pull_request' }} | |
run: source velox/scripts/setup-ubuntu.sh && install_apt_deps && install_duckdb | |
- name: Build Baseline Benchmarks | |
if: ${{ github.event_name == 'pull_request' }} | |
working-directory: velox | |
run: | | |
n_cores=$(nproc) | |
make benchmarks-basic-build NUM_THREADS=$n_cores MAX_HIGH_MEM_JOBS=$n_cores MAX_LINK_JOBS=$n_cores | |
ccache -s | |
mkdir -p ${BINARY_DIR}/baseline/ | |
cp -r --verbose _build/release/velox/benchmarks/basic/* ${BINARY_DIR}/baseline/ | |
- name: "Checkout Contender PR" | |
if: ${{ github.event_name == 'pull_request' }} | |
working-directory: velox | |
run: | | |
git checkout '${{ github.event.pull_request.head.sha }}' | |
- name: "Checkout Contender" | |
if: ${{ github.event_name == 'push' }} | |
uses: actions/checkout@v3 | |
with: | |
path: 'velox' | |
ref: ${{ github.sha }} | |
submodules: 'recursive' | |
- name: "Install dependencies" | |
run: source velox/scripts/setup-ubuntu.sh && install_apt_deps | |
- name: Build Contender Benchmarks | |
working-directory: velox | |
run: | | |
n_cores=$(nproc) | |
make benchmarks-basic-build NUM_THREADS=$n_cores MAX_HIGH_MEM_JOBS=$n_cores MAX_LINK_JOBS=$n_cores | |
ccache -s | |
mkdir -p ${BINARY_DIR}/contender/ | |
cp -r --verbose _build/release/velox/benchmarks/basic/* ${BINARY_DIR}/contender/ | |
- name: "Save ccache" | |
uses: actions/cache/save@v3 | |
id: cache | |
with: | |
path: "ccache" | |
key: ccache-benchmark-${{ github.sha }} | |
- name: "Install benchmark dependencies" | |
run: | | |
python3 -m pip install -r velox/scripts/benchmark-requirements.txt | |
- name: "Run Benchmarks - Baseline" | |
if: ${{ github.event_name == 'pull_request' }} | |
working-directory: 'velox' | |
run: | | |
make benchmarks-basic-run \ | |
EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/baseline/ --output_path ${BASELINE_OUTPUT_PATH}" | |
- name: "Run Benchmarks - Contender" | |
working-directory: 'velox' | |
run: | | |
make benchmarks-basic-run \ | |
EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/contender/ --output_path ${CONTENDER_OUTPUT_PATH}" | |
- name: "Compare initial results" | |
id: compare | |
if: ${{ github.event_name == 'pull_request' }} | |
run: | | |
./velox/scripts/benchmark-runner.py compare \ | |
--baseline_path ${BASELINE_OUTPUT_PATH} \ | |
--contender_path ${CONTENDER_OUTPUT_PATH} \ | |
--rerun_json_output "benchmark-results/rerun_json_output_0.json" \ | |
--do_not_fail | |
- name: "Rerun Benchmarks" | |
if: ${{ github.event_name == 'pull_request'}} | |
working-directory: 'velox' | |
run: | | |
for i in 1 2 3 4 5; do | |
CURRENT_JSON_RERUN="${RESULTS_ROOT}/rerun_json_output_$((${i} - 1)).json" | |
NEXT_JSON_RERUN="${RESULTS_ROOT}/rerun_json_output_${i}.json" | |
if [ ! -s "${CURRENT_JSON_RERUN}" ]; then | |
echo "::notice::Rerun iteration ${i} found empty file. Finalizing." | |
break | |
fi | |
echo "::group::Rerun iteration: ${i}" | |
make benchmarks-basic-run \ | |
EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/baseline/ --output_path ${BASELINE_OUTPUT_PATH}/rerun-${i}/ --rerun_json_input ${CURRENT_JSON_RERUN}" | |
make benchmarks-basic-run \ | |
EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/contender/ --output_path ${CONTENDER_OUTPUT_PATH}/rerun-${i}/ --rerun_json_input ${CURRENT_JSON_RERUN}" | |
./scripts/benchmark-runner.py compare \ | |
--baseline_path ${BASELINE_OUTPUT_PATH}/rerun-${i}/ \ | |
--contender_path ${CONTENDER_OUTPUT_PATH}/rerun-${i}/ \ | |
--rerun_json_output ${NEXT_JSON_RERUN} \ | |
--do_not_fail | |
echo "::endgroup::" | |
done | |
echo "::group::Compare final results" | |
./scripts/benchmark-runner.py compare \ | |
--baseline_path ${BASELINE_OUTPUT_PATH} \ | |
--contender_path ${CONTENDER_OUTPUT_PATH} \ | |
--recursive \ | |
--do_not_fail | |
echo "::endgroup::" | |
- name: "Save PR number" | |
run: echo "${{ github.event.pull_request.number || 0 }}" > pr_number.txt | |
- name: "Upload PR number" | |
uses: actions/upload-artifact@v4 | |
with: | |
path: "pr_number.txt" | |
name: "pr_number" | |
- name: "Upload result artifact" | |
uses: actions/upload-artifact@v4 | |
with: | |
path: "benchmark-results" | |
name: "benchmark-results" | |