Cardinality estimation on top of HLL #14145

Workflow file for this run

.github/workflows/ci-workflow.yml at accf461

	name: CI Workflow

	on:
	pull_request:
	branches:
	- master
	push:
	branches:
	- master
	workflow_dispatch:
	inputs:
	auto_checkpoint:
	description: 'Database config - auto checkpoint'
	required: false
	default: true
	type: boolean

	buffer_pool_size:
	description: 'Database config - buffer pool size'
	required: false
	default: 67108864
	type: number

	max_num_threads:
	description: 'Database config - max number of threads'
	required: false
	default: 2
	type: number

	enable_compression:
	description: 'Database config - enable compression'
	required: false
	default: true
	type: boolean

	checkpoint_threshold:
	description: 'Database config - checkpoint threshold'
	required: false
	default: 16777216
	type: number
	env:
	RUNTIME_CHECKS: 1
	USE_EXISTING_BINARY_DATASET: 1
	AUTO_CHECKPOINT: ${{ github.event.inputs.auto_checkpoint }}
	BUFFER_POOL_SIZE: ${{ github.event.inputs.buffer_pool_size }}
	MAX_NUM_THREADS: ${{ github.event.inputs.max_num_threads }}
	ENABLE_COMPRESSION: ${{ github.event.inputs.enable_compression }}
	CHECKPOINT_THRESHOLD: ${{ github.event.inputs.checkpoint_threshold }}
	WERROR: 1
	RUSTFLAGS: --deny warnings
	PIP_BREAK_SYSTEM_PACKAGES: 1

	# Only allow one run in this group to run at a time, and cancel any runs in progress in this group.
	# We use the workflow name and then add the pull request number, or (if it's a push to master), we use the name of the branch.
	# See github's docs[1] and a relevant stack overflow answer[2]
	# [1]: https://docs.github.com/en/actions/using-jobs/using-concurrency
	# [2]: https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre
	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: true

	jobs:
	generate-binary-datasets:
	name: generate binary dataset
	needs: [sanity-checks, python-lint-check]
	runs-on: kuzu-self-hosted-testing
	env:
	NUM_THREADS: 32
	GEN: Ninja
	CC: gcc
	CXX: g++
	steps:
	- uses: actions/checkout@v4

	- name: Build
	run: make release

	- name: Generate datasets
	run: bash scripts/generate_binary_demo.sh

	- name: Generate and upload tinysnb
	run: \|
	bash scripts/generate_binary_tinysnb.sh
	s3cmd get s3://kuzu-test/tinysnb/version.txt \|\| echo "0" > version.txt
	if [ "$(cat tinysnb/version.txt)" == "$(cat version.txt)" ]; then
	echo "TinySNB dataset is up to date, skipping upload"
	rm -rf tinysnb version.txt
	exit 0
	fi
	echo "TinySNB dataset is outdated, uploading..."
	s3cmd del -r s3://kuzu-test/tinysnb/
	s3cmd sync ./tinysnb s3://kuzu-test/
	rm -rf tinysnb version.txt

	- name: Generate and upload ldbc-sf01
	run: \|
	bash scripts/generate_binary_ldbc-sf01.sh
	s3cmd get s3://kuzu-test/ldbc01/version.txt \|\| echo "0" > version.txt
	if [ "$(cat ldbc01/version.txt)" == "$(cat version.txt)" ]; then
	echo "LDBC-SF01 dataset is up to date, skipping upload"
	rm -rf ldbc01 version.txt
	exit 0
	fi
	echo "LDBC-SF01 dataset is outdated, uploading..."
	s3cmd del -r s3://kuzu-test/ldbc01/
	s3cmd sync ./ldbc01 s3://kuzu-test/
	rm -rf ldbc01 version.txt

	- name: Upload binary-demo
	uses: actions/upload-artifact@v4
	with:
	name: binary-demo
	path: dataset/binary-demo

	gcc-build-test:
	name: gcc build & test
	needs: [sanity-checks, python-lint-check, generate-binary-datasets]
	runs-on: kuzu-self-hosted-testing
	env:
	NUM_THREADS: 32
	CMAKE_BUILD_PARALLEL_LEVEL: 32
	TEST_JOBS: 16
	CLANGD_DIAGNOSTIC_JOBS: 32
	CLANGD_DIAGNOSTIC_INSTANCES: 6
	GEN: Ninja
	CC: gcc
	CXX: g++
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	RUN_ID: "$(hostname)-$(date +%s)"
	HTTP_CACHE_FILE: TRUE

	steps:
	- uses: actions/checkout@v4

	- name: Download binary-demo
	uses: actions/download-artifact@v4
	with:
	name: binary-demo
	path: ${{ github.workspace }}/dataset/binary-demo

	- name: Ensure Python dependencies
	run: \|
	pip install torch~=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu
	pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

	- name: Ensure Node.js dependencies
	run: npm install --include=dev
	working-directory: tools/nodejs_api

	- name: Test
	run: \|
	ln -s /home/runner/ldbc-1-csv dataset/ldbc-1/csv
	make test

	- name: Test in mem
	env:
	IN_MEM_MODE: true
	run: \|
	make test

	- name: Test checkpoint at threshold 0
	env:
	AUTO_CHECKPOINT: true
	CHECKPOINT_THRESHOLD: 0
	run: \|
	make test

	- name: Python test
	run: make pytest

	- name: Node.js test
	run: make nodejstest

	- name: Java test
	run: make javatest

	- name: Rust test with pre-built library
	env:
	KUZU_SHARED: 1
	KUZU_INCLUDE_DIR: ${{ github.workspace }}/build/release/src
	KUZU_LIBRARY_DIR: ${{ github.workspace }}/build/release/src
	run: make rusttest

	gcc-build-test-in-mem-vector-size:
	name: gcc build & test in-mem only with various vector size
	needs: [gcc-build-test]
	runs-on: kuzu-self-hosted-testing
	env:
	NUM_THREADS: 32
	CMAKE_BUILD_PARALLEL_LEVEL: 32
	TEST_JOBS: 16
	CLANGD_DIAGNOSTIC_JOBS: 32
	CLANGD_DIAGNOSTIC_INSTANCES: 6
	GEN: Ninja
	CC: gcc
	CXX: g++
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	RUN_ID: "$(hostname)-$(date +%s)"
	HTTP_CACHE_FILE: TRUE

	steps:
	- uses: actions/checkout@v4

	- name: Download binary-demo
	uses: actions/download-artifact@v4
	with:
	name: binary-demo
	path: ${{ github.workspace }}/dataset/binary-demo

	- name: Ensure Python dependencies
	run: \|
	pip install torch~=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu
	pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

	- name: Ensure Node.js dependencies
	run: npm install --include=dev
	working-directory: tools/nodejs_api

	- name: Test vector 1024
	env:
	IN_MEM_MODE: true
	run: \|
	ln -s /home/runner/ldbc-1-csv dataset/ldbc-1/csv
	make test VECTOR_CAPACITY_LOG2=10

	code-coverage:
	name: code coverage
	runs-on: ubuntu-22.04
	needs: [sanity-checks, python-lint-check, generate-binary-datasets]
	env:
	TEST_JOBS: 10
	CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
	steps:
	- uses: actions/checkout@v4

	- name: Download binary-demo
	uses: actions/download-artifact@v4
	with:
	name: binary-demo
	path: dataset/binary-demo

	- name: Install lcov
	run: sudo apt-get install -y lcov

	- name: Test with coverage
	run: make lcov NUM_THREADS=$(nproc)

	- name: Test with coverage in mem
	env:
	IN_MEM_MODE: true
	run: make lcov NUM_THREADS=$(nproc)

	- name: Generate coverage report
	run: \|
	lcov --config-file .lcovrc -c -d ./ --no-external -o cover.info &&\
	lcov --remove cover.info $(< .github/workflows/lcov_exclude) -o cover.info

	- name: Upload coverage report
	uses: codecov/codecov-action@v4.5.0
	with:
	file: cover.info

	webassembly-build-test:
	name: webassembly build & test
	needs: [sanity-checks, generate-binary-datasets]
	runs-on: kuzu-self-hosted-testing
	env:
	WERROR: 0
	TEST_JOBS: 8
	NUM_THREADS: 32

	steps:
	- uses: actions/checkout@v4

	- name: Download binary-demo
	uses: actions/download-artifact@v4
	with:
	name: binary-demo
	path: dataset/binary-demo

	- name: Build & test
	run: \|
	ln -s /home/runner/ldbc-1-csv dataset/ldbc-1/csv
	source /home/runner/emsdk/emsdk_env.sh
	make wasmtest

	- name: Build & test (single-thread mode)
	env:
	SINGLE_THREADED: true
	run: \|
	make clean
	source /home/runner/emsdk/emsdk_env.sh
	make wasmtest

	rust-build-test:
	name: rust build & test
	needs: [rustfmt-check]
	runs-on: kuzu-self-hosted-testing
	env:
	CARGO_BUILD_JOBS: 32
	CMAKE_BUILD_PARALLEL_LEVEL: 32
	CC: gcc
	CXX: g++
	steps:
	- uses: actions/checkout@v4

	- name: Rust test
	run: make rusttest

	- name: Rust example
	working-directory: examples/rust
	run: cargo build --locked --features arrow

	gcc-build-test-with-asan:
	name: gcc build & test with asan
	needs: [gcc-build-test, generate-binary-datasets]
	runs-on: kuzu-self-hosted-testing
	env:
	NUM_THREADS: 32
	TEST_JOBS: 16
	GEN: Ninja
	CC: gcc
	CXX: g++
	WERROR: 0
	steps:
	- uses: actions/checkout@v4

	- name: Download binary-demo
	uses: actions/download-artifact@v4
	with:
	name: binary-demo
	path: dataset/binary-demo

	- name: Ensure Python dependencies
	run: \|
	pip install torch~=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu
	pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

	- name: Ensure Node.js dependencies
	run: npm install --include=dev
	working-directory: tools/nodejs_api

	- name: Test with ASAN
	run: make test ASAN=1
	env:
	ASAN_OPTIONS: "detect_leaks=1"

	- name: Test compression disabled with ASAN
	env:
	ENABLE_COMPRESSION: false
	run: \|
	make test

	clang-build-test:
	name: clang build & test
	needs: [sanity-checks, python-lint-check, generate-binary-datasets]
	runs-on: kuzu-self-hosted-testing
	env:
	NUM_THREADS: 32
	CMAKE_BUILD_PARALLEL_LEVEL: 32
	TEST_JOBS: 16
	CC: clang
	CXX: clang++
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	RUN_ID: "$(hostname)-$(date +%s)"
	HTTP_CACHE_FILE: TRUE
	steps:
	- uses: actions/checkout@v4

	- name: Download binary-demo
	uses: actions/download-artifact@v4
	with:
	name: binary-demo
	path: dataset/binary-demo

	- name: Ensure Python dependencies
	run: \|
	pip install torch~=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu
	pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

	- name: Ensure Node.js dependencies
	run: npm install --include=dev
	working-directory: tools/nodejs_api

	- name: Test
	run: \|
	ln -s /home/runner/ldbc-1-csv dataset/ldbc-1/csv
	make test

	- name: Test compression disabled
	env:
	ENABLE_COMPRESSION: false
	run: \|
	make test

	- name: Python test
	run: make pytest

	- name: Node.js test
	run: make nodejstest

	- name: Java test
	run: make javatest

	- name: Rust test with pre-built library
	env:
	KUZU_SHARED: 1
	KUZU_INCLUDE_DIR: ${{ github.workspace }}/build/release/src
	KUZU_LIBRARY_DIR: ${{ github.workspace }}/build/release/src
	run: make rusttest

	clang-build-test-various-page-sizes:
	strategy:
	matrix:
	page_size_log2: [16, 18]
	name: clang build & test with page_size_log2=${{ matrix.page_size_log2 }}
	needs: [clang-build-test]
	runs-on: kuzu-self-hosted-testing
	env:
	NUM_THREADS: 32
	CMAKE_BUILD_PARALLEL_LEVEL: 32
	TEST_JOBS: 16
	CC: clang
	CXX: clang++
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	RUN_ID: "$(hostname)-$(date +%s)"
	HTTP_CACHE_FILE: TRUE
	steps:
	- uses: actions/checkout@v4

	- name: Ensure Python dependencies
	run: \|
	pip install torch~=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu
	pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

	- name: Ensure Node.js dependencies
	run: npm install --include=dev
	working-directory: tools/nodejs_api

	- name: Build
	run: \|
	ln -s /home/runner/ldbc-1-csv dataset/ldbc-1/csv
	make release PAGE_SIZE_LOG2=${{ matrix.page_size_log2 }}

	- name: Generate binary demo
	run: \|
	bash scripts/generate_binary_demo.sh

	- name: Test
	run: \|
	make test PAGE_SIZE_LOG2=${{ matrix.page_size_log2 }}

	msvc-build-test:
	name: msvc build & test
	needs: [sanity-checks, python-lint-check, generate-binary-datasets]
	runs-on: self-hosted-windows
	env:
	# Shorten build path as much as possible
	CARGO_TARGET_DIR: ${{ github.workspace }}/rs
	CARGO_BUILD_JOBS: 18
	NUM_THREADS: 18
	CMAKE_BUILD_PARALLEL_LEVEL: 18
	TEST_JOBS: 9
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	PG_HOST: ${{ secrets.PG_HOST }}
	RUN_ID: "$(hostname)-$([Math]::Floor((Get-Date).TimeOfDay.TotalSeconds))"
	HTTP_CACHE_FILE: TRUE
	WERROR: 0
	steps:
	- uses: actions/checkout@v4

	- name: Download binary-demo
	uses: actions/download-artifact@v4
	with:
	name: binary-demo
	path: dataset/binary-demo

	- name: Ensure Python dependencies
	run: \|
	pip install torch~=2.0.0 --extra-index-url https://download.pytorch.org/whl/cpu
	pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.0.0+cpu.html

	- name: Ensure Node.js dependencies
	run: npm install --include=dev
	working-directory: tools/nodejs_api

	- name: Visual Studio Generator Build
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make all GEN="Visual Studio 17 2022"
	make clean

	- name: Test
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make test

	- name: Test compression disabled
	shell: cmd
	env:
	ENABLE_COMPRESSION: false
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make test

	- name: Test in mem
	shell: cmd
	env:
	IN_MEM_MODE: true
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make test

	- name: Python test
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make pytest

	- name: Node.js test
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make nodejstest

	- name: Rust test
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make rusttest

	- name: Java test
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make javatest

	- name: Rust test with pre-built library
	env:
	KUZU_SHARED: 1
	KUZU_INCLUDE_DIR: ${{ github.workspace }}/build/release/src
	KUZU_LIBRARY_DIR: ${{ github.workspace }}/build/release/src
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	set PATH=%PATH%;${{ github.workspace }}/build/release/src
	make rusttest

	sanity-checks:
	name: sanity checks
	runs-on: ubuntu-22.04
	steps:
	- uses: actions/checkout@v4

	- name: Check headers for include guards
	run: ./scripts/check-include-guards.sh src/include

	- name: Checks files for std::assert
	run: ./scripts/check-no-std-assert.sh src

	- name: Ensure generated grammar files are up to date
	run: \|
	python3 scripts/antlr4/hash.py src/antlr4/keywords.txt src/antlr4/Cypher.g4 > tmphashfile
	cmp tmphashfile scripts/antlr4/hash.md5
	rm tmphashfile

	clang-format:
	name: clang format
	runs-on: ubuntu-24.04
	steps:
	- name: Install clang-format
	run: \|
	sudo apt-get update
	sudo apt-get install -y clang-format-18

	- uses: actions/checkout@v4
	with:
	repository: ${{ github.event.pull_request.head.repo.full_name }}
	ref: ${{ github.event.pull_request.head.ref }}

	- name: Check and fix source format
	run: python3 scripts/run-clang-format.py --in-place --clang-format-executable /usr/bin/clang-format-18 -r src/

	- name: Check and fix test format
	run: python3 scripts/run-clang-format.py --in-place --clang-format-executable /usr/bin/clang-format-18 -r test/

	- name: Check and fix tools format
	run: python3 scripts/run-clang-format.py --in-place --clang-format-executable /usr/bin/clang-format-18 -r tools/

	- name: Check and fix extension format
	run: python3 scripts/run-clang-format.py --in-place --clang-format-executable /usr/bin/clang-format-18 -r extension/

	- name: Fail if any chang is made (master branch)
	if: github.ref == 'refs/heads/master'
	run: git diff --exit-code

	- name: Commit changes (non-master branch)
	uses: EndBug/add-and-commit@v9
	if: github.ref != 'refs/heads/master'
	with:
	author_name: "CI Bot"
	message: "Run clang-format"

	python-lint-check:
	name: python lint check
	runs-on: macos-14
	steps:
	- uses: actions/checkout@v4

	- name: Setup Python
	uses: actions/setup-python@v5
	with:
	python-version: "3.11"

	- name: Run Python lint
	working-directory: tools/python_api
	run: \|
	make requirements
	./.venv/bin/ruff check src_py test --verbose

	rustfmt-check:
	name: rustfmt check
	runs-on: ubuntu-22.04
	steps:
	- name: Update Rust
	run: rustup update

	- uses: actions/checkout@v4

	- name: Check Rust API format
	working-directory: tools/rust_api
	run: cargo fmt --check

	benchmark:
	name: benchmark
	needs: [gcc-build-test, clang-build-test]
	uses: ./.github/workflows/benchmark-workflow.yml

	report-benchmark-result:
	name: report benchmark result
	needs: [benchmark]
	runs-on: ubuntu-22.04
	if: github.event_name == 'pull_request'
	steps:
	- name: Download comparison results
	uses: actions/download-artifact@v4
	with:
	name: comparison-results

	- name: Report benchmark result
	uses: thollander/actions-comment-pull-request@v2
	with:
	filePath: compare_result.md

	clang-tidy:
	name: clang tidy & clangd diagnostics check
	needs: [sanity-checks]
	runs-on: kuzu-self-hosted-testing
	env:
	GEN: Ninja
	NUM_THREADS: 32
	CC: clang
	CXX: clang++

	steps:
	- uses: actions/checkout@v4

	# For `napi.h` header.
	- name: Ensure Node.js dependencies
	run: npm install --include=dev
	working-directory: tools/nodejs_api

	- name: Check for clangd diagnostics
	run: make clangd-diagnostics

	- name: Run clang-tidy
	run: make tidy

	- name: Run clang-tidy analyzer
	run: make tidy-analyzer

	macos-build-test:
	name: apple clang build & test
	needs: [sanity-checks, rustfmt-check, python-lint-check, generate-binary-datasets]
	runs-on: [self-hosted, macOS]
	env:
	TEST_JOBS: 16
	GEN: Ninja
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	PG_HOST: ${{ secrets.PG_HOST }}
	RUN_ID: "$(hostname)-$(date +%s)"
	HTTP_CACHE_FILE: TRUE
	steps:
	- uses: actions/checkout@v4

	- name: Determine NUM_THREADS
	run: \|
	export NUM_THREADS=$(($(sysctl -n hw.logicalcpu) * 2 / 3))
	if [ $NUM_THREADS -lt 12 ]; then
	export NUM_THREADS=12
	fi
	echo "NUM_THREADS=$NUM_THREADS" >> $GITHUB_ENV
	echo "CMAKE_BUILD_PARALLEL_LEVEL=$NUM_THREADS" >> $GITHUB_ENV
	echo "NUM_THREADS=$NUM_THREADS"

	- name: Download binary-demo
	uses: actions/download-artifact@v4
	with:
	name: binary-demo
	path: dataset/binary-demo

	- name: Ensure Python dependencies
	run: \|
	pip3 install torch~=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu
	pip3 install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

	- name: Ensure Node.js dependencies
	run: npm install --include=dev
	working-directory: tools/nodejs_api

	- name: Test
	run: \|
	ulimit -n 10240
	ln -s /Users/runner/ldbc-1-csv dataset/ldbc-1/csv
	make test

	- name: Test in mem
	env:
	IN_MEM_MODE: true
	run: \|
	make test

	- name: Test checkpoint at threshold 0
	env:
	AUTO_CHECKPOINT: true
	CHECKPOINT_THRESHOLD: 0
	run: \|
	make test

	- name: Python test
	env:
	PYBIND11_PYTHON_VERSION: 3.12
	run: \|
	ulimit -n 10240
	make pytest

	- name: C and C++ Examples
	run: \|
	ulimit -n 10240
	make example

	- name: Node.js test
	run: \|
	ulimit -n 10240
	make nodejstest

	- name: Java test
	run: \|
	ulimit -n 10240
	export JAVA_HOME=`/usr/libexec/java_home`
	make javatest

	- name: Rust test
	run: \|
	ulimit -n 10240
	source /Users/runner/.cargo/env
	make rusttest

	- name: Rust example
	working-directory: examples/rust
	run: \|
	ulimit -n 10240
	source /Users/runner/.cargo/env
	cargo build --locked --features arrow

	# TODO(bmwinger): rust pre-built library test for macos
	# When last tried it had issues finding libkuzu.dylib when running the tests
	# Using LD_LIBRARY_PATH (and similar variables) did not work for some reason

	shell-test:
	name: shell test
	runs-on: ubuntu-latest
	needs: [sanity-checks]
	steps:
	- uses: actions/checkout@v4

	- name: Build
	run: make release NUM_THREADS=$(nproc)

	- name: Test
	working-directory: tools/shell/test
	run: \|
	pip3 install pytest pexpect
	python3 -m pytest -v .

	linux-extension-test:
	name: linux extension test
	needs: [gcc-build-test, clang-build-test]
	runs-on: kuzu-self-hosted-testing
	env:
	NUM_THREADS: 32
	TEST_JOBS: 16
	CLANGD_DIAGNOSTIC_JOBS: 32
	CLANGD_DIAGNOSTIC_INSTANCES: 6
	GEN: Ninja
	CC: gcc
	CXX: g++
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	RUN_ID: "$(hostname)-$(date +%s)"
	HTTP_CACHE_FILE: TRUE
	ASAN_OPTIONS: detect_leaks=1

	steps:
	- uses: actions/checkout@v4

	- name: Update PostgreSQL host
	working-directory: extension/postgres/test/test_files
	env:
	FNAME: postgres.test
	FIND: "localhost"
	run: \|
	node -e 'fs=require("fs");fs.readFile(process.env.FNAME,"utf8",(err,data)=>{if(err!=null)throw err;fs.writeFile(process.env.FNAME,data.replaceAll(process.env.FIND,process.env.PG_HOST),"utf8",e=>{if(e!=null)throw e;});});'
	cat postgres.test

	- name: Install dependencies
	run: pip install rangehttpserver

	# shell needs to be built first to generate the dataset provided by the server
	- name: Extension test build
	run: make extension-test-build

	- name: Extension test
	run: \|
	python3 scripts/generate-tinysnb.py
	cd scripts/ && python3 http-server.py &
	make extension-test

	- name: Extension test in mem
	env:
	IN_MEM_MODE: true
	HTTP_CACHE_FILE: false
	run: \|
	make extension-test && make clean

	# TODO(Royi) there is currently a known memory leak issue in the DuckDB postgres extension so we avoid running the postgres extension tests with Leak Sanitizer
	- name: Extension test with asan
	run: \|
	make clean && make extension-test ASAN=1 WERROR=0 EXTENSION_TEST_EXCLUDE_FILTER="postgres" && make clean

	macos-extension-test:
	name: macos extension test
	needs: [macos-build-test]
	runs-on: [self-hosted, macOS]
	env:
	TEST_JOBS: 16
	GEN: Ninja
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	PG_HOST: ${{ secrets.PG_HOST }}
	RUN_ID: "$(hostname)-$(date +%s)"
	HTTP_CACHE_FILE: TRUE
	steps:
	- uses: actions/checkout@v4

	- name: Determine NUM_THREADS
	run: \|
	export NUM_THREADS=$(($(sysctl -n hw.logicalcpu) * 2 / 3))
	if [ $NUM_THREADS -lt 12 ]; then
	export NUM_THREADS=12
	fi
	echo "NUM_THREADS=$NUM_THREADS" >> $GITHUB_ENV
	echo "NUM_THREADS=$NUM_THREADS"

	- name: Update PostgreSQL host
	working-directory: extension/postgres/test/test_files
	env:
	FNAME: postgres.test
	FIND: "localhost"
	run: \|
	node -e 'fs=require("fs");fs.readFile(process.env.FNAME,"utf8",(err,data)=>{if(err!=null)throw err;fs.writeFile(process.env.FNAME,data.replaceAll(process.env.FIND,process.env.PG_HOST),"utf8",e=>{if(e!=null)throw e;});});'
	cat postgres.test

	- name: Install dependencies
	run: pip3 install rangehttpserver

	# shell needs to be built first to generate the dataset provided by the server
	- name: Extension test build
	run: make extension-test-build

	- name: Extension test
	run: \|
	python3 scripts/generate-tinysnb.py
	cd scripts/ && python3 http-server.py &
	make extension-test

	- name: Extension test in mem
	env:
	IN_MEM_MODE: true
	HTTP_CACHE_FILE: false
	run: \|
	make extension-test && make clean

	windows-extension-test:
	name: windows extension test
	needs: [msvc-build-test]
	runs-on: self-hosted-windows
	env:
	# Shorten build path as much as possible
	CARGO_TARGET_DIR: ${{ github.workspace }}/rs
	CARGO_BUILD_JOBS: 18
	NUM_THREADS: 18
	TEST_JOBS: 9
	UW_S3_ACCESS_KEY_ID: ${{ secrets.UW_S3_ACCESS_KEY_ID }}
	UW_S3_SECRET_ACCESS_KEY: ${{ secrets.UW_S3_SECRET_ACCESS_KEY }}
	AWS_S3_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_SECRET_ACCESS_KEY }}
	PG_HOST: ${{ secrets.PG_HOST }}
	RUN_ID: "$(hostname)-$([Math]::Floor((Get-Date).TimeOfDay.TotalSeconds))"
	HTTP_CACHE_FILE: TRUE
	WERROR: 0
	steps:
	- uses: actions/checkout@v4

	- name: Install dependencies
	run: pip install rangehttpserver

	- name: Update PostgreSQL host
	working-directory: extension/postgres/test/test_files
	env:
	FNAME: postgres.test
	FIND: "localhost"
	run: \|
	$fname = $env:FNAME
	$find = $env:FIND
	$replace = $env:PG_HOST
	$content = Get-Content -Path $fname
	$content = $content -replace [regex]::Escape($find), $replace
	Set-Content -Path $fname -Value $content
	cat $fname

	- name: Extension test build
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make extension-test-build

	- name: Extension test
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	python3 scripts/generate-tinysnb.py
	if %errorlevel% neq 0 exit /b %errorlevel%
	cd scripts/ && start /b python http-server.py && cd ..
	make extension-test

	- name: Extension test in mem
	shell: cmd
	env:
	IN_MEM_MODE: true
	HTTP_CACHE_FILE: false
	run: \|
	cd scripts/ && start /b python http-server.py && cd ..
	make extension-test

	- name: Clean
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
	make clean

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Cardinality estimation on top of HLL #14145

Workflow file

Cardinality estimation on top of HLL #14145

Jobs

Run details

Workflow file for this run