Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/kvikio_shared_library' into kvik…
Browse files Browse the repository at this point in the history
…io-remote-io-ci-debugging
  • Loading branch information
madsbk committed Nov 4, 2024
2 parents 98d2eb6 + 2d8eeaf commit 9e94e18
Show file tree
Hide file tree
Showing 170 changed files with 4,398 additions and 764 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: "Auto Assign PR"

on:
pull_request_target:
types:
- opened
- reopened
- synchronize

jobs:
add_assignees:
runs-on: ubuntu-latest
steps:
- uses: actions-ecosystem/action-add-assignees@v1
with:
github_token: "${{ secrets.GITHUB_TOKEN }}"
assignees: ${{ github.actor }}
1 change: 1 addition & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: "Pull Request Labeler"

on:
- pull_request_target

Expand Down
1 change: 1 addition & 0 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
python -m auditwheel repair \
--exclude libcudf.so \
--exclude libnvcomp.so \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

Expand Down
10 changes: 9 additions & 1 deletion ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ rapids-dependency-file-generator \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \
| tee /tmp/requirements-build.txt


# Download wheel from <https://github.com/rapidsai/kvikio/pull/527>
LIBKVIKIO_CHANNEL=$(
RAPIDS_PY_WHEEL_NAME=libkvikio_cu12 rapids-get-pr-wheel-artifact kvikio 527 cpp
)
echo ${LIBKVIKIO_CHANNEL}/libkvikio_*.whl >> /tmp/requirements-build.txt

rapids-logger "Installing build requirements"
python -m pip install \
-v \
Expand All @@ -25,14 +32,15 @@ python -m pip install \
# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
export PIP_NO_BUILD_ISOLATION=0

export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON"
export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON;-DUSE_LIBKVIKIO_RUNTIME_WHEEL=ON"
./ci/build_wheel.sh "${package_name}" "${package_dir}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair \
--exclude libnvcomp.so.4 \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

Expand Down
1 change: 1 addition & 0 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
python -m auditwheel repair \
--exclude libcudf.so \
--exclude libnvcomp.so \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

Expand Down
8 changes: 8 additions & 0 deletions ci/cudf_pandas_scripts/pandas-tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,16 @@ RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from
RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

# Download wheel from <https://github.com/rapidsai/kvikio/pull/527>
LIBKVIKIO_CHANNEL=$(
RAPIDS_PY_WHEEL_NAME=libkvikio_${RAPIDS_PY_CUDA_SUFFIX} rapids-get-pr-wheel-artifact kvikio 527 cpp # also python?
)
echo ${LIBKVIKIO_CHANNEL}/libkvikio_*.whl >> /tmp/requirements-build.txt

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
-r /tmp/requirements-build.txt \
"$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,pandas-tests]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
Expand Down
18 changes: 9 additions & 9 deletions ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,18 @@ else
RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
# `test_python_cudf_pandas` constraints are for `[test]` not `[cudf-pandas-tests]`
rapids-dependency-file-generator \
--output requirements \
--file-key test_python_cudf_pandas \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi
# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints test_python_cudf_pandas ./constraints.txt

# Download wheel from <https://github.com/rapidsai/kvikio/pull/527>
LIBKVIKIO_CHANNEL=$(
RAPIDS_PY_WHEEL_NAME=libkvikio_${RAPIDS_PY_CUDA_SUFFIX} rapids-get-pr-wheel-artifact kvikio 527 cpp # also python?
)
echo ${LIBKVIKIO_CHANNEL}/libkvikio_*.whl >> /tmp/requirements-build.txt

python -m pip install \
-v \
-r /tmp/requirements-build.txt \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,cudf-pandas-tests]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
Expand Down
18 changes: 9 additions & 9 deletions ci/test_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@ RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels

rapids-logger "Install cudf, pylibcudf, and test requirements"

# Constrain to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_cudf \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi
# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints py_test_cudf ./constraints.txt

# Download wheel from <https://github.com/rapidsai/kvikio/pull/527>
LIBKVIKIO_CHANNEL=$(
RAPIDS_PY_WHEEL_NAME=libkvikio_${RAPIDS_PY_CUDA_SUFFIX} rapids-get-pr-wheel-artifact kvikio 527 cpp # also python?
)
echo ${LIBKVIKIO_CHANNEL}/libkvikio_*.whl >> /tmp/requirements-build.txt

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
-r /tmp/requirements-build.txt \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
Expand Down
19 changes: 10 additions & 9 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,20 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

rapids-logger "Installing cudf_polars and its dependencies"
# Constraint to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_cudf_polars \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi

# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints py_test_cudf_polars ./constraints.txt

# Download wheel from <https://github.com/rapidsai/kvikio/pull/527>
LIBKVIKIO_CHANNEL=$(
RAPIDS_PY_WHEEL_NAME=libkvikio_${RAPIDS_PY_CUDA_SUFFIX} rapids-get-pr-wheel-artifact kvikio 527 cpp # also python?
)
echo ${LIBKVIKIO_CHANNEL}/libkvikio_*.whl >> /tmp/requirements-build.txt

# echo to expand wildcard before adding `[test]` requires for pip
python -m pip install \
-v \
-r /tmp/requirements-build.txt \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
Expand Down
19 changes: 10 additions & 9 deletions ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,20 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements"
# Constraint to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_dask_cudf \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi

# generate constraints (possibly pinning to oldest support versions of dependencies)
rapids-generate-pip-constraints py_test_dask_cudf ./constraints.txt

# Download wheel from <https://github.com/rapidsai/kvikio/pull/527>
LIBKVIKIO_CHANNEL=$(
RAPIDS_PY_WHEEL_NAME=libkvikio_${RAPIDS_PY_CUDA_SUFFIX} rapids-get-pr-wheel-artifact kvikio 527 cpp # also python?
)
echo ${LIBKVIKIO_CHANNEL}/libkvikio_*.whl >> /tmp/requirements-build.txt

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
-r /tmp/requirements-build.txt \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
Expand Down
6 changes: 4 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ dependencies:
- librdkafka>=2.5.0,<2.6.0a0
- librmm==24.12.*,>=0.0.0a0
- make
- mmh3
- moto>=4.0.8
- msgpack-python
- myst-nb
Expand All @@ -58,14 +59,14 @@ dependencies:
- numpy>=1.23,<3.0a0
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==4.0.1
- nvcomp==4.1.0.6
- nvtx>=0.2.1
- openpyxl
- packaging
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.12
- polars>=1.11,<1.13
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<18.0.0a0
Expand All @@ -76,6 +77,7 @@ dependencies:
- pytest-xdist
- pytest<8
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
Expand Down
6 changes: 4 additions & 2 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ dependencies:
- librdkafka>=2.5.0,<2.6.0a0
- librmm==24.12.*,>=0.0.0a0
- make
- mmh3
- moto>=4.0.8
- msgpack-python
- myst-nb
Expand All @@ -56,14 +57,14 @@ dependencies:
- numba-cuda>=0.0.13
- numpy>=1.23,<3.0a0
- numpydoc
- nvcomp==4.0.1
- nvcomp==4.1.0.6
- nvtx>=0.2.1
- openpyxl
- packaging
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.12
- polars>=1.11,<1.13
- pre-commit
- pyarrow>=14.0.0,<18.0.0a0
- pydata-sphinx-theme!=0.14.2
Expand All @@ -74,6 +75,7 @@ dependencies:
- pytest-xdist
- pytest<8
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ spdlog_version:
- ">=1.14.1,<1.15"

nvcomp_version:
- "=4.0.1"
- "=4.1.0.6"

zlib_version:
- ">=1.2.13"
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ add_library(
src/groupby/hash/compute_groupby.cu
src/groupby/hash/compute_mapping_indices.cu
src/groupby/hash/compute_mapping_indices_null.cu
src/groupby/hash/compute_shared_memory_aggs.cu
src/groupby/hash/compute_single_pass_aggs.cu
src/groupby/hash/create_sparse_results_table.cu
src/groupby/hash/flatten_single_pass_aggs.cpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,6 @@ ConfigureNVBench(
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(
STRINGS_BENCH
string/combine.cpp
string/convert_datetime.cpp
string/convert_durations.cpp
string/convert_fixed_point.cpp
Expand All @@ -374,6 +373,7 @@ ConfigureNVBench(
STRINGS_NVBENCH
string/case.cpp
string/char_types.cpp
string/combine.cpp
string/contains.cpp
string/copy_if_else.cpp
string/copy_range.cpp
Expand Down
26 changes: 13 additions & 13 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ enum class TreeType {
template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
static void BM_ast_transform(nvbench::state& state)
{
auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table =
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
row_count{table_size},
row_count{num_rows},
Nullable ? std::optional<double>{0.5} : std::nullopt);
auto table = source_table->view();

Expand Down Expand Up @@ -99,8 +99,8 @@ static void BM_ast_transform(nvbench::state& state)
auto const& expression_tree_root = expressions.back();

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);
state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
Expand All @@ -109,15 +109,15 @@ static void BM_ast_transform(nvbench::state& state)
template <cudf::ast::ast_operator cmp_op, cudf::ast::ast_operator reduce_op>
static void BM_string_compare_ast_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");
CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
auto const num_cols = tree_levels * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
Expand Down Expand Up @@ -150,7 +150,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1]));

std::for_each(thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
thrust::make_counting_iterator(tree_levels),
[&](size_t idx) {
auto const& lhs = expressions.back();
auto const& rhs = expressions.emplace_back(
Expand All @@ -177,7 +177,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
NVBENCH_BENCH(name) \
.set_name(#name) \
.add_int64_axis("tree_levels", {1, 5, 10}) \
.add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})

AST_TRANSFORM_BENCHMARK_DEFINE(
ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false);
Expand All @@ -202,7 +202,7 @@ AST_TRANSFORM_BENCHMARK_DEFINE(
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("tree_levels", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and,
Expand Down
Loading

0 comments on commit 9e94e18

Please sign in to comment.