diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9943b02a521..0ea4d5c54dc 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -67,7 +67,27 @@ jobs: node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" sha: ${{ inputs.sha }} + wheel-build-libcudf: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libcudf.sh + wheel-publish-libcudf: + needs: wheel-build-libcudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: libcudf wheel-build-pylibcudf: + needs: [wheel-publish-libcudf] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 74bdc666c68..2e2a8b6b9bc 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -23,6 +23,7 @@ jobs: - static-configure - conda-notebook-tests - docs-build + - wheel-build-libcudf - wheel-build-pylibcudf - wheel-build-cudf - wheel-tests-cudf @@ -121,10 +122,18 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" - wheel-build-pylibcudf: + wheel-build-libcudf: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + with: + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + build_type: pull-request + script: "ci/build_wheel_libcudf.sh" + wheel-build-pylibcudf: + needs: [checks, wheel-build-libcudf] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: pull-request script: "ci/build_wheel_pylibcudf.sh" diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index 7c0fb1efebe..cf33703f544 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -5,16 +5,28 @@ set -euo pipefail package_dir="python/cudf" -export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON" - -# Download the pylibcudf built in the previous step RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 /tmp/pylibcudf_dist -echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" > /tmp/constraints.txt +# Downloads libcudf and pylibcudf wheels from this current build, +# then ensures 'cudf' wheel builds always use the 'libcudf' and 'pylibcudf' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python /tmp/pylibcudf_dist +echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt +echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt export PIP_CONSTRAINT="/tmp/constraints.txt" + ./ci/build_wheel.sh ${package_dir} -python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* +python -m auditwheel repair \ + --exclude libcudf.so \ + --exclude libarrow.so.1601 \ + --exclude libnvcomp.so \ + --exclude libnvcomp_bitcomp.so \ + --exclude libnvcomp_gdeflate.so \ + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh new file mode 100755 index 00000000000..9694c3f6144 --- /dev/null +++ b/ci/build_wheel_libcudf.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Copyright (c) 2023-2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python/libcudf" + +./ci/build_wheel.sh ${package_dir} + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +mkdir -p ${package_dir}/final_dist +python -m auditwheel repair --exclude libarrow.so.1601 -w ${package_dir}/final_dist ${package_dir}/dist/* + +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh index b25d118ff81..7181a49d397 100755 --- a/ci/build_wheel_pylibcudf.sh +++ b/ci/build_wheel_pylibcudf.sh @@ -5,12 +5,26 @@ set -euo pipefail package_dir="python/pylibcudf" -export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON" +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -./ci/build_wheel.sh ${package_dir} +# Downloads libcudf wheel from this current build, +# then ensures 'pylibcudf' wheel builds always use the 'libcudf' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist +echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" -python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* +./ci/build_wheel.sh ${package_dir} +python -m auditwheel repair \ + --exclude libcudf.so \ + --exclude libarrow.so.1601 \ + --exclude libnvcomp.so \ + --exclude libnvcomp_bitcomp.so \ + --exclude libnvcomp_gdeflate.so \ + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* -RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh index 97c3139080f..e5cd4436a3a 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/run.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -12,13 +12,15 @@ rapids-logger "PR number: ${RAPIDS_REF_NAME:-"unknown"}" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -# Download the cudf and pylibcudf built in the previous step -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist -RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +# Download the cudf, libcudf, and pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,pandas-tests]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh index 8215ce729b3..b222f8001e9 100755 --- a/ci/cudf_pandas_scripts/run_tests.sh +++ b/ci/cudf_pandas_scripts/run_tests.sh @@ -37,13 +37,15 @@ if [ "$no_cudf" = true ]; then else RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" - # Download the cudf and pylibcudf built in the previous step - RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist - RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + # Download the cudf, libcudf, and pylibcudf built in the previous step + RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist + RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,cudf-pandas-tests]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" fi diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index e79a91510b8..be55b49870f 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -49,6 +49,7 @@ DEPENDENCIES=( dask-cuda dask-cudf kvikio + libcudf libkvikio librmm pylibcudf diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh index 19131952098..6861d699695 100755 --- a/ci/test_wheel_cudf.sh +++ b/ci/test_wheel_cudf.sh @@ -5,13 +5,15 @@ set -eou pipefail RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -# Download the cudf and pylibcudf built in the previous step -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist -RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +# Download the cudf, libcudf, and pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh index e9c6188502c..0baf6c9e277 100755 --- a/ci/test_wheel_cudf_polars.sh +++ b/ci/test_wheel_cudf_polars.sh @@ -18,16 +18,18 @@ else fi RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist -# Download pylibcudf built in the previous step -RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +# Download libcudf and pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist rapids-logger "Installing cudf_polars and its dependencies" # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ "$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" rapids-logger "Run cudf_polars tests" diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index ff893a08e27..fa74b2398f7 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -4,16 +4,18 @@ set -eou pipefail RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist -# Download the cudf and pylibcudf built in the previous step -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist -RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +# Download the cudf, libcudf, and pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ "$(echo ./dist/dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} diff --git a/dependencies.yaml b/dependencies.yaml index 150d03be021..553d01735b2 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -13,6 +13,7 @@ files: - cuda - cuda_version - depends_on_cupy + - depends_on_librmm - depends_on_rmm - develop - docs @@ -95,6 +96,8 @@ files: - build_base - build_python_common - depends_on_pylibcudf + - depends_on_libcudf + - depends_on_librmm - depends_on_rmm py_run_cudf: output: pyproject @@ -106,6 +109,7 @@ files: - run_cudf - pyarrow_run - depends_on_cupy + - depends_on_libcudf - depends_on_pylibcudf - depends_on_rmm py_test_cudf: @@ -117,6 +121,31 @@ files: includes: - test_python_common - test_python_cudf + py_build_libcudf: + output: pyproject + pyproject_dir: python/libcudf + extras: + table: build-system + includes: + - rapids_build_skbuild + py_rapids_build_libcudf: + output: pyproject + pyproject_dir: python/libcudf + extras: + table: tool.rapids-build-backend + key: requires + includes: + - build_base + - build_cpp + - build_python_libcudf + - depends_on_librmm + py_run_libcudf: + output: pyproject + pyproject_dir: python/libcudf + extras: + table: project + includes: + - pyarrow_run py_build_pylibcudf: output: pyproject pyproject_dir: python/pylibcudf @@ -133,6 +162,8 @@ files: includes: - build_base - build_python_common + - depends_on_libcudf + - depends_on_librmm - depends_on_rmm py_run_pylibcudf: output: pyproject @@ -140,6 +171,7 @@ files: extras: table: project includes: + - depends_on_libcudf - depends_on_rmm - pyarrow_run - run_pylibcudf @@ -359,13 +391,18 @@ dependencies: - cython>=3.0.3 # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - - pyarrow==16.1.0.* + - &pyarrow_build pyarrow==16.1.0.* - output_types: pyproject packages: # Hard pin the patch version used during the build. # Sync with conda build constraint & wheel run constraint. # TODO: Change to `2.0.*` for NumPy 2 - numpy==1.23.* + build_python_libcudf: + common: + - output_types: [conda, requirements, pyproject] + packages: + - *pyarrow_build libarrow_build: common: - output_types: conda @@ -759,6 +796,31 @@ dependencies: packages: - dask-cuda==24.10.*,>=0.0.0a0 - *numba + depends_on_libcudf: + common: + - output_types: conda + packages: + - &libcudf_unsuffixed libcudf==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for libcudf-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libcudf-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libcudf-cu11==24.10.*,>=0.0.0a0 + - {matrix: null, packages: [*libcudf_unsuffixed]} depends_on_pylibcudf: common: - output_types: conda @@ -849,6 +911,33 @@ dependencies: packages: &cupy_packages_cu11 - cupy-cuda11x>=12.0.0 - {matrix: null, packages: *cupy_packages_cu11} + depends_on_librmm: + common: + - output_types: conda + packages: + - &librmm_unsuffixed librmm==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for librmm-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - librmm-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - librmm-cu11==24.10.*,>=0.0.0a0 + - matrix: + packages: + - *librmm_unsuffixed depends_on_rmm: common: - output_types: conda diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index e11d62b3bd5..72f20b30052 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -24,72 +24,15 @@ project( LANGUAGES CXX CUDA ) -option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" - OFF -) -option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) - -# Find Python early so that later commands can use it -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -# If the user requested it we attempt to find CUDF. -if(FIND_CUDF_CPP) - include(rapids-cpm) - include(rapids-export) - include(rapids-find) - rapids_cpm_init() +find_package(cudf "${RAPIDS_VERSION}" REQUIRED) - if(USE_LIBARROW_FROM_PYARROW) - # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow - # libraries. These variables have no effect because we are always searching for arrow via - # pyarrow, but they must be set as they are required arguments to the function in - # get_arrow.cmake. - set(CUDF_USE_ARROW_STATIC OFF) - set(CUDF_ENABLE_ARROW_S3 OFF) - set(CUDF_ENABLE_ARROW_ORC OFF) - set(CUDF_ENABLE_ARROW_PYTHON OFF) - set(CUDF_ENABLE_ARROW_PARQUET OFF) - include(../../cpp/cmake/thirdparty/get_arrow.cmake) - endif() - - find_package(cudf "${RAPIDS_VERSION}" REQUIRED) - - # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack - # for the interop.pyx - include(../../cpp/cmake/thirdparty/get_dlpack.cmake) -else() - set(cudf_FOUND OFF) -endif() +# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack +# for the interop.pyx +include(rapids-cpm) +rapids_cpm_init() +include(../../cpp/cmake/thirdparty/get_dlpack.cmake) include(rapids-cython-core) - -if(NOT cudf_FOUND) - set(BUILD_TESTS OFF) - set(BUILD_BENCHMARKS OFF) - set(CUDF_BUILD_TESTUTIL OFF) - set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) - set(CUDA_STATIC_RUNTIME ON) - - add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) - - # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous - # components like headers from libcudacxx, but we do need the libraries. However, we want to - # control where they are installed to. Since there are multiple subpackages of cudf._lib that - # require access to libcudf, we place the library and all its dependent artifacts in the cudf - # directory as a single source of truth and modify the other rpaths appropriately. - set(cython_lib_dir cudf) - include(../pylibcudf/cmake/Modules/WheelHelpers.cmake) - # TODO: This install is currently overzealous. We should only install the libraries that are - # downloaded by CPM during the build, not libraries that were found on the system. However, in - # practice right this would only be a problem is if libcudf was not found but some of the - # dependencies were, and we have no real use cases where that happens. - install_aliased_imported_targets( - TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp - DESTINATION ${cython_lib_dir} - ) -endif() - rapids_cython_init() include(../pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index ccc45413de4..d7da42a1708 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -1,5 +1,15 @@ # Copyright (c) 2018-2024, NVIDIA CORPORATION. +# If libcudf was installed as a wheel, we must request it to load the library symbols. +# Otherwise, we assume that the library was installed in a system path that ld can find. +try: + import libcudf +except ModuleNotFoundError: + pass +else: + libcudf.load_library() + del libcudf + # _setup_numba _must be called before numba.cuda is imported, because # it sets the numba config variable responsible for enabling # Minor Version Compatibility. Setting it after importing numba.cuda has no effect. diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index d6182673308..5ea378fc0e5 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -63,6 +63,7 @@ rapids_cython_create_modules( ) target_link_libraries(strings_udf PUBLIC cudf_strings_udf) +target_include_directories(interop PUBLIC "$") set(targets_using_arrow_headers avro csv orc json parquet) link_to_pyarrow_headers("${targets_using_arrow_headers}") diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 9db52164eca..cb9fa30afab 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "cuda-python>=11.7.1,<12.0a0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", + "libcudf==24.10.*,>=0.0.0a0", "numba>=0.57", "numpy>=1.23,<2.0a0", "nvtx>=0.2.1", @@ -126,6 +127,8 @@ matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", + "libcudf==24.10.*,>=0.0.0a0", + "librmm==24.10.*,>=0.0.0a0", "ninja", "numpy==1.23.*", "pyarrow==16.1.0.*", diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt new file mode 100644 index 00000000000..09c7ed2e217 --- /dev/null +++ b/python/libcudf/CMakeLists.txt @@ -0,0 +1,58 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../../rapids_config.cmake) + +project( + libcudf-python + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX +) + +# Check if cudf is already available. If so, it is the user's responsibility to ensure that the +# CMake package is also available at build time of the Python cudf package. +find_package(cudf "${RAPIDS_VERSION}") + +if(cudf_FOUND) + return() +endif() + +unset(cudf_FOUND) + +# For wheels, this should always be true +set(USE_LIBARROW_FROM_PYARROW ON) + +# Find Python early so that later commands can use it +find_package(Python 3.10 REQUIRED COMPONENTS Interpreter) + +set(BUILD_TESTS OFF) +set(BUILD_BENCHMARKS OFF) +set(CUDF_BUILD_TESTUTIL OFF) +set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) +set(CUDA_STATIC_RUNTIME ON) + +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) + +include(../pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake) + +add_subdirectory(../../cpp cudf-cpp) + +# Ensure other libraries needed by libcudf.so get installed alongside it. +include(cmake/Modules/WheelHelpers.cmake) +install_aliased_imported_targets( + TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp + DESTINATION ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} +) diff --git a/python/libcudf/LICENSE b/python/libcudf/LICENSE new file mode 120000 index 00000000000..30cff7403da --- /dev/null +++ b/python/libcudf/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/libcudf/README.md b/python/libcudf/README.md new file mode 120000 index 00000000000..fe840054137 --- /dev/null +++ b/python/libcudf/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/python/pylibcudf/cmake/Modules/WheelHelpers.cmake b/python/libcudf/cmake/Modules/WheelHelpers.cmake similarity index 100% rename from python/pylibcudf/cmake/Modules/WheelHelpers.cmake rename to python/libcudf/cmake/Modules/WheelHelpers.cmake diff --git a/python/libcudf/libcudf/VERSION b/python/libcudf/libcudf/VERSION new file mode 120000 index 00000000000..d62dc733efd --- /dev/null +++ b/python/libcudf/libcudf/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file diff --git a/python/libcudf/libcudf/__init__.py b/python/libcudf/libcudf/__init__.py new file mode 100644 index 00000000000..10c476cbe89 --- /dev/null +++ b/python/libcudf/libcudf/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcudf._version import __git_commit__, __version__ +from libcudf.load import load_library diff --git a/python/libcudf/libcudf/_version.py b/python/libcudf/libcudf/_version.py new file mode 100644 index 00000000000..7dd732b4905 --- /dev/null +++ b/python/libcudf/libcudf/_version.py @@ -0,0 +1,33 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files(__package__) + .joinpath("VERSION") + .read_text() + .strip() +) +try: + __git_commit__ = ( + importlib.resources.files(__package__) + .joinpath("GIT_COMMIT") + .read_text() + .strip() + ) +except FileNotFoundError: + __git_commit__ = "" + +__all__ = ["__git_commit__", "__version__"] diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py new file mode 100644 index 00000000000..f6ba0d51bdb --- /dev/null +++ b/python/libcudf/libcudf/load.py @@ -0,0 +1,51 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ctypes +import os + + +def load_library(): + # This is loading the libarrow shared library in situations where it comes from the + # pyarrow package (i.e. when installed as a wheel). + import pyarrow # noqa: F401 + + # Dynamically load libcudf.so. Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, but if no + # other library is present use the one in the wheel. + libcudf_lib = None + try: + libcudf_lib = ctypes.CDLL("libcudf.so", ctypes.RTLD_GLOBAL) + except OSError: + # If neither of these directories contain the library, we assume we are in an + # environment where the C++ library is already installed somewhere else and the + # CMake build of the libcudf Python package was a no-op. + # + # Note that this approach won't work for real editable installs of the libcudf package. + # scikit-build-core has limited support for importlib.resources so there isn't a clean + # way to support that case yet. + for lib_dir in ("lib", "lib64"): + if os.path.isfile( + lib := os.path.join( + os.path.dirname(__file__), lib_dir, "libcudf.so" + ) + ): + libcudf_lib = ctypes.CDLL(lib, ctypes.RTLD_GLOBAL) + break + + # The caller almost never needs to do anything with this library, but no + # harm in offering the option since this object at least provides a handle + # to inspect where libcudf was loaded from. + return libcudf_lib diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml new file mode 100644 index 00000000000..fd01f7f6e2f --- /dev/null +++ b/python/libcudf/pyproject.toml @@ -0,0 +1,75 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +build-backend = "rapids_build_backend.build" +requires = [ + "rapids-build-backend>=0.3.0,<0.4.0.dev0", + "scikit-build-core[pyproject]>=0.10.0", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project] +name = "libcudf" +dynamic = ["version"] +description = "cuDF - GPU Dataframe (C++)" +readme = { file = "README.md", content-type = "text/markdown" } +authors = [ + { name = "NVIDIA Corporation" }, +] +license = { text = "Apache 2.0" } +requires-python = ">=3.10" +classifiers = [ + "Intended Audience :: Developers", + "Topic :: Database", + "Topic :: Scientific/Engineering", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: C++", + "Environment :: GPU :: NVIDIA CUDA", +] +dependencies = [ + "pyarrow>=16.1.0,<16.2.0a0", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project.urls] +Homepage = "https://github.com/rapidsai/cudf" + +[project.entry-points."cmake.prefix"] +libcudf = "libcudf" + +[tool.scikit-build] +build-dir = "build/{wheel_tag}" +cmake.build-type = "Release" +cmake.version = "CMakeLists.txt" +minimum-version = "build-system.requires" +ninja.make-fallback = true +sdist.reproducible = true +wheel.packages = ["libcudf"] +wheel.install-dir = "libcudf" +wheel.py-api = "py3" + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "libcudf/VERSION" +regex = "(?P.*)" + +[tool.rapids-build-backend] +build-backend = "scikit_build_core.build" +dependencies-file = "../../dependencies.yaml" +matrix-entry = "cuda_suffixed=true" +requires = [ + "cmake>=3.26.4,!=3.30.0", + "librmm==24.10.*,>=0.0.0a0", + "ninja", + "pyarrow==16.1.0.*", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt index 424d8372280..340ad120377 100644 --- a/python/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/CMakeLists.txt @@ -24,72 +24,16 @@ project( LANGUAGES CXX CUDA ) -option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" - OFF -) -option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) - -# Find Python early so that later commands can use it -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -# If the user requested it we attempt to find CUDF. -if(FIND_CUDF_CPP) - include(rapids-cpm) - include(rapids-export) - include(rapids-find) - rapids_cpm_init() - - if(USE_LIBARROW_FROM_PYARROW) - # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow - # libraries. These variables have no effect because we are always searching for arrow via - # pyarrow, but they must be set as they are required arguments to the function in - # get_arrow.cmake. - set(CUDF_USE_ARROW_STATIC OFF) - set(CUDF_ENABLE_ARROW_S3 OFF) - set(CUDF_ENABLE_ARROW_ORC OFF) - set(CUDF_ENABLE_ARROW_PYTHON OFF) - set(CUDF_ENABLE_ARROW_PARQUET OFF) - include(../../cpp/cmake/thirdparty/get_arrow.cmake) - endif() - - find_package(cudf "${RAPIDS_VERSION}" REQUIRED) +find_package(cudf "${RAPIDS_VERSION}" REQUIRED) - # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack - # for the interop.pyx - include(../../cpp/cmake/thirdparty/get_dlpack.cmake) -else() - set(cudf_FOUND OFF) -endif() +# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack +# for the interop.pyx +include(rapids-cpm) +rapids_cpm_init() +include(../../cpp/cmake/thirdparty/get_dlpack.cmake) include(rapids-cython-core) -if(NOT cudf_FOUND) - set(BUILD_TESTS OFF) - set(BUILD_BENCHMARKS OFF) - set(CUDF_BUILD_TESTUTIL OFF) - set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) - set(CUDA_STATIC_RUNTIME ON) - - add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) - - # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous - # components like headers from libcudacxx, but we do need the libraries. However, we want to - # control where they are installed to. Since there are multiple subpackages of pylibcudf that - # require access to libcudf, we place the library and all its dependent artifacts in the cudf - # directory as a single source of truth and modify the other rpaths appropriately. - set(cython_lib_dir pylibcudf) - include(cmake/Modules/WheelHelpers.cmake) - # TODO: This install is currently overzealous. We should only install the libraries that are - # downloaded by CPM during the build, not libraries that were found on the system. However, in - # practice right this would only be a problem is if libcudf was not found but some of the - # dependencies were, and we have no real use cases where that happens. - install_aliased_imported_targets( - TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp - DESTINATION ${cython_lib_dir} - ) -endif() - rapids_cython_init() include(cmake/Modules/LinkPyarrowHeaders.cmake) diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt index ab21bfe97ab..f81a32e07f9 100644 --- a/python/pylibcudf/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/CMakeLists.txt @@ -53,6 +53,8 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf ) +target_include_directories(pylibcudf_interop PUBLIC "$") + include(${rapids-cmake-dir}/export/find_package_root.cmake) include(../../../cpp/cmake/thirdparty/get_nanoarrow.cmake) target_link_libraries(pylibcudf_interop PUBLIC nanoarrow) diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py index 677fdaf80d0..e784c6c6dd5 100644 --- a/python/pylibcudf/pylibcudf/__init__.py +++ b/python/pylibcudf/pylibcudf/__init__.py @@ -1,5 +1,15 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. +# If libcudf was installed as a wheel, we must request it to load the library symbols. +# Otherwise, we assume that the library was installed in a system path that ld can find. +try: + import libcudf +except ModuleNotFoundError: + pass +else: + libcudf.load_library() + del libcudf + from . import ( aggregation, binaryop, diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index b037508d03f..63d76e9fd4e 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -19,6 +19,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "cuda-python>=11.7.1,<12.0a0", + "libcudf==24.10.*,>=0.0.0a0", "nvtx>=0.2.1", "packaging", "pyarrow>=16.1.0,<16.2.0a0", @@ -101,6 +102,8 @@ matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", + "libcudf==24.10.*,>=0.0.0a0", + "librmm==24.10.*,>=0.0.0a0", "ninja", "numpy==1.23.*", "pyarrow==16.1.0.*",