diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index f9d5976f1fe..e801d2f7f79 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -23,6 +23,7 @@ jobs: - static-configure - conda-notebook-tests - docs-build + - wheel-build-libcudf - wheel-build-cudf - wheel-tests-cudf - wheel-build-dask-cudf @@ -118,10 +119,18 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" - wheel-build-cudf: + wheel-build-libcudf: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 + with: + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + build_type: pull-request + script: "ci/build_wheel_libcudf.sh" + wheel-build-cudf: + needs: [checks, wheel-build-libcudf] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" diff --git a/build.sh b/build.sh index e5daf2f3451..a14d1ea85c4 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # cuDF build script @@ -17,12 +17,13 @@ ARGS=$* # script, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats" -HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"\\\"] +VALIDARGS="clean libcudf cudf libcudfwheel cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats" +HELP="$0 [clean] [libcudf] [cudf] [libcudfwheel] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"\\\"] clean - remove all existing build artifacts and configuration (start over) libcudf - build the cudf C++ code only cudf - build the cudf Python package + libcudfwheel - build the cudf C++ code packaged as a python wheel package cudfjar - build cudf JAR with static libcudf using devtoolset toolchain dask_cudf - build the dask_cudf Python package benchmarks - build benchmarks @@ -333,7 +334,14 @@ if buildAll || hasArg libcudf; then fi fi -# Build and install the cudf Python package +if buildAll || hasArg libcudfwheel; then + + cd ${REPODIR}/python/libcudf + SKBUILD_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX};-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR};-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES};${EXTRA_CMAKE_ARGS}" \ + python ${PYTHON_ARGS_FOR_INSTALL} . +fi + +# Build and install the cudf Python packages if buildAll || hasArg cudf; then cd ${REPODIR}/python/cudf diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index c4b794e81f7..56b6cdebc15 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -1,7 +1,7 @@ #!/bin/bash # Copyright (c) 2023-2024, NVIDIA CORPORATION. -set -euo pipefail +set -euox pipefail package_name=$1 package_dir=$2 @@ -34,6 +34,8 @@ if ! rapids-is-release-build; then alpha_spec=',>=0.0.0a0' fi +sed -r -i "s/libcudf==(.*)\"/libcudf${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file} + if [[ ${package_name} == "dask-cudf" ]]; then sed -r -i "s/cudf==(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file} sed -r -i "s/dask-cuda==(.*)\"/dask-cuda==\1${alpha_spec}\"/g" ${pyproject_file} diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index f0886a28fd9..8e144e09432 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -1,16 +1,24 @@ #!/bin/bash # Copyright (c) 2023-2024, NVIDIA CORPORATION. -set -euo pipefail +set -euox pipefail +package_name="cudf" package_dir="python/cudf" export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON" -./ci/build_wheel.sh cudf ${package_dir} +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +# Downloads libcudf wheel from this current build, then points pip to it in PIP_FIND_LINKS below +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist -python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* +ls -la /tmp/libcudf_dist +export PIP_FIND_LINKS="/tmp/libcudf_dist" +./ci/build_wheel.sh cudf ${package_dir} -RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +cd ${package_dir} +mkdir -p final_dist +python -m auditwheel repair --exclude libcudf.so --exclude libarrow.so.1400 -w final_dist dist/* + +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh new file mode 100755 index 00000000000..eb1402c8a87 --- /dev/null +++ b/ci/build_wheel_libcudf.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Copyright (c) 2023-2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python/libcudf" + +./ci/build_wheel.sh libcudf ${package_dir} + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/dist diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh index abde5e5d160..562c2c12635 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/run.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -11,8 +11,9 @@ rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch and rapids rapids-logger "PR number: ${RAPIDS_REF_NAME:-"unknown"}" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep -python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas-tests] +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-cudf-dep +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./local-cudf-dep +python -m pip install --find-links $(pwd)/local-cudf-dep $(ls ./local-cudf-dep/cudf*.whl)[test,pandas-tests] RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh index 78945d37f22..cb980ba9538 100755 --- a/ci/cudf_pandas_scripts/run_tests.sh +++ b/ci/cudf_pandas_scripts/run_tests.sh @@ -32,8 +32,9 @@ if [ "$no_cudf" = true ]; then echo "Skipping cudf install" else RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" - RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep - python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf-pandas-tests] + RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-cudf-dep + RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./local-cudf-dep + python -m pip install --find-links $(pwd)/local-cudf-dep $(ls ./local-cudf-dep/cudf*.whl)[test,cudf-pandas-tests] fi python -m pytest -p cudf.pandas ./python/cudf/cudf_pandas_tests/ diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh index fdb61278d36..27db3767e40 100755 --- a/ci/test_wheel_cudf.sh +++ b/ci/test_wheel_cudf.sh @@ -4,10 +4,11 @@ set -eou pipefail RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/cudf*.whl)[test] +python -m pip install --find-links $(pwd)/dist $(echo ./dist/cudf*.whl)[test] RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index 2b20b9d9ce4..798ed8df062 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -7,8 +7,9 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist # Download the cudf built in the previous step -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep -python -m pip install --no-deps ./local-cudf-dep/cudf*.whl +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-cudf-dep +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./local-cudf-dep +python -m pip install --no-deps --find-links $(pwd)/local-cudf-dep ./local-cudf-dep/cudf*.whl # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/dask_cudf*.whl)[test] diff --git a/dependencies.yaml b/dependencies.yaml index 14c698000cb..1506ccef87e 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -21,6 +21,7 @@ files: - py_version - run_common - run_cudf + - run_libcudf - run_dask_cudf - run_custreamz - test_cpp @@ -75,6 +76,18 @@ files: - docs - libarrow_run - py_version + # This is the shared library, bundled as a wheel. It is meant to be consumed by the wrapper. + py_build_libcudf: + output: pyproject + pyproject_dir: python/libcudf + extras: + table: build-system + includes: + - build_base + - build_cpp + - build_python_common + - build_python_cudf + # This is the wrapper that gets used in Python, not the shared library wheel py_build_cudf: output: pyproject pyproject_dir: python/cudf @@ -84,6 +97,7 @@ files: - build_base - build_python_common - build_python_cudf + - run_libcudf py_run_cudf: output: pyproject pyproject_dir: python/cudf @@ -94,6 +108,7 @@ files: - run_cudf - pyarrow_run - depends_on_cupy + - run_libcudf py_test_cudf: output: pyproject pyproject_dir: python/cudf @@ -336,6 +351,13 @@ dependencies: # Allow runtime version to float up to minor version # Disallow pyarrow 14.0.0 due to a CVE - pyarrow>=14.0.1,<15.0.0a0 + run_libcudf: + common: + # TODO: Currently this is a hack for devcontainers. Need to figure out the best solution. + #- output_types: [requirements, pyproject] + - output_types: [pyproject] + packages: + - libcudf==24.6.* cuda_version: specific: - output_types: conda diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index ecadbf5cbbc..6c11c4c0fe0 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -24,72 +24,15 @@ project( LANGUAGES CXX CUDA ) -option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" - OFF -) -option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) - -# Find Python early so that later commands can use it -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -# If the user requested it we attempt to find CUDF. -if(FIND_CUDF_CPP) - include(rapids-cpm) - include(rapids-export) - include(rapids-find) - rapids_cpm_init() - - if(USE_LIBARROW_FROM_PYARROW) - # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow - # libraries. These variables have no effect because we are always searching for arrow via - # pyarrow, but they must be set as they are required arguments to the function in - # get_arrow.cmake. - set(CUDF_USE_ARROW_STATIC OFF) - set(CUDF_ENABLE_ARROW_S3 OFF) - set(CUDF_ENABLE_ARROW_ORC OFF) - set(CUDF_ENABLE_ARROW_PYTHON OFF) - set(CUDF_ENABLE_ARROW_PARQUET OFF) - include(../../cpp/cmake/thirdparty/get_arrow.cmake) - endif() +find_package(cudf "${RAPIDS_VERSION}" REQUIRED) - find_package(cudf "${RAPIDS_VERSION}" REQUIRED) - - # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack - # for the interop.pyx - include(../../cpp/cmake/thirdparty/get_dlpack.cmake) -else() - set(cudf_FOUND OFF) -endif() +# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack +# for the interop.pyx +include(rapids-cpm) +rapids_cpm_init() +include(../../cpp/cmake/thirdparty/get_dlpack.cmake) include(rapids-cython-core) - -if(NOT cudf_FOUND) - set(BUILD_TESTS OFF) - set(BUILD_BENCHMARKS OFF) - set(CUDF_BUILD_TESTUTIL OFF) - set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) - set(CUDA_STATIC_RUNTIME ON) - - add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) - - # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous - # components like headers from libcudacxx, but we do need the libraries. However, we want to - # control where they are installed to. Since there are multiple subpackages of cudf._lib that - # require access to libcudf, we place the library and all its dependent artifacts in the cudf - # directory as a single source of truth and modify the other rpaths appropriately. - set(cython_lib_dir cudf) - include(cmake/Modules/WheelHelpers.cmake) - # TODO: This install is currently overzealous. We should only install the libraries that are - # downloaded by CPM during the build, not libraries that were found on the system. However, in - # practice right this would only be a problem is if libcudf was not found but some of the - # dependencies were, and we have no real use cases where that happens. - install_aliased_imported_targets( - TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp - DESTINATION ${cython_lib_dir} - ) -endif() - rapids_cython_init() include(cmake/Modules/LinkPyarrowHeaders.cmake) @@ -99,3 +42,18 @@ add_subdirectory(udf_cpp) if(DEFINED cython_lib_dir) rapids_cython_add_rpath_entries(TARGET cudf PATHS "${cython_lib_dir}") endif() + +# libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous components +# like headers from libcudacxx, but we do need the libraries. However, we want to control where they +# are installed to. Since there are multiple subpackages of cudf._lib that require access to +# libcudf, we place the library and all its dependent artifacts in the cudf directory as a single +# source of truth and modify the other rpaths appropriately. +include(cmake/Modules/WheelHelpers.cmake) +# TODO: This install is currently overzealous. We should only install the libraries that are +# downloaded by CPM during the build, not libraries that were found on the system. However, in +# practice right this would only be a problem is if libcudf was not found but some of the +# dependencies were, and we have no real use cases where that happens. +install_aliased_imported_targets( + TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp + DESTINATION ${cython_lib_dir} +) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index e14815a1b0d..8e10c0a2890 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -1,5 +1,15 @@ # Copyright (c) 2018-2024, NVIDIA CORPORATION. +# If libcudf was installed as a wheel, we must request it to load the library symbols. +# Otherwise, we assume that the library was installed in a system path that ld can find. +try: + import libcudf +except ModuleNotFoundError: + pass +else: + libcudf.load_library() + del libcudf + # _setup_numba _must be called before numba.cuda is imported, because # it sets the numba config variable responsible for enabling # Minor Version Compatibility. Setting it after importing numba.cuda has no effect. diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 07f334fdc12..1ed6e47f99f 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -68,6 +68,8 @@ target_link_libraries(strings_udf PUBLIC cudf_strings_udf) set(targets_using_arrow_headers interop avro csv orc json parquet) link_to_pyarrow_headers("${targets_using_arrow_headers}") +target_include_directories(interop PUBLIC "$") + add_subdirectory(cpp) add_subdirectory(io) add_subdirectory(nvtext) diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index c2b7cb7ca3d..cb255c66978 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -44,5 +44,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf ) link_to_pyarrow_headers(pylibcudf_interop) +target_include_directories(pylibcudf_interop PUBLIC "$") add_subdirectory(strings) diff --git a/python/cudf/cudf/utils/metadata/orc_column_statistics_pb2.py b/python/cudf/cudf/utils/metadata/orc_column_statistics_pb2.py new file mode 100644 index 00000000000..9f5d6f586c1 --- /dev/null +++ b/python/cudf/cudf/utils/metadata/orc_column_statistics_pb2.py @@ -0,0 +1,46 @@ +# fmt: off +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: cudf/utils/metadata/orc_column_statistics.proto +# Protobuf Python Version: 4.25.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n/cudf/utils/metadata/orc_column_statistics.proto\"B\n\x11IntegerStatistics\x12\x0f\n\x07minimum\x18\x01 \x01(\x12\x12\x0f\n\x07maximum\x18\x02 \x01(\x12\x12\x0b\n\x03sum\x18\x03 \x01(\x12\"A\n\x10\x44oubleStatistics\x12\x0f\n\x07minimum\x18\x01 \x01(\x01\x12\x0f\n\x07maximum\x18\x02 \x01(\x01\x12\x0b\n\x03sum\x18\x03 \x01(\x01\"A\n\x10StringStatistics\x12\x0f\n\x07minimum\x18\x01 \x01(\t\x12\x0f\n\x07maximum\x18\x02 \x01(\t\x12\x0b\n\x03sum\x18\x03 \x01(\x12\"%\n\x10\x42ucketStatistics\x12\x11\n\x05\x63ount\x18\x01 \x03(\x04\x42\x02\x10\x01\"B\n\x11\x44\x65\x63imalStatistics\x12\x0f\n\x07minimum\x18\x01 \x01(\t\x12\x0f\n\x07maximum\x18\x02 \x01(\t\x12\x0b\n\x03sum\x18\x03 \x01(\t\"2\n\x0e\x44\x61teStatistics\x12\x0f\n\x07minimum\x18\x01 \x01(\x11\x12\x0f\n\x07maximum\x18\x02 \x01(\x11\"_\n\x13TimestampStatistics\x12\x0f\n\x07minimum\x18\x01 \x01(\x12\x12\x0f\n\x07maximum\x18\x02 \x01(\x12\x12\x12\n\nminimumUtc\x18\x03 \x01(\x12\x12\x12\n\nmaximumUtc\x18\x04 \x01(\x12\"\x1f\n\x10\x42inaryStatistics\x12\x0b\n\x03sum\x18\x01 \x01(\x12\"\xa5\x03\n\x10\x43olumnStatistics\x12\x16\n\x0enumberOfValues\x18\x01 \x01(\x04\x12)\n\rintStatistics\x18\x02 \x01(\x0b\x32\x12.IntegerStatistics\x12+\n\x10\x64oubleStatistics\x18\x03 \x01(\x0b\x32\x11.DoubleStatistics\x12+\n\x10stringStatistics\x18\x04 \x01(\x0b\x32\x11.StringStatistics\x12+\n\x10\x62ucketStatistics\x18\x05 \x01(\x0b\x32\x11.BucketStatistics\x12-\n\x11\x64\x65\x63imalStatistics\x18\x06 \x01(\x0b\x32\x12.DecimalStatistics\x12\'\n\x0e\x64\x61teStatistics\x18\x07 \x01(\x0b\x32\x0f.DateStatistics\x12+\n\x10\x62inaryStatistics\x18\x08 \x01(\x0b\x32\x11.BinaryStatistics\x12\x31\n\x13timestampStatistics\x18\t \x01(\x0b\x32\x14.TimestampStatistics\x12\x0f\n\x07hasNull\x18\n \x01(\x08') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'cudf.utils.metadata.orc_column_statistics_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + _globals['_BUCKETSTATISTICS'].fields_by_name['count']._options = None + _globals['_BUCKETSTATISTICS'].fields_by_name['count']._serialized_options = b'\020\001' + _globals['_INTEGERSTATISTICS']._serialized_start=51 + _globals['_INTEGERSTATISTICS']._serialized_end=117 + _globals['_DOUBLESTATISTICS']._serialized_start=119 + _globals['_DOUBLESTATISTICS']._serialized_end=184 + _globals['_STRINGSTATISTICS']._serialized_start=186 + _globals['_STRINGSTATISTICS']._serialized_end=251 + _globals['_BUCKETSTATISTICS']._serialized_start=253 + _globals['_BUCKETSTATISTICS']._serialized_end=290 + _globals['_DECIMALSTATISTICS']._serialized_start=292 + _globals['_DECIMALSTATISTICS']._serialized_end=358 + _globals['_DATESTATISTICS']._serialized_start=360 + _globals['_DATESTATISTICS']._serialized_end=410 + _globals['_TIMESTAMPSTATISTICS']._serialized_start=412 + _globals['_TIMESTAMPSTATISTICS']._serialized_end=507 + _globals['_BINARYSTATISTICS']._serialized_start=509 + _globals['_BINARYSTATISTICS']._serialized_end=540 + _globals['_COLUMNSTATISTICS']._serialized_start=543 + _globals['_COLUMNSTATISTICS']._serialized_end=964 +# @@protoc_insertion_point(module_scope) +# fmt: on diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index fc3a243572f..9b659e3b04f 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -5,6 +5,7 @@ build-backend = "scikit_build_core.build" requires = [ "cmake>=3.26.4", "cython>=3.0.3", + "libcudf==24.6.*", "ninja", "numpy==1.23.*", "pyarrow==14.0.2.*", @@ -28,6 +29,7 @@ dependencies = [ "cuda-python>=11.7.1,<12.0a0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", + "libcudf==24.6.*", "numba>=0.57", "numpy>=1.23,<2.0a0", "nvtx>=0.2.1", diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt new file mode 100644 index 00000000000..5ccfd8f4181 --- /dev/null +++ b/python/libcudf/CMakeLists.txt @@ -0,0 +1,37 @@ +# ============================================================================= +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../../rapids_config.cmake) + +project( + libcudf-python + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX +) + +# For wheels, this should always be true +set(USE_LIBARROW_FROM_PYARROW ON) + +# Find Python early so that later commands can use it +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) + +set(BUILD_TESTS OFF) +set(BUILD_BENCHMARKS OFF) +set(CUDF_BUILD_TESTUTIL OFF) +set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) +set(CUDA_STATIC_RUNTIME ON) + +add_subdirectory(../../cpp cudf-cpp) diff --git a/python/libcudf/LICENSE b/python/libcudf/LICENSE new file mode 120000 index 00000000000..30cff7403da --- /dev/null +++ b/python/libcudf/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/libcudf/README.md b/python/libcudf/README.md new file mode 120000 index 00000000000..fe840054137 --- /dev/null +++ b/python/libcudf/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/python/libcudf/libcudf/VERSION b/python/libcudf/libcudf/VERSION new file mode 120000 index 00000000000..d62dc733efd --- /dev/null +++ b/python/libcudf/libcudf/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file diff --git a/python/libcudf/libcudf/__init__.py b/python/libcudf/libcudf/__init__.py new file mode 100644 index 00000000000..10c476cbe89 --- /dev/null +++ b/python/libcudf/libcudf/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcudf._version import __git_commit__, __version__ +from libcudf.load import load_library diff --git a/python/libcudf/libcudf/_version.py b/python/libcudf/libcudf/_version.py new file mode 100644 index 00000000000..da6b96c163e --- /dev/null +++ b/python/libcudf/libcudf/_version.py @@ -0,0 +1,23 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files("libcudf") + .joinpath("VERSION") + .read_text() + .strip() +) +__git_commit__ = "" diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py new file mode 100644 index 00000000000..11bb1522942 --- /dev/null +++ b/python/libcudf/libcudf/load.py @@ -0,0 +1,53 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ctypes +import os + + +def load_library(): + # this is loading the libarrow shared library + import pyarrow # noqa: F401 + + # Dynamically load libcudf.so. Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, but if no + # other library is present use the one in the wheel. + try: + libcudf_lib = ctypes.CDLL("libcudf.so", ctypes.RTLD_GLOBAL) + except OSError: + # load the nvcomp libraries from lib first, so that libcudf has them when we load it from lib64 + # TODO: if we put both of these in the same folder, I don't think we'd need this. + for nvcomp_lib in ( + "libnvcomp.so", + "libnvcomp_gdeflate.so", + "libnvcomp_bitcomp.so", + ): + _loaded_lib = ctypes.CDLL( + # TODO: Do we always know it will be lib64? Should we consider + # finding a way for CMake to export the path for us to find here? + os.path.join(os.path.dirname(__file__), "lib", nvcomp_lib), + ctypes.RTLD_GLOBAL, + ) + libcudf_lib = ctypes.CDLL( + # TODO: Do we always know it will be lib64? Should we consider + # finding a way for CMake to export the path for us to find here? + os.path.join(os.path.dirname(__file__), "lib64", "libcudf.so"), + ctypes.RTLD_GLOBAL, + ) + + # The caller almost never needs to do anything with this library, but no + # harm in offering the option since this object at least provides a handle + # to inspect where libcudf was loaded from. + return libcudf_lib diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml new file mode 100644 index 00000000000..b9b7ae40c08 --- /dev/null +++ b/python/libcudf/pyproject.toml @@ -0,0 +1,66 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +build-backend = "scikit_build_core.build" +requires = [ + "cmake>=3.26.4", + "cython>=3.0.3", + "ninja", + "numpy==1.23.*", + "protoc-wheel", + "pyarrow==14.0.2.*", + "rmm==24.6.*", + "scikit-build-core[pyproject]>=0.7.0", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project] +name = "libcudf" +dynamic = ["version"] +description = "cuDF - GPU Dataframe" +readme = { file = "README.md", content-type = "text/markdown" } +authors = [ + { name = "NVIDIA Corporation" }, +] +license = { text = "Apache 2.0" } +requires-python = ">=3.9" +classifiers = [ + "Intended Audience :: Developers", + "Topic :: Database", + "Topic :: Scientific/Engineering", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: C++", + "Environment :: GPU :: NVIDIA CUDA", +] + +[project.urls] +Homepage = "https://github.com/rapidsai/cudf" + +[project.entry-points."cmake.prefix"] +libcudf = "libcudf" + +[tool.scikit-build] +build-dir = "build/{wheel_tag}" +cmake.build-type = "Release" +cmake.minimum-version = "3.26.4" +ninja.make-fallback = true +sdist.reproducible = true +wheel.packages = ["libcudf"] +wheel.install-dir = "libcudf" +wheel.py-api = "py3" + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "libcudf/VERSION" +regex = "(?P.*)"