Skip to content

Commit

Permalink
Implement exposed null mask APIs in pylibcudf (#15908)
Browse files Browse the repository at this point in the history
Contributes to #15162

Authors:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #15908
  • Loading branch information
charlesbluca authored Aug 30, 2024
1 parent 2d6758f commit c6c720f
Show file tree
Hide file tree
Showing 11 changed files with 252 additions and 86 deletions.
2 changes: 2 additions & 0 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ def clean_all_xml_files(path):
"cudf.Series": ("cudf.core.series.Series", "cudf.Series"),
"cudf.Index": ("cudf.core.index.Index", "cudf.Index"),
"cupy.core.core.ndarray": ("cupy.ndarray", "cupy.ndarray"),
"DeviceBuffer": ("rmm._lib.device_buffer.DeviceBuffer", "rmm.DeviceBuffer"),
}


Expand Down Expand Up @@ -383,6 +384,7 @@ def _generate_namespaces(namespaces):
# Cython types that don't alias cleanly because of
# https://github.com/cython/cython/issues/5609
"size_type",
"size_t",
"type_id",
# Unknown base types
"int32_t",
Expand Down
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ This page provides API documentation for pylibcudf.
join
lists
merge
null_mask
quantiles
reduce
replace
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/null_mask.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=========
null_mask
=========

.. automodule:: pylibcudf.null_mask
:members:
103 changes: 19 additions & 84 deletions python/cudf/cudf/_lib/null_mask.pyx
Original file line number Diff line number Diff line change
@@ -1,39 +1,11 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from enum import Enum

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
import pylibcudf
from pylibcudf.null_mask import MaskState

from cudf.core.buffer import acquire_spill_lock, as_buffer

from libcpp.memory cimport make_unique, unique_ptr
from libcpp.pair cimport pair
from libcpp.utility cimport move

from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.null_mask cimport (
bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes,
bitmask_and as cpp_bitmask_and,
bitmask_or as cpp_bitmask_or,
copy_bitmask as cpp_copy_bitmask,
create_null_mask as cpp_create_null_mask,
underlying_type_t_mask_state,
)
from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport mask_state, size_type

from cudf._lib.column cimport Column
from cudf._lib.utils cimport table_view_from_columns


class MaskState(Enum):
"""
Enum for null mask creation state
"""
UNALLOCATED = <underlying_type_t_mask_state> mask_state.UNALLOCATED
UNINITIALIZED = <underlying_type_t_mask_state> mask_state.UNINITIALIZED
ALL_VALID = <underlying_type_t_mask_state> mask_state.ALL_VALID
ALL_NULL = <underlying_type_t_mask_state> mask_state.ALL_NULL


@acquire_spill_lock()
Expand All @@ -45,33 +17,20 @@ def copy_bitmask(Column col):
if col.base_mask is None:
return None

cdef column_view col_view = col.view()
cdef device_buffer db
cdef unique_ptr[device_buffer] up_db

with nogil:
db = move(cpp_copy_bitmask(col_view))
up_db = move(make_unique[device_buffer](move(db)))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
rmm_db = pylibcudf.null_mask.copy_bitmask(col.to_pylibcudf(mode="read"))
buf = as_buffer(rmm_db)
return buf


def bitmask_allocation_size_bytes(size_type num_bits):
def bitmask_allocation_size_bytes(num_bits):
"""
Given a size, calculates the number of bytes that should be allocated for a
column validity mask
"""
cdef size_t output_size

with nogil:
output_size = cpp_bitmask_allocation_size_bytes(num_bits)
return pylibcudf.null_mask.bitmask_allocation_size_bytes(num_bits)

return output_size


def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
def create_null_mask(size, state=MaskState.UNINITIALIZED):
"""
Given a size and a mask state, allocate a mask that can properly represent
the given size with the given mask state
Expand All @@ -83,48 +42,24 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
state : ``MaskState``, default ``MaskState.UNINITIALIZED``
State the null mask should be created in
"""
if not isinstance(state, MaskState):
raise TypeError(
"`state` is required to be of type `MaskState`, got "
+ (type(state).__name__)
)

cdef device_buffer db
cdef unique_ptr[device_buffer] up_db
cdef mask_state c_mask_state = <mask_state>(
<underlying_type_t_mask_state>(state.value)
)

with nogil:
db = move(cpp_create_null_mask(size, c_mask_state))
up_db = move(make_unique[device_buffer](move(db)))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
rmm_db = pylibcudf.null_mask.create_null_mask(size, state)
buf = as_buffer(rmm_db)
return buf


@acquire_spill_lock()
def bitmask_and(columns: list):
cdef table_view c_view = table_view_from_columns(columns)
cdef pair[device_buffer, size_type] c_result
cdef unique_ptr[device_buffer] up_db
with nogil:
c_result = move(cpp_bitmask_and(c_view))
up_db = move(make_unique[device_buffer](move(c_result.first)))
dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_buffer(dbuf)
return buf, c_result.second
def bitmask_and(list columns):
rmm_db, other = pylibcudf.null_mask.bitmask_and(
[col.to_pylibcudf(mode="read") for col in columns]
)
buf = as_buffer(rmm_db)
return buf, other


@acquire_spill_lock()
def bitmask_or(columns: list):
cdef table_view c_view = table_view_from_columns(columns)
cdef pair[device_buffer, size_type] c_result
cdef unique_ptr[device_buffer] up_db
with nogil:
c_result = move(cpp_bitmask_or(c_view))
up_db = move(make_unique[device_buffer](move(c_result.first)))
dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_buffer(dbuf)
return buf, c_result.second
def bitmask_or(list columns):
rmm_db, other = pylibcudf.null_mask.bitmask_or(
[col.to_pylibcudf(mode="read") for col in columns]
)
buf = as_buffer(rmm_db)
return buf, other
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set(cython_sources
join.pyx
lists.pyx
merge.pyx
null_mask.pyx
quantiles.pyx
reduce.pyx
replace.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ from . cimport (
join,
lists,
merge,
null_mask,
quantiles,
reduce,
replace,
Expand Down Expand Up @@ -57,6 +58,7 @@ __all__ = [
"join",
"lists",
"merge",
"null_mask",
"quantiles",
"reduce",
"replace",
Expand Down
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
join,
lists,
merge,
null_mask,
quantiles,
reduce,
replace,
Expand Down Expand Up @@ -69,6 +70,7 @@
"join",
"lists",
"merge",
"null_mask",
"quantiles",
"reduce",
"replace",
Expand Down
2 changes: 0 additions & 2 deletions python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ from pylibcudf.libcudf.types cimport bitmask_type, mask_state, size_type

from rmm._lib.device_buffer cimport device_buffer

ctypedef int32_t underlying_type_t_mask_state


cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
cdef device_buffer copy_bitmask "cudf::copy_bitmask" (
Expand Down
18 changes: 18 additions & 0 deletions python/pylibcudf/pylibcudf/null_mask.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.libcudf.types cimport mask_state, size_type

from rmm._lib.device_buffer cimport DeviceBuffer

from .column cimport Column


cpdef DeviceBuffer copy_bitmask(Column col)

cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits)

cpdef DeviceBuffer create_null_mask(size_type size, mask_state state = *)

cpdef tuple bitmask_and(list columns)

cpdef tuple bitmask_or(list columns)
142 changes: 142 additions & 0 deletions python/pylibcudf/pylibcudf/null_mask.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport make_unique
from libcpp.pair cimport pair
from libcpp.utility cimport move
from pylibcudf.libcudf cimport null_mask as cpp_null_mask
from pylibcudf.libcudf.types cimport mask_state, size_type

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

from pylibcudf.libcudf.types import mask_state as MaskState # no-cython-lint

from .column cimport Column
from .table cimport Table


cdef DeviceBuffer buffer_to_python(device_buffer buf):
return DeviceBuffer.c_from_unique_ptr(make_unique[device_buffer](move(buf)))


cpdef DeviceBuffer copy_bitmask(Column col):
"""Copies ``col``'s bitmask into a ``DeviceBuffer``.
For details, see :cpp:func:`copy_bitmask`.
Parameters
----------
col : Column
Column whose bitmask needs to be copied
Returns
-------
rmm.DeviceBuffer
A ``DeviceBuffer`` containing ``col``'s bitmask, or an empty ``DeviceBuffer``
if ``col`` is not nullable
"""
cdef device_buffer db

with nogil:
db = move(cpp_null_mask.copy_bitmask(col.view()))

return buffer_to_python(move(db))

cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits):
"""
Computes the required bytes necessary to represent the specified number of bits
with a 64B padding boundary.
For details, see :cpp:func:`bitmask_allocation_size_bytes`.
Parameters
----------
number_of_bits : size_type
The number of bits that need to be represented
Returns
-------
size_t
The necessary number of bytes
"""
with nogil:
return cpp_null_mask.bitmask_allocation_size_bytes(number_of_bits)


cpdef DeviceBuffer create_null_mask(
size_type size,
mask_state state = mask_state.UNINITIALIZED
):
"""Creates a ``DeviceBuffer`` for use as a null value indicator bitmask of a
``Column``.
For details, see :cpp:func:`create_null_mask`.
Parameters
----------
size : size_type
The number of elements to be represented by the mask
state : mask_state, optional
The desired state of the mask. Can be one of { MaskState.UNALLOCATED,
MaskState.UNINITIALIZED, MaskState.ALL_VALID, MaskState.ALL_NULL }
(default MaskState.UNINITIALIZED)
Returns
-------
rmm.DeviceBuffer
A ``DeviceBuffer`` for use as a null bitmask satisfying the desired size and
state
"""
cdef device_buffer db

with nogil:
db = move(cpp_null_mask.create_null_mask(size, state))

return buffer_to_python(move(db))


cpdef tuple bitmask_and(list columns):
"""Performs bitwise AND of the bitmasks of a list of columns.
For details, see :cpp:func:`bitmask_and`.
Parameters
----------
columns : list
The list of columns
Returns
-------
tuple[DeviceBuffer, size_type]
A tuple of the resulting mask and count of unset bits
"""
cdef Table c_table = Table(columns)
cdef pair[device_buffer, size_type] c_result

with nogil:
c_result = move(cpp_null_mask.bitmask_and(c_table.view()))

return buffer_to_python(move(c_result.first)), c_result.second


cpdef tuple bitmask_or(list columns):
"""Performs bitwise OR of the bitmasks of a list of columns.
For details, see :cpp:func:`bitmask_or`.
Parameters
----------
columns : list
The list of columns
Returns
-------
tuple[DeviceBuffer, size_type]
A tuple of the resulting mask and count of unset bits
"""
cdef Table c_table = Table(columns)
cdef pair[device_buffer, size_type] c_result

with nogil:
c_result = move(cpp_null_mask.bitmask_or(c_table.view()))

return buffer_to_python(move(c_result.first)), c_result.second
Loading

0 comments on commit c6c720f

Please sign in to comment.