From 002305bfcbde9816ddf3eb5fdde910e065fa8d02 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 17 Apr 2024 14:36:24 -0400 Subject: [PATCH 01/18] Add cudf extension --- src/awkward_pandas/ak_from_cudf.py | 681 +++++++++++++++++++++++++++++ src/awkward_pandas/cudf.py | 92 ++++ 2 files changed, 773 insertions(+) create mode 100644 src/awkward_pandas/ak_from_cudf.py create mode 100644 src/awkward_pandas/cudf.py diff --git a/src/awkward_pandas/ak_from_cudf.py b/src/awkward_pandas/ak_from_cudf.py new file mode 100644 index 0000000..a139a6c --- /dev/null +++ b/src/awkward_pandas/ak_from_cudf.py @@ -0,0 +1,681 @@ +import cudf +import pyarrow +import cupy +import numpy + +import awkward as ak +from awkward._backends.numpy import NumpyBackend +from awkward._backends.cupy import CupyBackend + + +# COPIED from awkward/studies/cudf-to-awkward.py + +######################### stripped-down copy of src/awkward/_connect/pyarrow.py + + +_string_like = ( + pyarrow.string(), + pyarrow.large_string(), + pyarrow.binary(), + pyarrow.large_binary(), +) + +_pyarrow_to_numpy_dtype = { + pyarrow.date32(): (True, numpy.dtype("M8[D]")), + pyarrow.date64(): (False, numpy.dtype("M8[ms]")), + pyarrow.time32("s"): (True, numpy.dtype("M8[s]")), + pyarrow.time32("ms"): (True, numpy.dtype("M8[ms]")), + pyarrow.time64("us"): (False, numpy.dtype("M8[us]")), + pyarrow.time64("ns"): (False, numpy.dtype("M8[ns]")), + pyarrow.timestamp("s"): (False, numpy.dtype("M8[s]")), + pyarrow.timestamp("ms"): (False, numpy.dtype("M8[ms]")), + pyarrow.timestamp("us"): (False, numpy.dtype("M8[us]")), + pyarrow.timestamp("ns"): (False, numpy.dtype("M8[ns]")), + pyarrow.duration("s"): (False, numpy.dtype("m8[s]")), + pyarrow.duration("ms"): (False, numpy.dtype("m8[ms]")), + pyarrow.duration("us"): (False, numpy.dtype("m8[us]")), + pyarrow.duration("ns"): (False, numpy.dtype("m8[ns]")), +} + + +def revertable(modified, original): + modified.__pyarrow_original = original + return modified + + +def remove_optiontype(akarray): + return akarray.__pyarrow_original + + +def popbuffers_finalize(out, array, validbits, generate_bitmasks, fix_offsets=True): + # Every buffer from Arrow must be offsets-corrected. + if fix_offsets and (array.offset != 0 or len(array) != len(out)): + out = out[array.offset : array.offset + len(array)] + + # Everything must leave popbuffers as option-type; the mask_node will be + # removed by the next level up in popbuffers recursion if appropriate. + + if validbits is None and generate_bitmasks: + # ceildiv(len(out), 8) = -(len(out) // -8) + validbits = numpy.full(-(len(out) // -8), numpy.uint8(0xFF), dtype=numpy.uint8) + + if validbits is None: + return revertable(ak.contents.UnmaskedArray.simplified(out), out) + else: + return revertable( + ak.contents.BitMaskedArray.simplified( + ak.index.IndexU8(numpy.frombuffer(validbits, dtype=numpy.uint8)), + out, + valid_when=True, + length=len(out), + lsb_order=True, + ), + out, + ) + + +def popbuffers(paarray, arrow_type, buffers, generate_bitmasks): + ### Beginning of the big if-elif-elif chain! + + if isinstance(arrow_type, pyarrow.lib.DictionaryType): + masked_index = popbuffers( + paarray.indices, + arrow_type.index_type, + buffers, + generate_bitmasks, + ) + index = masked_index.content.data + + if not isinstance(masked_index, ak.contents.UnmaskedArray): + mask = masked_index.mask_as_bool(valid_when=False) + if mask.any(): + index = numpy.asarray(index, copy=True) + index[mask] = -1 + + content = handle_arrow(paarray.dictionary, generate_bitmasks) + + parameters = {"__array__": "categorical"} + + return revertable( + ak.contents.IndexedOptionArray.simplified( + ak.index.Index(index), + content, + parameters=parameters, + ), + ak.contents.IndexedArray( + ak.index.Index(index), + remove_optiontype(content) if content.is_option else content, + parameters=parameters, + ), + ) + + elif isinstance(arrow_type, pyarrow.lib.FixedSizeListType): + assert arrow_type.num_buffers == 1 + validbits = buffers.pop(0) + + akcontent = popbuffers( + paarray.values, arrow_type.value_type, buffers, generate_bitmasks + ) + + if not arrow_type.value_field.nullable: + # strip the dummy option-type node + akcontent = remove_optiontype(akcontent) + + out = ak.contents.RegularArray(akcontent, arrow_type.list_size, parameters=None) + return popbuffers_finalize(out, paarray, validbits, generate_bitmasks) + + elif isinstance(arrow_type, (pyarrow.lib.LargeListType, pyarrow.lib.ListType)): + assert arrow_type.num_buffers == 2 + validbits = buffers.pop(0) + paoffsets = buffers.pop(0) + + if isinstance(arrow_type, pyarrow.lib.LargeListType): + akoffsets = ak.index.Index64(numpy.frombuffer(paoffsets, dtype=numpy.int64)) + else: + akoffsets = ak.index.Index32(numpy.frombuffer(paoffsets, dtype=numpy.int32)) + + akcontent = popbuffers( + paarray.values, arrow_type.value_type, buffers, generate_bitmasks + ) + + if not arrow_type.value_field.nullable: + # strip the dummy option-type node + akcontent = remove_optiontype(akcontent) + + out = ak.contents.ListOffsetArray(akoffsets, akcontent, parameters=None) + return popbuffers_finalize(out, paarray, validbits, generate_bitmasks) + + elif isinstance(arrow_type, pyarrow.lib.MapType): + # FIXME: make a ListOffsetArray of 2-tuples with __array__ == "sorted_map". + # (Make sure the keys are sorted). + raise NotImplementedError + + elif isinstance( + arrow_type, (pyarrow.lib.Decimal128Type, pyarrow.lib.Decimal256Type) + ): + # Note: Decimal128Type and Decimal256Type are subtypes of FixedSizeBinaryType. + # NumPy doesn't support decimal: https://github.com/numpy/numpy/issues/9789 + raise ValueError( + "Arrow arrays containing pyarrow.decimal128 or pyarrow.decimal256 types can't be converted into Awkward Arrays" + ) + + elif isinstance(arrow_type, pyarrow.lib.FixedSizeBinaryType): + assert arrow_type.num_buffers == 2 + validbits = buffers.pop(0) + pacontent = buffers.pop(0) + + parameters = {"__array__": "bytestring"} + sub_parameters = {"__array__": "byte"} + + out = ak.contents.RegularArray( + ak.contents.NumpyArray( + numpy.frombuffer(pacontent, dtype=numpy.uint8), + parameters=sub_parameters, + backend=NumpyBackend.instance(), + ), + arrow_type.byte_width, + parameters=parameters, + ) + return popbuffers_finalize(out, paarray, validbits, generate_bitmasks) + + elif arrow_type in _string_like: + assert arrow_type.num_buffers == 3 + validbits = buffers.pop(0) + paoffsets = buffers.pop(0) + pacontent = buffers.pop(0) + + if arrow_type in _string_like[::2]: + akoffsets = ak.index.Index32(numpy.frombuffer(paoffsets, dtype=numpy.int32)) + else: + akoffsets = ak.index.Index64(numpy.frombuffer(paoffsets, dtype=numpy.int64)) + + if arrow_type in _string_like[:2]: + parameters = {"__array__": "string"} + sub_parameters = {"__array__": "char"} + else: + parameters = {"__array__": "bytestring"} + sub_parameters = {"__array__": "byte"} + + out = ak.contents.ListOffsetArray( + akoffsets, + ak.contents.NumpyArray( + numpy.frombuffer(pacontent, dtype=numpy.uint8), + parameters=sub_parameters, + backend=NumpyBackend.instance(), + ), + parameters=parameters, + ) + return popbuffers_finalize(out, paarray, validbits, generate_bitmasks) + + elif isinstance(arrow_type, pyarrow.lib.StructType): + assert arrow_type.num_buffers == 1 + validbits = buffers.pop(0) + + keys = [] + contents = [] + for i in range(arrow_type.num_fields): + field = arrow_type[i] + field_name = field.name + keys.append(field_name) + + akcontent = popbuffers( + paarray.field(field_name), field.type, buffers, generate_bitmasks + ) + if not field.nullable: + # strip the dummy option-type node + akcontent = remove_optiontype(akcontent) + contents.append(akcontent) + + out = ak.contents.RecordArray( + contents, keys, length=len(paarray), parameters=None + ) + return popbuffers_finalize( + out, paarray, validbits, generate_bitmasks, fix_offsets=False + ) + + elif isinstance(arrow_type, pyarrow.lib.UnionType): + if isinstance(arrow_type, pyarrow.lib.SparseUnionType): + assert arrow_type.num_buffers == 2 + validbits = buffers.pop(0) + nptags = numpy.frombuffer(buffers.pop(0), dtype=numpy.int8) + npindex = numpy.arange(len(nptags), dtype=numpy.int32) + else: + assert arrow_type.num_buffers == 3 + validbits = buffers.pop(0) + nptags = numpy.frombuffer(buffers.pop(0), dtype=numpy.int8) + npindex = numpy.frombuffer(buffers.pop(0), dtype=numpy.int32) + + akcontents = [] + for i in range(arrow_type.num_fields): + field = arrow_type[i] + akcontent = popbuffers( + paarray.field(i), field.type, buffers, generate_bitmasks + ) + + if not field.nullable: + # strip the dummy option-type node + akcontent = remove_optiontype(akcontent) + akcontents.append(akcontent) + + out = ak.contents.UnionArray.simplified( + ak.index.Index8(nptags), + ak.index.Index32(npindex), + akcontents, + parameters=None, + ) + return popbuffers_finalize(out, paarray, None, generate_bitmasks) + + elif arrow_type == pyarrow.null(): + validbits = buffers.pop(0) + assert arrow_type.num_fields == 0 + + # This is already an option-type and offsets-corrected, so no popbuffers_finalize. + return ak.contents.IndexedOptionArray( + ak.index.Index64(numpy.full(len(paarray), -1, dtype=numpy.int64)), + ak.contents.EmptyArray(parameters=None), + parameters=None, + ) + + elif arrow_type == pyarrow.bool_(): + assert arrow_type.num_buffers == 2 + validbits = buffers.pop(0) + bitdata = buffers.pop(0) + + bytedata = numpy.unpackbits( + numpy.frombuffer(bitdata, dtype=numpy.uint8), bitorder="little" + ) + + out = ak.contents.NumpyArray( + bytedata.view(numpy.bool_), + parameters=None, + backend=NumpyBackend.instance(), + ) + return popbuffers_finalize(out, paarray, validbits, generate_bitmasks) + + elif isinstance(arrow_type, pyarrow.lib.DataType): + assert arrow_type.num_buffers == 2 + validbits = buffers.pop(0) + data = buffers.pop(0) + + to64, dt = _pyarrow_to_numpy_dtype.get(str(arrow_type), (False, None)) + if to64: + data = numpy.astype( + numpy.frombuffer(data, dtype=numpy.int32), dtype=numpy.int64 + ) + if dt is None: + dt = arrow_type.to_pandas_dtype() + + out = ak.contents.NumpyArray( + numpy.frombuffer(data, dtype=dt), + parameters=None, + backend=NumpyBackend.instance(), + ) + return popbuffers_finalize(out, paarray, validbits, generate_bitmasks) + + else: + raise TypeError(f"unrecognized Arrow array type: {arrow_type!r}") + + +def handle_arrow(obj, generate_bitmasks): + buffers = obj.buffers() + out = popbuffers(obj, obj.type, buffers, generate_bitmasks) + assert len(buffers) == 0 + return out + + +def pyarrow_to_awkward( + pyarrow_array: pyarrow.lib.Array, + generate_bitmasks=False, + highlevel=True, + behavior=None, + attrs=None, +): + ctx = ak._layout.HighLevelContext(behavior=behavior, attrs=attrs).finalize() + + out = handle_arrow(pyarrow_array, generate_bitmasks) + if isinstance(out, ak.contents.UnmaskedArray): + out = remove_optiontype(out) + + def remove_revertable(layout, **kwargs): + if hasattr(layout, "__pyarrow_original"): + del layout.__pyarrow_original + + ak._do.recursively_apply(out, remove_revertable) + + return ctx.wrap(out, highlevel=highlevel) + + +######################### equivalent for CuDF + + +def recurse_finalize( + out: ak.contents.Content, + column: cudf.core.column.column.ColumnBase, + validbits: None | cudf.core.buffer.buffer.Buffer, + generate_bitmasks: bool, + fix_offsets: bool = True, +): + # Every buffer from Arrow must be offsets-corrected. + if fix_offsets and (column.offset != 0 or len(column) != len(out)): + out = out[column.offset : column.offset + len(column)] + + if validbits is None: + return revertable(ak.contents.UnmaskedArray.simplified(out), out) + else: + return revertable( + ak.contents.BitMaskedArray.simplified( + ak.index.IndexU8(cupy.asarray(validbits)), + out, + valid_when=True, + length=len(out), + lsb_order=True, + ), + out, + ) + + +def recurse( + column: cudf.core.column.column.ColumnBase, + arrow_type: pyarrow.lib.DataType, + generate_bitmasks: bool, +): + if isinstance(column, cudf.core.column.CategoricalColumn): + validbits = column.base_mask + + paindex = column.base_children[-1] + masked_index = recurse(paindex, arrow_type_of(paindex), generate_bitmasks) + index = masked_index.content.data + + if not isinstance(masked_index, ak.contents.UnmaskedArray): + mask = masked_index.mask_as_bool(valid_when=False) + if mask.any(): + index = cupy.asarray(index, copy=True) + index[mask] = -1 + + pacats = column.categories + content = recurse(pacats, arrow_type_of(pacats), generate_bitmasks) + + if index.dtype == cupy.dtype(cupy.int64): + akindex1 = ak.index.Index64(index) + akindex2 = akindex1 + elif index.dtype == cupy.dtype(cupy.uint32): + akindex1 = ak.index.Index64(index.astype(cupy.int64)) + akindex2 = ak.index.IndexU32(index) + elif index.dtype == cupy.dtype(cupy.int32): + akindex1 = ak.index.Index32(index) + akindex2 = akindex1 + else: + akindex1 = ak.index.Index64(index.astype(cupy.int64)) + akindex2 = akindex1 + + return revertable( + ak.contents.IndexedOptionArray.simplified( + akindex1, + content, + parameters={"__array__": "categorical"}, + ), + ak.contents.IndexedArray( + akindex2, + remove_optiontype(content) if content.is_option else content, + parameters={"__array__": "categorical"}, + ), + ) + + elif isinstance(arrow_type, pyarrow.lib.FixedSizeListType): + validbits = column.base_mask + + akcontent = recurse( + column.base_children[-1], arrow_type.value_type, generate_bitmasks + ) + + if not arrow_type.value_field.nullable: + # strip the dummy option-type node + akcontent = remove_optiontype(akcontent) + + out = ak.contents.RegularArray(akcontent, arrow_type.list_size, parameters=None) + return recurse_finalize(out, column, validbits, generate_bitmasks) + + elif isinstance(arrow_type, (pyarrow.lib.LargeListType, pyarrow.lib.ListType)): + validbits = column.base_mask + paoffsets = column.offsets.base_data + + if isinstance(arrow_type, pyarrow.lib.LargeListType): + akoffsets = ak.index.Index64(cupy.asarray(paoffsets).view(cupy.int64)) + else: + akoffsets = ak.index.Index32(cupy.asarray(paoffsets).view(cupy.int32)) + + akcontent = recurse( + column.base_children[-1], arrow_type.value_type, generate_bitmasks + ) + + if not arrow_type.value_field.nullable: + # strip the dummy option-type node + akcontent = remove_optiontype(akcontent) + + out = ak.contents.ListOffsetArray(akoffsets, akcontent, parameters=None) + return recurse_finalize(out, column, validbits, generate_bitmasks) + + elif isinstance(arrow_type, pyarrow.lib.MapType): + # FIXME: make a ListOffsetArray of 2-tuples with __array__ == "sorted_map". + # (Make sure the keys are sorted). + raise NotImplementedError + + elif isinstance( + arrow_type, (pyarrow.lib.Decimal128Type, pyarrow.lib.Decimal256Type) + ): + # Note: Decimal128Type and Decimal256Type are subtypes of FixedSizeBinaryType. + # NumPy doesn't support decimal: https://github.com/numpy/numpy/issues/9789 + raise ValueError( + "Arrow arrays containing pyarrow.decimal128 or pyarrow.decimal256 types can't be converted into Awkward Arrays" + ) + + elif isinstance(arrow_type, pyarrow.lib.FixedSizeBinaryType): + validbits = column.base_mask + pacontent = column.base_data + + parameters = {"__array__": "bytestring"} + sub_parameters = {"__array__": "byte"} + + out = ak.contents.RegularArray( + ak.contents.NumpyArray( + cupy.asarray(pacontent), + parameters=sub_parameters, + backend=CupyBackend.instance(), + ), + arrow_type.byte_width, + parameters=parameters, + ) + return recurse_finalize(out, column, validbits, generate_bitmasks) + + elif arrow_type in _string_like: + validbits = column.base_mask + + paoffsets = column.base_children[-1] + pacontent = column.base_data + + if arrow_type in _string_like[::2]: + akoffsets = ak.index.Index32(cupy.asarray(paoffsets).view(cupy.int32)) + else: + akoffsets = ak.index.Index64(cupy.asarray(paoffsets).view(cupy.int64)) + + if arrow_type in _string_like[:2]: + parameters = {"__array__": "string"} + sub_parameters = {"__array__": "char"} + else: + parameters = {"__array__": "bytestring"} + sub_parameters = {"__array__": "byte"} + + out = ak.contents.ListOffsetArray( + akoffsets, + ak.contents.NumpyArray( + cupy.asarray(pacontent), + parameters=sub_parameters, + backend=CupyBackend.instance(), + ), + parameters=parameters, + ) + return recurse_finalize(out, column, validbits, generate_bitmasks) + + elif isinstance(arrow_type, pyarrow.lib.StructType): + validbits = column.base_mask + + keys = [] + contents = [] + for i in range(arrow_type.num_fields): + field = arrow_type[i] + field_name = field.name + keys.append(field_name) + + akcontent = recurse(column.base_children[i], field.type, generate_bitmasks) + if not field.nullable: + # strip the dummy option-type node + akcontent = remove_optiontype(akcontent) + contents.append(akcontent) + + out = ak.contents.RecordArray( + contents, keys, length=len(column), parameters=None + ) + return recurse_finalize(out, column, validbits, generate_bitmasks) + + elif isinstance(arrow_type, pyarrow.lib.UnionType): + raise NotImplementedError + + elif arrow_type == pyarrow.null(): + validbits = column.base_mask + + # This is already an option-type and offsets-corrected, so no popbuffers_finalize. + return ak.contents.IndexedOptionArray( + ak.index.Index64(cupy.full(len(column), -1, dtype=cupy.int64)), + ak.contents.EmptyArray(parameters=None), + parameters=None, + ) + + elif arrow_type == pyarrow.bool_(): + validbits = column.base_mask + + ## boolean data from CuDF differs from Arrow: it's represented as bytes, not bits! + # bitdata = column.base_data + # bytedata = cupy.unpackbits(cupy.asarray(bitdata), bitorder="little") + bytedata = cupy.asarray(column.base_data) + + out = ak.contents.NumpyArray( + cupy.asarray(bytedata).view(cupy.bool_), + parameters=None, + backend=CupyBackend.instance(), + ) + return recurse_finalize(out, column, validbits, generate_bitmasks) + + elif isinstance(arrow_type, pyarrow.lib.DataType): + validbits = column.base_mask + + to64, dt = _pyarrow_to_numpy_dtype.get(str(arrow_type), (False, None)) + if to64: + data = cupy.asarray(data).view(cupy.int32).astype(cupy.int64) + if dt is None: + dt = arrow_type.to_pandas_dtype() + + out = ak.contents.NumpyArray( + cupy.asarray(column.base_data).view(dt), + parameters=None, + backend=CupyBackend.instance(), + ) + return recurse_finalize(out, column, validbits, generate_bitmasks) + + else: + raise TypeError(f"unrecognized Arrow array type: {arrow_type!r}") + + +def arrow_type_of(column): + dtype = column.dtype + + if isinstance(column, cudf.core.column.StringColumn): + return pyarrow.string() + + elif isinstance(column, cudf.core.column.CategoricalColumn): + return None # deal with it in `recurse` for nesting-generality + + elif isinstance(dtype, numpy.dtype): + if dtype == numpy.dtype(object): + raise TypeError("Python object type encountered in CuDF Series") + else: + return pyarrow.from_numpy_dtype(dtype) + + else: + return dtype.to_arrow() + + +def handle_cudf(cudf_series: cudf.core.series.Series, generate_bitmasks): + column = cudf_series._data[cudf_series.name] + return recurse(column, arrow_type_of(column), generate_bitmasks) + + +def cudf_to_awkward( + cudf_series: cudf.core.series.Series, + generate_bitmasks=False, + highlevel=True, + behavior=None, + attrs=None, +): + ctx = ak._layout.HighLevelContext(behavior=behavior, attrs=attrs).finalize() + + out = handle_cudf(cudf_series, generate_bitmasks) + if isinstance(out, ak.contents.UnmaskedArray): + out = remove_optiontype(out) + + def remove_revertable(layout, **kwargs): + if hasattr(layout, "__pyarrow_original"): + del layout.__pyarrow_original + + ak._do.recursively_apply(out, remove_revertable) + + return ctx.wrap(out, highlevel=highlevel) + + +######################### testing + + +if __name__ == "__main__": + # tests numerics, lists, records, and option-type, but not union-type + examples = [ + [False, True, True], # booleans are special (1-bit) + [1.1, 2.2, 3.3], + [[False, True, True], [], [True, False]], + [[1, 2, 3], [], [4, 5]], + [[[1, 2], [3]], [], [[]], [[4], [], [5, 6, 7]], [[8, 9]]], + [{"x": 1}, {"x": 2}, {"x": 3}], + [{"x": 1.1, "y": []}, {"x": 2.2, "y": [1]}, {"x": 3.3, "y": [1, 2]}], + [[{"x": 1}, {"x": 2}, {"x": 3}], [], [{"x": 4}, {"x": 5}]], + ["This", "is", "a", "string", "array", ".", ""], + [["This", "is", "a"], ["nested"], ["string", "array", ".", ""]], + [None, None, None, None, None], + [False, True, None, True], + [1.1, 2.2, None, 3.3], + [[False, True, None, True], [], [True, False]], + [[False, True, True], None, [], [True, False]], + [[1, 2, None, 3], [], [4, 5]], + [[1, 2, 3], None, [], [4, 5]], + [[[1, 2, None], [3]], [], [[]], [[4], [], [5, 6, 7]], [[8, 9]]], + [[[1, 2], None, [3]], [], [[]], [[4], [], [5, 6, 7]], [[8, 9]]], + [[[1, 2], [3]], None, [], [[]], [[4], [], [5, 6, 7]], [[8, 9]]], + [{"x": 1}, {"x": None}, {"x": 3}], + [{"x": 1}, {"x": 2}, None, {"x": 3}], + [{"x": 1.1, "y": []}, {"x": None, "y": [1]}, {"x": 3.3, "y": [1, 2]}], + [{"x": 1.1, "y": []}, {"x": 2.2, "y": [1, None]}, {"x": 3.3, "y": [1, 2]}], + [{"x": 1.1, "y": []}, {"x": 2.2, "y": [1]}, None, {"x": 3.3, "y": [1, 2]}], + [[{"x": 1}, {"x": None}, {"x": 3}], [], [{"x": 4}, {"x": 5}]], + [[{"x": 1}, {"x": 2}, None, {"x": 3}], [], [{"x": 4}, {"x": 5}]], + [[{"x": 1}, {"x": 2}, {"x": 3}], None, [], [{"x": 4}, {"x": 5}]], + ["This", "is", "a", None, "string", "array", ".", ""], + [["This", "is", "a", None], ["nested"], ["string", "array", ".", ""]], + [["This", "is", "a"], None, ["nested"], ["string", "array", ".", ""]], + numpy.array(["2024-01-01", "2024-01-02"], dtype="datetime64[s]"), + numpy.array([1, 2, 3], dtype="timedelta64[s]"), + ] + + for example in examples: + print(f"---- {example}") + df = cudf.DataFrame({"column": example}) + + awkward_array = cudf_to_awkward(df["column"]) + assert ak.backend(awkward_array) == "cuda" + assert awkward_array.tolist() == list(example), awkward_array.show(type=True) diff --git a/src/awkward_pandas/cudf.py b/src/awkward_pandas/cudf.py new file mode 100644 index 0000000..48f067b --- /dev/null +++ b/src/awkward_pandas/cudf.py @@ -0,0 +1,92 @@ +import functools + +from cudf.core.series import Series +from cudf import DataFrame +import awkward as ak + +from awkward_pandas.mixin import ArithmeticMixin +from awkward_pandas.ak_from_cudf import cudf_to_awkward as from_cudf +from typing import Callable, Iterable + + +class AwkwardAccessor(ArithmeticMixin): + + def __init__(self, series: Series): + self.array = from_cudf(series) + + def __array_function__(self, *args, **kwargs): + return self.array.__array_function__(*args, **kwargs) + + def __array_ufunc__(self, *args, **kwargs): + if args[1] == "__call__": + return args[0](self.array, *args[3:], **kwargs) + raise NotImplementedError + + def __dir__(self) -> Iterable[str]: + return [ + _ + for _ in (dir(ak)) + if not _.startswith(("_", "ak_")) and not _[0].isupper() + ] + ["apply", "array"] + + def apply(self, fn: Callable) -> Series: + """Perform function on all the values of the series""" + out = fn(self.array) + return ak.to_cudf(out) + + def __getitem__(self, item): + # scalars? + out = self.array.__getitem__(item) + result = ak.to_cudf(out) + return result + + def __getattr__(self, item): + if item not in dir(self): + raise AttributeError + func = getattr(ak, item, None) + + if func: + + @functools.wraps(func) + def f(*others, **kwargs): + others = [ + other.ak.array + if isinstance(other, (DataFrame, Series)) + else other + for other in others + ] + kwargs = { + k: v.ak.array if isinstance(v, (DataFrame, Series)) else v + for k, v in kwargs.items() + } + + ak_arr = func(self.array, *others, **kwargs) + if isinstance(ak_arr, ak.Array): + return ak.to_cudf(ak_arr) + return ak_arr + + else: + raise AttributeError(item) + return f + + @classmethod + def _create_op(cls, op): + def run(self, *args, **kwargs): + return ak.to_cudf(op(self.array, *args, **kwargs)) + + return run + + _create_arithmetic_method = _create_op + _create_comparison_method = _create_op + _create_logical_method = _create_op + + +AwkwardAccessor._add_all() + + +@property # type:ignore +def ak_property(self): + return AwkwardAccessor(self) + + +Series.ak = ak_property # no official register function? From 63934e768bc65cd2b235f71a2911aa7ccf1b684b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 17 Apr 2024 15:17:59 -0400 Subject: [PATCH 02/18] maybe convert to cudf --- src/awkward_pandas/cudf.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/awkward_pandas/cudf.py b/src/awkward_pandas/cudf.py index 48f067b..68b733d 100644 --- a/src/awkward_pandas/cudf.py +++ b/src/awkward_pandas/cudf.py @@ -32,13 +32,12 @@ def __dir__(self) -> Iterable[str]: def apply(self, fn: Callable) -> Series: """Perform function on all the values of the series""" out = fn(self.array) - return ak.to_cudf(out) + return maybe_to_cudf(out) def __getitem__(self, item): # scalars? out = self.array.__getitem__(item) - result = ak.to_cudf(out) - return result + return maybe_to_cudf(out) def __getattr__(self, item): if item not in dir(self): @@ -61,9 +60,7 @@ def f(*others, **kwargs): } ak_arr = func(self.array, *others, **kwargs) - if isinstance(ak_arr, ak.Array): - return ak.to_cudf(ak_arr) - return ak_arr + return maybe_to_cudf(ak_arr) else: raise AttributeError(item) @@ -72,7 +69,7 @@ def f(*others, **kwargs): @classmethod def _create_op(cls, op): def run(self, *args, **kwargs): - return ak.to_cudf(op(self.array, *args, **kwargs)) + return maybe_to_cudf(op(self.array, *args, **kwargs)) return run @@ -81,6 +78,13 @@ def run(self, *args, **kwargs): _create_logical_method = _create_op +def maybe_to_cudf(x): + if isinstance(x, ak.Array): + return ak.to_cudf(x) + return x + + + AwkwardAccessor._add_all() From c6af32366aa1ca7f18fb8c2ab50bdc9626d7ff6b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 26 Apr 2024 13:16:53 -0400 Subject: [PATCH 03/18] Add example --- .gitignore | 1 + example/cudf-ak.ipynb | 506 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 507 insertions(+) create mode 100644 example/cudf-ak.ipynb diff --git a/.gitignore b/.gitignore index c4c2742..1f7574d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .dir-locals.el +.idea # setuptools_scm src/awkward_pandas/version.py diff --git a/example/cudf-ak.ipynb b/example/cudf-ak.ipynb new file mode 100644 index 0000000..df19bd9 --- /dev/null +++ b/example/cudf-ak.ipynb @@ -0,0 +1,506 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "cefd8e53-a56f-4b0c-88d2-d662d59849a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('2.6.3', '2023.8.1.dev25+g5dd12c5.d20240417')" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import awkward as ak\n", + "ak.numba.register_and_check()\n", + "import cupy as cp\n", + "import cudf\n", + "import numpy as np\n", + "import awkward_pandas.cudf\n", + "import subprocess\n", + "\n", + "def gpu_mem():\n", + " print(subprocess.check_output(\"nvidia-smi | grep py\", shell=True).split()[-2].decode())\n", + "\n", + "ak.__version__, awkward_pandas.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0490043a-564a-4c11-bb0d-a54fb4c6fb10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "188MiB\n" + ] + } + ], + "source": [ + "df = cudf.read_parquet(\"/floppy/code/awkward/s.parquet\")\n", + "gpu_mem()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e29ff9a4-60e4-4260-9a44-c135ad6d7d6b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a list\n", + "dtype: object" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "58d16a80-041e-4260-8c56-9de932dde557", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a [[1, 2, 3], [], [4, 5]]\n", + "Name: 0, dtype: list" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0] # each element is list-of-lists" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "abfe0ab6-5a89-4885-b654-c84804a4aea4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# series accessor\n", + "df.a.ak" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c7b65320-e1fa-44b2-a232-6ffb97ba1d18", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['all',\n", + " 'almost_equal',\n", + " 'angle',\n", + " 'annotations',\n", + " 'any',\n", + " 'apply',\n", + " 'argcartesian',\n", + " 'argcombinations',\n", + " 'argmax',\n", + " 'argmin',\n", + " 'argsort',\n", + " 'array',\n", + " 'awkward',\n", + " 'backend',\n", + " 'behavior',\n", + " 'behaviors',\n", + " 'broadcast_arrays',\n", + " 'broadcast_fields',\n", + " 'builder',\n", + " 'cartesian',\n", + " 'categories',\n", + " 'combinations',\n", + " 'concatenate',\n", + " 'contents',\n", + " 'copy',\n", + " 'corr',\n", + " 'count',\n", + " 'count_nonzero',\n", + " 'covar',\n", + " 'cppyy',\n", + " 'drop_none',\n", + " 'enforce_type',\n", + " 'errors',\n", + " 'fields',\n", + " 'fill_none',\n", + " 'firsts',\n", + " 'flatten',\n", + " 'forms',\n", + " 'forth',\n", + " 'from_arrow',\n", + " 'from_arrow_schema',\n", + " 'from_avro_file',\n", + " 'from_buffers',\n", + " 'from_categorical',\n", + " 'from_cupy',\n", + " 'from_dlpack',\n", + " 'from_feather',\n", + " 'from_iter',\n", + " 'from_jax',\n", + " 'from_json',\n", + " 'from_numpy',\n", + " 'from_parquet',\n", + " 'from_rdataframe',\n", + " 'from_regular',\n", + " 'full_like',\n", + " 'highlevel',\n", + " 'imag',\n", + " 'index',\n", + " 'is_categorical',\n", + " 'is_none',\n", + " 'is_tuple',\n", + " 'is_valid',\n", + " 'isclose',\n", + " 'jax',\n", + " 'linear_fit',\n", + " 'local_index',\n", + " 'mask',\n", + " 'max',\n", + " 'mean',\n", + " 'merge_option_of_records',\n", + " 'merge_union_of_records',\n", + " 'metadata_from_parquet',\n", + " 'min',\n", + " 'mixin_class',\n", + " 'mixin_class_method',\n", + " 'moment',\n", + " 'nan_to_none',\n", + " 'nan_to_num',\n", + " 'nanargmax',\n", + " 'nanargmin',\n", + " 'nanmax',\n", + " 'nanmean',\n", + " 'nanmin',\n", + " 'nanprod',\n", + " 'nanstd',\n", + " 'nansum',\n", + " 'nanvar',\n", + " 'num',\n", + " 'numba',\n", + " 'ones_like',\n", + " 'operations',\n", + " 'pad_none',\n", + " 'parameters',\n", + " 'prod',\n", + " 'ptp',\n", + " 'ravel',\n", + " 'real',\n", + " 'record',\n", + " 'round',\n", + " 'run_lengths',\n", + " 'singletons',\n", + " 'softmax',\n", + " 'sort',\n", + " 'std',\n", + " 'str',\n", + " 'strings_astype',\n", + " 'sum',\n", + " 'to_arrow',\n", + " 'to_arrow_table',\n", + " 'to_backend',\n", + " 'to_buffers',\n", + " 'to_cudf',\n", + " 'to_cupy',\n", + " 'to_dataframe',\n", + " 'to_feather',\n", + " 'to_jax',\n", + " 'to_json',\n", + " 'to_layout',\n", + " 'to_list',\n", + " 'to_numpy',\n", + " 'to_packed',\n", + " 'to_parquet',\n", + " 'to_parquet_dataset',\n", + " 'to_parquet_row_groups',\n", + " 'to_rdataframe',\n", + " 'to_regular',\n", + " 'transform',\n", + " 'type',\n", + " 'types',\n", + " 'typetracer',\n", + " 'unflatten',\n", + " 'unzip',\n", + " 'validity_error',\n", + " 'values_astype',\n", + " 'var',\n", + " 'where',\n", + " 'with_field',\n", + " 'with_name',\n", + " 'with_parameter',\n", + " 'without_field',\n", + " 'without_parameters',\n", + " 'zeros_like',\n", + " 'zip']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# allows all ak.* namespace, many identical to numpy equivalents\n", + "dir(df.a.ak)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8ff11e13-8503-4d79-a64c-993028709ca4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(28000000)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.a.ak.sum(axis=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "e2714a2e-6d78-4a99-88d0-e76851938d7c", + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "CuPyKernel not found: ('awkward_ListOffsetArray_reduce_nonlocal_preparenext_64', , , , , , , )\n\nSee if this has been reported at https://github.com/scikit-hep/awkward/issues", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/_dispatch.py:64\u001b[0m, in \u001b[0;36mnamed_high_level_function..dispatch\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 64\u001b[0m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mgen_or_result\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/operations/ak_sum.py:210\u001b[0m, in \u001b[0;36msum\u001b[0;34m(array, axis, keepdims, mask_identity, highlevel, behavior, attrs)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;66;03m# Implementation\u001b[39;00m\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask_identity\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhighlevel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/operations/ak_sum.py:277\u001b[0m, in \u001b[0;36m_impl\u001b[0;34m(array, axis, keepdims, mask_identity, highlevel, behavior, attrs)\u001b[0m\n\u001b[1;32m 275\u001b[0m reducer \u001b[38;5;241m=\u001b[39m ak\u001b[38;5;241m.\u001b[39m_reducers\u001b[38;5;241m.\u001b[39mSum()\n\u001b[0;32m--> 277\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mak\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduce\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask_identity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 283\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 284\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ctx\u001b[38;5;241m.\u001b[39mwrap(out, highlevel\u001b[38;5;241m=\u001b[39mhighlevel, allow_other\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/_do.py:333\u001b[0m, in \u001b[0;36mreduce\u001b[0;34m(layout, reducer, axis, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 332\u001b[0m shifts \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 333\u001b[0m \u001b[38;5;28mnext\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mlayout\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 334\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 335\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 336\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 337\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 338\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 339\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 340\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 341\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 342\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 343\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mnext\u001b[39m[\u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1476\u001b[0m, in \u001b[0;36mListOffsetArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 1475\u001b[0m \u001b[38;5;28mnext\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_ListOffsetArray64(\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m-> 1476\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1477\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1478\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1479\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1480\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1481\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1482\u001b[0m \u001b[43m \u001b[49m\u001b[43moutlength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1483\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1484\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1485\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1486\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1488\u001b[0m branch, depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbranch_depth\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1615\u001b[0m, in \u001b[0;36mListOffsetArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 1613\u001b[0m nextstarts \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moffsets[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m-> 1615\u001b[0m outcontent \u001b[38;5;241m=\u001b[39m \u001b[43mtrimmed\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1616\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1617\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1618\u001b[0m \u001b[43m \u001b[49m\u001b[43mnextstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1619\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1620\u001b[0m \u001b[43m \u001b[49m\u001b[43mnextparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1621\u001b[0m \u001b[43m \u001b[49m\u001b[43mglobalstarts_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1622\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1623\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1624\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1625\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1627\u001b[0m outoffsets \u001b[38;5;241m=\u001b[39m Index64\u001b[38;5;241m.\u001b[39mempty(outlength \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, index_nplike)\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/unmaskedarray.py:455\u001b[0m, in \u001b[0;36mUnmaskedArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_reduce_next\u001b[39m(\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 445\u001b[0m reducer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 453\u001b[0m behavior,\n\u001b[1;32m 454\u001b[0m ):\n\u001b[0;32m--> 455\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_content\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 461\u001b[0m \u001b[43m \u001b[49m\u001b[43moutlength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 462\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 463\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 464\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 465\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1476\u001b[0m, in \u001b[0;36mListOffsetArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 1475\u001b[0m \u001b[38;5;28mnext\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_ListOffsetArray64(\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m-> 1476\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1477\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1478\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1479\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1480\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1481\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1482\u001b[0m \u001b[43m \u001b[49m\u001b[43moutlength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1483\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1484\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1485\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1486\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1488\u001b[0m branch, depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbranch_depth\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1499\u001b[0m, in \u001b[0;36mListOffsetArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 1491\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m branch \u001b[38;5;129;01mand\u001b[39;00m negaxis \u001b[38;5;241m==\u001b[39m depth:\n\u001b[1;32m 1492\u001b[0m (\n\u001b[1;32m 1493\u001b[0m distincts,\n\u001b[1;32m 1494\u001b[0m maxcount,\n\u001b[1;32m 1495\u001b[0m maxnextparents,\n\u001b[1;32m 1496\u001b[0m nextcarry,\n\u001b[1;32m 1497\u001b[0m nextparents,\n\u001b[1;32m 1498\u001b[0m nextstarts,\n\u001b[0;32m-> 1499\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_rearrange_prepare_next\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutlength\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1501\u001b[0m outstarts \u001b[38;5;241m=\u001b[39m Index64\u001b[38;5;241m.\u001b[39mempty(outlength, index_nplike)\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1700\u001b[0m, in \u001b[0;36mListOffsetArray._rearrange_prepare_next\u001b[0;34m(self, outlength, parents)\u001b[0m\n\u001b[1;32m 1692\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m (\n\u001b[1;32m 1693\u001b[0m _maxnextparents\u001b[38;5;241m.\u001b[39mnplike \u001b[38;5;129;01mis\u001b[39;00m index_nplike\n\u001b[1;32m 1694\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m distincts\u001b[38;5;241m.\u001b[39mnplike \u001b[38;5;129;01mis\u001b[39;00m index_nplike\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1697\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m parents\u001b[38;5;241m.\u001b[39mnplike \u001b[38;5;129;01mis\u001b[39;00m index_nplike\n\u001b[1;32m 1698\u001b[0m )\n\u001b[1;32m 1699\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backend\u001b[38;5;241m.\u001b[39mmaybe_kernel_error(\n\u001b[0;32m-> 1700\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_backend\u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 1701\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mawkward_ListOffsetArray_reduce_nonlocal_preparenext_64\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1702\u001b[0m \u001b[43m \u001b[49m\u001b[43mnextcarry\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1703\u001b[0m \u001b[43m \u001b[49m\u001b[43mnextparents\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1704\u001b[0m \u001b[43m \u001b[49m\u001b[43m_maxnextparents\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1705\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistincts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1706\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_offsets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1707\u001b[0m \u001b[43m \u001b[49m\u001b[43moffsetscopy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1708\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1709\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m(\n\u001b[1;32m 1710\u001b[0m nextcarry\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1711\u001b[0m nextparents\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1712\u001b[0m nextlen,\n\u001b[1;32m 1713\u001b[0m _maxnextparents\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1714\u001b[0m distincts\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1715\u001b[0m distincts\u001b[38;5;241m.\u001b[39mlength,\n\u001b[1;32m 1716\u001b[0m offsetscopy\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1717\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_offsets\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1718\u001b[0m lenstarts,\n\u001b[1;32m 1719\u001b[0m parents\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1720\u001b[0m maxcount,\n\u001b[1;32m 1721\u001b[0m )\n\u001b[1;32m 1722\u001b[0m )\n\u001b[1;32m 1724\u001b[0m maxnextparents \u001b[38;5;241m=\u001b[39m index_nplike\u001b[38;5;241m.\u001b[39mindex_as_shape_item(_maxnextparents[\u001b[38;5;241m0\u001b[39m])\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/_backends/cupy.py:43\u001b[0m, in \u001b[0;36mCupyBackend.__getitem__\u001b[0;34m(self, index)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCuPyKernel not found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mindex\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mAssertionError\u001b[0m: CuPyKernel not found: ('awkward_ListOffsetArray_reduce_nonlocal_preparenext_64', , , , , , , )", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[27], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mak\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msum\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/floppy/code/awkward-pandas/src/awkward_pandas/cudf.py:62\u001b[0m, in \u001b[0;36mAwkwardAccessor.__getattr__..f\u001b[0;34m(*others, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m others \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 52\u001b[0m other\u001b[38;5;241m.\u001b[39mak\u001b[38;5;241m.\u001b[39marray\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(other, (DataFrame, Series))\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m other\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m other \u001b[38;5;129;01min\u001b[39;00m others\n\u001b[1;32m 56\u001b[0m ]\n\u001b[1;32m 57\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 58\u001b[0m k: v\u001b[38;5;241m.\u001b[39mak\u001b[38;5;241m.\u001b[39marray \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(v, (DataFrame, Series)) \u001b[38;5;28;01melse\u001b[39;00m v\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 60\u001b[0m }\n\u001b[0;32m---> 62\u001b[0m ak_arr \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mothers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m maybe_to_cudf(ak_arr)\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/_dispatch.py:38\u001b[0m, in \u001b[0;36mnamed_high_level_function..dispatch\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdispatch\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 37\u001b[0m \u001b[38;5;66;03m# NOTE: this decorator assumes that the operation is exposed under `ak.`\u001b[39;00m\n\u001b[0;32m---> 38\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m OperationErrorContext(name, args, kwargs):\n\u001b[1;32m 39\u001b[0m gen_or_result \u001b[38;5;241m=\u001b[39m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m isgenerator(gen_or_result):\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/_errors.py:85\u001b[0m, in \u001b[0;36mErrorContext.__exit__\u001b[0;34m(self, exception_type, exception_value, traceback)\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 79\u001b[0m \u001b[38;5;66;03m# Handle caught exception\u001b[39;00m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 81\u001b[0m exception_type \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(exception_type, \u001b[38;5;167;01mException\u001b[39;00m)\n\u001b[1;32m 83\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprimary() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n\u001b[1;32m 84\u001b[0m ):\n\u001b[0;32m---> 85\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexception_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexception_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 87\u001b[0m \u001b[38;5;66;03m# Step out of the way so that another ErrorContext can become primary.\u001b[39;00m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprimary() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n", + "File \u001b[0;32m/floppy/code/awkward/src/awkward/_errors.py:95\u001b[0m, in \u001b[0;36mErrorContext.handle_exception\u001b[0;34m(self, cls, exception)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecorate_exception(\u001b[38;5;28mcls\u001b[39m, exception)\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecorate_exception(\u001b[38;5;28mcls\u001b[39m, exception)\n", + "\u001b[0;31mAssertionError\u001b[0m: CuPyKernel not found: ('awkward_ListOffsetArray_reduce_nonlocal_preparenext_64', , , , , , , )\n\nSee if this has been reported at https://github.com/scikit-hep/awkward/issues" + ] + } + ], + "source": [ + "df.a.ak.sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2dd99fe5-0523-46c9-87ec-1392070f5139", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cupy.ndarray" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# if output was array-like, it stays on the GPU\n", + "type(_)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9d8e55cf-8cf1-40a0-8733-24b7719f431d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7.09 ms ± 118 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "# fast reduction across three levels of nesting\n", + "%timeit df.a.ak.sum(axis=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "fae94aea-d9cf-4228-bcab-f843c7cc9c98", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[[[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " [[-1, -2, -3], [], [-4, -5]],\n",
+       " ...,\n",
+       " [[-6, -7]],\n",
+       " [[-6, -7]],\n",
+       " [[-6, -7]],\n",
+       " [[-6, -7]],\n",
+       " [[-6, -7]],\n",
+       " [[-6, -7]],\n",
+       " [[-6, -7]],\n",
+       " [[-6, -7]],\n",
+       " [[-6, -7]]]\n",
+       "------------------------------------------\n",
+       "type: 2000000 * var * option[var * ?int64]
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# ufunc maintains structure\n", + "np.negative(df.a.ak)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1b83da2c-5e15-42f6-b594-f2ebaece5ac8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "362MiB\n" + ] + } + ], + "source": [ + "gpu_mem() # created new arrays on GPU, made new cuDF series" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "558ca2c3-d6c7-4404-bcab-557b9b03f795", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 [[2, 3, 4], [], [5, 6]]\n", + "1 [[2, 3, 4], [], [5, 6]]\n", + "2 [[2, 3, 4], [], [5, 6]]\n", + "3 [[2, 3, 4], [], [5, 6]]\n", + "4 [[2, 3, 4], [], [5, 6]]\n", + "dtype: list\n" + ] + } + ], + "source": [ + "# operator overload\n", + "print((df.a.ak + 1).head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d240ea54-87b4-4b99-b67f-b2f885a4bf5e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:cuda]", + "language": "python", + "name": "conda-env-cuda-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6a54f0417aae513208dd607f3e6f33cc903713e7 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 1 May 2024 15:37:20 -0400 Subject: [PATCH 04/18] refactor for accessor superclass --- example/cudf-ak.ipynb | 243 ++++++++++++++++++++++++--------- src/awkward_pandas/__init__.py | 1 - src/awkward_pandas/cudf.py | 93 ++----------- src/awkward_pandas/io.py | 1 + src/awkward_pandas/mixin.py | 4 +- 5 files changed, 197 insertions(+), 145 deletions(-) diff --git a/example/cudf-ak.ipynb b/example/cudf-ak.ipynb index df19bd9..e1f329e 100644 --- a/example/cudf-ak.ipynb +++ b/example/cudf-ak.ipynb @@ -19,7 +19,6 @@ ], "source": [ "import awkward as ak\n", - "ak.numba.register_and_check()\n", "import cupy as cp\n", "import cudf\n", "import numpy as np\n", @@ -34,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "0490043a-564a-4c11-bb0d-a54fb4c6fb10", "metadata": {}, "outputs": [ @@ -53,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "id": "e29ff9a4-60e4-4260-9a44-c135ad6d7d6b", "metadata": {}, "outputs": [ @@ -64,7 +63,7 @@ "dtype: object" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -75,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "58d16a80-041e-4260-8c56-9de932dde557", "metadata": {}, "outputs": [ @@ -86,7 +85,7 @@ "Name: 0, dtype: list" ] }, - "execution_count": 8, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -97,17 +96,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "abfe0ab6-5a89-4885-b654-c84804a4aea4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 9, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -119,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "id": "c7b65320-e1fa-44b2-a232-6ffb97ba1d18", "metadata": { "scrolled": true @@ -159,8 +158,10 @@ " 'covar',\n", " 'cppyy',\n", " 'drop_none',\n", + " 'dt',\n", " 'enforce_type',\n", " 'errors',\n", + " 'explode',\n", " 'fields',\n", " 'fill_none',\n", " 'firsts',\n", @@ -233,6 +234,7 @@ " 'sort',\n", " 'std',\n", " 'str',\n", + " 'str',\n", " 'strings_astype',\n", " 'sum',\n", " 'to_arrow',\n", @@ -259,6 +261,7 @@ " 'types',\n", " 'typetracer',\n", " 'unflatten',\n", + " 'unmerge',\n", " 'unzip',\n", " 'validity_error',\n", " 'values_astype',\n", @@ -273,7 +276,7 @@ " 'zip']" ] }, - "execution_count": 10, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -285,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "id": "8ff11e13-8503-4d79-a64c-993028709ca4", "metadata": {}, "outputs": [ @@ -295,7 +298,7 @@ "array(28000000)" ] }, - "execution_count": 11, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -306,47 +309,7 @@ }, { "cell_type": "code", - "execution_count": 27, - "id": "e2714a2e-6d78-4a99-88d0-e76851938d7c", - "metadata": {}, - "outputs": [ - { - "ename": "AssertionError", - "evalue": "CuPyKernel not found: ('awkward_ListOffsetArray_reduce_nonlocal_preparenext_64', , , , , , , )\n\nSee if this has been reported at https://github.com/scikit-hep/awkward/issues", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/_dispatch.py:64\u001b[0m, in \u001b[0;36mnamed_high_level_function..dispatch\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 64\u001b[0m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mgen_or_result\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/operations/ak_sum.py:210\u001b[0m, in \u001b[0;36msum\u001b[0;34m(array, axis, keepdims, mask_identity, highlevel, behavior, attrs)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;66;03m# Implementation\u001b[39;00m\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask_identity\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhighlevel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/operations/ak_sum.py:277\u001b[0m, in \u001b[0;36m_impl\u001b[0;34m(array, axis, keepdims, mask_identity, highlevel, behavior, attrs)\u001b[0m\n\u001b[1;32m 275\u001b[0m reducer \u001b[38;5;241m=\u001b[39m ak\u001b[38;5;241m.\u001b[39m_reducers\u001b[38;5;241m.\u001b[39mSum()\n\u001b[0;32m--> 277\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mak\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduce\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask_identity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 283\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 284\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ctx\u001b[38;5;241m.\u001b[39mwrap(out, highlevel\u001b[38;5;241m=\u001b[39mhighlevel, allow_other\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/_do.py:333\u001b[0m, in \u001b[0;36mreduce\u001b[0;34m(layout, reducer, axis, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 332\u001b[0m shifts \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 333\u001b[0m \u001b[38;5;28mnext\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mlayout\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 334\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 335\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 336\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 337\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 338\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 339\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 340\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 341\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 342\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 343\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mnext\u001b[39m[\u001b[38;5;241m0\u001b[39m]\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1476\u001b[0m, in \u001b[0;36mListOffsetArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 1475\u001b[0m \u001b[38;5;28mnext\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_ListOffsetArray64(\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m-> 1476\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1477\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1478\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1479\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1480\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1481\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1482\u001b[0m \u001b[43m \u001b[49m\u001b[43moutlength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1483\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1484\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1485\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1486\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1488\u001b[0m branch, depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbranch_depth\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1615\u001b[0m, in \u001b[0;36mListOffsetArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 1613\u001b[0m nextstarts \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moffsets[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m-> 1615\u001b[0m outcontent \u001b[38;5;241m=\u001b[39m \u001b[43mtrimmed\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1616\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1617\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1618\u001b[0m \u001b[43m \u001b[49m\u001b[43mnextstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1619\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1620\u001b[0m \u001b[43m \u001b[49m\u001b[43mnextparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1621\u001b[0m \u001b[43m \u001b[49m\u001b[43mglobalstarts_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1622\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1623\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1624\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1625\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1627\u001b[0m outoffsets \u001b[38;5;241m=\u001b[39m Index64\u001b[38;5;241m.\u001b[39mempty(outlength \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, index_nplike)\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/unmaskedarray.py:455\u001b[0m, in \u001b[0;36mUnmaskedArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_reduce_next\u001b[39m(\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 445\u001b[0m reducer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 453\u001b[0m behavior,\n\u001b[1;32m 454\u001b[0m ):\n\u001b[0;32m--> 455\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_content\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 461\u001b[0m \u001b[43m \u001b[49m\u001b[43moutlength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 462\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 463\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 464\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 465\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1476\u001b[0m, in \u001b[0;36mListOffsetArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 1475\u001b[0m \u001b[38;5;28mnext\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_ListOffsetArray64(\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m-> 1476\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_next\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1477\u001b[0m \u001b[43m \u001b[49m\u001b[43mreducer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1478\u001b[0m \u001b[43m \u001b[49m\u001b[43mnegaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1479\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1480\u001b[0m \u001b[43m \u001b[49m\u001b[43mshifts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1481\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1482\u001b[0m \u001b[43m \u001b[49m\u001b[43moutlength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1483\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1484\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1485\u001b[0m \u001b[43m \u001b[49m\u001b[43mbehavior\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1486\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1488\u001b[0m branch, depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbranch_depth\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1499\u001b[0m, in \u001b[0;36mListOffsetArray._reduce_next\u001b[0;34m(self, reducer, negaxis, starts, shifts, parents, outlength, mask, keepdims, behavior)\u001b[0m\n\u001b[1;32m 1491\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m branch \u001b[38;5;129;01mand\u001b[39;00m negaxis \u001b[38;5;241m==\u001b[39m depth:\n\u001b[1;32m 1492\u001b[0m (\n\u001b[1;32m 1493\u001b[0m distincts,\n\u001b[1;32m 1494\u001b[0m maxcount,\n\u001b[1;32m 1495\u001b[0m maxnextparents,\n\u001b[1;32m 1496\u001b[0m nextcarry,\n\u001b[1;32m 1497\u001b[0m nextparents,\n\u001b[1;32m 1498\u001b[0m nextstarts,\n\u001b[0;32m-> 1499\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_rearrange_prepare_next\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutlength\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1501\u001b[0m outstarts \u001b[38;5;241m=\u001b[39m Index64\u001b[38;5;241m.\u001b[39mempty(outlength, index_nplike)\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/contents/listoffsetarray.py:1700\u001b[0m, in \u001b[0;36mListOffsetArray._rearrange_prepare_next\u001b[0;34m(self, outlength, parents)\u001b[0m\n\u001b[1;32m 1692\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m (\n\u001b[1;32m 1693\u001b[0m _maxnextparents\u001b[38;5;241m.\u001b[39mnplike \u001b[38;5;129;01mis\u001b[39;00m index_nplike\n\u001b[1;32m 1694\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m distincts\u001b[38;5;241m.\u001b[39mnplike \u001b[38;5;129;01mis\u001b[39;00m index_nplike\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1697\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m parents\u001b[38;5;241m.\u001b[39mnplike \u001b[38;5;129;01mis\u001b[39;00m index_nplike\n\u001b[1;32m 1698\u001b[0m )\n\u001b[1;32m 1699\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backend\u001b[38;5;241m.\u001b[39mmaybe_kernel_error(\n\u001b[0;32m-> 1700\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_backend\u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 1701\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mawkward_ListOffsetArray_reduce_nonlocal_preparenext_64\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1702\u001b[0m \u001b[43m \u001b[49m\u001b[43mnextcarry\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1703\u001b[0m \u001b[43m \u001b[49m\u001b[43mnextparents\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1704\u001b[0m \u001b[43m \u001b[49m\u001b[43m_maxnextparents\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1705\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistincts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1706\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_offsets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1707\u001b[0m \u001b[43m \u001b[49m\u001b[43moffsetscopy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1708\u001b[0m \u001b[43m \u001b[49m\u001b[43mparents\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1709\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m(\n\u001b[1;32m 1710\u001b[0m nextcarry\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1711\u001b[0m nextparents\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1712\u001b[0m nextlen,\n\u001b[1;32m 1713\u001b[0m _maxnextparents\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1714\u001b[0m distincts\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1715\u001b[0m distincts\u001b[38;5;241m.\u001b[39mlength,\n\u001b[1;32m 1716\u001b[0m offsetscopy\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1717\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_offsets\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1718\u001b[0m lenstarts,\n\u001b[1;32m 1719\u001b[0m parents\u001b[38;5;241m.\u001b[39mdata,\n\u001b[1;32m 1720\u001b[0m maxcount,\n\u001b[1;32m 1721\u001b[0m )\n\u001b[1;32m 1722\u001b[0m )\n\u001b[1;32m 1724\u001b[0m maxnextparents \u001b[38;5;241m=\u001b[39m index_nplike\u001b[38;5;241m.\u001b[39mindex_as_shape_item(_maxnextparents[\u001b[38;5;241m0\u001b[39m])\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/_backends/cupy.py:43\u001b[0m, in \u001b[0;36mCupyBackend.__getitem__\u001b[0;34m(self, index)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCuPyKernel not found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mindex\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mAssertionError\u001b[0m: CuPyKernel not found: ('awkward_ListOffsetArray_reduce_nonlocal_preparenext_64', , , , , , , )", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[27], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mak\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msum\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/floppy/code/awkward-pandas/src/awkward_pandas/cudf.py:62\u001b[0m, in \u001b[0;36mAwkwardAccessor.__getattr__..f\u001b[0;34m(*others, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m others \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 52\u001b[0m other\u001b[38;5;241m.\u001b[39mak\u001b[38;5;241m.\u001b[39marray\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(other, (DataFrame, Series))\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m other\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m other \u001b[38;5;129;01min\u001b[39;00m others\n\u001b[1;32m 56\u001b[0m ]\n\u001b[1;32m 57\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 58\u001b[0m k: v\u001b[38;5;241m.\u001b[39mak\u001b[38;5;241m.\u001b[39marray \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(v, (DataFrame, Series)) \u001b[38;5;28;01melse\u001b[39;00m v\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 60\u001b[0m }\n\u001b[0;32m---> 62\u001b[0m ak_arr \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mothers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m maybe_to_cudf(ak_arr)\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/_dispatch.py:38\u001b[0m, in \u001b[0;36mnamed_high_level_function..dispatch\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdispatch\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 37\u001b[0m \u001b[38;5;66;03m# NOTE: this decorator assumes that the operation is exposed under `ak.`\u001b[39;00m\n\u001b[0;32m---> 38\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m OperationErrorContext(name, args, kwargs):\n\u001b[1;32m 39\u001b[0m gen_or_result \u001b[38;5;241m=\u001b[39m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m isgenerator(gen_or_result):\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/_errors.py:85\u001b[0m, in \u001b[0;36mErrorContext.__exit__\u001b[0;34m(self, exception_type, exception_value, traceback)\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 79\u001b[0m \u001b[38;5;66;03m# Handle caught exception\u001b[39;00m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 81\u001b[0m exception_type \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(exception_type, \u001b[38;5;167;01mException\u001b[39;00m)\n\u001b[1;32m 83\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprimary() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n\u001b[1;32m 84\u001b[0m ):\n\u001b[0;32m---> 85\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexception_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexception_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 87\u001b[0m \u001b[38;5;66;03m# Step out of the way so that another ErrorContext can become primary.\u001b[39;00m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprimary() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n", - "File \u001b[0;32m/floppy/code/awkward/src/awkward/_errors.py:95\u001b[0m, in \u001b[0;36mErrorContext.handle_exception\u001b[0;34m(self, cls, exception)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecorate_exception(\u001b[38;5;28mcls\u001b[39m, exception)\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecorate_exception(\u001b[38;5;28mcls\u001b[39m, exception)\n", - "\u001b[0;31mAssertionError\u001b[0m: CuPyKernel not found: ('awkward_ListOffsetArray_reduce_nonlocal_preparenext_64', , , , , , , )\n\nSee if this has been reported at https://github.com/scikit-hep/awkward/issues" - ] - } - ], - "source": [ - "df.a.ak.sum(axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, + "execution_count": 8, "id": "2dd99fe5-0523-46c9-87ec-1392070f5139", "metadata": {}, "outputs": [ @@ -356,7 +319,7 @@ "cupy.ndarray" ] }, - "execution_count": 12, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -368,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 9, "id": "9d8e55cf-8cf1-40a0-8733-24b7719f431d", "metadata": {}, "outputs": [ @@ -376,7 +339,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "7.09 ms ± 118 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "6.91 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -387,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 10, "id": "fae94aea-d9cf-4228-bcab-f843c7cc9c98", "metadata": {}, "outputs": [ @@ -421,7 +384,7 @@ "" ] }, - "execution_count": 14, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 11, "id": "1b83da2c-5e15-42f6-b594-f2ebaece5ac8", "metadata": {}, "outputs": [ @@ -451,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 12, "id": "558ca2c3-d6c7-4404-bcab-557b9b03f795", "metadata": {}, "outputs": [ @@ -473,10 +436,166 @@ "print((df.a.ak + 1).head())" ] }, + { + "cell_type": "markdown", + "id": "bb51c8c3-42cf-4999-b688-67703f7311d2", + "metadata": {}, + "source": [ + "#### numba" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "d240ea54-87b4-4b99-b67f-b2f885a4bf5e", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "ename": "TypingError", + "evalue": "Failed in cuda mode pipeline (step: nopython frontend)\nNo implementation of function Function() found for signature:\n \n >>> iadd(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))\n \nThere are 22 candidate implementations:\n - Of which 20 did not match due to:\n Overload of function 'iadd': File: : Line N/A.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match.\n - Of which 2 did not match due to:\n Operator Overload in function 'iadd': File: unknown: Line unknown.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match for registered cases:\n * (int64, int64) -> int64\n * (int64, uint64) -> int64\n * (uint64, int64) -> int64\n * (uint64, uint64) -> uint64\n * (float32, float32) -> float32\n * (float64, float64) -> float64\n * (complex64, complex64) -> complex64\n * (complex128, complex128) -> complex128\n\nDuring: typing of intrinsic-call at /tmp/ipykernel_7665/2322563490.py (10)\n\nFile \"../../../../tmp/ipykernel_7665/2322563490.py\", line 10:\n\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypingError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 15\u001b[0m\n\u001b[1;32m 13\u001b[0m blocksize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m256\u001b[39m\n\u001b[1;32m 14\u001b[0m numblocks \u001b[38;5;241m=\u001b[39m (\u001b[38;5;28mlen\u001b[39m(df\u001b[38;5;241m.\u001b[39ma) \u001b[38;5;241m+\u001b[39m blocksize \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m blocksize\n\u001b[0;32m---> 15\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mak\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43minner_sum\u001b[49m\u001b[43m[\u001b[49m\u001b[43mnumblocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocksize\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m out\n", + "File \u001b[0;32m/floppy/code/awkward-pandas/src/awkward_pandas/mixin.py:158\u001b[0m, in \u001b[0;36mAccessor.apply\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\u001b[38;5;28mself\u001b[39m, fn: Callable, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 157\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Perform arbitrary function on all the values of the series\"\"\"\u001b[39;00m\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_output(\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:539\u001b[0m, in \u001b[0;36m_LaunchConfiguration.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs):\n\u001b[0;32m--> 539\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgriddim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblockdim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 540\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msharedmem\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:681\u001b[0m, in \u001b[0;36mCUDADispatcher.call\u001b[0;34m(self, args, griddim, blockdim, stream, sharedmem)\u001b[0m\n\u001b[1;32m 679\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moverloads\u001b[38;5;241m.\u001b[39mvalues()))\n\u001b[1;32m 680\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 681\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[43m_dispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_cuda_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 683\u001b[0m kernel\u001b[38;5;241m.\u001b[39mlaunch(args, griddim, blockdim, stream, sharedmem)\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:689\u001b[0m, in \u001b[0;36mCUDADispatcher._compile_for_args\u001b[0;34m(self, *args, **kws)\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kws\n\u001b[1;32m 688\u001b[0m argtypes \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtypeof_pyval(a) \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[0;32m--> 689\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mtuple\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43margtypes\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:932\u001b[0m, in \u001b[0;36mCUDADispatcher.compile\u001b[0;34m(self, sig)\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_can_compile:\n\u001b[1;32m 930\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilation disabled\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 932\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[43m_Kernel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpy_func\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtargetoptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 933\u001b[0m \u001b[38;5;66;03m# We call bind to force codegen, so that there is a cubin to cache\u001b[39;00m\n\u001b[1;32m 934\u001b[0m kernel\u001b[38;5;241m.\u001b[39mbind()\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:83\u001b[0m, in \u001b[0;36m_Kernel.__init__\u001b[0;34m(self, py_func, argtypes, link, debug, lineinfo, inline, fastmath, extensions, max_registers, opt, device)\u001b[0m\n\u001b[1;32m 77\u001b[0m nvvm_options \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 78\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfastmath\u001b[39m\u001b[38;5;124m'\u001b[39m: fastmath,\n\u001b[1;32m 79\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mopt\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m3\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m opt \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 80\u001b[0m }\n\u001b[1;32m 82\u001b[0m cc \u001b[38;5;241m=\u001b[39m get_current_device()\u001b[38;5;241m.\u001b[39mcompute_capability\n\u001b[0;32m---> 83\u001b[0m cres \u001b[38;5;241m=\u001b[39m \u001b[43mcompile_cuda\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpy_func\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvoid\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margtypes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 84\u001b[0m \u001b[43m \u001b[49m\u001b[43mdebug\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdebug\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[43m \u001b[49m\u001b[43mlineinfo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlineinfo\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[43m \u001b[49m\u001b[43minline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 87\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastmath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastmath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 88\u001b[0m \u001b[43m \u001b[49m\u001b[43mnvvm_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnvvm_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 89\u001b[0m \u001b[43m \u001b[49m\u001b[43mcc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 90\u001b[0m tgt_ctx \u001b[38;5;241m=\u001b[39m cres\u001b[38;5;241m.\u001b[39mtarget_context\n\u001b[1;32m 91\u001b[0m code \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpy_func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__code__\u001b[39m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/compiler.py:196\u001b[0m, in \u001b[0;36mcompile_cuda\u001b[0;34m(pyfunc, return_type, args, debug, lineinfo, inline, fastmath, nvvm_options, cc)\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mnumba\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtarget_extension\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m target_override\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m target_override(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[0;32m--> 196\u001b[0m cres \u001b[38;5;241m=\u001b[39m \u001b[43mcompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile_extra\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtypingctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtypingctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[43mtargetctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtargetctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpyfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mflags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlocals\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mpipeline_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCUDACompiler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 205\u001b[0m library \u001b[38;5;241m=\u001b[39m cres\u001b[38;5;241m.\u001b[39mlibrary\n\u001b[1;32m 206\u001b[0m library\u001b[38;5;241m.\u001b[39mfinalize()\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:751\u001b[0m, in \u001b[0;36mcompile_extra\u001b[0;34m(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Compiler entry point\u001b[39;00m\n\u001b[1;32m 728\u001b[0m \n\u001b[1;32m 729\u001b[0m \u001b[38;5;124;03mParameter\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 747\u001b[0m \u001b[38;5;124;03m compiler pipeline\u001b[39;00m\n\u001b[1;32m 748\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 749\u001b[0m pipeline \u001b[38;5;241m=\u001b[39m pipeline_class(typingctx, targetctx, library,\n\u001b[1;32m 750\u001b[0m args, return_type, flags, \u001b[38;5;28mlocals\u001b[39m)\n\u001b[0;32m--> 751\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile_extra\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:445\u001b[0m, in \u001b[0;36mCompilerBase.compile_extra\u001b[0;34m(self, func)\u001b[0m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mlifted \u001b[38;5;241m=\u001b[39m ()\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mlifted_from \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 445\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compile_bytecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:513\u001b[0m, in \u001b[0;36mCompilerBase._compile_bytecode\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 509\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 510\u001b[0m \u001b[38;5;124;03mPopulate and run pipeline for bytecode input\u001b[39;00m\n\u001b[1;32m 511\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 512\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfunc_ir \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 513\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compile_core\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:492\u001b[0m, in \u001b[0;36mCompilerBase._compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 490\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstatus\u001b[38;5;241m.\u001b[39mfail_reason \u001b[38;5;241m=\u001b[39m e\n\u001b[1;32m 491\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_final_pipeline:\n\u001b[0;32m--> 492\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 493\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CompilerError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll available pipelines exhausted\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:479\u001b[0m, in \u001b[0;36mCompilerBase._compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 477\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 479\u001b[0m \u001b[43mpm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mcr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 481\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:368\u001b[0m, in \u001b[0;36mPassManager.run\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 365\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m mode pipeline (step: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m \\\n\u001b[1;32m 366\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpipeline_name, pass_desc)\n\u001b[1;32m 367\u001b[0m patched_exception \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_patch_error(msg, e)\n\u001b[0;32m--> 368\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m patched_exception\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:356\u001b[0m, in \u001b[0;36mPassManager.run\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 354\u001b[0m pass_inst \u001b[38;5;241m=\u001b[39m _pass_registry\u001b[38;5;241m.\u001b[39mget(pss)\u001b[38;5;241m.\u001b[39mpass_inst\n\u001b[1;32m 355\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pass_inst, CompilerPass):\n\u001b[0;32m--> 356\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_runPass\u001b[49m\u001b[43m(\u001b[49m\u001b[43midx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpass_inst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 357\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLegacy pass in use\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:311\u001b[0m, in \u001b[0;36mPassManager._runPass\u001b[0;34m(self, index, pss, internal_state)\u001b[0m\n\u001b[1;32m 309\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m check(pss\u001b[38;5;241m.\u001b[39mrun_initialization, internal_state)\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m SimpleTimer() \u001b[38;5;28;01mas\u001b[39;00m pass_time:\n\u001b[0;32m--> 311\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[43mcheck\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_pass\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minternal_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m SimpleTimer() \u001b[38;5;28;01mas\u001b[39;00m finalize_time:\n\u001b[1;32m 313\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m check(pss\u001b[38;5;241m.\u001b[39mrun_finalizer, internal_state)\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:273\u001b[0m, in \u001b[0;36mPassManager._runPass..check\u001b[0;34m(func, compiler_state)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcheck\u001b[39m(func, compiler_state):\n\u001b[0;32m--> 273\u001b[0m mangled \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcompiler_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mangled \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m 275\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilerPass implementations should return True/False. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 276\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilerPass with name \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m did not.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typed_passes.py:112\u001b[0m, in \u001b[0;36mBaseTypeInference.run_pass\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;124;03mType inference and legalization\u001b[39;00m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m fallback_context(state, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFunction \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m failed type inference\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;241m%\u001b[39m (state\u001b[38;5;241m.\u001b[39mfunc_id\u001b[38;5;241m.\u001b[39mfunc_name,)):\n\u001b[1;32m 111\u001b[0m \u001b[38;5;66;03m# Type inference\u001b[39;00m\n\u001b[0;32m--> 112\u001b[0m typemap, return_type, calltypes, errs \u001b[38;5;241m=\u001b[39m \u001b[43mtype_inference_stage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtypingctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 114\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtargetctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 115\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc_ir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 116\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 117\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreturn_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocals\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[43mraise_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 120\u001b[0m state\u001b[38;5;241m.\u001b[39mtypemap \u001b[38;5;241m=\u001b[39m typemap\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# save errors in case of partial typing\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typed_passes.py:93\u001b[0m, in \u001b[0;36mtype_inference_stage\u001b[0;34m(typingctx, targetctx, interp, args, return_type, locals, raise_errors)\u001b[0m\n\u001b[1;32m 91\u001b[0m infer\u001b[38;5;241m.\u001b[39mbuild_constraint()\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# return errors in case of partial typing\u001b[39;00m\n\u001b[0;32m---> 93\u001b[0m errs \u001b[38;5;241m=\u001b[39m \u001b[43minfer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpropagate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraise_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mraise_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 94\u001b[0m typemap, restype, calltypes \u001b[38;5;241m=\u001b[39m infer\u001b[38;5;241m.\u001b[39munify(raise_errors\u001b[38;5;241m=\u001b[39mraise_errors)\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _TypingResults(typemap, restype, calltypes, errs)\n", + "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typeinfer.py:1091\u001b[0m, in \u001b[0;36mTypeInferer.propagate\u001b[0;34m(self, raise_errors)\u001b[0m\n\u001b[1;32m 1088\u001b[0m force_lit_args \u001b[38;5;241m=\u001b[39m [e \u001b[38;5;28;01mfor\u001b[39;00m e \u001b[38;5;129;01min\u001b[39;00m errors\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e, ForceLiteralArg)]\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m force_lit_args:\n\u001b[0;32m-> 1091\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m errors[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1092\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m reduce(operator\u001b[38;5;241m.\u001b[39mor_, force_lit_args)\n", + "\u001b[0;31mTypingError\u001b[0m: Failed in cuda mode pipeline (step: nopython frontend)\nNo implementation of function Function() found for signature:\n \n >>> iadd(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))\n \nThere are 22 candidate implementations:\n - Of which 20 did not match due to:\n Overload of function 'iadd': File: : Line N/A.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match.\n - Of which 2 did not match due to:\n Operator Overload in function 'iadd': File: unknown: Line unknown.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match for registered cases:\n * (int64, int64) -> int64\n * (int64, uint64) -> int64\n * (uint64, int64) -> int64\n * (uint64, uint64) -> uint64\n * (float32, float32) -> float32\n * (float64, float64) -> float64\n * (complex64, complex64) -> complex64\n * (complex128, complex128) -> complex128\n\nDuring: typing of intrinsic-call at /tmp/ipykernel_7665/2322563490.py (10)\n\nFile \"../../../../tmp/ipykernel_7665/2322563490.py\", line 10:\n\n" + ] + } + ], + "source": [ + "import numba.cuda\n", + "ak.numba.register_and_check()\n", + "\n", + "@numba.cuda.jit(extensions=[ak.numba.cuda])\n", + "def inner_sum(array, out):\n", + " tid = numba.cuda.grid(1)\n", + " if tid < len(array):\n", + " out[tid] = 0\n", + " for i, x in enumerate(array[tid]):\n", + " out[tid] += x\n", + "\n", + "out = cp.empty(len(df.a), dtype=\"int32\")\n", + "blocksize = 256\n", + "numblocks = (len(df.a) + blocksize - 1) // blocksize\n", + "df.a.ak.apply(inner_sum[numblocks, blocksize], out)\n", + "out\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f9826104-a4e4-4afe-b98f-2d0ef941e88c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[[[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " ...,\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]]]\n",
+       "------------------------------------------\n",
+       "type: 2000000 * var * option[var * ?int64]
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.a.ak.array" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "891dfe23-6534-4395-9dd1-5098b8e34aed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numba.cuda.dispatcher._LaunchConfiguration" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(inner_sum[numblocks, blocksize])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "b5760e04-8bb2-42ee-93be-94b455efe326", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numba.cuda.dispatcher.CUDADispatcher" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(inner_sum)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d", "metadata": {}, "outputs": [], "source": [] diff --git a/src/awkward_pandas/__init__.py b/src/awkward_pandas/__init__.py index 8613a25..e8c20a5 100644 --- a/src/awkward_pandas/__init__.py +++ b/src/awkward_pandas/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations -import awkward_pandas.dask import awkward_pandas.pandas # noqa from awkward_pandas.io import read_json, read_parquet from awkward_pandas.version import version as __version__ # noqa diff --git a/src/awkward_pandas/cudf.py b/src/awkward_pandas/cudf.py index 68b733d..6a4c3fb 100644 --- a/src/awkward_pandas/cudf.py +++ b/src/awkward_pandas/cudf.py @@ -1,96 +1,29 @@ -import functools - -from cudf.core.series import Series -from cudf import DataFrame +from cudf import DataFrame, Series import awkward as ak -from awkward_pandas.mixin import ArithmeticMixin +from awkward_pandas.mixin import Accessor from awkward_pandas.ak_from_cudf import cudf_to_awkward as from_cudf -from typing import Callable, Iterable - - -class AwkwardAccessor(ArithmeticMixin): - - def __init__(self, series: Series): - self.array = from_cudf(series) - - def __array_function__(self, *args, **kwargs): - return self.array.__array_function__(*args, **kwargs) - - def __array_ufunc__(self, *args, **kwargs): - if args[1] == "__call__": - return args[0](self.array, *args[3:], **kwargs) - raise NotImplementedError - - def __dir__(self) -> Iterable[str]: - return [ - _ - for _ in (dir(ak)) - if not _.startswith(("_", "ak_")) and not _[0].isupper() - ] + ["apply", "array"] - - def apply(self, fn: Callable) -> Series: - """Perform function on all the values of the series""" - out = fn(self.array) - return maybe_to_cudf(out) - - def __getitem__(self, item): - # scalars? - out = self.array.__getitem__(item) - return maybe_to_cudf(out) - def __getattr__(self, item): - if item not in dir(self): - raise AttributeError - func = getattr(ak, item, None) - if func: +class CudfAwkwardAccessor(Accessor): - @functools.wraps(func) - def f(*others, **kwargs): - others = [ - other.ak.array - if isinstance(other, (DataFrame, Series)) - else other - for other in others - ] - kwargs = { - k: v.ak.array if isinstance(v, (DataFrame, Series)) else v - for k, v in kwargs.items() - } - - ak_arr = func(self.array, *others, **kwargs) - return maybe_to_cudf(ak_arr) - - else: - raise AttributeError(item) - return f + series_type = Series + dataframe_type = DataFrame @classmethod - def _create_op(cls, op): - def run(self, *args, **kwargs): - return maybe_to_cudf(op(self.array, *args, **kwargs)) - - return run - - _create_arithmetic_method = _create_op - _create_comparison_method = _create_op - _create_logical_method = _create_op - - -def maybe_to_cudf(x): - if isinstance(x, ak.Array): - return ak.to_cudf(x) - return x - - + def _to_output(cls, arr): + if isinstance(arr, ak.Array): + return ak.to_cudf(arr) + return arr -AwkwardAccessor._add_all() + @property + def array(self) -> ak.Array: + return from_cudf(self._obj) @property # type:ignore def ak_property(self): - return AwkwardAccessor(self) + return CudfAwkwardAccessor(self) Series.ak = ak_property # no official register function? diff --git a/src/awkward_pandas/io.py b/src/awkward_pandas/io.py index 305e0da..62bcf7e 100644 --- a/src/awkward_pandas/io.py +++ b/src/awkward_pandas/io.py @@ -21,6 +21,7 @@ def read_parquet( extract: whether to turn top-level records into a dataframe. If False, will return a series. """ + # TODO: dispatch to backends, don't assume pandas as default ds = ak.from_parquet(url, storage_options=storage_options, **kwargs) s = awkward_pandas.pandas.PandasAwkwardAccessor._to_output(ds) if extract: diff --git a/src/awkward_pandas/mixin.py b/src/awkward_pandas/mixin.py index d84434f..918006a 100644 --- a/src/awkward_pandas/mixin.py +++ b/src/awkward_pandas/mixin.py @@ -153,9 +153,9 @@ def _to_output(cls, data): def to_output(self, data): return self._to_output(data) - def apply(self, fn: Callable): + def apply(self, fn: Callable, *args, **kwargs): """Perform arbitrary function on all the values of the series""" - return self.to_output(fn(self.array)) + return self.to_output(fn(self.array, *args, **kwargs)) def __getitem__(self, item): out = self.array.__getitem__(item) From ea2610daec397bef0944ca8f79e60f32a0ae5733 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 1 May 2024 16:06:04 -0400 Subject: [PATCH 05/18] Add numba example to notebook --- example/cudf-ak.ipynb | 144 ++++++++++-------------------------------- 1 file changed, 32 insertions(+), 112 deletions(-) diff --git a/example/cudf-ak.ipynb b/example/cudf-ak.ipynb index e1f329e..95a7aba 100644 --- a/example/cudf-ak.ipynb +++ b/example/cudf-ak.ipynb @@ -103,7 +103,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -339,7 +339,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "6.91 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "6.92 ms ± 93 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -446,44 +446,21 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "d240ea54-87b4-4b99-b67f-b2f885a4bf5e", "metadata": { "scrolled": true }, "outputs": [ { - "ename": "TypingError", - "evalue": "Failed in cuda mode pipeline (step: nopython frontend)\nNo implementation of function Function() found for signature:\n \n >>> iadd(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))\n \nThere are 22 candidate implementations:\n - Of which 20 did not match due to:\n Overload of function 'iadd': File: : Line N/A.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match.\n - Of which 2 did not match due to:\n Operator Overload in function 'iadd': File: unknown: Line unknown.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match for registered cases:\n * (int64, int64) -> int64\n * (int64, uint64) -> int64\n * (uint64, int64) -> int64\n * (uint64, uint64) -> uint64\n * (float32, float32) -> float32\n * (float64, float64) -> float64\n * (complex64, complex64) -> complex64\n * (complex128, complex128) -> complex128\n\nDuring: typing of intrinsic-call at /tmp/ipykernel_7665/2322563490.py (10)\n\nFile \"../../../../tmp/ipykernel_7665/2322563490.py\", line 10:\n\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypingError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 15\u001b[0m\n\u001b[1;32m 13\u001b[0m blocksize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m256\u001b[39m\n\u001b[1;32m 14\u001b[0m numblocks \u001b[38;5;241m=\u001b[39m (\u001b[38;5;28mlen\u001b[39m(df\u001b[38;5;241m.\u001b[39ma) \u001b[38;5;241m+\u001b[39m blocksize \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m blocksize\n\u001b[0;32m---> 15\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mak\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43minner_sum\u001b[49m\u001b[43m[\u001b[49m\u001b[43mnumblocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocksize\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m out\n", - "File \u001b[0;32m/floppy/code/awkward-pandas/src/awkward_pandas/mixin.py:158\u001b[0m, in \u001b[0;36mAccessor.apply\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\u001b[38;5;28mself\u001b[39m, fn: Callable, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 157\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Perform arbitrary function on all the values of the series\"\"\"\u001b[39;00m\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_output(\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:539\u001b[0m, in \u001b[0;36m_LaunchConfiguration.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs):\n\u001b[0;32m--> 539\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgriddim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblockdim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 540\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msharedmem\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:681\u001b[0m, in \u001b[0;36mCUDADispatcher.call\u001b[0;34m(self, args, griddim, blockdim, stream, sharedmem)\u001b[0m\n\u001b[1;32m 679\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moverloads\u001b[38;5;241m.\u001b[39mvalues()))\n\u001b[1;32m 680\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 681\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[43m_dispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_cuda_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 683\u001b[0m kernel\u001b[38;5;241m.\u001b[39mlaunch(args, griddim, blockdim, stream, sharedmem)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:689\u001b[0m, in \u001b[0;36mCUDADispatcher._compile_for_args\u001b[0;34m(self, *args, **kws)\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kws\n\u001b[1;32m 688\u001b[0m argtypes \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtypeof_pyval(a) \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[0;32m--> 689\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mtuple\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43margtypes\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:932\u001b[0m, in \u001b[0;36mCUDADispatcher.compile\u001b[0;34m(self, sig)\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_can_compile:\n\u001b[1;32m 930\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilation disabled\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 932\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[43m_Kernel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpy_func\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtargetoptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 933\u001b[0m \u001b[38;5;66;03m# We call bind to force codegen, so that there is a cubin to cache\u001b[39;00m\n\u001b[1;32m 934\u001b[0m kernel\u001b[38;5;241m.\u001b[39mbind()\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:83\u001b[0m, in \u001b[0;36m_Kernel.__init__\u001b[0;34m(self, py_func, argtypes, link, debug, lineinfo, inline, fastmath, extensions, max_registers, opt, device)\u001b[0m\n\u001b[1;32m 77\u001b[0m nvvm_options \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 78\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfastmath\u001b[39m\u001b[38;5;124m'\u001b[39m: fastmath,\n\u001b[1;32m 79\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mopt\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m3\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m opt \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 80\u001b[0m }\n\u001b[1;32m 82\u001b[0m cc \u001b[38;5;241m=\u001b[39m get_current_device()\u001b[38;5;241m.\u001b[39mcompute_capability\n\u001b[0;32m---> 83\u001b[0m cres \u001b[38;5;241m=\u001b[39m \u001b[43mcompile_cuda\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpy_func\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvoid\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margtypes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 84\u001b[0m \u001b[43m \u001b[49m\u001b[43mdebug\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdebug\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[43m \u001b[49m\u001b[43mlineinfo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlineinfo\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[43m \u001b[49m\u001b[43minline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 87\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastmath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastmath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 88\u001b[0m \u001b[43m \u001b[49m\u001b[43mnvvm_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnvvm_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 89\u001b[0m \u001b[43m \u001b[49m\u001b[43mcc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 90\u001b[0m tgt_ctx \u001b[38;5;241m=\u001b[39m cres\u001b[38;5;241m.\u001b[39mtarget_context\n\u001b[1;32m 91\u001b[0m code \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpy_func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__code__\u001b[39m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/compiler.py:196\u001b[0m, in \u001b[0;36mcompile_cuda\u001b[0;34m(pyfunc, return_type, args, debug, lineinfo, inline, fastmath, nvvm_options, cc)\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mnumba\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtarget_extension\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m target_override\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m target_override(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[0;32m--> 196\u001b[0m cres \u001b[38;5;241m=\u001b[39m \u001b[43mcompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile_extra\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtypingctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtypingctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[43mtargetctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtargetctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpyfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mflags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlocals\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mpipeline_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCUDACompiler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 205\u001b[0m library \u001b[38;5;241m=\u001b[39m cres\u001b[38;5;241m.\u001b[39mlibrary\n\u001b[1;32m 206\u001b[0m library\u001b[38;5;241m.\u001b[39mfinalize()\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:751\u001b[0m, in \u001b[0;36mcompile_extra\u001b[0;34m(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Compiler entry point\u001b[39;00m\n\u001b[1;32m 728\u001b[0m \n\u001b[1;32m 729\u001b[0m \u001b[38;5;124;03mParameter\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 747\u001b[0m \u001b[38;5;124;03m compiler pipeline\u001b[39;00m\n\u001b[1;32m 748\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 749\u001b[0m pipeline \u001b[38;5;241m=\u001b[39m pipeline_class(typingctx, targetctx, library,\n\u001b[1;32m 750\u001b[0m args, return_type, flags, \u001b[38;5;28mlocals\u001b[39m)\n\u001b[0;32m--> 751\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile_extra\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:445\u001b[0m, in \u001b[0;36mCompilerBase.compile_extra\u001b[0;34m(self, func)\u001b[0m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mlifted \u001b[38;5;241m=\u001b[39m ()\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mlifted_from \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 445\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compile_bytecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:513\u001b[0m, in \u001b[0;36mCompilerBase._compile_bytecode\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 509\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 510\u001b[0m \u001b[38;5;124;03mPopulate and run pipeline for bytecode input\u001b[39;00m\n\u001b[1;32m 511\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 512\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfunc_ir \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 513\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compile_core\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:492\u001b[0m, in \u001b[0;36mCompilerBase._compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 490\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstatus\u001b[38;5;241m.\u001b[39mfail_reason \u001b[38;5;241m=\u001b[39m e\n\u001b[1;32m 491\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_final_pipeline:\n\u001b[0;32m--> 492\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 493\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CompilerError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll available pipelines exhausted\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:479\u001b[0m, in \u001b[0;36mCompilerBase._compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 477\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 479\u001b[0m \u001b[43mpm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mcr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 481\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:368\u001b[0m, in \u001b[0;36mPassManager.run\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 365\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m mode pipeline (step: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m \\\n\u001b[1;32m 366\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpipeline_name, pass_desc)\n\u001b[1;32m 367\u001b[0m patched_exception \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_patch_error(msg, e)\n\u001b[0;32m--> 368\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m patched_exception\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:356\u001b[0m, in \u001b[0;36mPassManager.run\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 354\u001b[0m pass_inst \u001b[38;5;241m=\u001b[39m _pass_registry\u001b[38;5;241m.\u001b[39mget(pss)\u001b[38;5;241m.\u001b[39mpass_inst\n\u001b[1;32m 355\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pass_inst, CompilerPass):\n\u001b[0;32m--> 356\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_runPass\u001b[49m\u001b[43m(\u001b[49m\u001b[43midx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpass_inst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 357\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLegacy pass in use\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:311\u001b[0m, in \u001b[0;36mPassManager._runPass\u001b[0;34m(self, index, pss, internal_state)\u001b[0m\n\u001b[1;32m 309\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m check(pss\u001b[38;5;241m.\u001b[39mrun_initialization, internal_state)\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m SimpleTimer() \u001b[38;5;28;01mas\u001b[39;00m pass_time:\n\u001b[0;32m--> 311\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[43mcheck\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_pass\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minternal_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m SimpleTimer() \u001b[38;5;28;01mas\u001b[39;00m finalize_time:\n\u001b[1;32m 313\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m check(pss\u001b[38;5;241m.\u001b[39mrun_finalizer, internal_state)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:273\u001b[0m, in \u001b[0;36mPassManager._runPass..check\u001b[0;34m(func, compiler_state)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcheck\u001b[39m(func, compiler_state):\n\u001b[0;32m--> 273\u001b[0m mangled \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcompiler_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mangled \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m 275\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilerPass implementations should return True/False. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 276\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilerPass with name \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m did not.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typed_passes.py:112\u001b[0m, in \u001b[0;36mBaseTypeInference.run_pass\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;124;03mType inference and legalization\u001b[39;00m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m fallback_context(state, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFunction \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m failed type inference\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;241m%\u001b[39m (state\u001b[38;5;241m.\u001b[39mfunc_id\u001b[38;5;241m.\u001b[39mfunc_name,)):\n\u001b[1;32m 111\u001b[0m \u001b[38;5;66;03m# Type inference\u001b[39;00m\n\u001b[0;32m--> 112\u001b[0m typemap, return_type, calltypes, errs \u001b[38;5;241m=\u001b[39m \u001b[43mtype_inference_stage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtypingctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 114\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtargetctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 115\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc_ir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 116\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 117\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreturn_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocals\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[43mraise_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 120\u001b[0m state\u001b[38;5;241m.\u001b[39mtypemap \u001b[38;5;241m=\u001b[39m typemap\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# save errors in case of partial typing\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typed_passes.py:93\u001b[0m, in \u001b[0;36mtype_inference_stage\u001b[0;34m(typingctx, targetctx, interp, args, return_type, locals, raise_errors)\u001b[0m\n\u001b[1;32m 91\u001b[0m infer\u001b[38;5;241m.\u001b[39mbuild_constraint()\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# return errors in case of partial typing\u001b[39;00m\n\u001b[0;32m---> 93\u001b[0m errs \u001b[38;5;241m=\u001b[39m \u001b[43minfer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpropagate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraise_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mraise_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 94\u001b[0m typemap, restype, calltypes \u001b[38;5;241m=\u001b[39m infer\u001b[38;5;241m.\u001b[39munify(raise_errors\u001b[38;5;241m=\u001b[39mraise_errors)\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _TypingResults(typemap, restype, calltypes, errs)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typeinfer.py:1091\u001b[0m, in \u001b[0;36mTypeInferer.propagate\u001b[0;34m(self, raise_errors)\u001b[0m\n\u001b[1;32m 1088\u001b[0m force_lit_args \u001b[38;5;241m=\u001b[39m [e \u001b[38;5;28;01mfor\u001b[39;00m e \u001b[38;5;129;01min\u001b[39;00m errors\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e, ForceLiteralArg)]\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m force_lit_args:\n\u001b[0;32m-> 1091\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m errors[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1092\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m reduce(operator\u001b[38;5;241m.\u001b[39mor_, force_lit_args)\n", - "\u001b[0;31mTypingError\u001b[0m: Failed in cuda mode pipeline (step: nopython frontend)\nNo implementation of function Function() found for signature:\n \n >>> iadd(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))\n \nThere are 22 candidate implementations:\n - Of which 20 did not match due to:\n Overload of function 'iadd': File: : Line N/A.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match.\n - Of which 2 did not match due to:\n Operator Overload in function 'iadd': File: unknown: Line unknown.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match for registered cases:\n * (int64, int64) -> int64\n * (int64, uint64) -> int64\n * (uint64, int64) -> int64\n * (uint64, uint64) -> uint64\n * (float32, float32) -> float32\n * (float64, float64) -> float64\n * (complex64, complex64) -> complex64\n * (complex128, complex128) -> complex128\n\nDuring: typing of intrinsic-call at /tmp/ipykernel_7665/2322563490.py (10)\n\nFile \"../../../../tmp/ipykernel_7665/2322563490.py\", line 10:\n\n" - ] + "data": { + "text/plain": [ + "array([15, 15, 15, ..., 13, 13, 13], dtype=int32)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -495,110 +472,53 @@ " tid = numba.cuda.grid(1)\n", " if tid < len(array):\n", " out[tid] = 0\n", - " for i, x in enumerate(array[tid]):\n", - " out[tid] += x\n", + " for x in array[tid]:\n", + " for y in x:\n", + " out[tid] += y\n", "\n", "out = cp.empty(len(df.a), dtype=\"int32\")\n", "blocksize = 256\n", "numblocks = (len(df.a) + blocksize - 1) // blocksize\n", - "df.a.ak.apply(inner_sum[numblocks, blocksize], out)\n", + "\n", + "df.a.ak.apply(lambda x: inner_sum[numblocks, blocksize](ak.drop_none(x, axis=0), out))\n", "out\n" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "f9826104-a4e4-4afe-b98f-2d0ef941e88c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[[[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " ...,\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]]]\n",
-       "------------------------------------------\n",
-       "type: 2000000 * var * option[var * ?int64]
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.a.ak.array" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "891dfe23-6534-4395-9dd1-5098b8e34aed", + "execution_count": 14, + "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "numba.cuda.dispatcher._LaunchConfiguration" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "3.32 ms ± 85.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] } ], "source": [ - "type(inner_sum[numblocks, blocksize])" + "%timeit df.a.ak.apply(lambda x: inner_sum[numblocks, blocksize](ak.drop_none(x, axis=0), out))" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "b5760e04-8bb2-42ee-93be-94b455efe326", + "execution_count": 15, + "id": "bb781ca6-bdbd-4659-9885-8c634f490fca", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "numba.cuda.dispatcher.CUDADispatcher" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "370MiB\n" + ] } ], "source": [ - "type(inner_sum)" + "gpu_mem() " ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From c047fdc77b78c2182cb74b752b532bd13d795e12 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 17 Jun 2024 14:04:25 -0400 Subject: [PATCH 06/18] placeholders --- src/awkward_pandas/cudf.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/awkward_pandas/cudf.py b/src/awkward_pandas/cudf.py index 6a4c3fb..31ba616 100644 --- a/src/awkward_pandas/cudf.py +++ b/src/awkward_pandas/cudf.py @@ -1,12 +1,13 @@ -from cudf import DataFrame, Series +from typing import Callable + import awkward as ak +from cudf import DataFrame, Series -from awkward_pandas.mixin import Accessor from awkward_pandas.ak_from_cudf import cudf_to_awkward as from_cudf +from awkward_pandas.mixin import Accessor class CudfAwkwardAccessor(Accessor): - series_type = Series dataframe_type = DataFrame @@ -20,6 +21,24 @@ def _to_output(cls, arr): def array(self) -> ak.Array: return from_cudf(self._obj) + @property + def str(self): + """Nested string operations""" + # need to find string ops within cudf + raise NotImplementedError + + @property + def dt(self): + """Nested datetime operations""" + # need to find datetime ops within cudf + raise NotImplementedError + + def apply(self, fn: Callable, *args, **kwargs): + if "CPUDispatcher" in str(fn): + # auto wrap original function for GPU + raise NotImplementedError + super().apply(fn, *args, **kwargs) + @property # type:ignore def ak_property(self): From 20dc77f5960e1b00f6481481bb6dc150e23f88d3 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 9 Aug 2024 13:25:41 -0400 Subject: [PATCH 07/18] Start cuDF strings --- src/awkward_pandas/cudf.py | 29 +++++++++++++++++++++++++++-- src/awkward_pandas/io.py | 6 ++++++ src/awkward_pandas/mixin.py | 4 ++++ tests/test_cudf.py | 17 +++++++++++++++++ tests/test_dask.py | 4 ++-- 5 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 tests/test_cudf.py diff --git a/src/awkward_pandas/cudf.py b/src/awkward_pandas/cudf.py index 31ba616..95b2876 100644 --- a/src/awkward_pandas/cudf.py +++ b/src/awkward_pandas/cudf.py @@ -1,10 +1,31 @@ +import functools from typing import Callable import awkward as ak from cudf import DataFrame, Series +from cudf.core.column.string import StringMethods from awkward_pandas.ak_from_cudf import cudf_to_awkward as from_cudf from awkward_pandas.mixin import Accessor +from awkward_pandas.strings import StringAccessor + + +class CudfStringAccessor(StringAccessor): + def decode(self, encoding: str = "utf-8"): + raise NotImplementedError("cudf does not support bytearray type, so we can't automatically identify them") + + def __getattr__(self, attr: str) -> Callable: + attr = StringAccessor.method_name(attr) + fn = getattr(StringMethods(self.accessor._obj), attr) + + @functools.wraps(fn) + def f(*args, **kwargs): + arr = fn(self.accessor._obj, *args, **kwargs) + if isinstance(arr, ak.Array): + return self.accessor.to_output(arr) + return arr + + return f class CudfAwkwardAccessor(Accessor): @@ -17,15 +38,19 @@ def _to_output(cls, arr): return ak.to_cudf(arr) return arr + @classmethod + def to_array(cls, data) -> ak.Array: + return from_cudf(data) + @property def array(self) -> ak.Array: - return from_cudf(self._obj) + return self.to_array(self._obj) @property def str(self): """Nested string operations""" # need to find string ops within cudf - raise NotImplementedError + return CudfStringAccessor(self) @property def dt(self): diff --git a/src/awkward_pandas/io.py b/src/awkward_pandas/io.py index 6d3222b..a6111a0 100644 --- a/src/awkward_pandas/io.py +++ b/src/awkward_pandas/io.py @@ -53,6 +53,7 @@ def read_json( extract: whether to turn top-level records into a dataframe. If False, will return a series. """ + # TODO: take JSONschema input explicitly with fsspec.open(url, **storage_options) as f: ds = ak.from_json( f, @@ -63,3 +64,8 @@ def read_json( if extract: return s.ak.unmerge() return s + + +# TODO: read_avro + +# TODO: to_parquet, to_json diff --git a/src/awkward_pandas/mixin.py b/src/awkward_pandas/mixin.py index 821b6ec..7ccc431 100644 --- a/src/awkward_pandas/mixin.py +++ b/src/awkward_pandas/mixin.py @@ -134,10 +134,14 @@ class Accessor(ArithmeticMixin): aggregations = True # False means data is partitioned series_type = () dataframe_type = () + behavior = None def __init__(self, obj): self._obj = obj + def __call__(self, *args, behavior=None, **kwargs): + self.behavior = behavior + @classmethod def is_series(cls, data): return isinstance(data, cls.series_type) diff --git a/tests/test_cudf.py b/tests/test_cudf.py new file mode 100644 index 0000000..185ceea --- /dev/null +++ b/tests/test_cudf.py @@ -0,0 +1,17 @@ +import pytest + +import awkward as ak + +pytest.importorskip("awkward_pandas.cudf") + + +def test_operator_overload(): + import cudf + s = [[1, 2, 3], [], [4, 5]] + series = cudf.Series(s) + assert ak.backend(series.ak.array) == "cuda" + s2 = series.ak + 1 + assert ak.backend(s2.ak.array) == "cuda" + assert isinstance(s2, cudf.Series) + assert s2.ak.to_list() == [[2, 3, 4], [], [5, 6]] + diff --git a/tests/test_dask.py b/tests/test_dask.py index c4d8571..0778725 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -2,10 +2,10 @@ import pyarrow as pa import pytest -import awkward_pandas.dask # noqa - dd = pytest.importorskip("dask.dataframe") +import awkward_pandas.dask # noqa + def test_simple_map(): data = pd.arrays.ArrowExtensionArray(pa.array([[0], [0, 1]] * 2)) From 34f9f349bdf7d10662a655d8a93d2a7cfafa042b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 9 Aug 2024 14:23:53 -0400 Subject: [PATCH 08/18] Update tests/test_dask.py --- tests/test_dask.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_dask.py b/tests/test_dask.py index 18d4630..006dd5e 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -4,11 +4,7 @@ dd = pytest.importorskip("dask.dataframe") -<<<<<<< HEAD -import awkward_pandas.dask # noqa -======= import akimbo.dask # noqa ->>>>>>> main def test_simple_map(): From 45a4bbe9f0254947b2409c0d3336e8a20c8f5a1e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 9 Aug 2024 14:24:17 -0400 Subject: [PATCH 09/18] Update tests/test_cudf.py --- tests/test_cudf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cudf.py b/tests/test_cudf.py index 185ceea..d2fdad4 100644 --- a/tests/test_cudf.py +++ b/tests/test_cudf.py @@ -2,7 +2,7 @@ import awkward as ak -pytest.importorskip("awkward_pandas.cudf") +pytest.importorskip("akimbo.cudf") def test_operator_overload(): From 0f8b2f8baa037e728621269cccede4edf3ddb680 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 9 Aug 2024 14:34:24 -0400 Subject: [PATCH 10/18] fix --- example/cudf-ak.ipynb | 6 +++--- src/akimbo/cudf.py | 6 +++--- tests/test_cudf.py | 2 +- tests/test_dask.py | 4 ---- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/example/cudf-ak.ipynb b/example/cudf-ak.ipynb index 95a7aba..e6f97d0 100644 --- a/example/cudf-ak.ipynb +++ b/example/cudf-ak.ipynb @@ -22,13 +22,13 @@ "import cupy as cp\n", "import cudf\n", "import numpy as np\n", - "import awkward_pandas.cudf\n", + "import akimbo.cudf\n", "import subprocess\n", "\n", "def gpu_mem():\n", " print(subprocess.check_output(\"nvidia-smi | grep py\", shell=True).split()[-2].decode())\n", "\n", - "ak.__version__, awkward_pandas.__version__" + "ak.__version__, akimbo.__version__" ] }, { @@ -103,7 +103,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, diff --git a/src/akimbo/cudf.py b/src/akimbo/cudf.py index 95b2876..d4ccef1 100644 --- a/src/akimbo/cudf.py +++ b/src/akimbo/cudf.py @@ -5,9 +5,9 @@ from cudf import DataFrame, Series from cudf.core.column.string import StringMethods -from awkward_pandas.ak_from_cudf import cudf_to_awkward as from_cudf -from awkward_pandas.mixin import Accessor -from awkward_pandas.strings import StringAccessor +from akimbo.ak_from_cudf import cudf_to_awkward as from_cudf +from akimbo.mixin import Accessor +from akimbo.strings import StringAccessor class CudfStringAccessor(StringAccessor): diff --git a/tests/test_cudf.py b/tests/test_cudf.py index 185ceea..d2fdad4 100644 --- a/tests/test_cudf.py +++ b/tests/test_cudf.py @@ -2,7 +2,7 @@ import awkward as ak -pytest.importorskip("awkward_pandas.cudf") +pytest.importorskip("akimbo.cudf") def test_operator_overload(): diff --git a/tests/test_dask.py b/tests/test_dask.py index 18d4630..006dd5e 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -4,11 +4,7 @@ dd = pytest.importorskip("dask.dataframe") -<<<<<<< HEAD -import awkward_pandas.dask # noqa -======= import akimbo.dask # noqa ->>>>>>> main def test_simple_map(): From 9b9e248e9121e00ee6f8f23bdb33385ccef02828 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 14 Aug 2024 21:39:21 -0400 Subject: [PATCH 11/18] str --- example/cuda_env.yaml | 22 ++++++++++ example/cudf-ak.ipynb | 99 ++++++++++++++++++++++++++++--------------- src/akimbo/cudf.py | 30 ++++++++----- tests/test_cudf.py | 13 +++++- 4 files changed, 117 insertions(+), 47 deletions(-) create mode 100644 example/cuda_env.yaml diff --git a/example/cuda_env.yaml b/example/cuda_env.yaml new file mode 100644 index 0000000..c6b12b2 --- /dev/null +++ b/example/cuda_env.yaml @@ -0,0 +1,22 @@ +name: cuda +channels: + - conda-forge +dependencies: + - python=3.10 + - cuda-cudart + - cuda-version=12.2 + - pycuda + - cupy + - numba + - awkward + - rapidsai::cudf + - ipython + - numba + - pyarrow + - pandas + - polars + - pytest + - distributed + - dask-awkward + - pytest + - rox diff --git a/example/cudf-ak.ipynb b/example/cudf-ak.ipynb index e6f97d0..bf4662e 100644 --- a/example/cudf-ak.ipynb +++ b/example/cudf-ak.ipynb @@ -9,7 +9,7 @@ { "data": { "text/plain": [ - "('2.6.3', '2023.8.1.dev25+g5dd12c5.d20240417')" + "('2.6.7', '2024.8.1.dev17+gff760f4.d20240812')" ] }, "execution_count": 1, @@ -41,7 +41,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "188MiB\n" + "170MiB\n" ] } ], @@ -102,8 +102,32 @@ "outputs": [ { "data": { + "text/html": [ + "
[[[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " [[1, 2, 3], [], [4, 5]],\n",
+       " ...,\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]],\n",
+       " [[6, 7]]]\n",
+       "------------------------------------------\n",
+       "type: 2000000 * var * option[var * ?int64]
" + ], "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -127,7 +151,8 @@ { "data": { "text/plain": [ - "['all',\n", + "['Mask',\n", + " 'all',\n", " 'almost_equal',\n", " 'angle',\n", " 'annotations',\n", @@ -139,6 +164,7 @@ " 'argmin',\n", " 'argsort',\n", " 'array',\n", + " 'attrs',\n", " 'awkward',\n", " 'backend',\n", " 'behavior',\n", @@ -156,6 +182,7 @@ " 'count',\n", " 'count_nonzero',\n", " 'covar',\n", + " 'cpp_type',\n", " 'cppyy',\n", " 'drop_none',\n", " 'dt',\n", @@ -193,6 +220,7 @@ " 'is_valid',\n", " 'isclose',\n", " 'jax',\n", + " 'layout',\n", " 'linear_fit',\n", " 'local_index',\n", " 'mask',\n", @@ -216,12 +244,16 @@ " 'nanstd',\n", " 'nansum',\n", " 'nanvar',\n", + " 'nbytes',\n", + " 'ndim',\n", " 'num',\n", " 'numba',\n", + " 'numba_type',\n", " 'ones_like',\n", " 'operations',\n", " 'pad_none',\n", " 'parameters',\n", + " 'prettyprint',\n", " 'prod',\n", " 'ptp',\n", " 'ravel',\n", @@ -229,12 +261,12 @@ " 'record',\n", " 'round',\n", " 'run_lengths',\n", + " 'show',\n", " 'singletons',\n", " 'softmax',\n", " 'sort',\n", " 'std',\n", " 'str',\n", - " 'str',\n", " 'strings_astype',\n", " 'sum',\n", " 'to_arrow',\n", @@ -256,9 +288,11 @@ " 'to_parquet_row_groups',\n", " 'to_rdataframe',\n", " 'to_regular',\n", + " 'tolist',\n", " 'transform',\n", " 'type',\n", " 'types',\n", + " 'typestr',\n", " 'typetracer',\n", " 'unflatten',\n", " 'unmerge',\n", @@ -339,7 +373,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "6.92 ms ± 93 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "4.15 ms ± 75.2 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -356,32 +390,19 @@ "outputs": [ { "data": { - "text/html": [ - "
[[[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " [[-1, -2, -3], [], [-4, -5]],\n",
-       " ...,\n",
-       " [[-6, -7]],\n",
-       " [[-6, -7]],\n",
-       " [[-6, -7]],\n",
-       " [[-6, -7]],\n",
-       " [[-6, -7]],\n",
-       " [[-6, -7]],\n",
-       " [[-6, -7]],\n",
-       " [[-6, -7]],\n",
-       " [[-6, -7]]]\n",
-       "------------------------------------------\n",
-       "type: 2000000 * var * option[var * ?int64]
" - ], "text/plain": [ - "" + "0 [[-1, -2, -3], [], [-4, -5]]\n", + "1 [[-1, -2, -3], [], [-4, -5]]\n", + "2 [[-1, -2, -3], [], [-4, -5]]\n", + "3 [[-1, -2, -3], [], [-4, -5]]\n", + "4 [[-1, -2, -3], [], [-4, -5]]\n", + " ... \n", + "1999995 [[-6, -7]]\n", + "1999996 [[-6, -7]]\n", + "1999997 [[-6, -7]]\n", + "1999998 [[-6, -7]]\n", + "1999999 [[-6, -7]]\n", + "Length: 2000000, dtype: list" ] }, "execution_count": 10, @@ -404,7 +425,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "362MiB\n" + "252MiB\n" ] } ], @@ -494,7 +515,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "3.32 ms ± 85.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "4.32 ms ± 119 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -512,18 +533,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "370MiB\n" + "260MiB\n" ] } ], "source": [ "gpu_mem() " ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "764cb543-8e23-4d4c-87bb-27acc8399b2e", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:cuda]", + "display_name": "Python [conda env:cuda] *", "language": "python", "name": "conda-env-cuda-py" }, diff --git a/src/akimbo/cudf.py b/src/akimbo/cudf.py index d4ccef1..1d7d311 100644 --- a/src/akimbo/cudf.py +++ b/src/akimbo/cudf.py @@ -2,30 +2,40 @@ from typing import Callable import awkward as ak +import cudf from cudf import DataFrame, Series from cudf.core.column.string import StringMethods from akimbo.ak_from_cudf import cudf_to_awkward as from_cudf from akimbo.mixin import Accessor from akimbo.strings import StringAccessor +from akimbo.apply_tree import dec + + +def match_string(arr): + return arr.parameters.get("__array__", "") == "string" class CudfStringAccessor(StringAccessor): def decode(self, encoding: str = "utf-8"): raise NotImplementedError("cudf does not support bytearray type, so we can't automatically identify them") - def __getattr__(self, attr: str) -> Callable: - attr = StringAccessor.method_name(attr) - fn = getattr(StringMethods(self.accessor._obj), attr) + def encode(self, encoding: str = "utf-8"): + raise NotImplementedError("cudf does not support bytearray type") + + +for meth in dir(StringMethods): + if meth.startswith("_"): + continue + + def f(lay, *args, method=meth, **kwargs): + if not match_string(lay): + return - @functools.wraps(fn) - def f(*args, **kwargs): - arr = fn(self.accessor._obj, *args, **kwargs) - if isinstance(arr, ak.Array): - return self.accessor.to_output(arr) - return arr + col = getattr(StringMethods(cudf.Series(lay._to_cudf(cudf, None, len(lay)))), method)(*args, **kwargs) + return from_cudf(col).layout - return f + setattr(CudfStringAccessor, meth, dec(func=f, match=match_string, inmode="ak")) class CudfAwkwardAccessor(Accessor): diff --git a/tests/test_cudf.py b/tests/test_cudf.py index d2fdad4..3512a9d 100644 --- a/tests/test_cudf.py +++ b/tests/test_cudf.py @@ -1,13 +1,15 @@ import pytest +import pyarrow as pa import awkward as ak pytest.importorskip("akimbo.cudf") +import cudf + def test_operator_overload(): - import cudf - s = [[1, 2, 3], [], [4, 5]] + s = pa.array([[1, 2, 3], [], [4, 5]], type=pa.list_(pa.int32())) series = cudf.Series(s) assert ak.backend(series.ak.array) == "cuda" s2 = series.ak + 1 @@ -15,3 +17,10 @@ def test_operator_overload(): assert isinstance(s2, cudf.Series) assert s2.ak.to_list() == [[2, 3, 4], [], [5, 6]] + +def test_string_methods(): + s = pa.array([{"s": ["hey", "Ho"], "i": [0]}, {"s": ["Gar", "go"], "i": [2]}], + type=pa.struct([("s", pa.list_(pa.string())), ("i", pa.list_(pa.int32()))])) + series = cudf.Series(s) + s2 = series.ak.str.upper() + assert s2.ak.to_list() == [{"s": ["HEY", "HO"], "i": [0]}, {"s": ["GAR", "GO"], "i": [2]}] From 4b375eaa45cc0ec1620ff0b68b620c54f172c2f2 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 15 Aug 2024 13:21:54 -0400 Subject: [PATCH 12/18] Str docs and args for cudf --- src/akimbo/cudf.py | 5 +++++ src/akimbo/strings.py | 2 ++ tests/test_cudf.py | 13 +++++++++++++ 3 files changed, 20 insertions(+) diff --git a/src/akimbo/cudf.py b/src/akimbo/cudf.py index 1d7d311..633ae4d 100644 --- a/src/akimbo/cudf.py +++ b/src/akimbo/cudf.py @@ -17,6 +17,8 @@ def match_string(arr): class CudfStringAccessor(StringAccessor): + """String operations on nested/var-length data""" + def decode(self, encoding: str = "utf-8"): raise NotImplementedError("cudf does not support bytearray type, so we can't automatically identify them") @@ -28,10 +30,13 @@ def encode(self, encoding: str = "utf-8"): if meth.startswith("_"): continue + @functools.wraps(getattr(StringMethods, meth)) def f(lay, *args, method=meth, **kwargs): if not match_string(lay): return + # unnecessary round-tripping, and repeating logic from `dec`? + args = args or kwargs.pop("args", ()) col = getattr(StringMethods(cudf.Series(lay._to_cudf(cudf, None, len(lay)))), method)(*args, **kwargs) return from_cudf(col).layout diff --git a/src/akimbo/strings.py b/src/akimbo/strings.py index 70f49a7..a763b4d 100644 --- a/src/akimbo/strings.py +++ b/src/akimbo/strings.py @@ -52,6 +52,8 @@ def _decode(layout): class StringAccessor: + """String operations on nested/var-length data""" + def __init__(self, accessor): self.accessor = accessor diff --git a/tests/test_cudf.py b/tests/test_cudf.py index 3512a9d..6f65d28 100644 --- a/tests/test_cudf.py +++ b/tests/test_cudf.py @@ -24,3 +24,16 @@ def test_string_methods(): series = cudf.Series(s) s2 = series.ak.str.upper() assert s2.ak.to_list() == [{"s": ["HEY", "HO"], "i": [0]}, {"s": ["GAR", "GO"], "i": [2]}] + + assert series.ak.str.upper.__doc__ + # kwargs + s2 = series.ak.str.replace(pat="h", repl="B") + assert s2.ak.to_list() == [{"s": ["Bey", "Ho"], "i": [0]}, {"s": ["Gar", "go"], "i": [2]}] + + # positional args + s2 = series.ak.str.replace("h", "B") + assert s2.ak.to_list() == [{"s": ["Bey", "Ho"], "i": [0]}, {"s": ["Gar", "go"], "i": [2]}] + + # non-str output + s2 = series.ak.str.len() + assert s2.ak.to_list() == [{"s": [3, 2], "i": [0]}, {"s": [3, 2], "i": [2]}] From 9b9f27fd524c67ad803a491cdf9f354a107938bd Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 16 Aug 2024 14:33:21 -0400 Subject: [PATCH 13/18] Add dt methods for cudf --- src/akimbo/cudf.py | 61 ++++++++++++++++++++++++++++++++++++++-------- src/akimbo/io.py | 6 ++++- tests/test_cudf.py | 28 +++++++++++++++++++++ 3 files changed, 84 insertions(+), 11 deletions(-) diff --git a/src/akimbo/cudf.py b/src/akimbo/cudf.py index 633ae4d..6c2bb8a 100644 --- a/src/akimbo/cudf.py +++ b/src/akimbo/cudf.py @@ -3,13 +3,15 @@ import awkward as ak import cudf -from cudf import DataFrame, Series +from cudf import DataFrame, Series, _lib as libcudf from cudf.core.column.string import StringMethods +from cudf.core.column.datetime import DatetimeColumn from akimbo.ak_from_cudf import cudf_to_awkward as from_cudf from akimbo.mixin import Accessor +from akimbo.datetimes import DatetimeAccessor, match as match_t from akimbo.strings import StringAccessor -from akimbo.apply_tree import dec +from akimbo.apply_tree import dec, leaf def match_string(arr): @@ -26,23 +28,58 @@ def encode(self, encoding: str = "utf-8"): raise NotImplementedError("cudf does not support bytearray type") +def dec_cu(op, match=match_string): + + @functools.wraps(op) + def f(lay, **kwargs): + # op(column, ...)->column + col = op(lay._to_cudf(cudf, None, len(lay)), **kwargs) + return from_cudf(cudf.Series(col)).layout + + return dec(func=f, match=match, inmode="ak") + + for meth in dir(StringMethods): if meth.startswith("_"): continue @functools.wraps(getattr(StringMethods, meth)) - def f(lay, *args, method=meth, **kwargs): - if not match_string(lay): - return - - # unnecessary round-tripping, and repeating logic from `dec`? - args = args or kwargs.pop("args", ()) - col = getattr(StringMethods(cudf.Series(lay._to_cudf(cudf, None, len(lay)))), method)(*args, **kwargs) + def f(lay, method=meth, **kwargs): + # this is different from dec_cu, because we need to instantiate StringMethods + # before getting the method from it + col = getattr(StringMethods(cudf.Series(lay._to_cudf(cudf, None, len(lay)))), method)(**kwargs) return from_cudf(col).layout setattr(CudfStringAccessor, meth, dec(func=f, match=match_string, inmode="ak")) +class CudfDatetimeAccessor(DatetimeAccessor): + + ... + + +for meth in dir(DatetimeColumn): + if meth.startswith("_"): + continue + + @functools.wraps(getattr(DatetimeColumn, meth)) + def f(lay, method=meth, **kwargs): + # this is different from dec_cu, because we need to instantiate StringMethods + # before getting the method from it + m = getattr(lay._to_cudf(cudf, None, len(lay)), method) + if callable(m): + col = m(**kwargs) + else: + # attributes giving components + col = m + return from_cudf(cudf.Series(col)).layout + + if isinstance(getattr(DatetimeColumn, meth), property): + setattr(CudfDatetimeAccessor, meth, property(dec(func=f, match=match_t, inmode="ak"))) + else: + setattr(CudfDatetimeAccessor, meth, dec(func=f, match=match_t, inmode="ak")) + + class CudfAwkwardAccessor(Accessor): series_type = Series dataframe_type = DataFrame @@ -51,6 +88,8 @@ class CudfAwkwardAccessor(Accessor): def _to_output(cls, arr): if isinstance(arr, ak.Array): return ak.to_cudf(arr) + elif isinstance(arr, ak.contents.Content): + return arr._to_cudf(cudf, None, len(arr)) return arr @classmethod @@ -67,11 +106,13 @@ def str(self): # need to find string ops within cudf return CudfStringAccessor(self) + cast = dec_cu(libcudf.unary.cast, match=leaf) + @property def dt(self): """Nested datetime operations""" # need to find datetime ops within cudf - raise NotImplementedError + return CudfDatetimeAccessor(self) def apply(self, fn: Callable, *args, **kwargs): if "CPUDispatcher" in str(fn): diff --git a/src/akimbo/io.py b/src/akimbo/io.py index 8fa36a3..a7745ca 100644 --- a/src/akimbo/io.py +++ b/src/akimbo/io.py @@ -18,9 +18,13 @@ def ak_to_series(ds, backend="pandas", extract=True): # TODO: actually don't use this, use dask-awkward, or dask.dataframe s = akimbo.polars.PolarsAwkwardAccessor._to_output(ds) + elif backend == "cudf": + import akimbo.cudf + + s = akimbo.cudf.CudfAwkwardAccessor._to_output(ds) else: raise ValueError("Backend must be in {'pandas', 'polars', 'dask'}") - if extract: + if extract and ds.fields: return s.ak.unmerge() return s diff --git a/tests/test_cudf.py b/tests/test_cudf.py index 6f65d28..b15b5c8 100644 --- a/tests/test_cudf.py +++ b/tests/test_cudf.py @@ -1,3 +1,5 @@ +import datetime + import pytest import pyarrow as pa @@ -5,6 +7,7 @@ pytest.importorskip("akimbo.cudf") +import akimbo.io import cudf @@ -37,3 +40,28 @@ def test_string_methods(): # non-str output s2 = series.ak.str.len() assert s2.ak.to_list() == [{"s": [3, 2], "i": [0]}, {"s": [3, 2], "i": [2]}] + + +def test_cast(): + s = cudf.Series([0, 1, 2]) + # shows that cast to timestamp needs to be two-step in cudf + s2 = s.ak.cast('m8[s]').ak.cast('M8[s]') + out = s2.ak.to_list() + assert out == [ + datetime.datetime(1970, 1, 1, 0, 0), + datetime.datetime(1970, 1, 1, 0, 0, 1), + datetime.datetime(1970, 1, 1, 0, 0, 2) + ] + + +def test_times(): + data = [ + datetime.datetime(1970, 1, 1, 0, 0), + datetime.datetime(1970, 1, 1, 0, 0, 1), + None, + datetime.datetime(1970, 1, 1, 0, 0, 2) + ] + arr = ak.Array([[data], [], [data]]) + s = akimbo.io.ak_to_series(arr, "cudf") + s2 = s.ak.dt.second + assert s2.ak.to_list() == [[[0, 1, None, 2]], [], [[0, 1, None, 2]]] From 6d6059622f24b7a82d250d28719e2bcd46ba879b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 9 Oct 2024 15:24:33 -0400 Subject: [PATCH 14/18] Examples --- example/cudf-ak.ipynb | 202 ++++++++++++++++++++++++++---------------- src/akimbo/cudf.py | 3 +- tests/test_cudf.py | 18 ++++ 3 files changed, 147 insertions(+), 76 deletions(-) diff --git a/example/cudf-ak.ipynb b/example/cudf-ak.ipynb index bf4662e..db5008d 100644 --- a/example/cudf-ak.ipynb +++ b/example/cudf-ak.ipynb @@ -1,5 +1,22 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "58d18a3a-45b1-425a-b822-e8be0a6c0bc0", + "metadata": {}, + "source": [ + "\n", + "```python\n", + "import awkward as ak\n", + "\n", + "def make_data(fn):\n", + " part = [[[1, 2, 3], [], [4, 5]],\n", + " [[6, 7]]] * 1000000\n", + " arr = ak.Array({\"a\": part})\n", + " ak.to_parquet(arr, fn, extensionarray=False)\n", + "```" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -9,7 +26,7 @@ { "data": { "text/plain": [ - "('2.6.7', '2024.8.1.dev17+gff760f4.d20240812')" + "('2.6.9', '2024.8.1.dev29+g9b9f27f.d20240927')" ] }, "execution_count": 1, @@ -35,13 +52,15 @@ "cell_type": "code", "execution_count": 2, "id": "0490043a-564a-4c11-bb0d-a54fb4c6fb10", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "170MiB\n" + "160MiB\n" ] } ], @@ -97,52 +116,6 @@ { "cell_type": "code", "execution_count": 5, - "id": "abfe0ab6-5a89-4885-b654-c84804a4aea4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[[[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " ...,\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]]]\n",
-       "------------------------------------------\n",
-       "type: 2000000 * var * option[var * ?int64]
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# series accessor\n", - "df.a.ak" - ] - }, - { - "cell_type": "code", - "execution_count": 6, "id": "c7b65320-e1fa-44b2-a232-6ffb97ba1d18", "metadata": { "scrolled": true @@ -164,6 +137,7 @@ " 'argmin',\n", " 'argsort',\n", " 'array',\n", + " 'array_equal',\n", " 'attrs',\n", " 'awkward',\n", " 'backend',\n", @@ -208,8 +182,10 @@ " 'from_json',\n", " 'from_numpy',\n", " 'from_parquet',\n", + " 'from_raggedtensor',\n", " 'from_rdataframe',\n", " 'from_regular',\n", + " 'from_torch',\n", " 'full_like',\n", " 'highlevel',\n", " 'imag',\n", @@ -286,8 +262,10 @@ " 'to_parquet',\n", " 'to_parquet_dataset',\n", " 'to_parquet_row_groups',\n", + " 'to_raggedtensor',\n", " 'to_rdataframe',\n", " 'to_regular',\n", + " 'to_torch',\n", " 'tolist',\n", " 'transform',\n", " 'type',\n", @@ -310,7 +288,7 @@ " 'zip']" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -322,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "8ff11e13-8503-4d79-a64c-993028709ca4", "metadata": {}, "outputs": [ @@ -332,7 +310,7 @@ "array(28000000)" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -343,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "2dd99fe5-0523-46c9-87ec-1392070f5139", "metadata": {}, "outputs": [ @@ -353,7 +331,7 @@ "cupy.ndarray" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -365,7 +343,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "9d8e55cf-8cf1-40a0-8733-24b7719f431d", "metadata": {}, "outputs": [ @@ -373,7 +351,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "4.15 ms ± 75.2 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "4.83 ms ± 16 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -384,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "fae94aea-d9cf-4228-bcab-f843c7cc9c98", "metadata": {}, "outputs": [ @@ -392,20 +370,20 @@ "data": { "text/plain": [ "0 [[-1, -2, -3], [], [-4, -5]]\n", - "1 [[-1, -2, -3], [], [-4, -5]]\n", + "1 [[-6, -7]]\n", "2 [[-1, -2, -3], [], [-4, -5]]\n", - "3 [[-1, -2, -3], [], [-4, -5]]\n", + "3 [[-6, -7]]\n", "4 [[-1, -2, -3], [], [-4, -5]]\n", " ... \n", "1999995 [[-6, -7]]\n", - "1999996 [[-6, -7]]\n", + "1999996 [[-1, -2, -3], [], [-4, -5]]\n", "1999997 [[-6, -7]]\n", - "1999998 [[-6, -7]]\n", + "1999998 [[-1, -2, -3], [], [-4, -5]]\n", "1999999 [[-6, -7]]\n", "Length: 2000000, dtype: list" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -417,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "1b83da2c-5e15-42f6-b594-f2ebaece5ac8", "metadata": {}, "outputs": [ @@ -425,7 +403,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "252MiB\n" + "256MiB\n" ] } ], @@ -435,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "558ca2c3-d6c7-4404-bcab-557b9b03f795", "metadata": {}, "outputs": [ @@ -444,9 +422,9 @@ "output_type": "stream", "text": [ "0 [[2, 3, 4], [], [5, 6]]\n", - "1 [[2, 3, 4], [], [5, 6]]\n", + "1 [[7, 8]]\n", "2 [[2, 3, 4], [], [5, 6]]\n", - "3 [[2, 3, 4], [], [5, 6]]\n", + "3 [[7, 8]]\n", "4 [[2, 3, 4], [], [5, 6]]\n", "dtype: list\n" ] @@ -467,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "d240ea54-87b4-4b99-b67f-b2f885a4bf5e", "metadata": { "scrolled": true @@ -476,10 +454,10 @@ { "data": { "text/plain": [ - "array([15, 15, 15, ..., 13, 13, 13], dtype=int32)" + "array([15, 13, 15, ..., 13, 15, 13], dtype=int32)" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -507,7 +485,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d", "metadata": {}, "outputs": [ @@ -515,7 +493,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "4.32 ms ± 119 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "6.17 ms ± 118 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -525,7 +503,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "bb781ca6-bdbd-4659-9885-8c634f490fca", "metadata": {}, "outputs": [ @@ -533,7 +511,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "260MiB\n" + "264MiB\n" ] } ], @@ -541,10 +519,84 @@ "gpu_mem() " ] }, + { + "cell_type": "markdown", + "id": "6d1ffd1a-b53b-4657-bab6-9c9223c28808", + "metadata": {}, + "source": [ + "**slice**" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d039a508-e77c-4e23-a583-ec7997a88bb1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [[1], [], [4]]\n", + "1 [[6]]\n", + "2 [[1], [], [4]]\n", + "3 [[6]]\n", + "4 [[1], [], [4]]\n", + " ... \n", + "1999995 [[6]]\n", + "1999996 [[1], [], [4]]\n", + "1999997 [[6]]\n", + "1999998 [[1], [], [4]]\n", + "1999999 [[6]]\n", + "Length: 2000000, dtype: list" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pick the first number of the innermost lists, if there is one\n", + "df.a.ak[:, :, :1]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f149dfaf-c01e-4d0a-8e01-2d20623d216f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [1, 2, 3]\n", + "1 [6, 7]\n", + "2 [1, 2, 3]\n", + "3 [6, 7]\n", + "4 [1, 2, 3]\n", + " ... \n", + "1999995 [6, 7]\n", + "1999996 [1, 2, 3]\n", + "1999997 [6, 7]\n", + "1999998 [1, 2, 3]\n", + "1999999 [6, 7]\n", + "Length: 2000000, dtype: list" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pick the first inner list of each row\n", + "df.a.ak[:, 0, :]" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "764cb543-8e23-4d4c-87bb-27acc8399b2e", + "id": "5aaf1903-6a6a-456f-89a7-3dedb01520ad", "metadata": {}, "outputs": [], "source": [] diff --git a/src/akimbo/cudf.py b/src/akimbo/cudf.py index 6c2bb8a..abcbeef 100644 --- a/src/akimbo/cudf.py +++ b/src/akimbo/cudf.py @@ -59,7 +59,8 @@ class CudfDatetimeAccessor(DatetimeAccessor): for meth in dir(DatetimeColumn): - if meth.startswith("_"): + if meth.startswith("_") or meth == "strptime": + # strptime belongs in .str, not here! continue @functools.wraps(getattr(DatetimeColumn, meth)) diff --git a/tests/test_cudf.py b/tests/test_cudf.py index b15b5c8..e573df4 100644 --- a/tests/test_cudf.py +++ b/tests/test_cudf.py @@ -21,6 +21,24 @@ def test_operator_overload(): assert s2.ak.to_list() == [[2, 3, 4], [], [5, 6]] +def test_inner_slicing(): + s = pa.array([[1, 2, 3], [0], [4, 5]], type=pa.list_(pa.int32())) + series = cudf.Series(s) + assert ak.backend(series.ak.array) == "cuda" + s2 = series.ak[:, 0] + assert ak.backend(s2.ak.array) == "cuda" + assert isinstance(s2, cudf.Series) + assert s2.ak.to_list() == [1, 0, 4] + s2 = series.ak[:, :2] + assert ak.backend(s2.ak.array) == "cuda" + assert isinstance(s2, cudf.Series) + assert s2.ak.to_list() == [[1, 2], [0], [4, 5]] + s2 = series.ak[:, ::2] + assert ak.backend(s2.ak.array) == "cuda" + assert isinstance(s2, cudf.Series) + assert s2.ak.to_list() == [[1, 3], [0], [4]] + + def test_string_methods(): s = pa.array([{"s": ["hey", "Ho"], "i": [0]}, {"s": ["Gar", "go"], "i": [2]}], type=pa.struct([("s", pa.list_(pa.string())), ("i", pa.list_(pa.int32()))])) From 2cc9bd37ce9bc9213583775d94c713b9e4bc75e5 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 9 Oct 2024 15:34:56 -0400 Subject: [PATCH 15/18] coverage and env --- .coveragerc | 2 ++ .github/workflows/pypi.yml | 2 +- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..fb1036a --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +omit = akimbo/cudf.py \ No newline at end of file diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 0a1296c..717053d 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -34,4 +34,4 @@ jobs: pip list - name: test run: | - python -m pytest -v --cov akimbo + python -m pytest -v --cov --cov-config=.coveragerc akimbo diff --git a/pyproject.toml b/pyproject.toml index 7d9bda7..eba4bf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ docs = [ "pandas", "polars", "dask", "pyarrow", "pandoc", "nbsphinx" ] test = [ - "pandas", "polars", "dask", "pyarrow", "pytest", "pytest-cov", "numba", "dask-awkward" + "pandas", "polars", "dask", "pyarrow", "pytest", "pytest-cov", "numba", "dask-awkward", "distributed" ] [project.urls] From b2bfe93a1fb8589bfa6d362f537d33ec8400d4d2 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 9 Oct 2024 15:56:39 -0400 Subject: [PATCH 16/18] path --- .coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index fb1036a..c29b638 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,2 +1,2 @@ [run] -omit = akimbo/cudf.py \ No newline at end of file +omit = src/akimbo/cudf.py \ No newline at end of file From 8142a1c07672effec755ddbc57cab132e385a883 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 9 Oct 2024 15:59:21 -0400 Subject: [PATCH 17/18] simpler --- .coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index c29b638..244a196 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,2 +1,2 @@ [run] -omit = src/akimbo/cudf.py \ No newline at end of file +omit = **cudf.py \ No newline at end of file From 680906c6572ddcbe3b0e3bd55cc5f42bed737286 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 9 Oct 2024 16:00:12 -0400 Subject: [PATCH 18/18] path again --- .coveragerc | 2 +- .github/workflows/pypi.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index 244a196..c29b638 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,2 +1,2 @@ [run] -omit = **cudf.py \ No newline at end of file +omit = src/akimbo/cudf.py \ No newline at end of file diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 717053d..914f094 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -34,4 +34,4 @@ jobs: pip list - name: test run: | - python -m pytest -v --cov --cov-config=.coveragerc akimbo + python -m pytest -v --cov-config=.coveragerc --cov akimbo