diff --git a/example/cudf-ak.ipynb b/example/cudf-ak.ipynb index e1f329e..95a7aba 100644 --- a/example/cudf-ak.ipynb +++ b/example/cudf-ak.ipynb @@ -103,7 +103,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -339,7 +339,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "6.91 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "6.92 ms ± 93 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -446,44 +446,21 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "d240ea54-87b4-4b99-b67f-b2f885a4bf5e", "metadata": { "scrolled": true }, "outputs": [ { - "ename": "TypingError", - "evalue": "Failed in cuda mode pipeline (step: nopython frontend)\nNo implementation of function Function() found for signature:\n \n >>> iadd(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))\n \nThere are 22 candidate implementations:\n - Of which 20 did not match due to:\n Overload of function 'iadd': File: : Line N/A.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match.\n - Of which 2 did not match due to:\n Operator Overload in function 'iadd': File: unknown: Line unknown.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match for registered cases:\n * (int64, int64) -> int64\n * (int64, uint64) -> int64\n * (uint64, int64) -> int64\n * (uint64, uint64) -> uint64\n * (float32, float32) -> float32\n * (float64, float64) -> float64\n * (complex64, complex64) -> complex64\n * (complex128, complex128) -> complex128\n\nDuring: typing of intrinsic-call at /tmp/ipykernel_7665/2322563490.py (10)\n\nFile \"../../../../tmp/ipykernel_7665/2322563490.py\", line 10:\n\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypingError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 15\u001b[0m\n\u001b[1;32m 13\u001b[0m blocksize \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m256\u001b[39m\n\u001b[1;32m 14\u001b[0m numblocks \u001b[38;5;241m=\u001b[39m (\u001b[38;5;28mlen\u001b[39m(df\u001b[38;5;241m.\u001b[39ma) \u001b[38;5;241m+\u001b[39m blocksize \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m blocksize\n\u001b[0;32m---> 15\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mak\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43minner_sum\u001b[49m\u001b[43m[\u001b[49m\u001b[43mnumblocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocksize\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m out\n", - "File \u001b[0;32m/floppy/code/awkward-pandas/src/awkward_pandas/mixin.py:158\u001b[0m, in \u001b[0;36mAccessor.apply\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\u001b[38;5;28mself\u001b[39m, fn: Callable, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 157\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Perform arbitrary function on all the values of the series\"\"\"\u001b[39;00m\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_output(\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:539\u001b[0m, in \u001b[0;36m_LaunchConfiguration.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs):\n\u001b[0;32m--> 539\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgriddim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblockdim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 540\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msharedmem\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:681\u001b[0m, in \u001b[0;36mCUDADispatcher.call\u001b[0;34m(self, args, griddim, blockdim, stream, sharedmem)\u001b[0m\n\u001b[1;32m 679\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moverloads\u001b[38;5;241m.\u001b[39mvalues()))\n\u001b[1;32m 680\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 681\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[43m_dispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_cuda_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 683\u001b[0m kernel\u001b[38;5;241m.\u001b[39mlaunch(args, griddim, blockdim, stream, sharedmem)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:689\u001b[0m, in \u001b[0;36mCUDADispatcher._compile_for_args\u001b[0;34m(self, *args, **kws)\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kws\n\u001b[1;32m 688\u001b[0m argtypes \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtypeof_pyval(a) \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[0;32m--> 689\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mtuple\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43margtypes\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:932\u001b[0m, in \u001b[0;36mCUDADispatcher.compile\u001b[0;34m(self, sig)\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_can_compile:\n\u001b[1;32m 930\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilation disabled\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 932\u001b[0m kernel \u001b[38;5;241m=\u001b[39m \u001b[43m_Kernel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpy_func\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margtypes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtargetoptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 933\u001b[0m \u001b[38;5;66;03m# We call bind to force codegen, so that there is a cubin to cache\u001b[39;00m\n\u001b[1;32m 934\u001b[0m kernel\u001b[38;5;241m.\u001b[39mbind()\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/dispatcher.py:83\u001b[0m, in \u001b[0;36m_Kernel.__init__\u001b[0;34m(self, py_func, argtypes, link, debug, lineinfo, inline, fastmath, extensions, max_registers, opt, device)\u001b[0m\n\u001b[1;32m 77\u001b[0m nvvm_options \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 78\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfastmath\u001b[39m\u001b[38;5;124m'\u001b[39m: fastmath,\n\u001b[1;32m 79\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mopt\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m3\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m opt \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 80\u001b[0m }\n\u001b[1;32m 82\u001b[0m cc \u001b[38;5;241m=\u001b[39m get_current_device()\u001b[38;5;241m.\u001b[39mcompute_capability\n\u001b[0;32m---> 83\u001b[0m cres \u001b[38;5;241m=\u001b[39m \u001b[43mcompile_cuda\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpy_func\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtypes\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvoid\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margtypes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 84\u001b[0m \u001b[43m \u001b[49m\u001b[43mdebug\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdebug\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[43m \u001b[49m\u001b[43mlineinfo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlineinfo\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[43m \u001b[49m\u001b[43minline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 87\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastmath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastmath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 88\u001b[0m \u001b[43m \u001b[49m\u001b[43mnvvm_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnvvm_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 89\u001b[0m \u001b[43m \u001b[49m\u001b[43mcc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 90\u001b[0m tgt_ctx \u001b[38;5;241m=\u001b[39m cres\u001b[38;5;241m.\u001b[39mtarget_context\n\u001b[1;32m 91\u001b[0m code \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpy_func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__code__\u001b[39m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/cuda/compiler.py:196\u001b[0m, in \u001b[0;36mcompile_cuda\u001b[0;34m(pyfunc, return_type, args, debug, lineinfo, inline, fastmath, nvvm_options, cc)\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mnumba\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtarget_extension\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m target_override\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m target_override(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[0;32m--> 196\u001b[0m cres \u001b[38;5;241m=\u001b[39m \u001b[43mcompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile_extra\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtypingctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtypingctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[43mtargetctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtargetctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpyfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mflags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlocals\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mpipeline_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCUDACompiler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 205\u001b[0m library \u001b[38;5;241m=\u001b[39m cres\u001b[38;5;241m.\u001b[39mlibrary\n\u001b[1;32m 206\u001b[0m library\u001b[38;5;241m.\u001b[39mfinalize()\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:751\u001b[0m, in \u001b[0;36mcompile_extra\u001b[0;34m(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Compiler entry point\u001b[39;00m\n\u001b[1;32m 728\u001b[0m \n\u001b[1;32m 729\u001b[0m \u001b[38;5;124;03mParameter\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 747\u001b[0m \u001b[38;5;124;03m compiler pipeline\u001b[39;00m\n\u001b[1;32m 748\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 749\u001b[0m pipeline \u001b[38;5;241m=\u001b[39m pipeline_class(typingctx, targetctx, library,\n\u001b[1;32m 750\u001b[0m args, return_type, flags, \u001b[38;5;28mlocals\u001b[39m)\n\u001b[0;32m--> 751\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile_extra\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:445\u001b[0m, in \u001b[0;36mCompilerBase.compile_extra\u001b[0;34m(self, func)\u001b[0m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mlifted \u001b[38;5;241m=\u001b[39m ()\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mlifted_from \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 445\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compile_bytecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:513\u001b[0m, in \u001b[0;36mCompilerBase._compile_bytecode\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 509\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 510\u001b[0m \u001b[38;5;124;03mPopulate and run pipeline for bytecode input\u001b[39;00m\n\u001b[1;32m 511\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 512\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfunc_ir \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 513\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compile_core\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:492\u001b[0m, in \u001b[0;36mCompilerBase._compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 490\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstatus\u001b[38;5;241m.\u001b[39mfail_reason \u001b[38;5;241m=\u001b[39m e\n\u001b[1;32m 491\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_final_pipeline:\n\u001b[0;32m--> 492\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 493\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CompilerError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll available pipelines exhausted\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler.py:479\u001b[0m, in \u001b[0;36mCompilerBase._compile_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 477\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 479\u001b[0m \u001b[43mpm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mcr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 481\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:368\u001b[0m, in \u001b[0;36mPassManager.run\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 365\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m mode pipeline (step: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m \\\n\u001b[1;32m 366\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpipeline_name, pass_desc)\n\u001b[1;32m 367\u001b[0m patched_exception \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_patch_error(msg, e)\n\u001b[0;32m--> 368\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m patched_exception\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:356\u001b[0m, in \u001b[0;36mPassManager.run\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 354\u001b[0m pass_inst \u001b[38;5;241m=\u001b[39m _pass_registry\u001b[38;5;241m.\u001b[39mget(pss)\u001b[38;5;241m.\u001b[39mpass_inst\n\u001b[1;32m 355\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pass_inst, CompilerPass):\n\u001b[0;32m--> 356\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_runPass\u001b[49m\u001b[43m(\u001b[49m\u001b[43midx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpass_inst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 357\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLegacy pass in use\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_lock.py:35\u001b[0m, in \u001b[0;36m_CompilerLock.__call__.._acquire_compile_lock\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_acquire_compile_lock\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:311\u001b[0m, in \u001b[0;36mPassManager._runPass\u001b[0;34m(self, index, pss, internal_state)\u001b[0m\n\u001b[1;32m 309\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m check(pss\u001b[38;5;241m.\u001b[39mrun_initialization, internal_state)\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m SimpleTimer() \u001b[38;5;28;01mas\u001b[39;00m pass_time:\n\u001b[0;32m--> 311\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[43mcheck\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_pass\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minternal_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m SimpleTimer() \u001b[38;5;28;01mas\u001b[39;00m finalize_time:\n\u001b[1;32m 313\u001b[0m mutated \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m check(pss\u001b[38;5;241m.\u001b[39mrun_finalizer, internal_state)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/compiler_machinery.py:273\u001b[0m, in \u001b[0;36mPassManager._runPass..check\u001b[0;34m(func, compiler_state)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcheck\u001b[39m(func, compiler_state):\n\u001b[0;32m--> 273\u001b[0m mangled \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcompiler_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mangled \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m 275\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilerPass implementations should return True/False. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 276\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompilerPass with name \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m did not.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typed_passes.py:112\u001b[0m, in \u001b[0;36mBaseTypeInference.run_pass\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;124;03mType inference and legalization\u001b[39;00m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m fallback_context(state, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFunction \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m failed type inference\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;241m%\u001b[39m (state\u001b[38;5;241m.\u001b[39mfunc_id\u001b[38;5;241m.\u001b[39mfunc_name,)):\n\u001b[1;32m 111\u001b[0m \u001b[38;5;66;03m# Type inference\u001b[39;00m\n\u001b[0;32m--> 112\u001b[0m typemap, return_type, calltypes, errs \u001b[38;5;241m=\u001b[39m \u001b[43mtype_inference_stage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtypingctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 114\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtargetctx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 115\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc_ir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 116\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 117\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreturn_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocals\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[43mraise_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 120\u001b[0m state\u001b[38;5;241m.\u001b[39mtypemap \u001b[38;5;241m=\u001b[39m typemap\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# save errors in case of partial typing\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typed_passes.py:93\u001b[0m, in \u001b[0;36mtype_inference_stage\u001b[0;34m(typingctx, targetctx, interp, args, return_type, locals, raise_errors)\u001b[0m\n\u001b[1;32m 91\u001b[0m infer\u001b[38;5;241m.\u001b[39mbuild_constraint()\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# return errors in case of partial typing\u001b[39;00m\n\u001b[0;32m---> 93\u001b[0m errs \u001b[38;5;241m=\u001b[39m \u001b[43minfer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpropagate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraise_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mraise_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 94\u001b[0m typemap, restype, calltypes \u001b[38;5;241m=\u001b[39m infer\u001b[38;5;241m.\u001b[39munify(raise_errors\u001b[38;5;241m=\u001b[39mraise_errors)\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _TypingResults(typemap, restype, calltypes, errs)\n", - "File \u001b[0;32m~/miniconda3/envs/cuda/lib/python3.10/site-packages/numba/core/typeinfer.py:1091\u001b[0m, in \u001b[0;36mTypeInferer.propagate\u001b[0;34m(self, raise_errors)\u001b[0m\n\u001b[1;32m 1088\u001b[0m force_lit_args \u001b[38;5;241m=\u001b[39m [e \u001b[38;5;28;01mfor\u001b[39;00m e \u001b[38;5;129;01min\u001b[39;00m errors\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e, ForceLiteralArg)]\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m force_lit_args:\n\u001b[0;32m-> 1091\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m errors[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1092\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m reduce(operator\u001b[38;5;241m.\u001b[39mor_, force_lit_args)\n", - "\u001b[0;31mTypingError\u001b[0m: Failed in cuda mode pipeline (step: nopython frontend)\nNo implementation of function Function() found for signature:\n \n >>> iadd(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))\n \nThere are 22 candidate implementations:\n - Of which 20 did not match due to:\n Overload of function 'iadd': File: : Line N/A.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match.\n - Of which 2 did not match due to:\n Operator Overload in function 'iadd': File: unknown: Line unknown.\n With argument(s): '(int32, OptionalType(ak.ArrayView(ak.UnmaskedArrayType(ak.NumpyArrayType(array(int64, 1d, C), {}), {}), None, ())))':\n No match for registered cases:\n * (int64, int64) -> int64\n * (int64, uint64) -> int64\n * (uint64, int64) -> int64\n * (uint64, uint64) -> uint64\n * (float32, float32) -> float32\n * (float64, float64) -> float64\n * (complex64, complex64) -> complex64\n * (complex128, complex128) -> complex128\n\nDuring: typing of intrinsic-call at /tmp/ipykernel_7665/2322563490.py (10)\n\nFile \"../../../../tmp/ipykernel_7665/2322563490.py\", line 10:\n\n" - ] + "data": { + "text/plain": [ + "array([15, 15, 15, ..., 13, 13, 13], dtype=int32)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -495,110 +472,53 @@ " tid = numba.cuda.grid(1)\n", " if tid < len(array):\n", " out[tid] = 0\n", - " for i, x in enumerate(array[tid]):\n", - " out[tid] += x\n", + " for x in array[tid]:\n", + " for y in x:\n", + " out[tid] += y\n", "\n", "out = cp.empty(len(df.a), dtype=\"int32\")\n", "blocksize = 256\n", "numblocks = (len(df.a) + blocksize - 1) // blocksize\n", - "df.a.ak.apply(inner_sum[numblocks, blocksize], out)\n", + "\n", + "df.a.ak.apply(lambda x: inner_sum[numblocks, blocksize](ak.drop_none(x, axis=0), out))\n", "out\n" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "f9826104-a4e4-4afe-b98f-2d0ef941e88c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[[[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " [[1, 2, 3], [], [4, 5]],\n",
-       " ...,\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]],\n",
-       " [[6, 7]]]\n",
-       "------------------------------------------\n",
-       "type: 2000000 * var * option[var * ?int64]
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.a.ak.array" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "891dfe23-6534-4395-9dd1-5098b8e34aed", + "execution_count": 14, + "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "numba.cuda.dispatcher._LaunchConfiguration" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "3.32 ms ± 85.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] } ], "source": [ - "type(inner_sum[numblocks, blocksize])" + "%timeit df.a.ak.apply(lambda x: inner_sum[numblocks, blocksize](ak.drop_none(x, axis=0), out))" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "b5760e04-8bb2-42ee-93be-94b455efe326", + "execution_count": 15, + "id": "bb781ca6-bdbd-4659-9885-8c634f490fca", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "numba.cuda.dispatcher.CUDADispatcher" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "370MiB\n" + ] } ], "source": [ - "type(inner_sum)" + "gpu_mem() " ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {