diff --git a/python/kvikio/nvcomp_codec.py b/python/kvikio/nvcomp_codec.py index a43edff1df..8f3b73dd79 100644 --- a/python/kvikio/nvcomp_codec.py +++ b/python/kvikio/nvcomp_codec.py @@ -104,16 +104,21 @@ def encode_batch(self, bufs: List[Any]) -> List[Any]: temp_buf = cp.empty(temp_size, dtype=cp.uint8) # Includes header with the original buffer size, - # same as in numcodecs codec. + # same as in numcodecs codec. This enables data compatibility between + # numcodecs default codecs and this nvCOMP batch codec. # TODO(akamenev): probably should use contiguous buffer which stores all chunks? comp_chunks_header = [ cp.empty(self.HEADER_SIZE_BYTES + comp_chunk_size, dtype=cp.uint8) for _ in range(num_chunks) ] + # comp_chunks is used as a container that stores pointers to actual chunks. + # nvCOMP requires this container to be in GPU memory. comp_chunks = cp.array( [c.data.ptr + self.HEADER_SIZE_BYTES for c in comp_chunks_header], dtype=cp.uint64, ) + # Similar to comp_chunks, comp_chunk_sizes is an array that contains + # chunk sizes and is required by nvCOMP to be in GPU memory. comp_chunk_sizes = cp.empty(num_chunks, dtype=cp.uint64) self._algo.compress( @@ -213,12 +218,16 @@ def decode_batch( temp_buf = cp.empty(temp_size, dtype=cp.uint8) # Prepare uncompressed chunks buffers. + # First, allocate chunks of appropriate sizes and then + # copy the pointers to a pointer array in GPU memory as required by nvCOMP. + # TODO(akamenev): probably can allocate single contiguous buffer. uncomp_chunks = [cp.empty(size, dtype=cp.uint8) for size in uncomp_chunk_sizes] uncomp_chunk_ptrs = cp.array( [c.data.ptr for c in uncomp_chunks], dtype=cp.uint64 ) - + # Sizes array must be in GPU memory. uncomp_chunk_sizes = cp.array(uncomp_chunk_sizes, dtype=cp.uint64) + # TODO(akamenev): currently we provide the following 2 buffers to decompress() # but do not check/use them afterwards since some of the algos # (e.g. LZ4 and Gdeflate) do not require it and run faster diff --git a/python/tests/test_nvcomp_codec.py b/python/tests/test_nvcomp_codec.py index a951b7e302..99a90834c0 100644 --- a/python/tests/test_nvcomp_codec.py +++ b/python/tests/test_nvcomp_codec.py @@ -1,6 +1,7 @@ # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. +import itertools as it import json import numcodecs @@ -26,6 +27,23 @@ def _get_codec(algo: str): return numcodecs.registry.get_codec(codec_args) +@pytest.fixture(params=[(16,), (8, 16), (16, 16)]) +def shape(request): + return request.param + + +# Separate fixture for combinations of shapes and chunks, since +# chunks array must have the same rank as data array. +@pytest.fixture( + params=it.chain( + it.product([(32,)], [(16,), (32,), (40,)]), + it.product([(16, 8), (16, 16)], [(8, 16), (16, 16), (40, 12)]), + ) +) +def shape_chunks(request): + return request.param + + @pytest.mark.parametrize("algo", SUPPORTED_CODECS) def test_codec_registry(algo: str): codec = _get_codec(algo) @@ -33,8 +51,7 @@ def test_codec_registry(algo: str): @pytest.mark.parametrize("algo", SUPPORTED_CODECS) -def test_basic(algo: str): - shape = (16, 16) +def test_basic(algo: str, shape): codec = NvCompBatchCodec(algo) # Create data. @@ -49,9 +66,8 @@ def test_basic(algo: str): @pytest.mark.parametrize("algo", SUPPORTED_CODECS) -def test_basic_zarr(algo: str): - shape = (16, 16) - chunks = (8, 8) +def test_basic_zarr(algo: str, shape_chunks): + shape, chunks = shape_chunks codec = NvCompBatchCodec(algo) @@ -86,14 +102,13 @@ def test_batch_comp_decomp(algo: str): @pytest.mark.parametrize("algo", SUPPORTED_CODECS) -def test_comp_decomp(algo: str): +def test_comp_decomp(algo: str, shape_chunks): + shape, chunks = shape_chunks + codec = _get_codec(algo) np.random.seed(1) - shape = (16, 16) - chunks = (8, 8) - data = np.random.randn(*shape).astype(np.float32) z1 = zarr.array(data, chunks=chunks, compressor=codec)