diff --git a/python/kvikio/nvcomp_codec.py b/python/kvikio/nvcomp_codec.py
index a43edff1df..8f3b73dd79 100644
--- a/python/kvikio/nvcomp_codec.py
+++ b/python/kvikio/nvcomp_codec.py
@@ -104,16 +104,21 @@ def encode_batch(self, bufs: List[Any]) -> List[Any]:
         temp_buf = cp.empty(temp_size, dtype=cp.uint8)
 
         # Includes header with the original buffer size,
-        # same as in numcodecs codec.
+        # same as in numcodecs codec. This enables data compatibility between
+        # numcodecs default codecs and this nvCOMP batch codec.
         # TODO(akamenev): probably should use contiguous buffer which stores all chunks?
         comp_chunks_header = [
             cp.empty(self.HEADER_SIZE_BYTES + comp_chunk_size, dtype=cp.uint8)
             for _ in range(num_chunks)
         ]
+        # comp_chunks is used as a container that stores pointers to actual chunks.
+        # nvCOMP requires this container to be in GPU memory.
         comp_chunks = cp.array(
             [c.data.ptr + self.HEADER_SIZE_BYTES for c in comp_chunks_header],
             dtype=cp.uint64,
         )
+        # Similar to comp_chunks, comp_chunk_sizes is an array that contains
+        # chunk sizes and is required by nvCOMP to be in GPU memory.
         comp_chunk_sizes = cp.empty(num_chunks, dtype=cp.uint64)
 
         self._algo.compress(
@@ -213,12 +218,16 @@ def decode_batch(
         temp_buf = cp.empty(temp_size, dtype=cp.uint8)
 
         # Prepare uncompressed chunks buffers.
+        # First, allocate chunks of appropriate sizes and then
+        # copy the pointers to a pointer array in GPU memory as required by nvCOMP.
+        # TODO(akamenev): probably can allocate single contiguous buffer.
         uncomp_chunks = [cp.empty(size, dtype=cp.uint8) for size in uncomp_chunk_sizes]
         uncomp_chunk_ptrs = cp.array(
             [c.data.ptr for c in uncomp_chunks], dtype=cp.uint64
         )
-
+        # Sizes array must be in GPU memory.
         uncomp_chunk_sizes = cp.array(uncomp_chunk_sizes, dtype=cp.uint64)
+
         # TODO(akamenev): currently we provide the following 2 buffers to decompress()
         # but do not check/use them afterwards since some of the algos
         # (e.g. LZ4 and Gdeflate) do not require it and run faster
diff --git a/python/tests/test_nvcomp_codec.py b/python/tests/test_nvcomp_codec.py
index a951b7e302..99a90834c0 100644
--- a/python/tests/test_nvcomp_codec.py
+++ b/python/tests/test_nvcomp_codec.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 # See file LICENSE for terms.
 
+import itertools as it
 import json
 
 import numcodecs
@@ -26,6 +27,23 @@ def _get_codec(algo: str):
     return numcodecs.registry.get_codec(codec_args)
 
 
+@pytest.fixture(params=[(16,), (8, 16), (16, 16)])
+def shape(request):
+    return request.param
+
+
+# Separate fixture for combinations of shapes and chunks, since
+# chunks array must have the same rank as data array.
+@pytest.fixture(
+    params=it.chain(
+        it.product([(32,)], [(16,), (32,), (40,)]),
+        it.product([(16, 8), (16, 16)], [(8, 16), (16, 16), (40, 12)]),
+    )
+)
+def shape_chunks(request):
+    return request.param
+
+
 @pytest.mark.parametrize("algo", SUPPORTED_CODECS)
 def test_codec_registry(algo: str):
     codec = _get_codec(algo)
@@ -33,8 +51,7 @@ def test_codec_registry(algo: str):
 
 
 @pytest.mark.parametrize("algo", SUPPORTED_CODECS)
-def test_basic(algo: str):
-    shape = (16, 16)
+def test_basic(algo: str, shape):
     codec = NvCompBatchCodec(algo)
 
     # Create data.
@@ -49,9 +66,8 @@ def test_basic(algo: str):
 
 
 @pytest.mark.parametrize("algo", SUPPORTED_CODECS)
-def test_basic_zarr(algo: str):
-    shape = (16, 16)
-    chunks = (8, 8)
+def test_basic_zarr(algo: str, shape_chunks):
+    shape, chunks = shape_chunks
 
     codec = NvCompBatchCodec(algo)
 
@@ -86,14 +102,13 @@ def test_batch_comp_decomp(algo: str):
 
 
 @pytest.mark.parametrize("algo", SUPPORTED_CODECS)
-def test_comp_decomp(algo: str):
+def test_comp_decomp(algo: str, shape_chunks):
+    shape, chunks = shape_chunks
+
     codec = _get_codec(algo)
 
     np.random.seed(1)
 
-    shape = (16, 16)
-    chunks = (8, 8)
-
     data = np.random.randn(*shape).astype(np.float32)
 
     z1 = zarr.array(data, chunks=chunks, compressor=codec)