Skip to content

Commit

Permalink
Add support for nvCOMP batch API (#249)
Browse files Browse the repository at this point in the history
See #248 for more details.

Authors:
  - Alexey Kamenev (https://github.com/Alexey-Kamenev)

Approvers:
  - Mads R. B. Kristensen (https://github.com/madsbk)

URL: #249
  • Loading branch information
Alexey-Kamenev authored Jul 3, 2023
1 parent 1ffe8b1 commit 9e004ce
Show file tree
Hide file tree
Showing 7 changed files with 1,842 additions and 1 deletion.
357 changes: 357 additions & 0 deletions notebooks/nvcomp_batch_codec.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,357 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b543ae63",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import numcodecs\n",
"\n",
"import numpy as np\n",
"\n",
"import zarr\n",
"\n",
"from IPython.display import display\n",
"\n",
"np.set_printoptions(precision=4, suppress=True)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1a8e55d5",
"metadata": {},
"source": [
"### Basic usage\n",
"\n",
"Get nvCOMP codec from numcodecs registry:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "75524650",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"NvCompBatchCodec(algorithm='lz4', options={})"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"NVCOMP_CODEC_ID = \"nvcomp_batch\"\n",
"\n",
"# Currently supported algorithms.\n",
"LZ4_ALGO = \"LZ4\"\n",
"GDEFLATE_ALGO = \"Gdeflate\"\n",
"SNAPPY_ALGO = \"snappy\"\n",
"ZSTD_ALGO = \"zstd\"\n",
"\n",
"codec = numcodecs.registry.get_codec(dict(id=NVCOMP_CODEC_ID, algorithm=LZ4_ALGO))\n",
"# To pass algorithm-specific options, use options parameter:\n",
"# codec = numcodecs.registry.get_codec(dict(id=NVCOMP_CODEC_ID, algo=LZ4_ALGO, options={\"data_type\": 1}))\n",
"\n",
"display(codec)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "46641ccb",
"metadata": {},
"source": [
"Create data:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "12a4fffd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.6243, -0.6118, -0.5282, ..., 0.0436, -0.62 , 0.698 ],\n",
" [-0.4471, 1.2245, 0.4035, ..., 0.4203, 0.811 , 1.0444],\n",
" [-0.4009, 0.824 , -0.5623, ..., 0.7848, -0.9554, 0.5859],\n",
" ...,\n",
" [ 1.3797, 0.1387, 1.2255, ..., 1.8051, 0.3722, 0.1253],\n",
" [ 0.7348, -0.7115, -0.1248, ..., -1.9533, -0.7684, -0.5345],\n",
" [ 0.2183, -0.8654, 0.8886, ..., -1.0141, -0.0627, -1.4379]],\n",
" dtype=float32)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table class=\"zarr-info\"><tbody><tr><th style=\"text-align: left\">Type</th><td style=\"text-align: left\">zarr.core.Array</td></tr><tr><th style=\"text-align: left\">Data type</th><td style=\"text-align: left\">float32</td></tr><tr><th style=\"text-align: left\">Shape</th><td style=\"text-align: left\">(100, 100)</td></tr><tr><th style=\"text-align: left\">Chunk shape</th><td style=\"text-align: left\">(10, 10)</td></tr><tr><th style=\"text-align: left\">Order</th><td style=\"text-align: left\">C</td></tr><tr><th style=\"text-align: left\">Read-only</th><td style=\"text-align: left\">False</td></tr><tr><th style=\"text-align: left\">Compressor</th><td style=\"text-align: left\">NvCompBatchCodec(algorithm='lz4', options={})</td></tr><tr><th style=\"text-align: left\">Store type</th><td style=\"text-align: left\">zarr.storage.KVStore</td></tr><tr><th style=\"text-align: left\">No. bytes</th><td style=\"text-align: left\">40000 (39.1K)</td></tr><tr><th style=\"text-align: left\">No. bytes stored</th><td style=\"text-align: left\">41006 (40.0K)</td></tr><tr><th style=\"text-align: left\">Storage ratio</th><td style=\"text-align: left\">1.0</td></tr><tr><th style=\"text-align: left\">Chunks initialized</th><td style=\"text-align: left\">100/100</td></tr></tbody></table>"
],
"text/plain": [
"Type : zarr.core.Array\n",
"Data type : float32\n",
"Shape : (100, 100)\n",
"Chunk shape : (10, 10)\n",
"Order : C\n",
"Read-only : False\n",
"Compressor : NvCompBatchCodec(algorithm='lz4', options={})\n",
"Store type : zarr.storage.KVStore\n",
"No. bytes : 40000 (39.1K)\n",
"No. bytes stored : 41006 (40.0K)\n",
"Storage ratio : 1.0\n",
"Chunks initialized : 100/100"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"shape = (100, 100)\n",
"chunks = (10, 10)\n",
"\n",
"np.random.seed(1)\n",
"\n",
"x = zarr.array(np.random.randn(*shape).astype(np.float32), chunks=chunks, compressor=codec)\n",
"display(x[:])\n",
"display(x.info)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c15cbdff",
"metadata": {},
"source": [
"Store and load back the data:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "730cde85",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'chunks': [10, 10],\n",
" 'compressor': {'algorithm': 'lz4', 'id': 'nvcomp_batch', 'options': {}},\n",
" 'dtype': '<f4',\n",
" 'fill_value': 0.0,\n",
" 'filters': None,\n",
" 'order': 'C',\n",
" 'shape': [100, 100],\n",
" 'zarr_format': 2}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Use simple dictionary store, see zarr documentation for other options.\n",
"zarr_store = {}\n",
"\n",
"zarr.save_array(zarr_store, x, compressor=codec)\n",
"\n",
"# Check stored metadata.\n",
"meta_info = json.loads(zarr_store[\".zarray\"])\n",
"display(meta_info)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "11338a99",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"zarr-info\"><tbody><tr><th style=\"text-align: left\">Type</th><td style=\"text-align: left\">zarr.core.Array</td></tr><tr><th style=\"text-align: left\">Data type</th><td style=\"text-align: left\">float32</td></tr><tr><th style=\"text-align: left\">Shape</th><td style=\"text-align: left\">(100, 100)</td></tr><tr><th style=\"text-align: left\">Chunk shape</th><td style=\"text-align: left\">(10, 10)</td></tr><tr><th style=\"text-align: left\">Order</th><td style=\"text-align: left\">C</td></tr><tr><th style=\"text-align: left\">Read-only</th><td style=\"text-align: left\">False</td></tr><tr><th style=\"text-align: left\">Compressor</th><td style=\"text-align: left\">NvCompBatchCodec(algorithm='lz4', options={})</td></tr><tr><th style=\"text-align: left\">Store type</th><td style=\"text-align: left\">zarr.storage.KVStore</td></tr><tr><th style=\"text-align: left\">No. bytes</th><td style=\"text-align: left\">40000 (39.1K)</td></tr><tr><th style=\"text-align: left\">No. bytes stored</th><td style=\"text-align: left\">41006 (40.0K)</td></tr><tr><th style=\"text-align: left\">Storage ratio</th><td style=\"text-align: left\">1.0</td></tr><tr><th style=\"text-align: left\">Chunks initialized</th><td style=\"text-align: left\">100/100</td></tr></tbody></table>"
],
"text/plain": [
"Type : zarr.core.Array\n",
"Data type : float32\n",
"Shape : (100, 100)\n",
"Chunk shape : (10, 10)\n",
"Order : C\n",
"Read-only : False\n",
"Compressor : NvCompBatchCodec(algorithm='lz4', options={})\n",
"Store type : zarr.storage.KVStore\n",
"No. bytes : 40000 (39.1K)\n",
"No. bytes stored : 41006 (40.0K)\n",
"Storage ratio : 1.0\n",
"Chunks initialized : 100/100"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"y = zarr.open_array(zarr_store)\n",
"display(y.info)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5b6cc2ca",
"metadata": {},
"outputs": [],
"source": [
"# Test the roundtrip.\n",
"np.testing.assert_equal(y[:], x[:])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1a8eea79",
"metadata": {},
"source": [
"### CPU compression / GPU decompression\n",
"\n",
"Some algorithms, such as LZ4, can be used interchangeably on CPU and GPU. For example, the data might be created using CPU LZ4 codec and then decompressed using GPU version of LZ4 codec."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "87d25b76",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'chunks': [10, 10],\n",
" 'compressor': {'acceleration': 1, 'id': 'lz4'},\n",
" 'dtype': '<f4',\n",
" 'fill_value': 0.0,\n",
" 'filters': None,\n",
" 'order': 'C',\n",
" 'shape': [100, 100],\n",
" 'zarr_format': 2}"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table class=\"zarr-info\"><tbody><tr><th style=\"text-align: left\">Type</th><td style=\"text-align: left\">zarr.core.Array</td></tr><tr><th style=\"text-align: left\">Data type</th><td style=\"text-align: left\">float32</td></tr><tr><th style=\"text-align: left\">Shape</th><td style=\"text-align: left\">(100, 100)</td></tr><tr><th style=\"text-align: left\">Chunk shape</th><td style=\"text-align: left\">(10, 10)</td></tr><tr><th style=\"text-align: left\">Order</th><td style=\"text-align: left\">C</td></tr><tr><th style=\"text-align: left\">Read-only</th><td style=\"text-align: left\">False</td></tr><tr><th style=\"text-align: left\">Compressor</th><td style=\"text-align: left\">LZ4(acceleration=1)</td></tr><tr><th style=\"text-align: left\">Store type</th><td style=\"text-align: left\">zarr.storage.KVStore</td></tr><tr><th style=\"text-align: left\">No. bytes</th><td style=\"text-align: left\">40000 (39.1K)</td></tr><tr><th style=\"text-align: left\">No. bytes stored</th><td style=\"text-align: left\">40973 (40.0K)</td></tr><tr><th style=\"text-align: left\">Storage ratio</th><td style=\"text-align: left\">1.0</td></tr><tr><th style=\"text-align: left\">Chunks initialized</th><td style=\"text-align: left\">100/100</td></tr></tbody></table>"
],
"text/plain": [
"Type : zarr.core.Array\n",
"Data type : float32\n",
"Shape : (100, 100)\n",
"Chunk shape : (10, 10)\n",
"Order : C\n",
"Read-only : False\n",
"Compressor : LZ4(acceleration=1)\n",
"Store type : zarr.storage.KVStore\n",
"No. bytes : 40000 (39.1K)\n",
"No. bytes stored : 40973 (40.0K)\n",
"Storage ratio : 1.0\n",
"Chunks initialized : 100/100"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table class=\"zarr-info\"><tbody><tr><th style=\"text-align: left\">Type</th><td style=\"text-align: left\">zarr.core.Array</td></tr><tr><th style=\"text-align: left\">Data type</th><td style=\"text-align: left\">float32</td></tr><tr><th style=\"text-align: left\">Shape</th><td style=\"text-align: left\">(100, 100)</td></tr><tr><th style=\"text-align: left\">Chunk shape</th><td style=\"text-align: left\">(10, 10)</td></tr><tr><th style=\"text-align: left\">Order</th><td style=\"text-align: left\">C</td></tr><tr><th style=\"text-align: left\">Read-only</th><td style=\"text-align: left\">False</td></tr><tr><th style=\"text-align: left\">Compressor</th><td style=\"text-align: left\">NvCompBatchCodec(algorithm='lz4', options={})</td></tr><tr><th style=\"text-align: left\">Store type</th><td style=\"text-align: left\">zarr.storage.KVStore</td></tr><tr><th style=\"text-align: left\">No. bytes</th><td style=\"text-align: left\">40000 (39.1K)</td></tr><tr><th style=\"text-align: left\">No. bytes stored</th><td style=\"text-align: left\">40883 (39.9K)</td></tr><tr><th style=\"text-align: left\">Storage ratio</th><td style=\"text-align: left\">1.0</td></tr><tr><th style=\"text-align: left\">Chunks initialized</th><td style=\"text-align: left\">100/100</td></tr></tbody></table>"
],
"text/plain": [
"Type : zarr.core.Array\n",
"Data type : float32\n",
"Shape : (100, 100)\n",
"Chunk shape : (10, 10)\n",
"Order : C\n",
"Read-only : False\n",
"Compressor : NvCompBatchCodec(algorithm='lz4', options={})\n",
"Store type : zarr.storage.KVStore\n",
"No. bytes : 40000 (39.1K)\n",
"No. bytes stored : 40883 (39.9K)\n",
"Storage ratio : 1.0\n",
"Chunks initialized : 100/100"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Get default (CPU) implementation of LZ4 codec.\n",
"cpu_codec = numcodecs.registry.get_codec({\"id\": \"lz4\"})\n",
"\n",
"x = zarr.array(np.random.randn(*shape).astype(np.float32), chunks=chunks, compressor=cpu_codec)\n",
"# Define a simple, dictionary-based store. In real scenarios this can be a filesystem or some other persistent store.\n",
"store = {}\n",
"zarr.save_array(store, x, compressor=cpu_codec)\n",
"\n",
"# Check that the data was written by the expected codec.\n",
"meta = json.loads(store[\".zarray\"])\n",
"display(meta)\n",
"assert meta[\"compressor\"][\"id\"] == \"lz4\"\n",
"\n",
"# Change codec to GPU/nvCOMP-based.\n",
"meta[\"compressor\"] = {\"id\": NVCOMP_CODEC_ID, \"algorithm\": LZ4_ALGO}\n",
"store[\".zarray\"] = json.dumps(meta).encode()\n",
"\n",
"y = zarr.open_array(store, compressor=codec)\n",
"\n",
"display(x.info)\n",
"display(y.info)\n",
"\n",
"np.testing.assert_equal(x[:], y[:])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b9294992",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
3 changes: 2 additions & 1 deletion python/kvikio/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
# =============================================================================

# Set the list of Cython files to build
set(cython_modules arr libnvcomp libkvikio)
set(cython_modules arr libnvcomp libnvcomp_ll libkvikio)

# Build all of the Cython targets
add_cython_modules("${cython_modules}")

target_link_libraries(libnvcomp nvcomp::nvcomp)
target_link_libraries(libnvcomp_ll nvcomp::nvcomp)
Loading

0 comments on commit 9e004ce

Please sign in to comment.