diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100755
index 0000000000..b917dfd3aa
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,19 @@
+#cpp code owners
+cpp/               @rapidsai/kvikio-cpp-codeowners
+
+#python code owners
+python/            @rapidsai/kvikio-python-codeowners
+
+#legate code owners
+legate/            @rapidsai/kvikio-cpp-codeowners @rapidsai/kvikio-python-codeowners
+
+#cmake code owners
+**/CMakeLists.txt  @rapidsai/kvikio-cmake-codeowners
+**/cmake/          @rapidsai/kvikio-cmake-codeowners
+build.sh           @rapidsai/kvikio-cmake-codeowners
+python/setup.py    @rapidsai/kvikio-cmake-codeowners @rapidsai/kvikio-python-codeowners
+
+#build/ops code owners
+.github/           @rapidsai/ops-codeowners
+ci/                @rapidsai/ops-codeowners
+conda/             @rapidsai/ops-codeowners
diff --git a/.github/labeler.yml b/.github/labeler.yml
new file mode 100644
index 0000000000..7907548a1a
--- /dev/null
+++ b/.github/labeler.yml
@@ -0,0 +1,21 @@
+# Documentation for config - https://github.com/actions/labeler#common-examples
+
+KvikIO (Python):
+  - 'python/**'
+  - 'notebooks/**'
+
+libkvikio:
+  - 'cpp/**'
+
+legate:
+  - 'legate/**'
+
+CMake:
+  - '**/CMakeLists.txt'
+  - '**/cmake/**'
+
+ci:
+  - 'ci/**'
+
+conda:
+  - 'conda/**'
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 576759d7cb..405aa3766a 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@cuda-120
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -56,7 +56,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 7610b96449..e327502669 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -19,38 +19,38 @@ jobs:
       - conda-python-tests
       - docs-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@cuda-120
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@cuda-120
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120
     with:
       build_type: pull-request
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 390e1ac263..e9c623ae16 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   python-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index ca75fa0439..c9845112bc 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -67,9 +67,6 @@ for DEP in "${DEPENDENCIES[@]}"; do
   for FILE in dependencies.yaml conda/environments/*.yaml; do
     sed_runner "/-.* ${DEP}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*/g" ${FILE}
   done
-  for FILE in python/pyproject.toml legate/pyproject.toml; do
-    sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE}
-  done
 done
 
 # CI files
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index ccce762b8d..2d5a2e6b4d 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -8,7 +8,7 @@ channels:
 dependencies:
 - c-compiler
 - cmake>=3.26.4
-- cuda-python>=11.7.1,<12.0
+- cuda-python>=11.7.1,<12.0a0
 - cudatoolkit=11.8
 - cudf==23.8.*
 - cupy>=12.0.0
@@ -22,7 +22,9 @@ dependencies:
 - libcufile=1.4.0.31
 - ninja
 - numpy>=1.21
+- nvcc_linux-64=11.8
 - nvcomp==2.6.1
+- packaging
 - pre-commit
 - pydata-sphinx-theme
 - pytest
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
new file mode 100644
index 0000000000..08982b7666
--- /dev/null
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -0,0 +1,36 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- conda-forge
+- nvidia
+dependencies:
+- c-compiler
+- cmake>=3.26.4
+- cuda-nvcc
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.0
+- cudf==23.8.*
+- cupy>=12.0.0
+- cxx-compiler
+- cython>=0.29,<0.30
+- dask>=2022.05.2
+- distributed>=2022.05.2
+- doxygen=1.8.20
+- gcc_linux-64=11.*
+- libcufile
+- libcufile-dev
+- ninja
+- numpy>=1.21
+- packaging
+- pre-commit
+- pydata-sphinx-theme
+- pytest
+- pytest-cov
+- python>=3.9,<3.11
+- scikit-build>=0.13.1
+- sphinx<6
+- sysroot_linux-64=2.17
+- zarr
+name: all_cuda-120_arch-x86_64
diff --git a/conda/recipes/kvikio/conda_build_config.yaml b/conda/recipes/kvikio/conda_build_config.yaml
index 24b3e99dc0..7bce9d8853 100644
--- a/conda/recipes/kvikio/conda_build_config.yaml
+++ b/conda/recipes/kvikio/conda_build_config.yaml
@@ -5,6 +5,9 @@ cxx_compiler_version:
   - 11
 
 cuda_compiler:
+  - cuda-nvcc
+
+cuda11_compiler:
   - nvcc
 
 sysroot_version:
diff --git a/conda/recipes/kvikio/meta.yaml b/conda/recipes/kvikio/meta.yaml
index 93094e3d2b..0dc83e35f5 100644
--- a/conda/recipes/kvikio/meta.yaml
+++ b/conda/recipes/kvikio/meta.yaml
@@ -36,7 +36,9 @@ build:
     - cd python
     - python -m pip install . -vv
   ignore_run_exports_from:
-    - {{ compiler('cuda') }}
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }}
+    {% endif %}
 
 requirements:
   build:
@@ -44,23 +46,35 @@ requirements:
     - ninja
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
-    - {{ compiler('cuda') }} {{ cuda_version }}
+    - cuda-version ={{ cuda_version }}
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }} ={{ cuda_version }}
+    {% else %}
+    - {{ compiler('cuda') }}
+    {% endif %}
     - sysroot_{{ target_platform }} {{ sysroot_version }}
   host:
     - python
     - setuptools
     - pip
     - cython >=0.29,<0.30
+    {% if cuda_major == "11" %}
     - cudatoolkit ={{ cuda_version }}
+    {% endif %}
+    - cuda-version ={{ cuda_version }}
     - nvcomp {{ nvcomp_version }}
     - scikit-build >=0.13.1
-    - libkvikio {{ version }}
+    - libkvikio ={{ version }}
   run:
     - python
     - numpy >=1.20
     - cupy >=12.0.0
     - zarr
+    - packaging
+    - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+    {% if cuda_major == "11" %}
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
+    {% endif %}
 
 test:
   requires:
diff --git a/conda/recipes/libkvikio/conda_build_config.yaml b/conda/recipes/libkvikio/conda_build_config.yaml
index 710ee57cb2..141285ba4d 100644
--- a/conda/recipes/libkvikio/conda_build_config.yaml
+++ b/conda/recipes/libkvikio/conda_build_config.yaml
@@ -8,17 +8,20 @@ cmake_version:
   - ">=3.26.4"
 
 cuda_compiler:
+  - cuda-nvcc
+
+cuda11_compiler:
   - nvcc
 
 sysroot_version:
   - "2.17"
 
-# The CTK libraries below are missing from the conda-forge::cudatoolkit
-# package. The "*_host_*" version specifiers correspond to `11.8` packages and the
-# "*_run_*" version specifiers correspond to `11.x` packages.
+# The CTK libraries below are missing from the conda-forge::cudatoolkit package
+# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
+# and the "*_run_*" version specifiers correspond to `11.x` packages.
 
-libcufile_host_version:
+cuda11_libcufile_host_version:
   - "1.4.0.31"
 
-libcufile_run_version:
+cuda11_libcufile_run_version:
   - ">=1.0.0.82,<=1.4.0.31"
diff --git a/conda/recipes/libkvikio/meta.yaml b/conda/recipes/libkvikio/meta.yaml
index b8f453cfb0..0e2f32d539 100644
--- a/conda/recipes/libkvikio/meta.yaml
+++ b/conda/recipes/libkvikio/meta.yaml
@@ -36,13 +36,23 @@ requirements:
     - cmake {{ cmake_version }}
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
-    - {{ compiler('cuda') }} {{ cuda_version }}
+    - cuda-version ={{ cuda_version }}
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }} ={{ cuda_version }}
+    {% else %}
+    - {{ compiler('cuda') }}
+    {% endif %}
     - ninja
     - sysroot_{{ target_platform }} {{ sysroot_version }}
   host:
-    - cudatoolkit {{ cuda_version }}.*
-    - libcufile {{ libcufile_host_version }} # [linux64]
-    - libcufile-dev {{ libcufile_host_version }} # [linux64]
+    - cuda-version ={{ cuda_version }}
+    {% if cuda_major == "11" %}
+    - cudatoolkit ={{ cuda_version }}
+    - libcufile {{ cuda11_libcufile_host_version }}      # [linux64]
+    - libcufile-dev {{ cuda11_libcufile_host_version }}  # [linux64]
+    {% else %}
+    - libcufile-dev  # [linux64]
+    {% endif %}
 
 outputs:
   - name: libkvikio
@@ -53,13 +63,22 @@ outputs:
       string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       run_exports:
         - {{ pin_subpackage("libkvikio", max_pin="x.x") }}
+      ignore_run_exports_from:
+        {% if cuda_major == "11" %}
+        - {{ compiler('cuda11') }}
+        {% endif %}
     requirements:
       build:
         - cmake {{ cmake_version }}
       run:
+        - cuda-version {{ cuda_spec }}
+        {% if cuda_major == "11" %}
         - cudatoolkit {{ cuda_spec }}
-        - libcufile {{ libcufile_run_version }} # [linux64]
-        - libcufile-dev {{ libcufile_run_version }} # [linux64]
+        - libcufile {{ cuda11_libcufile_run_version }}      # [linux64]
+        - libcufile-dev {{ cuda11_libcufile_run_version }}  # [linux64]
+        {% else %}
+        - libcufile-dev  # [linux64]
+        {% endif %}
     test:
         commands:
           - test -f $PREFIX/include/kvikio/file_handle.hpp
@@ -75,13 +94,28 @@ outputs:
     build:
       number: {{ GIT_DESCRIBE_NUMBER }}
       string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      ignore_run_exports_from:
+        {% if cuda_major == "11" %}
+        - {{ compiler('cuda11') }}
+        {% endif %}
     requirements:
       build:
         - cmake {{ cmake_version }}
+      host:
+        - cuda-version ={{ cuda_version }}
+        {% if cuda_major == "11" %}
+        - cudatoolkit ={{ cuda_version }}
+        - libcufile {{ cuda11_libcufile_run_version }}  # [linux64]
+        {% else %}
+        - cuda-cudart-dev
+        - libcufile-dev  # [linux64]
+        {% endif %}
       run:
+        - cuda-version {{ cuda_spec }}
+        {% if cuda_major == "11" %}
         - cudatoolkit {{ cuda_spec }}
-        - libcufile {{ libcufile_run_version }} # [linux64]
-        - libcufile-dev {{ libcufile_run_version }} # [linux64]
+        - libcufile {{ cuda11_libcufile_run_version }}  # [linux64]
+        {% endif %}
     about:
       home: https://rapids.ai
       license: Apache-2.0
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c71b9c0af9..9e288fc1a4 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -38,6 +38,8 @@ rapids_cmake_build_type(Release)
 # build options
 option(KvikIO_BUILD_EXAMPLES "Configure CMake to build examples" ON)
 
+rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH)
+
 # find packages we depend on
 rapids_cpm_init()
 
@@ -84,8 +86,10 @@ target_link_libraries(kvikio INTERFACE Threads::Threads)
 target_link_libraries(kvikio INTERFACE CUDA::toolkit)
 if(cuFile_FOUND)
   target_link_libraries(kvikio INTERFACE cufile::cuFile_interface)
+  target_compile_definitions(kvikio INTERFACE KVIKIO_CUFILE_FOUND)
+
   if(cuFile_BATCH_API_FOUND)
-    target_compile_definitions(kvikio INTERFACE CUFILE_BATCH_API_FOUND)
+    target_compile_definitions(kvikio INTERFACE KVIKIO_CUFILE_BATCH_API_FOUND)
   endif()
 endif()
 target_link_libraries(kvikio INTERFACE ${CMAKE_DL_LIBS})
diff --git a/cpp/examples/CMakeLists.txt b/cpp/examples/CMakeLists.txt
index 677348ef16..bb653ed8fb 100644
--- a/cpp/examples/CMakeLists.txt
+++ b/cpp/examples/CMakeLists.txt
@@ -22,9 +22,6 @@ target_link_libraries(BASIC_IO_TEST PRIVATE kvikio CUDA::cudart)
 
 if(CMAKE_COMPILER_IS_GNUCXX)
   set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
-  if(NOT cuFile_FOUND)
-    set(KVIKIO_CXX_FLAGS "${KVIKIO_CXX_FLAGS};-DKVIKIO_DISABLE_CUFILE")
-  endif()
   target_compile_options(BASIC_IO_TEST PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>")
 endif()
 
diff --git a/cpp/include/kvikio/batch.hpp b/cpp/include/kvikio/batch.hpp
index 25c86f71d9..9c58a50b1d 100644
--- a/cpp/include/kvikio/batch.hpp
+++ b/cpp/include/kvikio/batch.hpp
@@ -44,7 +44,7 @@ struct BatchOp {
   CUfileOpcode_t opcode;
 };
 
-#ifdef CUFILE_BATCH_API_FOUND
+#ifdef KVIKIO_CUFILE_BATCH_API_FOUND
 
 /**
  * @brief Handle of an cuFile batch using  semantic.
diff --git a/cpp/include/kvikio/buffer.hpp b/cpp/include/kvikio/buffer.hpp
index f092198ff2..26e8ed5b60 100644
--- a/cpp/include/kvikio/buffer.hpp
+++ b/cpp/include/kvikio/buffer.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@ inline void buffer_register(const void* devPtr_base,
                             const std::vector<int>& errors_to_ignore = std::vector<int>())
 {
   if (defaults::compat_mode()) { return; }
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
   CUfileError_t status = cuFileAPI::instance().BufRegister(devPtr_base, size, flags);
   if (status.err != CU_FILE_SUCCESS) {
     // Check if `status.err` is in `errors_to_ignore`
@@ -70,7 +70,7 @@ inline void buffer_register(const void* devPtr_base,
 inline void buffer_deregister(const void* devPtr_base)
 {
   if (defaults::compat_mode()) { return; }
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
   CUFILE_TRY(cuFileAPI::instance().BufDeregister(devPtr_base));
 #endif
 }
diff --git a/cpp/include/kvikio/driver.hpp b/cpp/include/kvikio/driver.hpp
index 24526565c5..7d73f465aa 100644
--- a/cpp/include/kvikio/driver.hpp
+++ b/cpp/include/kvikio/driver.hpp
@@ -40,7 +40,7 @@ inline void set_driver_flag(unsigned int& prop, unsigned int flag, bool val) noe
 }
 }  // namespace detail
 
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
 
 class DriverInitializer {
   // Optional, if not used cuFiles opens the driver automatically
@@ -178,7 +178,7 @@ class DriverProperties {
 
   [[nodiscard]] std::size_t get_max_batch_io_size()
   {
-#ifdef CUFILE_BATCH_API_FOUND
+#ifdef KVIKIO_CUFILE_BATCH_API_FOUND
     lazy_init();
     return _props.max_batch_io_size;
 #else
diff --git a/cpp/include/kvikio/error.hpp b/cpp/include/kvikio/error.hpp
index e8e0e00641..ca809c63b5 100644
--- a/cpp/include/kvikio/error.hpp
+++ b/cpp/include/kvikio/error.hpp
@@ -56,7 +56,7 @@ struct CUfileException : public std::runtime_error {
 #define CUDA_DRIVER_TRY_1(_call) CUDA_DRIVER_TRY_2(_call, CUfileException)
 #endif
 
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
 #ifndef CUFILE_TRY
 #define CUFILE_TRY(...)                                         \
   GET_CUFILE_TRY_MACRO(__VA_ARGS__, CUFILE_TRY_2, CUFILE_TRY_1) \
diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp
index 834d44d48d..b63d12c5e7 100644
--- a/cpp/include/kvikio/file_handle.hpp
+++ b/cpp/include/kvikio/file_handle.hpp
@@ -168,7 +168,7 @@ class FileHandle {
     }
 
     if (_compat_mode) { return; }
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
     CUfileDescr_t desc{};  // It is important to set to zero!
     desc.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD;
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access)
@@ -213,7 +213,7 @@ class FileHandle {
     if (closed()) { return; }
 
     if (!_compat_mode) {
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
       cuFileAPI::instance().HandleDeregister(_handle);
 #endif
     }
@@ -308,7 +308,7 @@ class FileHandle {
     if (_compat_mode) {
       return posix_device_read(_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset);
     }
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
     ssize_t ret = cuFileAPI::instance().Read(
       _handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset));
     if (ret == -1) {
@@ -358,7 +358,7 @@ class FileHandle {
     if (_compat_mode) {
       return posix_device_write(_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset);
     }
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
     ssize_t ret = cuFileAPI::instance().Write(
       _handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset));
     if (ret == -1) {
diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp
index f0ccdbfe83..4791b39cd4 100644
--- a/cpp/include/kvikio/shim/cufile.hpp
+++ b/cpp/include/kvikio/shim/cufile.hpp
@@ -23,7 +23,7 @@
 
 namespace kvikio {
 
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
 
 /**
  * @brief Shim layer of the cuFile C-API
@@ -47,7 +47,7 @@ class cuFileAPI {
   decltype(cuFileDriverSetMaxCacheSize)* DriverSetMaxCacheSize{nullptr};
   decltype(cuFileDriverSetMaxPinnedMemSize)* DriverSetMaxPinnedMemSize{nullptr};
 
-#ifdef CUFILE_BATCH_API_FOUND
+#ifdef KVIKIO_CUFILE_BATCH_API_FOUND
   decltype(cuFileBatchIOSetUp)* BatchIOSetUp{nullptr};
   decltype(cuFileBatchIOSubmit)* BatchIOSubmit{nullptr};
   decltype(cuFileBatchIOGetStatus)* BatchIOGetStatus{nullptr};
@@ -83,7 +83,7 @@ class cuFileAPI {
     get_symbol(DriverSetMaxCacheSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxCacheSize));
     get_symbol(DriverSetMaxPinnedMemSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxPinnedMemSize));
 
-#ifdef CUFILE_BATCH_API_FOUND
+#ifdef KVIKIO_CUFILE_BATCH_API_FOUND
     get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp));
     get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit));
     get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus));
@@ -141,7 +141,7 @@ class cuFileAPI {
  *
  * @return The boolean answer
  */
-#ifdef KVIKIO_CUFILE_EXIST
+#ifdef KVIKIO_CUFILE_FOUND
 inline bool is_cufile_library_available()
 {
   try {
@@ -173,7 +173,7 @@ inline bool is_cufile_available()
  *
  * @return The boolean answer
  */
-#ifdef CUFILE_BATCH_API_FOUND
+#ifdef KVIKIO_CUFILE_BATCH_API_FOUND
 inline bool is_batch_available()
 {
   try {
diff --git a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp
index 9f1a28cf33..71bc0f3c90 100644
--- a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp
+++ b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp
@@ -17,19 +17,14 @@
 
 /**
  * In order to support compilation when `cufile.h` isn't available, we
- * wrap all use of cufile in a `#ifdef KVIKIO_CUFILE_EXIST` guard.
+ * wrap all use of cufile in a `#ifdef KVIKIO_CUFILE_FOUND` guard.
  *
  * The motivation here is to make KvikIO work in all circumstances so
  * that libraries doesn't have to implement there own fallback solutions.
  */
-#ifndef KVIKIO_DISABLE_CUFILE
-#if __has_include(<cufile.h>)
+#ifdef KVIKIO_CUFILE_FOUND
 #include <cufile.h>
-#define KVIKIO_CUFILE_EXIST
-#endif
-#endif
-
-#ifndef KVIKIO_CUFILE_EXIST
+#else
 using CUfileDriverControlFlags_t = enum CUfileDriverControlFlags {
   CU_FILE_USE_POLL_MODE     = 0, /*!< use POLL mode. properties.use_poll_mode*/
   CU_FILE_ALLOW_COMPAT_MODE = 1  /*!< allow COMPATIBILITY mode. properties.allow_compat_mode*/
@@ -39,7 +34,7 @@ using CUfileHandle_t = void*;
 
 // If the Batch API isn't defined, we define some of the data types here.
 // Notice, this doesn't need to be ABI compatible with the cufile definitions.
-#ifndef CUFILE_BATCH_API_FOUND
+#ifndef KVIKIO_CUFILE_BATCH_API_FOUND
 typedef enum CUfileOpcode { CUFILE_READ = 0, CUFILE_WRITE } CUfileOpcode_t;
 
 typedef enum CUFILEStatus_enum {
diff --git a/dependencies.yaml b/dependencies.yaml
index 9285ea6d32..4bbf5cc4a1 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,16 +3,18 @@ files:
   all:
     output: conda
     matrix:
-      cuda: ["11.8"]
+      cuda: ["11.8", "12.0"]
       arch: [x86_64]
     includes:
       - build
       - checks
       - cudatoolkit
       - docs
+      - notebooks
       - py_version
       - run
       - test_python
+      - test_python_legate
   test_cpp:
     output: none
     includes:
@@ -78,6 +80,7 @@ files:
       key: test
     includes:
       - test_python
+      - test_python_legate
 channels:
   - rapidsai
   - rapidsai-nightly
@@ -113,6 +116,23 @@ dependencies:
             packages:
               - gcc_linux-aarch64=11.*
               - sysroot_linux-aarch64=2.17
+      - output_types: conda
+        matrices:
+          - matrix:
+              arch: x86_64
+              cuda: "11.8"
+            packages:
+              - nvcc_linux-64=11.8
+          - matrix:
+              arch: aarch64
+              cuda: "11.8"
+            packages:
+              - nvcc_linux-aarch64=11.8
+          - matrix:
+              cuda: "12.0"
+            packages:
+              - cuda-version=12.0
+              - cuda-nvcc
   checks:
     common:
       - output_types: [conda, requirements]
@@ -122,6 +142,10 @@ dependencies:
     specific:
       - output_types: conda
         matrices:
+          - matrix:
+              cuda: "12.0"
+            packages:
+              - cuda-version=12.0
           - matrix:
               cuda: "11.8"
             packages:
@@ -140,6 +164,12 @@ dependencies:
               - cudatoolkit=11.2
       - output_types: conda
         matrices:
+          - matrix:
+              cuda: "12.0"
+              arch: x86_64
+            packages:
+              - libcufile
+              - libcufile-dev
           - matrix:
               cuda: "11.8"
               arch: x86_64
@@ -213,6 +243,7 @@ dependencies:
         packages:
           - numpy>=1.21
           - zarr
+          - packaging
       - output_types: conda
         packages:
           - cupy>=12.0.0
@@ -231,9 +262,27 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - cuda-python>=11.7.1,<12.0
-          - cudf==23.8.*
-          - dask>=2022.05.2
-          - distributed>=2022.05.2
+          - &dask dask>=2022.05.2
           - pytest
           - pytest-cov
+    specific:
+      - output_types: [conda, requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.0"
+            packages:
+              - cuda-python>=12.0,<13.0a0
+          - matrix: # All CUDA 11 versions
+            packages:
+              - cuda-python>=11.7.1,<12.0a0
+  test_python_legate:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - *dask
+          - distributed>=2022.05.2
+  notebooks:
+    common:
+      - output_types: conda
+        packages:
+          - cudf==23.8.*
diff --git a/legate/pyproject.toml b/legate/pyproject.toml
index 93e3e7d60a..6f6b440b2b 100644
--- a/legate/pyproject.toml
+++ b/legate/pyproject.toml
@@ -25,6 +25,7 @@ requires-python = ">=3.9"
 dependencies = [
     "cupy-cuda11x>=12.0.0",
     "numpy>=1.21",
+    "packaging",
     "zarr",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -39,8 +40,7 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "cuda-python>=11.7.1,<12.0",
-    "cudf==23.8.*",
+    "cuda-python>=11.7.1,<12.0a0",
     "dask>=2022.05.2",
     "distributed>=2022.05.2",
     "pytest",
diff --git a/python/benchmarks/single-node-io.py b/python/benchmarks/single-node-io.py
index cceeb62415..72b57300cc 100644
--- a/python/benchmarks/single-node-io.py
+++ b/python/benchmarks/single-node-io.py
@@ -214,9 +214,8 @@ def run_zarr(args):
     import kvikio.zarr
 
     dir_path = args.dir / "zarr"
-
-    if not hasattr(zarr.Array, "meta_array"):
-        raise RuntimeError("requires Zarr v2.13+")
+    if not kvikio.zarr.supported:
+        raise RuntimeError(f"requires Zarr >={kvikio.zarr.MINIMUM_ZARR_VERSION}")
 
     compressor = None
     if args.zarr_compressor is not None:
diff --git a/python/kvikio/zarr.py b/python/kvikio/zarr.py
index ba5d0769f0..50a6756db8 100644
--- a/python/kvikio/zarr.py
+++ b/python/kvikio/zarr.py
@@ -1,63 +1,114 @@
 # Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
 # See file LICENSE for terms.
 
+import contextlib
 import os
 import os.path
 from abc import abstractmethod
+from typing import Any, Mapping, Sequence
 
 import cupy
+import numpy
 import numpy as np
+import zarr
 import zarr.creation
 import zarr.storage
 from numcodecs.abc import Codec
 from numcodecs.compat import ensure_contiguous_ndarray_like
 from numcodecs.registry import register_codec
+from packaging.version import parse
 
 import kvikio
 import kvikio.nvcomp
-from kvikio._lib.arr import asarray
+
+MINIMUM_ZARR_VERSION = "2.15"
+
+# Is this version of zarr supported? We depend on the `Context`
+# argument introduced in https://github.com/zarr-developers/zarr-python/pull/1131
+# in zarr v2.15.
+supported = parse(zarr.__version__) >= parse(MINIMUM_ZARR_VERSION)
 
 
 class GDSStore(zarr.storage.DirectoryStore):
     """GPUDirect Storage (GDS) class using directories and files.
 
-    This class works like `zarr.storage.DirectoryStore` but use GPU
-    buffers and will use GDS when applicable.
-    The store supports both CPU and GPU buffers but when reading, GPU
-    buffers are returned always.
+    This class works like `zarr.storage.DirectoryStore` but implements
+    getitems() in order to support direct reading into device memory.
+    It uses KvikIO for reads and writes, which in turn will use GDS
+    when applicable.
 
-    TODO: Write metadata to disk in order to preserve the item types such that
-    GPU items are read as GPU device buffers and CPU items are read as bytes.
+    Notes
+    -----
+    GDSStore doesn't implement `_fromfile()` thus non-array data such as
+    meta data is always read into host memory.
+    This is because only zarr.Array use getitems() to retrieve data.
     """
 
+    # The default output array type used by getitems().
+    default_meta_array = numpy.empty(())
+
+    def __init__(self, *args, **kwargs) -> None:
+        if not kvikio.zarr.supported:
+            raise RuntimeError(
+                f"GDSStore requires Zarr >={kvikio.zarr.MINIMUM_ZARR_VERSION}"
+            )
+        super().__init__(*args, **kwargs)
+
     def __eq__(self, other):
         return isinstance(other, GDSStore) and self.path == other.path
 
-    def _fromfile(self, fn):
-        """Read `fn` into device memory _unless_ `fn` refers to Zarr metadata"""
-        if os.path.basename(fn) in [
-            zarr.storage.array_meta_key,
-            zarr.storage.group_meta_key,
-            zarr.storage.attrs_key,
-        ]:
-            return super()._fromfile(fn)
-        else:
-            nbytes = os.path.getsize(fn)
-            with kvikio.CuFile(fn, "r") as f:
-                ret = cupy.empty(nbytes, dtype="u1")
-                read = f.read(ret)
-                assert read == nbytes
-                return ret
-
     def _tofile(self, a, fn):
-        a = asarray(a)
-        assert a.contiguous
-        if a.cuda:
-            with kvikio.CuFile(fn, "w") as f:
-                written = f.write(a)
-                assert written == a.nbytes
-        else:
-            super()._tofile(a.obj, fn)
+        with kvikio.CuFile(fn, "w") as f:
+            written = f.write(a)
+            assert written == a.nbytes
+
+    def getitems(
+        self,
+        keys: Sequence[str],
+        *,
+        contexts: Mapping[str, Mapping] = {},
+    ) -> Mapping[str, Any]:
+        """Retrieve data from multiple keys.
+
+        Parameters
+        ----------
+        keys : Iterable[str]
+            The keys to retrieve
+        contexts: Mapping[str, Context]
+            A mapping of keys to their context. Each context is a mapping of store
+            specific information. If the "meta_array" key exist, GDSStore use its
+            values as the output array otherwise GDSStore.default_meta_array is used.
+
+        Returns
+        -------
+        Mapping
+            A collection mapping the input keys to their results.
+        """
+        ret = {}
+        io_results = []
+
+        with contextlib.ExitStack() as stack:
+            for key in keys:
+                filepath = os.path.join(self.path, key)
+                if not os.path.isfile(filepath):
+                    continue
+                try:
+                    meta_array = contexts[key]["meta_array"]
+                except KeyError:
+                    meta_array = self.default_meta_array
+
+                nbytes = os.path.getsize(filepath)
+                f = stack.enter_context(kvikio.CuFile(filepath, "r"))
+                ret[key] = numpy.empty_like(meta_array, shape=(nbytes,), dtype="u1")
+                io_results.append((f.pread(ret[key]), nbytes))
+
+            for future, nbytes in io_results:
+                nbytes_read = future.get()
+                if nbytes_read != nbytes:
+                    raise RuntimeError(
+                        f"Incomplete read ({nbytes_read}) expected {nbytes}"
+                    )
+        return ret
 
 
 class NVCompCompressor(Codec):
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 500849337a..5f484d8ee6 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -25,6 +25,7 @@ requires-python = ">=3.9"
 dependencies = [
     "cupy-cuda11x>=12.0.0",
     "numpy>=1.21",
+    "packaging",
     "zarr",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -39,10 +40,8 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "cuda-python>=11.7.1,<12.0",
-    "cudf==23.8.*",
+    "cuda-python>=11.7.1,<12.0a0",
     "dask>=2022.05.2",
-    "distributed>=2022.05.2",
     "pytest",
     "pytest-cov",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/tests/test_benchmarks.py b/python/tests/test_benchmarks.py
index ec2ec2b395..ee0321d40a 100644
--- a/python/tests/test_benchmarks.py
+++ b/python/tests/test_benchmarks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
 # See file LICENSE for terms.
 
 import os
@@ -29,9 +29,9 @@ def test_single_node_io(run_cmd, tmp_path, api):
     """Test benchmarks/single-node-io.py"""
 
     if "zarr" in api:
-        zarr = pytest.importorskip("zarr")
-        if not hasattr(zarr.Array, "meta_array"):
-            pytest.skip("requires Zarr v2.13+")
+        kz = pytest.importorskip("kvikio.zarr")
+        if not kz.supported:
+            pytest.skip(f"requires Zarr >={kz.MINIMUM_ZARR_VERSION}")
 
     retcode = run_cmd(
         cmd=[
diff --git a/python/tests/test_zarr.py b/python/tests/test_zarr.py
index 99d47c9df1..296c5f1ee6 100644
--- a/python/tests/test_zarr.py
+++ b/python/tests/test_zarr.py
@@ -10,10 +10,12 @@
 zarr = pytest.importorskip("zarr")
 kvikio_zarr = pytest.importorskip("kvikio.zarr")
 
-# To support CuPy arrays, we need the `meta_array` argument introduced in
-# Zarr v2.13, see <https://github.com/zarr-developers/zarr-python/pull/934>
-if not hasattr(zarr.Array, "meta_array"):
-    pytest.skip("requires Zarr v2.13+", allow_module_level=True)
+
+if not kvikio_zarr.supported:
+    pytest.skip(
+        f"requires Zarr >={kvikio_zarr.MINIMUM_ZARR_VERSION}",
+        allow_module_level=True,
+    )
 
 
 @pytest.fixture
@@ -22,46 +24,117 @@ def store(tmp_path):
     return kvikio_zarr.GDSStore(tmp_path / "test-file.zarr")
 
 
-@pytest.mark.parametrize("array_type", ["numpy", "cupy"])
-def test_direct_store_access(store, array_type):
+def test_direct_store_access(store, xp):
     """Test accessing the GDS Store directly"""
 
-    module = pytest.importorskip(array_type)
-    a = module.arange(5, dtype="u1")
+    a = xp.arange(5, dtype="u1")
     store["a"] = a
     b = store["a"]
 
-    # Notice, GDSStore always returns a cupy array
-    assert type(b) is cupy.ndarray
-    cupy.testing.assert_array_equal(a, b)
+    # Notice, unless using getitems(), GDSStore always returns bytes
+    assert isinstance(b, bytes)
+    assert (xp.frombuffer(b, dtype="u1") == a).all()
 
 
-def test_array(store):
-    """Test Zarr array"""
+@pytest.mark.parametrize("xp_write", ["numpy", "cupy"])
+@pytest.mark.parametrize("xp_read_a", ["numpy", "cupy"])
+@pytest.mark.parametrize("xp_read_b", ["numpy", "cupy"])
+def test_direct_store_access_getitems(store, xp_write, xp_read_a, xp_read_b):
+    """Test accessing the GDS Store directly using getitems()"""
 
-    a = cupy.arange(100)
-    z = zarr.array(
-        a, chunks=10, compressor=None, store=store, meta_array=cupy.empty(())
+    xp_read_a = pytest.importorskip(xp_read_a)
+    xp_read_b = pytest.importorskip(xp_read_b)
+    xp_write = pytest.importorskip(xp_write)
+    a = xp_write.arange(5, dtype="u1")
+    b = a * 2
+    store["a"] = a
+    store["b"] = b
+
+    res = store.getitems(
+        keys=["a", "b"],
+        contexts={
+            "a": {"meta_array": xp_read_a.empty(())},
+            "b": {"meta_array": xp_read_b.empty(())},
+        },
     )
+    assert isinstance(res["a"], xp_read_a.ndarray)
+    assert isinstance(res["b"], xp_read_b.ndarray)
+    cupy.testing.assert_array_equal(res["a"], a)
+    cupy.testing.assert_array_equal(res["b"], b)
+
+
+def test_array(store, xp):
+    """Test Zarr array"""
+
+    a = xp.arange(100)
+    z = zarr.array(a, chunks=10, compressor=None, store=store, meta_array=xp.empty(()))
+    assert isinstance(z.meta_array, type(a))
     assert a.shape == z.shape
     assert a.dtype == z.dtype
     assert isinstance(a, type(z[:]))
-    cupy.testing.assert_array_equal(a, z[:])
+    xp.testing.assert_array_equal(a, z[:])
 
 
-def test_group(store):
+def test_group(store, xp):
     """Test Zarr group"""
 
-    g = zarr.open_group(store, meta_array=cupy.empty(()))
+    g = zarr.open_group(store, meta_array=xp.empty(()))
     g.ones("data", shape=(10, 11), dtype=int, compressor=None)
     a = g["data"]
     assert a.shape == (10, 11)
     assert a.dtype == int
     assert isinstance(a, zarr.Array)
-    assert isinstance(a[:], cupy.ndarray)
+    assert isinstance(a.meta_array, xp.ndarray)
+    assert isinstance(a[:], xp.ndarray)
     assert (a[:] == 1).all()
 
 
+def test_open_array(store, xp):
+    """Test Zarr's open_array()"""
+
+    a = xp.arange(10)
+    z = zarr.open_array(
+        store,
+        shape=a.shape,
+        dtype=a.dtype,
+        chunks=(10,),
+        compressor=None,
+        meta_array=xp.empty(()),
+    )
+    z[:] = a
+    assert a.shape == z.shape
+    assert a.dtype == z.dtype
+    assert isinstance(a, type(z[:]))
+    xp.testing.assert_array_equal(a, z[:])
+
+
+@pytest.mark.parametrize("inline_array", [True, False])
+def test_dask_read(store, xp, inline_array):
+    """Test Zarr read in Dask"""
+
+    da = pytest.importorskip("dask.array")
+    a = xp.arange(100)
+    z = zarr.array(a, chunks=10, compressor=None, store=store, meta_array=xp.empty(()))
+    d = da.from_zarr(z, inline_array=inline_array)
+    d += 1
+    xp.testing.assert_array_equal(a + 1, d.compute())
+
+
+def test_dask_write(store, xp):
+    """Test Zarr write in Dask"""
+
+    da = pytest.importorskip("dask.array")
+
+    # Write dask array to disk using Zarr
+    a = xp.arange(100)
+    d = da.from_array(a, chunks=10)
+    da.to_zarr(d, store, compressor=None, meta_array=xp.empty(()))
+
+    # Validate the written Zarr array
+    z = zarr.open_array(store)
+    xp.testing.assert_array_equal(a, z[:])
+
+
 @pytest.mark.parametrize("xp_read", ["numpy", "cupy"])
 @pytest.mark.parametrize("xp_write", ["numpy", "cupy"])
 @pytest.mark.parametrize("compressor", kvikio_zarr.nvcomp_compressors)