diff --git a/CHANGELOG.md b/CHANGELOG.md
index b90befa05..fdff50b7b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# cuCIM 21.10.00 (Date TBD)
+
+Please see https://github.com/rapidsai/cucim/releases/tag/v21.10.00a for the latest changes to this development branch.
+
 # cuCIM 21.08.00 (4 Aug 2021)
 
 ## 🐛 Bug Fixes
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 861e6cd37..ee7701cbf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -103,7 +103,7 @@ include(ExternalProject)
 ################################################################################
 # Options
 ################################################################################
-option(CUCIM_SUPPORT_GDS "Support cufile library" OFF)
+option(CUCIM_SUPPORT_GDS "Support cufile library" ON)
 option(CUCIM_STATIC_GDS "Use static cufile library" OFF)
 option(CUCIM_SUPPORT_CUDA "Support CUDA" ON)
 
diff --git a/README.md b/README.md
index 14c8e7e85..884a81f6c 100644
--- a/README.md
+++ b/README.md
@@ -22,8 +22,6 @@
 
 `<CUDA version>` should be 11.0+ (e.g., `11.0`, `11.2`, etc.)
 
-**NOTE:** The first cuCIM conda package (v0.19.0) would be available on 4/19/2021.
-
 #### Conda (nightlies)
 
 > conda create -n cucim -c rapidsai-nightly -c conda-forge cucim cudatoolkit=`<CUDA version>`
@@ -53,6 +51,7 @@ docker rm -v ${tmp_id}
 ```
 
 ## Build/Install from Source
+
 See build [instructions](CONTRIBUTING.md#setting-up-your-build-environment).
 
 ## Contributing Guide
diff --git a/VERSION b/VERSION
index a9997c68e..208121388 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-21.08.01
+21.10.00
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 3766d3774..3da03c2e8 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -52,7 +52,7 @@ gpuci_logger "Activate conda env"
 conda activate rapids
 
 gpuci_logger "Install dependencies"
-gpuci_conda_retry install -y -c rapidsai-nightly \
+gpuci_mamba_retry install -y -c rapidsai-nightly \
     "cudatoolkit=${CUDA_VER}.*" \
     "rapids-build-env=$MINOR_VERSION.*"
 
@@ -69,7 +69,7 @@ CUCIM_BLD_PATH=/opt/conda/envs/rapids/conda-bld
 mkdir -p ${CUCIM_BLD_PATH}
 
 
-gpuci_conda_retry build -c ${LIBCUCIM_BLD_PATH} -c conda-forge -c rapidsai-nightly \
+gpuci_mamba_retry build -c ${LIBCUCIM_BLD_PATH} -c conda-forge -c rapidsai-nightly \
     --dirty \
     --no-remove-work-dir \
     --croot ${CUCIM_BLD_PATH} \
@@ -82,7 +82,7 @@ gpuci_conda_retry build -c ${LIBCUCIM_BLD_PATH} -c conda-forge -c rapidsai-night
 
 # Install cuCIM and its dependencies
 gpuci_logger "Installing cuCIM and its dependencies"
-gpuci_conda_retry install -y -c ${LIBCUCIM_BLD_PATH} -c ${CUCIM_BLD_PATH} -c rapidsai-nightly \
+gpuci_mamba_retry install -y -c ${LIBCUCIM_BLD_PATH} -c ${CUCIM_BLD_PATH} -c rapidsai-nightly \
     "rapids-build-env=$MINOR_VERSION.*" \
     libcucim \
     cucim
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7bef1f7e3..804ee3368 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -65,6 +65,7 @@ add_library(${CUCIM_PACKAGE_NAME}
         include/cucim/memory/memory_manager.h
         include/cucim/util/cuda.h
         include/cucim/util/file.h
+        include/cucim/util/platform.h
         include/cucim/3rdparty/dlpack/dlpack.h
         include/cucim/3rdparty/dlpack/dlpackcpp.h
         src/cuimage.cpp
@@ -95,7 +96,8 @@ add_library(${CUCIM_PACKAGE_NAME}
         src/logger/logger.cpp
         src/logger/timer.cpp
         src/memory/memory_manager.cu
-        src/util/file.cpp)
+        src/util/file.cpp
+        src/util/platform.cpp)
 
 # Compile options
 set_target_properties(${CUCIM_PACKAGE_NAME}
diff --git a/cpp/include/cucim/cuimage.h b/cpp/include/cucim/cuimage.h
index 9e4f0dcc1..18efc028b 100644
--- a/cpp/include/cucim/cuimage.h
+++ b/cpp/include/cucim/cuimage.h
@@ -164,6 +164,8 @@ class EXPORT_VISIBLE CuImage : public std::enable_shared_from_this<CuImage>
 
     void save(std::string file_path) const;
 
+    void close();
+
 private:
     using Mutex = std::mutex;
     using ScopedLock = std::scoped_lock<Mutex>;
diff --git a/cpp/include/cucim/util/platform.h b/cpp/include/cucim/util/platform.h
new file mode 100644
index 000000000..699762756
--- /dev/null
+++ b/cpp/include/cucim/util/platform.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+#ifndef CUCIM_UTIL_PLATFORM_H
+#define CUCIM_UTIL_PLATFORM_H
+
+#include "cucim/macros/api_header.h"
+
+/**
+ * @brief Platform-specific macros and functions.
+ */
+namespace cucim::util
+{
+
+EXPORT_VISIBLE bool is_in_wsl();
+
+} // namespace cucim::util
+
+#endif // CUCIM_UTIL_PLATFORM_H
diff --git a/cpp/plugins/cucim.kit.cuslide/CMakeLists.txt b/cpp/plugins/cucim.kit.cuslide/CMakeLists.txt
index 75ee847f0..3014b9479 100644
--- a/cpp/plugins/cucim.kit.cuslide/CMakeLists.txt
+++ b/cpp/plugins/cucim.kit.cuslide/CMakeLists.txt
@@ -172,6 +172,8 @@ add_library(${CUCIM_PLUGIN_NAME}
     src/cuslide/deflate/deflate.h
     src/cuslide/jpeg/libjpeg_turbo.cpp
     src/cuslide/jpeg/libjpeg_turbo.h
+    src/cuslide/raw/raw.cpp
+    src/cuslide/raw/raw.h
     src/cuslide/tiff/ifd.cpp
     src/cuslide/tiff/ifd.h
     src/cuslide/tiff/tiff.cpp
diff --git a/cpp/plugins/cucim.kit.cuslide/VERSION b/cpp/plugins/cucim.kit.cuslide/VERSION
index a9997c68e..208121388 100644
--- a/cpp/plugins/cucim.kit.cuslide/VERSION
+++ b/cpp/plugins/cucim.kit.cuslide/VERSION
@@ -1 +1 @@
-21.08.01
+21.10.00
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/deflate/deflate.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/deflate/deflate.cpp
index f97ab5965..03db0b895 100644
--- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/deflate/deflate.cpp
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/deflate/deflate.cpp
@@ -40,6 +40,20 @@ bool decode_deflate(int fd,
     (void)out_device;
     struct libdeflate_decompressor* d;
 
+    if (dest == nullptr)
+    {
+        throw std::runtime_error("'dest' shouldn't be nullptr in decode_deflate()");
+    }
+
+    // Allocate memory only when dest is not null
+    if (*dest == nullptr)
+    {
+        if ((*dest = (unsigned char*)malloc(dest_nbytes)) == nullptr)
+        {
+            throw std::runtime_error("Unable to allocate uncompressed image buffer");
+        }
+    }
+
     d = libdeflate_alloc_decompressor();
 
     if (d == nullptr)
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/jpeg/libjpeg_turbo.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/jpeg/libjpeg_turbo.cpp
index 887cdd7d0..891c216cb 100644
--- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/jpeg/libjpeg_turbo.cpp
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/jpeg/libjpeg_turbo.cpp
@@ -108,6 +108,11 @@ bool decode_libjpeg(int fd,
     if (size == 0)
         THROW("determining input file size", "Input file contains no data");
 
+    if (dest == nullptr)
+    {
+        THROW("checking dest ptr", "'dest' shouldn't be nullptr in decode_libjpeg()");
+    }
+
     if (jpeg_buf == nullptr)
     {
         if ((jpeg_buf = (unsigned char*)tjAlloc(size)) == nullptr)
@@ -145,7 +150,7 @@ bool decode_libjpeg(int fd,
     if (*dest == nullptr)
     {
         if ((*dest = (unsigned char*)tjAlloc(width * height * tjPixelSize[pixelFormat])) == nullptr)
-            THROW_UNIX("allocating uncompressed image buffer");
+            THROW_UNIX("Unable to allocate uncompressed image buffer");
     }
 
     if (tjDecompress2(tjInstance, jpeg_buf, size, (unsigned char*)*dest, width, 0, height, pixelFormat, flags) < 0)
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/raw/raw.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/raw/raw.cpp
new file mode 100644
index 000000000..dee3926ef
--- /dev/null
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/raw/raw.cpp
@@ -0,0 +1,85 @@
+/*
+ * Apache License, Version 2.0
+ * Copyright 2021 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Code below is using libdeflate library which is under MIT license
+ * Please see LICENSE-3rdparty.md for the detail.
+ */
+
+#include "raw.h"
+
+#include <cstring>
+#include <stdexcept>
+#include <unistd.h>
+
+
+namespace cuslide::raw
+{
+
+bool decode_raw(int fd,
+                unsigned char* raw_buf,
+                uint64_t offset,
+                uint64_t size,
+                uint8_t** dest,
+                uint64_t dest_nbytes,
+                const cucim::io::Device& out_device)
+{
+    (void)out_device;
+
+    if (dest == nullptr)
+    {
+        throw std::runtime_error("'dest' shouldn't be nullptr in decode_raw()");
+    }
+
+    // Allocate memory only when dest is not null
+    if (*dest == nullptr)
+    {
+        if ((*dest = (unsigned char*)malloc(dest_nbytes)) == nullptr)
+        {
+            throw std::runtime_error("Unable to allocate uncompressed image buffer");
+        }
+    }
+
+    if (raw_buf == nullptr)
+    {
+        if ((raw_buf = (unsigned char*)malloc(size)) == nullptr)
+        {
+            throw std::runtime_error("Unable to allocate buffer for raw data!");
+        }
+
+        if (pread(fd, raw_buf, size, offset) < 1)
+        {
+            throw std::runtime_error("Unable to read file for raw data!");
+        }
+    }
+    else
+    {
+        fd = -1;
+        raw_buf += offset;
+    }
+
+    memcpy(*dest, raw_buf, dest_nbytes);
+
+    if (fd != -1)
+    {
+        free(raw_buf);
+    }
+
+    return true;
+}
+
+} // namespace cuslide::raw
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/raw/raw.h b/cpp/plugins/cucim.kit.cuslide/src/cuslide/raw/raw.h
new file mode 100644
index 000000000..8c2453aa8
--- /dev/null
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/raw/raw.h
@@ -0,0 +1,33 @@
+/*
+ * Apache License, Version 2.0
+ * Copyright 2021 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef CUSLIDE_RAW_H
+#define CUSLIDE_RAW_H
+
+#include <cucim/io/device.h>
+
+namespace cuslide::raw
+{
+
+bool decode_raw(int fd,
+                unsigned char* raw_buf,
+                uint64_t offset,
+                uint64_t size,
+                uint8_t** dest,
+                uint64_t dest_nbytes,
+                const cucim::io::Device& out_device);
+}
+#endif // CUSLIDE_RAW_H
diff --git a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp
index f7d29ac5a..dc1c055c5 100644
--- a/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp
+++ b/cpp/plugins/cucim.kit.cuslide/src/cuslide/tiff/ifd.cpp
@@ -34,6 +34,7 @@
 
 #include "cuslide/jpeg/libjpeg_turbo.h"
 #include "cuslide/deflate/deflate.h"
+#include "cuslide/raw/raw.h"
 #include "tiff.h"
 
 
@@ -151,7 +152,7 @@ bool IFD::read(const TIFF* tiff,
     int64_t sy = request->location[1];
     int64_t w = request->size[0];
     int64_t h = request->size[1];
-    int32_t n_ch = 3; // number of channels
+    int32_t n_ch = samples_per_pixel_; // number of channels
 
     size_t raster_size = w * h * samples_per_pixel_;
     void* raster = nullptr;
@@ -365,13 +366,22 @@ const std::vector<uint64_t>& IFD::image_piece_bytecounts() const
 
 bool IFD::is_compression_supported() const
 {
-    return (compression_ == COMPRESSION_ADOBE_DEFLATE || compression_ == COMPRESSION_JPEG ||
-            compression_ == COMPRESSION_DEFLATE);
+    switch (compression_)
+    {
+    case COMPRESSION_NONE:
+    case COMPRESSION_JPEG:
+    case COMPRESSION_ADOBE_DEFLATE:
+    case COMPRESSION_DEFLATE:
+        return true;
+    default:
+        return false;
+    }
 }
+
 bool IFD::is_read_optimizable() const
 {
     return is_compression_supported() && bits_per_sample_ == 8 && samples_per_pixel_ == 3 &&
-           planar_config_ == PLANARCONFIG_CONTIG &&
+           (tile_width_ != 0 && tile_height_ != 0) && planar_config_ == PLANARCONFIG_CONTIG &&
            (photometric_ == PHOTOMETRIC_RGB || photometric_ == PHOTOMETRIC_YCBCR) &&
            !tiff_->is_in_read_config(TIFF::kUseLibTiff);
 }
@@ -501,7 +511,12 @@ bool IFD::read_region_tiles(const TIFF* tiff,
                         tile_data = static_cast<uint8_t*>(image_cache.allocate(tile_raster_nbytes));
                     }
 
-                    if (compression_method == COMPRESSION_JPEG)
+                    if (compression_method == COMPRESSION_NONE)
+                    {
+                        cuslide::raw::decode_raw(tiff_file, nullptr, tiledata_offset, tiledata_size, &tile_data,
+                                                 tile_raster_nbytes, out_device);
+                    }
+                    else if (compression_method == COMPRESSION_JPEG)
                     {
                         cuslide::jpeg::decode_libjpeg(tiff_file, nullptr, tiledata_offset, tiledata_size,
                                                       jpegtable_data, jpegtable_count, &tile_data, out_device);
@@ -745,7 +760,13 @@ bool IFD::read_region_tiles_boundary(const TIFF* tiff,
                     {
                         tile_data = static_cast<uint8_t*>(image_cache.allocate(tile_raster_nbytes));
                     }
-                    if (compression_method == COMPRESSION_JPEG)
+
+                    if (compression_method == COMPRESSION_NONE)
+                    {
+                        cuslide::raw::decode_raw(tiff_file, nullptr, tiledata_offset, tiledata_size, &tile_data,
+                                                 tile_raster_nbytes, out_device);
+                    }
+                    else if (compression_method == COMPRESSION_JPEG)
                     {
                         cuslide::jpeg::decode_libjpeg(tiff_file, nullptr, tiledata_offset, tiledata_size,
                                                       jpegtable_data, jpegtable_count, &tile_data, out_device);
diff --git a/cpp/src/cuimage.cpp b/cpp/src/cuimage.cpp
index 6f126deb5..8942c557e 100644
--- a/cpp/src/cuimage.cpp
+++ b/cpp/src/cuimage.cpp
@@ -229,10 +229,7 @@ CuImage::CuImage() : std::enable_shared_from_this<CuImage>()
 CuImage::~CuImage()
 {
     //    printf("[cuCIM] CuImage::~CuImage()\n");
-    if (file_handle_.client_data)
-    {
-        image_formats_->formats[0].image_parser.close(&file_handle_);
-    }
+    close();
     image_formats_ = nullptr; // memory release is handled by the framework
     if (image_metadata_)
     {
@@ -615,6 +612,10 @@ CuImage CuImage::read_region(std::vector<int64_t>&& location,
         // Read region from internal file if image_data_ is nullptr
         if (image_data_ == nullptr)
         {
+            if (file_handle_.fd < 0) // file_handle_ is not opened
+            {
+                throw std::runtime_error("[Error] The image file is closed!");
+            }
             if (!image_formats_->formats[0].image_reader.read(
                     &file_handle_, image_metadata_, &request, image_data, nullptr /*out_metadata*/))
             {
@@ -778,6 +779,10 @@ std::set<std::string> CuImage::associated_images() const
 
 CuImage CuImage::associated_image(const std::string& name, const io::Device& device) const
 {
+    if (file_handle_.fd < 0) // file_handle_ is not opened
+    {
+        throw std::runtime_error("[Error] The image file is closed!");
+    }
     auto it = associated_images_.find(name);
     if (it != associated_images_.end())
     {
@@ -855,6 +860,18 @@ void CuImage::save(std::string file_path) const
         fs.close();
     }
 }
+
+void CuImage::close()
+{
+    if (file_handle_.client_data)
+    {
+        image_formats_->formats[0].image_parser.close(&file_handle_);
+    }
+    file_handle_.cufile = nullptr;
+    file_handle_.path = nullptr;
+    file_handle_.fd = -1;
+}
+
 void CuImage::ensure_init()
 {
     ScopedLock g(mutex_);
diff --git a/cpp/src/filesystem/cufile_driver.cpp b/cpp/src/filesystem/cufile_driver.cpp
index 23555f2ce..7f6993d3b 100644
--- a/cpp/src/filesystem/cufile_driver.cpp
+++ b/cpp/src/filesystem/cufile_driver.cpp
@@ -30,6 +30,7 @@
 #include <fmt/format.h>
 
 #include "cucim/util/cuda.h"
+#include "cucim/util/platform.h"
 #include "cufile_stub.h"
 
 #define ALIGN_UP(x, align_to) (((uint64_t)(x) + ((uint64_t)(align_to)-1)) & ~((uint64_t)(align_to)-1))
@@ -100,7 +101,7 @@ static int get_file_flags(const char* flags)
 
 bool is_gds_available()
 {
-    return static_cast<bool>(s_cufile_initializer);
+    return static_cast<bool>(s_cufile_initializer) && !cucim::util::is_in_wsl();
 }
 
 std::shared_ptr<CuFileDriver> open(const char* file_path, const char* flags, mode_t mode)
@@ -529,7 +530,8 @@ ssize_t CuFileDriver::pread(void* buf, size_t count, off_t file_offset, off_t bu
             {
                 if (memory_type == cudaMemoryTypeUnregistered)
                 {
-                    read_cnt = ::pread(handle_.fd, reinterpret_cast<char*>(buf) + buf_offset, block_read_size, file_offset);
+                    read_cnt =
+                        ::pread(handle_.fd, reinterpret_cast<char*>(buf) + buf_offset, block_read_size, file_offset);
                     total_read_cnt += read_cnt;
                 }
                 else
@@ -838,7 +840,8 @@ ssize_t CuFileDriver::pwrite(const void* buf, size_t count, off_t file_offset, o
             {
                 if (memory_type == cudaMemoryTypeUnregistered)
                 {
-                    write_cnt = ::pwrite(handle_.fd, reinterpret_cast<const char*>(buf) + buf_offset, block_write_size, file_offset);
+                    write_cnt = ::pwrite(
+                        handle_.fd, reinterpret_cast<const char*>(buf) + buf_offset, block_write_size, file_offset);
                     total_write_cnt += write_cnt;
                 }
                 else
@@ -1100,7 +1103,8 @@ ssize_t CuFileDriver::pwrite(const void* buf, size_t count, off_t file_offset, o
     {
         (void*)s_cufile_cache.device_cache(); // Lazy initialization
 
-        ssize_t write_cnt = cuFileWrite(handle_.cufile, reinterpret_cast<const char*>(buf) + buf_offset, count, file_offset, 0);
+        ssize_t write_cnt =
+            cuFileWrite(handle_.cufile, reinterpret_cast<const char*>(buf) + buf_offset, count, file_offset, 0);
         if (write_cnt < 0)
         {
             fmt::print(stderr, "[cuFile Error] {}\n", CUFILE_ERRSTR(write_cnt));
diff --git a/cpp/src/filesystem/file_handle.cpp b/cpp/src/filesystem/file_handle.cpp
index 1b3036d94..dfb55c8de 100644
--- a/cpp/src/filesystem/file_handle.cpp
+++ b/cpp/src/filesystem/file_handle.cpp
@@ -21,7 +21,7 @@
 #include "cucim/codec/hash_function.h"
 
 CuCIMFileHandle::CuCIMFileHandle()
-    : fd(0),
+    : fd(-1),
       cufile(nullptr),
       type(FileHandleType::kUnknown),
       path(nullptr),
diff --git a/cpp/src/util/file.cpp b/cpp/src/util/file.cpp
index 7009ae792..08d659372 100644
--- a/cpp/src/util/file.cpp
+++ b/cpp/src/util/file.cpp
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include "cucim/util/file.h"
 
 #include <sys/stat.h>
diff --git a/cpp/src/util/platform.cpp b/cpp/src/util/platform.cpp
new file mode 100644
index 000000000..de0732633
--- /dev/null
+++ b/cpp/src/util/platform.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cucim/util/platform.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/utsname.h>
+
+
+namespace cucim::util
+{
+
+bool is_in_wsl()
+{
+    struct utsname buf;
+    int err = uname(&buf);
+    if (err == 0)
+    {
+        char* pos = strstr(buf.release, "icrosoft");
+        if (pos)
+        {
+            // 'Microsoft' for WSL1 and 'microsoft' for WSL2
+            if (buf.release < pos && (pos[-1] == 'm' || pos[-1] == 'M'))
+            {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+} // namespace cucim::util
diff --git a/cucim.code-workspace b/cucim.code-workspace
index 48a3d159b..1c12af801 100644
--- a/cucim.code-workspace
+++ b/cucim.code-workspace
@@ -31,7 +31,7 @@
 					"CUCIM_TESTDATA_FOLDER": "${workspaceDirectory}/test_data",
 					// Add cuslide plugin's library path to LD_LIBRARY_PATH
 					"LD_LIBRARY_PATH": "${workspaceDirectory}/build-debug/lib:${workspaceDirectory}/cpp/plugins/cucim.kit.cuslide/build-debug/lib:${workspaceDirectory}/temp/cuda/lib64:${os_env:LD_LIBRARY_PATH}",
-					"CUCIM_TEST_PLUGIN_PATH": "cucim.kit.cuslide@21.08.01.so"
+					"CUCIM_TEST_PLUGIN_PATH": "cucim.kit.cuslide@21.10.00.so"
 				},
 				"cwd": "${workspaceDirectory}",
 				"catch2": {
@@ -214,7 +214,7 @@
 					},
 					{
 						"name": "CUCIM_TEST_PLUGIN_PATH",
-						"value": "cucim.kit.cuslide@21.08.01.so"
+						"value": "cucim.kit.cuslide@21.10.00.so"
 					}
 				],
 				"console": "externalTerminal",
@@ -242,7 +242,7 @@
 					},
 					{
 						"name": "CUCIM_TEST_PLUGIN_PATH",
-						"value": "cucim.kit.cuslide@21.08.01.so"
+						"value": "cucim.kit.cuslide@21.10.00.so"
 					}
 				],
 				"console": "externalTerminal",
@@ -273,7 +273,7 @@
 					},
 					{
 						"name": "CUCIM_TEST_PLUGIN_PATH",
-						"value": "cucim.kit.cuslide@21.08.01.so"
+						"value": "cucim.kit.cuslide@21.10.00.so"
 					}
 				],
 				"console": "externalTerminal",
@@ -316,4 +316,4 @@
 			},
 		]
 	}
-}
\ No newline at end of file
+}
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5cd51b8af..66315b461 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -71,9 +71,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '21.08'
+version = '21.10'
 # The full version, including alpha/beta/rc tags.
-release = '21.08.01'
+release = '21.10.00'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/gds/src/cufile_stub.cpp b/gds/src/cufile_stub.cpp
index 1cc4020a1..bf52e758b 100644
--- a/gds/src/cufile_stub.cpp
+++ b/gds/src/cufile_stub.cpp
@@ -16,6 +16,7 @@
 
 #include "cufile_stub.h"
 #include "cucim/dynlib/helper.h"
+#include "cucim/util/platform.h"
 
 #define IMPORT_FUNCTION(handle, name) impl_##name = cucim::dynlib::get_library_symbol<t_##name>(handle, #name);
 
@@ -196,9 +197,16 @@ extern "C"
 
     CUfileError_t cuFileDriverOpen(void)
     {
+        // GDS v1.0.0 does not support WSL and executing this can cause the following error:
+        //    Assertion failure, file index :cufio-udev  line :143
+        // So we do not call impl_cuFileDriverOpen() here if the current platform is WSL.
         if (impl_cuFileDriverOpen)
         {
-            return impl_cuFileDriverOpen();
+            // If not in WSL, call impl_cuFileDriverOpen()
+            if (!cucim::util::is_in_wsl())
+            {
+                return impl_cuFileDriverOpen();
+            }
         }
         return CUfileError_t{ CU_FILE_DRIVER_NOT_INITIALIZED, CUDA_SUCCESS };
     }
diff --git a/python/cucim/CHANGELOG.md b/python/cucim/CHANGELOG.md
index 185797ff2..4fab12030 100644
--- a/python/cucim/CHANGELOG.md
+++ b/python/cucim/CHANGELOG.md
@@ -1,6 +1,14 @@
 
 # Changelog
 
+## [21.08.01](https://github.com/rapidsai/cucim/wiki/release_notes_v21.08.01)
+
+- [New] Add skimage.morphology.thin ([#27](https://github.com/rapidsai/cucim/pull/27))
+- [Bug] Fix missing `__array_interface__` for associated_image(): ([#48](https://github.com/rapidsai/cucim/pull/48), [#65](https://github.com/rapidsai/cucim/pull/65))
+- [Testing] Added unit and performance tests for TIFF loaders ([#62](https://github.com/rapidsai/cucim/pull/62))
+- [Bug] Fix Windows int-type Bug: ([#72](https://github.com/rapidsai/cucim/pull/72))
+- [Update] Use more descriptive ElementwiseKernel names in cucim.skimage: ([#75](https://github.com/rapidsai/cucim/pull/75))
+
 ## [21.06.00](https://github.com/rapidsai/cucim/wiki/release_notes_v21.06.00)
 
 - Implement cache mechanism
diff --git a/python/cucim/README.md b/python/cucim/README.md
index c33e62787..304888251 100644
--- a/python/cucim/README.md
+++ b/python/cucim/README.md
@@ -8,6 +8,8 @@
 
 - [GTC 2021 cuCIM: A GPU Image I/O and Processing Toolkit [S32194]](https://www.nvidia.com/en-us/gtc/catalog/?search=cuCIM#/)
   - [video](https://gtc21.event.nvidia.com/media/cuCIM%3A%20A%20GPU%20Image%20I_O%20and%20Processing%20Toolkit%20%5BS32194%5D/1_fwfxd0iu)
+- [SciPy 2021 cuCIM - A GPU image I/O and processing library](https://www.scipy2021.scipy.org/)
+  - [video](https://youtu.be/G46kOOM9xbQ)
 
 ## Quick Start
 
@@ -21,7 +23,7 @@ pip install scipy scikit-image cupy-cuda110==9.0.0b3
 ```
 ### Jupyter Notebooks
 
-Please check out our [Welcome](https://github.com/rapidsai/cucim/blob/branch-0.20/notebooks/Welcome.ipynb) notebook.
+Please check out our [Welcome](https://github.com/rapidsai/cucim/blob/branch-21.08/notebooks/Welcome.ipynb) notebook.
 
 ### Open Image
 
@@ -90,6 +92,10 @@ visualize(region)
 #Image.fromarray(np.asarray(region))
 ```
 
+### Using Cache
+
+Please look at this [notebook](https://nbviewer.jupyter.org/github/rapidsai/cucim/blob/v21.08.01/notebooks/Using_Cache.ipynb).
+
 ### Using scikit-image API
 
 Import `cucim.skimage` instead of `skimage`.
diff --git a/python/cucim/VERSION b/python/cucim/VERSION
index a9997c68e..208121388 100644
--- a/python/cucim/VERSION
+++ b/python/cucim/VERSION
@@ -1 +1 @@
-21.08.01
+21.10.00
diff --git a/python/cucim/docs/getting_started/index.md b/python/cucim/docs/getting_started/index.md
index 33b08ae88..5a304ab57 100644
--- a/python/cucim/docs/getting_started/index.md
+++ b/python/cucim/docs/getting_started/index.md
@@ -14,15 +14,15 @@
 
 ## Installation
 
-Please download the latest SDK package (`cuCIM-v21.08.01-linux.tar.gz`).
+Please download the latest SDK package (`cuCIM-v21.10.00-linux.tar.gz`).
 
 Untar the downloaded file.
 
 ```bash
-mkdir -p cuCIM-v21.08.01
-tar -xzvf cuCIM-v21.08.01-linux.tar.gz -C cuCIM-v21.08.01
+mkdir -p cuCIM-v21.10.00
+tar -xzvf cuCIM-v21.10.00-linux.tar.gz -C cuCIM-v21.10.00
 
-cd cuCIM-v21.08.01
+cd cuCIM-v21.10.00
 ```
 
 ## Run command
@@ -147,7 +147,7 @@ Its execution would show some metadata information and create two files -- `outp
 ```
 $ ./bin/tiff_image notebooks/input/image.tif .
 [Plugin: cucim.kit.cuslide] Loading...
-[Plugin: cucim.kit.cuslide] Loading the dynamic library from: cucim.kit.cuslide@21.08.01.so
+[Plugin: cucim.kit.cuslide] Loading the dynamic library from: cucim.kit.cuslide@21.10.00.so
 [Plugin: cucim.kit.cuslide] loaded successfully. Version: 0
 Initializing plugin: cucim.kit.cuslide (interfaces: [cucim::io::IImageFormat v0.1]) (impl: cucim.kit.cuslide)
 is_loaded: true
diff --git a/python/cucim/docs/index.md b/python/cucim/docs/index.md
index 9c170ef08..302dd0105 100644
--- a/python/cucim/docs/index.md
+++ b/python/cucim/docs/index.md
@@ -18,7 +18,7 @@ development/index -->
 
 # cuCIM Documentation
 
-Current latest version is [Version 21.08.01](release_notes/v21.08.01.md).
+Current latest version is [Version 21.10.00](release_notes/v21.10.00.md).
 
 **cuCIM** a toolkit to provide GPU accelerated I/O, image  processing & computer vision primitives for N-Dimensional images with a focus on biomedical imaging.
 
diff --git a/python/cucim/setup.cfg b/python/cucim/setup.cfg
index ec22e3913..448ec9b61 100644
--- a/python/cucim/setup.cfg
+++ b/python/cucim/setup.cfg
@@ -31,6 +31,7 @@ per-file-ignores =
     src/cucim/skimage/measure/tests/test_block.py:E201,E202,E241
     src/cucim/skimage/transform/tests/test_warps.py:E201,E202,E241,W605
     src/cucim/skimage/transform/_geometric.py:E201,E202,E241
+    src/cucim/core/operations/expose/transform.py:F401
 
 [tool:pytest]
 # If a pytest section is found in one of the possible config files
diff --git a/python/cucim/src/cucim/__init__.py b/python/cucim/src/cucim/__init__.py
index 52a1ebb20..69c912e50 100644
--- a/python/cucim/src/cucim/__init__.py
+++ b/python/cucim/src/cucim/__init__.py
@@ -30,6 +30,8 @@
     Functions from scikit-image.
 
 """
+_is_cupy_available = False
+_is_clara_available = False
 
 # Try to import cupy first.
 # If cucim.clara package is imported first, you may see the following error when running on CUDA 10.x (#44)
@@ -37,13 +39,40 @@
 #   Segmentation fault
 try:
     import cupy
+    _is_cupy_available = True
 except ImportError:
     pass
 
 try:
     from .clara import CuImage, __version__, cli
+    _is_clara_available = True
 except ImportError:
     from ._version import get_versions
     __version__ = get_versions()['version']
     del get_versions
     del _version
+
+
+def is_available(module_name: str = "") -> bool:
+    """Check if a specific module is available.
+
+    If module_name is not specified, returns True if all of the modules are
+    available.
+
+    Parameters
+    ----------
+    module_name : str
+        Name of the module to check. (e.g. "skimage", "core", and "clara")
+
+    Returns
+    -------
+    bool
+        True if the module is available, False otherwise.
+
+    """
+    if module_name in ("skimage", "core"):
+        return _is_cupy_available
+    elif module_name == 'clara':
+        return _is_clara_available
+    else:
+        return _is_cupy_available and _is_clara_available
diff --git a/python/cucim/src/cucim/core/operations/__init__.py b/python/cucim/src/cucim/core/operations/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cucim/src/cucim/core/operations/color/__init__.py b/python/cucim/src/cucim/core/operations/color/__init__.py
new file mode 100644
index 000000000..4b9332fc1
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/color/__init__.py
@@ -0,0 +1,5 @@
+from .jitter import color_jitter
+
+__all__ = [
+    "color_jitter",
+]
diff --git a/python/cucim/src/cucim/core/operations/color/jitter.py b/python/cucim/src/cucim/core/operations/color/jitter.py
new file mode 100755
index 000000000..6525547f8
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/color/jitter.py
@@ -0,0 +1,300 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numbers
+from typing import Any, List, Optional, Tuple
+
+import cupy
+import numpy as np
+
+from .kernel.cuda_kernel_source import cuda_kernel_code
+
+_logger = logging.getLogger("colorjitter_cucim")
+CUDA_KERNELS = cupy.RawModule(code=cuda_kernel_code)
+
+
+def color_jitter(
+    img: Any,
+    brightness=0,
+    contrast=0,
+    saturation=0,
+    hue=0
+):
+    """Applies color jitter by random sequential application of
+    4 operations (brightness, contrast, saturation, hue).
+
+    Parameters
+    ----------
+    img : channel first, cupy.ndarray or numpy.ndarray
+        Input data of shape (C, H, W). Can also batch process input of shape
+        (N, C, H, W). Can be a numpy.ndarray or cupy.ndarray.
+    brightness : float or 2-tuple of float, optional
+        Non-negative factor to jitter the brightness by. When `brightness` is a
+        scalar, scaling will be by a random value in range
+        ``[max(0, 1 - brightness), (1 + brightness)]``. `brightness` can
+        also be a 2-tuple specifying the range for the random scaling factor.
+        A value of 0 or (1, 1) will result in no change.
+    contrast : float or 2-tuple of float, optional
+        Non-negative factor to jitter the contrast by. When `contrast` is a
+        scalar, scaling will be by a random value between
+        ``[max(0, 1 - contrast), (1 + contrast)]``. `contrast` can
+        also be a 2-tuple specifying the range for the random scaling factor.
+        A value of 0 or (1, 1) will result in no change.
+    saturation : float or 2-tuple of float, optional
+        Non-negative factor to jitter the saturation by. When `saturation` is a
+        scalar, scaling will be by a random value between
+        ``[max(0, 1 - saturation), (1 + saturation)]``. `saturation` can
+        also be a 2-tuple specifying the range for the random scaling factor.
+        A value of 0 or (1, 1) will result in no change.
+    hue : float or 2-tuple of float, optional
+        Factor between [-0.5, 0.5] to jitter hue by. When `hue` is a
+        scalar, scaling will be by a random value between in the range
+        ``[-hue, hue]``. `hue` can also be a 2-tuple specifying the range.
+        A value of 0 or (0, 0) will result in no change.
+
+    Returns
+    -------
+    out : cupy.ndarray or numpy.ndarray
+        Output data. Same dimensions and type as input.
+
+    Raises
+    ------
+    ValueError
+        If 'brightness','contrast','saturation' or 'hue' is outside
+        of allowed range
+    TypeError
+        If input 'img' is not cupy.ndarray or numpy.ndarray
+
+    Examples
+    --------
+    >>> import cucim.core.operations.color as ccl
+    >>> # input is channel first 3d array
+    >>> output_array = ccl.color_jitter(input_arr,.25,.75,.25,.04)
+    """
+    try:
+        # should be a class stateful implementation to caches values
+        # once instead of checking every time
+        def check_input(value, name, center=1, bound=(0, float('inf')),
+                        clip_first_on_zero=True):
+            if isinstance(value, numbers.Number):
+                if value < 0:
+                    raise ValueError("If {} is a single number, \
+                                     it must be non negative.".format(name))
+                value = [center - float(value), center + float(value)]
+                if clip_first_on_zero:
+                    value[0] = max(value[0], 0.0)
+            elif isinstance(value, (tuple, list)) and len(value) == 2:
+                if not bound[0] <= value[0] <= value[1] <= bound[1]:
+                    raise ValueError("{} values should be between {}"
+                                     .format(name, bound))
+            else:
+                raise TypeError("{} should be a single number or a \
+                                list/tuple with length 2.".format(name))
+            # if value is 0 or (1., 1.) for brightness/contrast/saturation
+            # or (0., 0.) for hue, do nothing
+            if value[0] == value[1] == center:
+                value = None
+            return value
+
+        def get_params(brightness: Optional[List[float]],
+                       contrast: Optional[List[float]],
+                       saturation: Optional[List[float]],
+                       hue: Optional[List[float]]
+                       ) -> Tuple[np.ndarray, Optional[float],
+                                  Optional[float], Optional[float],
+                                  Optional[float]]:
+
+            fn_idx = np.random.permutation(4)
+
+            b = None
+            if brightness is not None:
+                b = float(np.random.uniform(brightness[0], brightness[1]))
+            c = None
+            if contrast is not None:
+                c = float(np.random.uniform(contrast[0], contrast[1]))
+            s = None
+            if saturation is not None:
+                s = float(np.random.uniform(saturation[0], saturation[1]))
+            h = None
+            if hue is not None:
+                h = float(np.random.uniform(hue[0], hue[1]))
+
+            return fn_idx, b, c, s, h
+
+        # brightness jitter
+        def adjust_brightness(input_arr, brightness):
+            if len(input_arr.shape) == 4:
+                N, C, H, W = input_arr.shape
+            elif len(input_arr.shape) == 3:
+                C, H, W = input_arr.shape
+                N = 1
+
+            block = (128, 1, 1)
+            length = N * C * H * W
+            length = (length + 1) >> 2
+            grid = (int((length - 1) / block[0] + 1) , 1, 1)
+
+            result = cupy.ndarray(shape=input_arr.shape,
+                                  dtype=input_arr.dtype)
+            kernel = CUDA_KERNELS.get_function("brightnessjitter_kernel")
+            kernel(grid, block, args=(input_arr,
+                                      result,
+                                      np.int32(N * C * H * W),
+                                      np.float32(brightness)))
+            return result
+
+        # contrast jitter
+        def adjust_contrast(input_arr, contrast):
+            # contrast: 0.0 grey image, 1.0 original image
+            # out RGB -> Grey
+            # new image with mean L, convert to RGB
+            # L -> RGB is just replicating L values across all channels
+            # blend again as LHS
+
+            if len(input_arr.shape) == 4:
+                N, C, H, W = input_arr.shape
+            elif len(input_arr.shape) == 3:
+                C, H, W = input_arr.shape
+                N = 1
+            block = (128, 1, 1)
+            pitch = W * H
+            grid = (int((pitch - 1) / block[0] + 1) , N, 1)
+
+            output_L32 = cupy.empty((N, H, W), dtype=cupy.uint32)
+            kernel_rgb2l = CUDA_KERNELS.get_function("rgb2l_kernel")
+            kernel_rgb2l(grid, block, args=(input_arr,
+                                            output_L32,
+                                            np.int32(pitch)))
+
+            L32_mean = output_L32.mean(axis=[1, 2], dtype=cupy.float32)
+
+            if len(input_arr.shape) == 3:
+                output_rgb = cupy.empty((C, H, W), dtype=cupy.uint8)
+            else:
+                output_rgb = cupy.empty((N, C, H, W), dtype=cupy.uint8)
+            kernel_blendconstant = \
+                CUDA_KERNELS.get_function("blendconstant_kernel")
+            kernel_blendconstant(grid, block, args=(input_arr,
+                                                    output_rgb,
+                                                    np.int32(pitch),
+                                                    L32_mean,
+                                                    np.float32(contrast)))
+
+            return output_rgb
+
+        # saturation jitter
+        def adjust_saturation(input_arr, saturation):
+            # saturation (color enhance) 0.0 b/w image
+            if len(input_arr.shape) == 4:
+                N, C, H, W = input_arr.shape
+            elif len(input_arr.shape) == 3:
+                C, H, W = input_arr.shape
+                N = 1
+
+            pitch = W * H
+            block = (128, 1, 1)
+            grid = (int((pitch - 1) / block[0] + 1), N, 1)
+
+            output_rgb = cupy.empty(input_arr.shape, dtype=cupy.uint8)
+            kernel_satjitter = \
+                CUDA_KERNELS.get_function("saturationjitter_kernel")
+            kernel_satjitter(grid, block, args=(input_arr,
+                                                output_rgb,
+                                                np.int32(pitch),
+                                                np.float32(saturation)))
+
+            return output_rgb
+
+        # hue jitter
+        def adjust_hue(input_arr, hue):
+            if not(-0.5 <= hue <= 0.5):
+                raise ValueError('hue factor({}) is not in [-0.5, 0.5].'.
+                                 format(hue))
+
+            if len(input_arr.shape) == 4:
+                N, C, H, W = input_arr.shape
+            elif len(input_arr.shape) == 3:
+                C, H, W = input_arr.shape
+                N = 1
+
+            pitch = W * H
+            block = (128, 1, 1)
+            grid = (int((pitch - 1) / block[0] + 1), N, 1)
+            output_rgb = cupy.empty(input_arr.shape, dtype=cupy.uint8)
+            kernel_huejitter = CUDA_KERNELS.get_function("huejitter_kernel")
+            kernel_huejitter(grid, block, args=(input_arr,
+                                                output_rgb,
+                                                np.int32(pitch),
+                                                np.float32(hue)))
+
+            return output_rgb
+
+        # execution
+        f_brightness = check_input(brightness, 'brightness')
+        f_contrast = check_input(contrast, 'contrast')
+        f_saturation = check_input(saturation, 'saturation')
+        f_hue = check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
+                            clip_first_on_zero=False)
+
+        to_cupy = False
+
+        if isinstance(img, np.ndarray):
+            to_cupy = True
+            cupy_img = cupy.asarray(img, dtype=cupy.uint8, order="C")
+        elif not isinstance(img, cupy.ndarray):
+            raise TypeError("img must be a cupy.ndarray or numpy.ndarray")
+        else:
+            cupy_img = cupy.ascontiguousarray(img)
+
+        if cupy_img.dtype != cupy.uint8:
+            if cupy.can_cast(cupy_img.dtype, cupy.uint8, 'unsafe') is False:
+                raise ValueError(
+                    "Cannot cast type {cupy_img.dtype.name} to 'uint8'"
+                )
+            else:
+                cupy_img = cupy_img.astype(cupy.uint8)
+
+        if img.ndim not in (3, 4):
+            raise ValueError(
+                f"Unsupported img.ndim={img.ndim}. Expected `img` with "
+                "dimensions (C, H, W) or (N, C, H, W)."
+            )
+
+        fn_idx, brightness_factor, contrast_factor, saturation_factor, \
+            hue_factor = get_params(f_brightness, f_contrast,
+                                    f_saturation, f_hue)
+
+        for fn_id in fn_idx:
+            if fn_id == 0 and brightness_factor is not None:
+                cupy_img = adjust_brightness(cupy_img, brightness_factor)
+            elif fn_id == 1 and contrast_factor is not None:
+                cupy_img = adjust_contrast(cupy_img, contrast_factor)
+            elif fn_id == 2 and saturation_factor is not None:
+                cupy_img = adjust_saturation(cupy_img, saturation_factor)
+            elif fn_id == 3 and hue_factor is not None:
+                cupy_img = adjust_hue(cupy_img, hue_factor)
+
+        if img.dtype != np.uint8:
+            cupy_img = cupy_img.astype(cupy.float32)
+
+        result = cupy_img
+        if to_cupy is True:
+            result = cupy.asnumpy(cupy_img)
+
+        return result
+    except Exception as e:
+        _logger.error("[cucim] " + str(e), exc_info=True)
+        _logger.info("Error executing color jitter on GPU")
+        raise
diff --git a/python/cucim/src/cucim/core/operations/color/kernel/cuda_kernel_source.py b/python/cucim/src/cucim/core/operations/color/kernel/cuda_kernel_source.py
new file mode 100644
index 000000000..563b53e4c
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/color/kernel/cuda_kernel_source.py
@@ -0,0 +1,263 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cuda_kernel_code = r'''
+extern "C" {
+__global__ void brightnessjitter_kernel(unsigned char *input_rgb, \
+                                        unsigned char *output_rgb, \
+                                        int total_pixels, \
+                                        float brightness_factor) {
+  // pitch is only WxH - not channels included
+  int idx = threadIdx.x + blockIdx.x * blockDim.x;
+  int unvectorized_length = total_pixels;
+  int vectorized_length = (unvectorized_length >> 2);
+
+  if (idx < vectorized_length) {
+    // 32-bit vectorized loads
+    unsigned int input_val = reinterpret_cast<unsigned int*>(input_rgb)[idx];
+    unsigned int out_vec = 0x0;
+
+    #pragma unroll(4)
+    for (unsigned int ik = 0; ik < 4; ik++) {
+      unsigned char val = ((0xff << (ik*8)) & input_val) >> (ik*8);
+      float pixel = __uint2float_rn((unsigned int)val);
+      pixel *= brightness_factor;
+      // clip
+      pixel = (pixel <= 0.0f) ? 0.0f : ((pixel >= 255.0f) ? 255.0f : pixel);
+      unsigned int tmp = __float2uint_rz(pixel);//rz helps bitwise match but..
+      out_vec = out_vec | (tmp << (ik*8));
+    }
+    reinterpret_cast<unsigned int*>(output_rgb)[idx] = out_vec;
+  } else if (idx == vectorized_length) {
+    // one 8-bit element at a time unroll
+
+    int start_idx = idx << 2;
+    while (start_idx < unvectorized_length) {
+      float pixel = __uint2float_rn((unsigned int)input_rgb[start_idx]);
+      pixel *= brightness_factor;
+      // clip
+      pixel = (pixel <= 0.0f) ? 0.0f : ((pixel >= 255.0f) ? 255.0f : pixel);
+      output_rgb[start_idx] = (unsigned char)__float2uint_rz(pixel);
+      start_idx++;
+    }
+  }
+}
+
+__global__ void rgb2l_kernel(unsigned char *input_rgb, \
+                             unsigned int *output_L, \
+                             int pitch) {
+  // 1D grid, access RGB values with pitch'd access
+  // pitch is WxH not the image array pitch used for storing surface data
+  int idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+  if (idx < pitch) {
+    int lookup_idx = idx + (blockIdx.y * (pitch * 3));
+    unsigned int ui_r = (unsigned int)input_rgb[lookup_idx];
+    unsigned int ui_g = (unsigned int)input_rgb[lookup_idx+pitch];
+    unsigned int ui_b = (unsigned int)input_rgb[lookup_idx+pitch*2];
+    unsigned int L = ((ui_r * 19595 + ui_g * 38470 + ui_b * 7471) + \
+                       0x8000) >> 16;
+    int out_idx = (blockIdx.y * pitch) + idx;
+    output_L[out_idx] = L;
+  }
+}
+
+__global__ void blendconstant_kernel(unsigned char *input_rgb, \
+                                     unsigned char *output_rgb, \
+                                     int pitch, \
+                                     float* blend_constant, \
+                                     float blend_factor) {
+  // 1D grid, access RGB values with pitch'd access
+  // pitch is WxH not the image array pitch used for storing surface data
+  int idx = threadIdx.x + blockIdx.x * blockDim.x;
+  int blend_constant_i = int(blend_constant[blockIdx.y] + 0.5);
+  float blend_constant_f = (float)blend_constant_i;
+
+  if (idx < pitch) {
+    idx += (blockIdx.y * (pitch * 3));
+    float r = __uint2float_rn((unsigned int)input_rgb[idx]);
+    float g = __uint2float_rn((unsigned int)input_rgb[idx+pitch]);
+    float b = __uint2float_rn((unsigned int)input_rgb[idx+pitch*2]);
+
+    // jit_contrast = float(L_round) + contrast *
+    //                (input_arr.astype(cp.float32) -  float(L_round))
+
+    r = blend_constant_f + blend_factor * (r - blend_constant_f);
+    g = blend_constant_f + blend_factor * (g - blend_constant_f);
+    b = blend_constant_f + blend_factor * (b - blend_constant_f);
+
+    r = (r <= 0.0f) ? 0.0f : ((r >= 255.0f) ? 255.0f : r);
+    g = (g <= 0.0f) ? 0.0f : ((g >= 255.0f) ? 255.0f : g);
+    b = (b <= 0.0f) ? 0.0f : ((b >= 255.0f) ? 255.0f : b);
+
+    output_rgb[idx]         = (unsigned char)__float2uint_rz(r);
+    output_rgb[idx+pitch]   = (unsigned char)__float2uint_rz(g);
+    output_rgb[idx+pitch*2] = (unsigned char)__float2uint_rz(b);
+  }
+}
+
+__global__ void saturationjitter_kernel(unsigned char *input_rgb, \
+                                        unsigned char *output_rgb, \
+                                        int pitch, \
+                                        float saturation_factor) {
+  // 1D grid, access RGB values with pitch'd access
+  // pitch is WxH not the image array pitch used for storing surface data
+  int idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+  if (idx < pitch) {
+    idx += (blockIdx.y * (pitch * 3));
+    unsigned int ui_r = (unsigned int)input_rgb[idx];
+    unsigned int ui_g = (unsigned int)input_rgb[idx+pitch];
+    unsigned int ui_b = (unsigned int)input_rgb[idx+pitch*2];
+
+    // output_L = ((input_arr[0,:,:] * 19595 + input_arr[1,:,:] \
+    //              * 38470 + input_arr[2,:,:] * 7471) + 0x8000) >> 16
+
+    unsigned int L = ((ui_r * 19595 + ui_g * 38470 + ui_b * 7471) \
+                       + 0x8000) >> 16;
+    // jit_saturation = L_saturation + saturation * \
+    //                  (input_arr.astype(cp.float32) - L_saturation)
+
+    float sat_L = __uint2float_rn(L);
+    float f_r = __uint2float_rn(ui_r);
+    float f_g = __uint2float_rn(ui_g);
+    float f_b = __uint2float_rn(ui_b);
+
+    float r = sat_L + saturation_factor * (f_r - sat_L);
+    float g = sat_L + saturation_factor * (f_g - sat_L);
+    float b = sat_L + saturation_factor * (f_b - sat_L);
+    r = (r <= 0.0f) ? 0.0f : ((r >= 255.0f) ? 255.0f : r);
+    g = (g <= 0.0f) ? 0.0f : ((g >= 255.0f) ? 255.0f : g);
+    b = (b <= 0.0f) ? 0.0f : ((b >= 255.0f) ? 255.0f : b);
+
+    output_rgb[idx]         = (unsigned char)__float2uint_rz(r);
+    output_rgb[idx+pitch]   = (unsigned char)__float2uint_rz(g);
+    output_rgb[idx+pitch*2] = (unsigned char)__float2uint_rz(b);
+  }
+}
+
+__global__ void huejitter_kernel(unsigned char *input_rgb, \
+                    unsigned char *output_rgb, \
+                    int pitch, float hue_factor) {
+  // 1D grid, access RGB values with pitch'd access
+  // pitch is WxH not the image array pitch used for storing surface data
+  int idx = threadIdx.x + blockIdx.x * blockDim.x;
+  // convert to HSV
+  // change hue value
+  // convert HSV to RGB
+  if (idx < pitch) {
+    idx += (blockIdx.y * (pitch * 3));
+    float r = __uint2float_rn((unsigned int)input_rgb[idx]);
+    float g = __uint2float_rn((unsigned int)input_rgb[idx+pitch]);
+    float b = __uint2float_rn((unsigned int)input_rgb[idx+pitch*2]);
+
+    float maxc = fmaxf(r, fmaxf(g,b));
+    float minc = fminf(r, fminf(g,b));
+
+    float uv = maxc;
+    float uh = 0;
+    float us = 0;
+    if (maxc == minc) {
+      uh = 0;
+      us = 0;
+    } else {
+      float cr = maxc - minc;
+      float s = cr / maxc;
+      float rc = (maxc - r) / cr;
+      float gc = (maxc - g) / cr;
+      float bc = (maxc - b) / cr;
+
+      float h;
+      if (r == maxc) {
+        h = bc - gc;
+      } else if (g == maxc) {
+        h = 2.0f + rc - bc;
+      } else {
+        h = 4.0f + gc - rc;
+      }
+      h = (h / 6.0f) + 1.0f;
+      h = fmodf(h, 1.0f);
+      uh = h * 255.0f;
+      uh = (uh <= 0.0f) ? 0.0f : ((uh >= 255.0f) ? 255.0f : uh);
+      us = s * 255.0f;
+      us = (us <= 0.0f) ? 0.0f : ((us >= 255.0f) ? 255.0f : us);
+    }
+
+    // jitter hue
+    unsigned char h_char = (unsigned char)__float2uint_rn(uh);
+    unsigned char s_char = (unsigned char)__float2uint_rn(us);
+    unsigned char v_char = (unsigned char)__float2uint_rn(uv);
+
+    h_char += (hue_factor * 255);
+
+    float h = __uint2float_rn((unsigned int)h_char);
+    float s = __uint2float_rn((unsigned int)s_char);
+    float v = __uint2float_rn((unsigned int)v_char);
+
+    if (s == 0) {
+      // write zero and out
+      output_rgb[idx] = 0;
+      output_rgb[idx+pitch] = 0;
+      output_rgb[idx+pitch*2] = 0;
+    } else {
+      float i = (h * 6.0f) / 255.0f;
+      float f = i - floorf(i);
+      float fs = s / 255.0f;
+      i = floorf(i);
+
+      float p = roundf(v * (1.0f - fs));
+      float q = roundf(v * (1.0f - (fs * f)));
+      float t = roundf(v * (1.0f - (fs * (1.0f - f))));
+
+      float up = (p <= 0.0f) ? 0.0f : ((p >= 255.0f) ? 255.0f : p);
+      float uq = (q <= 0.0f) ? 0.0f : ((q >= 255.0f) ? 255.0f : q);
+      float ut = (t <= 0.0f) ? 0.0f : ((t >= 255.0f) ? 255.0f : t);
+
+      // todo: make atleast 16-bit stores
+      switch ((int)i % 6) {
+        case 0:
+          output_rgb[idx]         = (unsigned char)__float2uint_rn(v);
+          output_rgb[idx+pitch]   = (unsigned char)__float2uint_rn(ut);
+          output_rgb[idx+pitch*2] = (unsigned char)__float2uint_rn(up);
+          break;
+        case 1:
+          output_rgb[idx]         = (unsigned char)__float2uint_rn(uq);
+          output_rgb[idx+pitch]   = (unsigned char)__float2uint_rn(v);
+          output_rgb[idx+pitch*2] = (unsigned char)__float2uint_rn(up);
+          break;
+        case 2:
+          output_rgb[idx]         = (unsigned char)__float2uint_rn(up);
+          output_rgb[idx+pitch]   = (unsigned char)__float2uint_rn(v);
+          output_rgb[idx+pitch*2] = (unsigned char)__float2uint_rn(ut);
+          break;
+        case 3:
+          output_rgb[idx]         = (unsigned char)__float2uint_rn(up);
+          output_rgb[idx+pitch]   = (unsigned char)__float2uint_rn(uq);
+          output_rgb[idx+pitch*2] = (unsigned char)__float2uint_rn(v);
+          break;
+        case 4:
+          output_rgb[idx]         = (unsigned char)__float2uint_rn(ut);
+          output_rgb[idx+pitch]   = (unsigned char)__float2uint_rn(up);
+          output_rgb[idx+pitch*2] = (unsigned char)__float2uint_rn(v);
+          break;
+        case 5:
+          output_rgb[idx]         = (unsigned char)__float2uint_rn(v);
+          output_rgb[idx+pitch]   = (unsigned char)__float2uint_rn(up);
+          output_rgb[idx+pitch*2] = (unsigned char)__float2uint_rn(uq);
+          break;
+      }
+    }
+  }
+}
+}'''
diff --git a/python/cucim/src/cucim/core/operations/color/tests/test_color_jitter.py b/python/cucim/src/cucim/core/operations/color/tests/test_color_jitter.py
new file mode 100644
index 000000000..dcc7db66e
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/color/tests/test_color_jitter.py
@@ -0,0 +1,66 @@
+import cupy
+import numpy as np
+import pytest
+import skimage.data
+from PIL import Image
+
+import cucim.core.operations.color as ccl
+
+
+def get_image_array():
+    img = skimage.data.astronaut()
+    arr = np.asarray(img)
+    arr = np.transpose(arr)
+    return arr
+
+
+def verify_result(output, input):
+    diff_im = output - input
+    diff_total_value = np.abs(np.sum(diff_im))
+    assert diff_total_value >= 0
+
+
+def test_color_jitter_bad_params():
+    arr = get_image_array()
+    with pytest.raises(ValueError):
+        arr1 = arr.flatten()
+        ccl.color_jitter(arr1, .25, .75, .25, .04)
+    with pytest.raises(TypeError):
+        img = Image.fromarray(arr.T, 'RGB')
+        ccl.color_jitter(img, .25, .75, .25, .04)
+
+
+def test_color_jitter_numpyinput():
+    arr = get_image_array()
+    np_output = ccl.color_jitter(arr, .25, .75, .25, .04)
+    verify_result(np_output, arr)
+
+
+def test_color_jitter_cupyinput():
+    arr = get_image_array()
+    cupy_arr = cupy.asarray(arr)
+    cupy_output = ccl.color_jitter(cupy_arr, .25, .75, .25, .04)
+    np_output = cupy.asnumpy(cupy_output)
+    verify_result(np_output, arr)
+
+
+def test_color_jitter_cupy_cast():
+    arr = get_image_array()
+    cupy_arr = cupy.asarray(arr)
+    cupy_arr = cupy_arr.astype(cupy.float32)
+    cupy_output = ccl.color_jitter(cupy_arr, .25, .75, .25, .04)
+    assert cupy_output.dtype == cupy.float32
+
+
+def test_color_jitter_factor():
+    arr = get_image_array()
+    np_output = ccl.color_jitter(arr, 0, 0, 0, 0)
+    verify_result(np_output, arr)
+
+
+def test_color_jitter_batchinput():
+    arr = get_image_array()
+    arr_batch = np.stack((arr,) * 8, axis=0)
+    np_output = ccl.color_jitter(arr_batch, .25, .75, .25, .04)
+    assert np_output.shape[0] == 8
+    verify_result(np_output, arr_batch)
diff --git a/python/cucim/src/cucim/core/operations/expose/tests/test_expose.py b/python/cucim/src/cucim/core/operations/expose/tests/test_expose.py
new file mode 100644
index 000000000..350d67feb
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/expose/tests/test_expose.py
@@ -0,0 +1,17 @@
+from cucim.core.operations.expose.transform import (color_jitter, image_flip,
+                                                    image_rotate_90,
+                                                    rand_image_flip,
+                                                    rand_image_rotate_90,
+                                                    rand_zoom,
+                                                    scale_intensity_range, zoom)
+
+
+def test_exposed_transforms():
+    assert color_jitter is not None
+    assert image_flip is not None
+    assert image_rotate_90 is not None
+    assert scale_intensity_range is not None
+    assert zoom is not None
+    assert rand_zoom is not None
+    assert rand_image_flip is not None
+    assert rand_image_rotate_90 is not None
diff --git a/python/cucim/src/cucim/core/operations/expose/transform.py b/python/cucim/src/cucim/core/operations/expose/transform.py
new file mode 100644
index 000000000..8587d0a74
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/expose/transform.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cucim.core.operations.color import color_jitter
+from cucim.core.operations.intensity import (rand_zoom, scale_intensity_range,
+                                             zoom)
+from cucim.core.operations.spatial import (image_flip, image_rotate_90,
+                                           rand_image_flip,
+                                           rand_image_rotate_90)
diff --git a/python/cucim/src/cucim/core/operations/intensity/__init__.py b/python/cucim/src/cucim/core/operations/intensity/__init__.py
new file mode 100644
index 000000000..7da89caba
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/intensity/__init__.py
@@ -0,0 +1,8 @@
+from .scaling import scale_intensity_range
+from .zoom import rand_zoom, zoom
+
+__all__ = [
+    "scale_intensity_range",
+    "zoom", 
+    "rand_zoom"
+]
diff --git a/python/cucim/src/cucim/core/operations/intensity/kernel/cuda_kernel_source.py b/python/cucim/src/cucim/core/operations/intensity/kernel/cuda_kernel_source.py
new file mode 100644
index 000000000..9c854486b
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/intensity/kernel/cuda_kernel_source.py
@@ -0,0 +1,243 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cuda_kernel_code = r'''
+extern "C" {
+__global__ void scaleVolume(float* image, float* output, \
+                            float x, float y, float bmin, \
+                            float bmax, int W)
+{
+    const unsigned int j = blockIdx.x * blockDim.x + threadIdx.x;
+    if(j < W) {
+        output[j] = fmaxf(fminf(image[j] * x - y, bmax), bmin);
+    }
+}
+
+__global__ void zoom_in_kernel(float *input_tensor, float *output_tensor, \
+                 int input_h, int input_w, int output_h, int output_w, \
+                 int pitch, int out_h_start, int out_h_end, \
+                 int out_w_start, int out_w_end) {
+  extern __shared__ float staging_tile[];
+
+  // H -> block Y, row
+  // W -> block X, col
+  int out_start_h = blockIdx.y * blockDim.y;
+  int out_end_h   = (blockIdx.y + 1) * blockDim.y - 1;
+  int out_start_w = blockIdx.x * blockDim.x;
+  int out_end_w   = (blockIdx.x + 1) * blockDim.x - 1;
+
+  int image_start_offset = blockIdx.z * pitch;
+
+  // ideally should go in unified register
+  int smem_load_h_start = floorf((out_start_h * input_h) / (float)output_h);
+  int smem_load_h_end = ceilf(((out_end_h+1) * input_h) / (float)output_h);
+  int smem_h_load_stretch = smem_load_h_end - smem_load_h_start;
+
+  int smem_load_w_start = floorf((out_start_w * input_w) / (float)output_w);
+  int smem_load_w_end = ceilf(((out_end_w+1) * input_w) / (float)output_w);
+  int smem_w_load_stretch = smem_load_w_end - smem_load_w_start;
+
+  for (int i = threadIdx.y; i < smem_h_load_stretch; i+=blockDim.y) {
+    for (int j = threadIdx.x; j < smem_w_load_stretch; j+=blockDim.x) {
+
+      if (((i+smem_load_h_start) < input_h) &&
+          ((j+smem_load_w_start) < input_w)) {
+          staging_tile[i * smem_w_load_stretch + j] = \
+                      input_tensor[image_start_offset +
+                                   (smem_load_h_start + i) * input_w +
+                                   smem_load_w_start + j];
+      } else {
+        staging_tile[i * smem_w_load_stretch + j] = 0.0f;
+      }
+    }
+  }
+  __syncthreads();
+
+  int out_pixel_h = blockIdx.y * blockDim.y + threadIdx.y;
+  int out_pixel_w = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if (out_pixel_h < output_h && out_pixel_w < output_w
+      && out_pixel_h >= out_h_start && out_pixel_h < out_h_end
+      && out_pixel_w >= out_w_start && out_pixel_w < out_w_end) {
+
+    // compute pixels oh, ow span
+    int start_h = floorf((out_pixel_h * input_h) / (float)output_h);
+    int end_h = ceilf(((out_pixel_h+1) * input_h) / (float)output_h);
+
+    int start_w = floorf((out_pixel_w * input_w) / (float)output_w);
+    int end_w = ceilf(((out_pixel_w+1) * input_w) / (float)output_w);
+
+    int del_h = end_h - start_h;
+    int del_w = end_w - start_w;
+
+    float sum_ = 0.0f;
+
+    for (int i = 0; i < del_h; i++) {
+      for (int j = 0; j < del_w; j++) {
+        int smem_row = (start_h + i) - smem_load_h_start;
+        int smem_col = (start_w + j) - smem_load_w_start;
+        sum_ += staging_tile[smem_row * smem_w_load_stretch + smem_col];
+      }
+    }
+    sum_ /= (float)del_h;
+    sum_ /= (float)del_w;
+
+    output_tensor[(blockIdx.z * pitch) +
+                  ((out_pixel_h - out_h_start) * input_w) +
+                  (out_pixel_w - out_w_start)] = sum_;
+  }
+}
+
+__global__ void zoom_out_kernel(float *input_tensor, float *output_tensor,
+                  int input_h, int input_w, int output_h, int output_w,
+                  int pitch, int out_h_start, int out_h_end, int out_w_start,
+                  int out_w_end) {
+  extern __shared__ float staging_tile[];
+
+  // H -> block Y, row
+  // W -> block X, col
+  int out_start_h = blockIdx.y * blockDim.y;
+  int out_end_h   = (blockIdx.y + 1) * blockDim.y - 1;
+  int out_start_w = blockIdx.x * blockDim.x;
+  int out_end_w   = (blockIdx.x + 1) * blockDim.x - 1;
+
+  int image_start_offset = blockIdx.z * pitch;
+
+  // ideally should go in unified register
+  int smem_load_h_start = floorf((out_start_h * input_h) / (float)output_h);
+  int smem_load_h_end = ceilf(((out_end_h+1) * input_h) / (float)output_h);
+  int smem_h_load_stretch = smem_load_h_end - smem_load_h_start;
+
+  int smem_load_w_start = floorf((out_start_w * input_w) / (float)output_w);
+  int smem_load_w_end = ceilf(((out_end_w+1) * input_w) / (float)output_w);
+  int smem_w_load_stretch = smem_load_w_end - smem_load_w_start;
+
+  for (int i = threadIdx.y; i < smem_h_load_stretch; i+=blockDim.y) {
+    for (int j = threadIdx.x; j < smem_w_load_stretch; j+=blockDim.x) {
+
+      if (((i+smem_load_h_start) < input_h) &&
+          ((j+smem_load_w_start) < input_w)) {
+          staging_tile[i * smem_w_load_stretch + j] = \
+                    input_tensor[image_start_offset +
+                                 (smem_load_h_start + i)*input_w +
+                                 smem_load_w_start + j];
+      } else {
+        staging_tile[i * smem_w_load_stretch + j] = 0.0f;
+      }
+    }
+  }
+  __syncthreads();
+
+  int out_pixel_h = blockIdx.y * blockDim.y + threadIdx.y;
+  int out_pixel_w = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if (out_pixel_h < output_h && out_pixel_w < output_w) {
+
+    // compute pixels oh, ow span
+    int start_h = floorf((out_pixel_h * input_h) / (float)output_h);
+    int end_h = ceilf(((out_pixel_h+1) * input_h) / (float)output_h);
+
+    int start_w = floorf((out_pixel_w * input_w) / (float)output_w);
+    int end_w = ceilf(((out_pixel_w+1) * input_w) / (float)output_w);
+
+    int del_h = end_h - start_h;
+    int del_w = end_w - start_w;
+
+    float sum_ = 0.0f;
+
+    for (int i = 0; i < del_h; i++) {
+      for (int j = 0; j < del_w; j++) {
+        int smem_row = (start_h + i) - smem_load_h_start;
+        int smem_col = (start_w + j) - smem_load_w_start;
+        sum_ += staging_tile[smem_row * smem_w_load_stretch + smem_col];
+      }
+    }
+    sum_ /= (float)del_h;
+    sum_ /= (float)del_w;
+
+    output_tensor[(blockIdx.z * pitch) +
+                  ((out_pixel_h + out_h_start) * input_w) +
+                  (out_pixel_w + out_w_start)] = sum_;
+
+    // replicate along top edge
+    if (out_pixel_h == 0) {
+      for (int ik = 0; ik < out_h_start; ik++)
+        output_tensor[(blockIdx.z * pitch) +
+                      ((out_pixel_h + ik) * input_w) +
+                      (out_pixel_w + out_w_start)] = sum_;
+    }
+
+    // replicate along bottom edge
+    if (out_pixel_h == (output_h - 1)) {
+      for (int ik = 1; ik <= out_h_end; ik++)
+        output_tensor[(blockIdx.z * pitch) +
+                      ((out_h_start + out_pixel_h + ik) * input_w) +
+                      (out_pixel_w + out_w_start)] = sum_;
+    }
+
+    // replicate along left edge
+    if (out_pixel_w == 0) {
+      for (int ik = 0; ik < out_w_start; ik++)
+        output_tensor[(blockIdx.z * pitch) +
+                      ((out_pixel_h + out_h_start) * input_w) + ik] = sum_;
+    }
+
+    // replicate along right edge
+    if (out_pixel_w == (output_w - 1)) {
+      for (int ik = 1; ik <= out_w_end; ik++)
+        output_tensor[(blockIdx.z * pitch) +
+                      ((out_pixel_h + out_h_start) * input_w) +
+                      (out_pixel_w + out_w_start + ik)] = sum_;
+    }
+
+    // corner replication not very friendly if large area to patch -
+    // single thread issues stores
+    // ToDo: Consider adding another kernel for corner padding
+
+    // top left corner
+    if (out_pixel_h == 0 && out_pixel_w == 0) {
+      for (int ik = 0; ik < out_h_start; ik++) {
+        for (int il = 0; il < out_w_start; il++)
+          output_tensor[(blockIdx.z * pitch) + (ik * input_w) + il] = sum_;
+      }
+    }
+    // top right corner
+    if (out_pixel_h == 0 && out_pixel_w == (output_w - 1)) {
+      for (int ik = 0; ik < out_h_start; ik++) {
+        for (int il = 1; il <= out_w_end; il++)
+          output_tensor[(blockIdx.z * pitch) + (ik * input_w) +
+                        (out_pixel_w + out_w_start + il)] = sum_;
+      }
+    }
+    // bottom left corner
+    if (out_pixel_h == (output_h - 1) && out_pixel_w == 0) {
+      for (int ik = 1; ik <= out_h_end; ik++) {
+        for (int il = 0; il < out_w_start; il++)
+          output_tensor[(blockIdx.z * pitch) +
+                        ((out_h_start + out_pixel_h + ik) * input_w) +
+                        il] = sum_;
+      }
+    }
+    // bottom right corner
+    if (out_pixel_h == (output_h - 1) && out_pixel_w == (output_w - 1)) {
+      for (int ik = 1; ik <= out_h_end; ik++) {
+        for (int il = 1; il <= out_w_end; il++)
+          output_tensor[(blockIdx.z * pitch) +
+                        ((out_h_start + out_pixel_h + ik) * input_w) +
+                        (out_pixel_w + out_w_start + il)] = sum_;
+      }
+    }
+  }
+}
+}'''
diff --git a/python/cucim/src/cucim/core/operations/intensity/scaling.py b/python/cucim/src/cucim/core/operations/intensity/scaling.py
new file mode 100755
index 000000000..c8cac6491
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/intensity/scaling.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Any
+
+import cupy
+import numpy as np
+
+from .kernel.cuda_kernel_source import cuda_kernel_code
+
+CUDA_KERNELS = cupy.RawModule(code=cuda_kernel_code)
+_logger = logging.getLogger("scaling_cucim")
+
+
+def scale_intensity_range(
+    img: Any,
+    b_max: float,
+    b_min: float,
+    a_max: float,
+    a_min: float,
+    clip: bool = False
+) -> Any:
+    """
+    Apply intensity scaling to the input array.
+    Scaling from [a_min, a_max] to [b_min, b_max] with clip option.
+
+    Parameters
+    ----------
+    img : channel first, cupy.ndarray or numpy.ndarray
+        Input data of shape (C, H, W). Can also batch process input of shape
+        (N, C, H, W). Can be a numpy.ndarray or cupy.ndarray.
+    b_min : float
+        intensity target range min.
+    b_max : float
+        intensity target range max.
+    a_min : float
+        intensity original range min.
+    a_max : float
+        intensity original range max.
+    clip : float
+        whether to perform clip after scaling.
+
+    Returns
+    -------
+    out : cupy.ndarray or numpy.ndarray
+        Output data. Same dimensions and type as input.
+
+    Raises
+    ------
+    TypeError
+        If input 'img' is not cupy.ndarray or numpy.ndarray
+    ValueError
+        If input original intensity min and max are same
+
+    Examples
+    --------
+    >>> import cucim.core.operations.intensity as its
+    >>> # input is channel first 3d array
+    >>> output_array = its.scale_intensity_range(input_arr,
+                                                 0.0, 255.0,
+                                                 -1.0, 1.0, False)
+    """
+    try:
+        if a_max - a_min == 0.0:
+            raise ValueError("Original intensity range min and max are same")
+
+        to_cupy = False
+
+        if isinstance(img, np.ndarray):
+            to_cupy = True
+            cupy_img = cupy.asarray(img, dtype=cupy.float32, order='C')
+        elif not isinstance(img, cupy.ndarray):
+            raise TypeError("img must be a cupy.ndarray or numpy.ndarray")
+        else:
+            cupy_img = cupy.ascontiguousarray(img)
+
+        if cupy_img.dtype != cupy.float32:
+            if cupy.can_cast(img.dtype, cupy.float32) is False:
+                raise ValueError(
+                    "Cannot safely cast type {cupy_img.dtype.name} to \
+                    'float32'"
+                )
+            else:
+                cupy_img = cupy_img.astype(cupy.float32)
+
+        scale = CUDA_KERNELS.get_function("scaleVolume")
+
+        x = (b_max - b_min) / (a_max - a_min)
+        y = a_min * x - b_min
+        if clip is False:
+            b_max = float('inf')
+            b_min = float('-inf')
+
+        sh = img.shape
+        total_size = np.prod(sh)
+        blockx = 128
+        gridx = int((total_size - 1) / blockx + 1)
+
+        result = cupy.empty(img.shape, dtype=cupy_img.dtype)
+
+        scale((gridx, 1, 1), (blockx, 1, 1),
+              (cupy_img, result, np.float32(x), np.float32(y),
+              np.float32(b_min), np.float32(b_max),
+              np.int32(total_size)))
+
+        if img.dtype != cupy.float32:
+            result = result.astype(img.dtype)
+
+        if to_cupy is True:
+            result = cupy.asnumpy(result)
+
+    except Exception as e:
+        _logger.error("[cucim] " + str(e), exc_info=True)
+        raise
+
+    return result
diff --git a/python/cucim/src/cucim/core/operations/intensity/tests/scaled.png b/python/cucim/src/cucim/core/operations/intensity/tests/scaled.png
new file mode 100644
index 000000000..b7acf410b
Binary files /dev/null and b/python/cucim/src/cucim/core/operations/intensity/tests/scaled.png differ
diff --git a/python/cucim/src/cucim/core/operations/intensity/tests/test_rand_zoom.py b/python/cucim/src/cucim/core/operations/intensity/tests/test_rand_zoom.py
new file mode 100644
index 000000000..85641383b
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/intensity/tests/test_rand_zoom.py
@@ -0,0 +1,56 @@
+import os
+
+import cupy
+import numpy as np
+import skimage.data
+from PIL import Image
+
+import cucim.core.operations.intensity as its
+
+
+def get_input_arr():
+    img = skimage.data.astronaut()
+    arr = np.asarray(img)
+    arr = np.transpose(arr)
+    return arr
+
+
+def get_zoomed_data():
+    dirname = os.path.dirname(__file__)
+    img1 = Image.open(os.path.join(os.path.abspath(dirname), "zoomed.png"))
+    arr_o = np.asarray(img1)
+    arr_o = np.transpose(arr_o)
+    return arr_o
+
+
+def test_rand_zoom_numpy_input():
+    arr = get_input_arr()
+    zoomed_arr = get_zoomed_data()
+    output = its.rand_zoom(arr, prob=1.0, min_zoom=1.1, max_zoom=1.1)
+    assert np.allclose(output, zoomed_arr)
+
+
+def test_rand_zoom_zero_prob():
+    arr = get_input_arr()
+    output = its.rand_zoom(arr, prob=0.0, min_zoom=1.1, max_zoom=1.1)
+    assert np.allclose(output, arr)
+
+
+def test_rand_zoom_cupy_input():
+    arr = get_input_arr()
+    zoomed_arr = get_zoomed_data()
+    cupy_arr = cupy.asarray(arr)
+    cupy_output = its.rand_zoom(cupy_arr, prob=1.0, min_zoom=1.1, max_zoom=1.1)
+    np_output = cupy.asnumpy(cupy_output)
+    assert np.allclose(np_output, zoomed_arr)
+
+
+def test_rand_zoom_batchinput():
+    arr = get_input_arr()
+    zoomed_arr = get_zoomed_data()
+    arr_batch = np.stack((arr,) * 8, axis=0)
+    np_output = its.rand_zoom(arr_batch, prob=1.0, min_zoom=1.1, max_zoom=1.1)
+    assert np_output.shape[0] == 8
+
+    for i in range(np_output.shape[0]):
+        assert np.allclose(np_output[i], zoomed_arr)
diff --git a/python/cucim/src/cucim/core/operations/intensity/tests/test_scaling.py b/python/cucim/src/cucim/core/operations/intensity/tests/test_scaling.py
new file mode 100644
index 000000000..97b4a0da6
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/intensity/tests/test_scaling.py
@@ -0,0 +1,62 @@
+import os
+
+import cupy
+import numpy as np
+import pytest
+import skimage.data
+from PIL import Image
+
+import cucim.core.operations.intensity as its
+
+
+def get_input_arr():
+    img = skimage.data.astronaut()
+    arr = np.asarray(img)
+    arr = np.transpose(arr)
+    return arr
+
+
+def get_scaled_data():
+    dirname = os.path.dirname(__file__)
+    img1 = Image.open(os.path.join(os.path.abspath(dirname), "scaled.png"))
+    arr_o = np.asarray(img1)
+    arr_o = np.transpose(arr_o)
+    return arr_o
+
+
+def test_scale_param():
+    arr = get_input_arr()
+    with pytest.raises(ValueError):
+        its.scale_intensity_range(arr, 0.0, 255.0, 1.0, 1.0, False)
+    with pytest.raises(TypeError):
+        img = Image.fromarray(arr.T, 'RGB')
+        its.scale_intensity_range(img, 0.0, 255.0, -1.0, 1.0, False)
+
+
+def test_scale_numpy_input():
+    arr = get_input_arr()
+    scaled_arr = get_scaled_data()
+    output = its.scale_intensity_range(arr, 0.0, 255.0, -1.0, 1.0, False)
+    assert np.allclose(output, scaled_arr)
+
+
+def test_scale_cupy_input():
+    arr = get_input_arr()
+    scaled_arr = get_scaled_data()
+    cupy_arr = cupy.asarray(arr)
+    cupy_output = its.scale_intensity_range(cupy_arr,
+                                            0.0, 255.0, -1.0, 1.0, False)
+    np_output = cupy.asnumpy(cupy_output)
+    assert np.allclose(np_output, scaled_arr)
+
+
+def test_scale_batchinput():
+    arr = get_input_arr()
+    scaled_arr = get_scaled_data()
+    arr_batch = np.stack((arr,) * 8, axis=0)
+    output = its.scale_intensity_range(arr_batch, 0.0, 255.0, -1.0, 1.0, False)
+
+    assert output.shape[0] == 8
+
+    for i in range(output.shape[0]):
+        assert np.allclose(output[i], scaled_arr)
diff --git a/python/cucim/src/cucim/core/operations/intensity/tests/test_zoom.py b/python/cucim/src/cucim/core/operations/intensity/tests/test_zoom.py
new file mode 100644
index 000000000..1c80d6673
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/intensity/tests/test_zoom.py
@@ -0,0 +1,61 @@
+import os
+
+import cupy
+import numpy as np
+import pytest
+import skimage.data
+from PIL import Image
+
+import cucim.core.operations.intensity as its
+
+
+def get_input_arr():
+    img = skimage.data.astronaut()
+    arr = np.asarray(img)
+    arr = np.transpose(arr)
+    return arr
+
+
+def get_zoomed_data():
+    dirname = os.path.dirname(__file__)
+    img1 = Image.open(os.path.join(os.path.abspath(dirname), "zoomed.png"))
+    arr_o = np.asarray(img1)
+    arr_o = np.transpose(arr_o)
+    return arr_o
+
+
+def test_zoom_param():
+    arr = get_input_arr()
+    with pytest.raises(ValueError):
+        arr1 = arr.flatten()
+        its.zoom(arr1, [1.1, 1.1])
+    with pytest.raises(TypeError):
+        img = Image.fromarray(arr.T, 'RGB')
+        its.zoom(img, [1.1, 1.1])
+
+
+def test_zoom_numpy_input():
+    arr = get_input_arr()
+    zoomed_arr = get_zoomed_data()
+    output = its.zoom(arr, [1.1, 1.1])
+    assert np.allclose(output, zoomed_arr)
+
+
+def test_zoom_cupy_input():
+    arr = get_input_arr()
+    zoomed_arr = get_zoomed_data()
+    cupy_arr = cupy.asarray(arr)
+    cupy_output = its.zoom(cupy_arr, [1.1, 1.1])
+    np_output = cupy.asnumpy(cupy_output)
+    assert np.allclose(np_output, zoomed_arr)
+
+
+def test_zoom_batchinput():
+    arr = get_input_arr()
+    zoomed_arr = get_zoomed_data()
+    arr_batch = np.stack((arr,) * 8, axis=0)
+    np_output = its.zoom(arr_batch, [1.1, 1.1])
+    assert np_output.shape[0] == 8
+
+    for i in range(np_output.shape[0]):
+        assert np.allclose(np_output[i], zoomed_arr)
diff --git a/python/cucim/src/cucim/core/operations/intensity/tests/zoomed.png b/python/cucim/src/cucim/core/operations/intensity/tests/zoomed.png
new file mode 100644
index 000000000..14b2169c5
Binary files /dev/null and b/python/cucim/src/cucim/core/operations/intensity/tests/zoomed.png differ
diff --git a/python/cucim/src/cucim/core/operations/intensity/zoom.py b/python/cucim/src/cucim/core/operations/intensity/zoom.py
new file mode 100644
index 000000000..4d197f62d
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/intensity/zoom.py
@@ -0,0 +1,244 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import math
+from typing import Any, Sequence, Union
+
+import cupy
+import numpy as np
+
+from .kernel.cuda_kernel_source import cuda_kernel_code
+
+_logger = logging.getLogger("zoom_cucim")
+CUDA_KERNELS = cupy.RawModule(code=cuda_kernel_code)
+
+
+def zoom(
+    img: Any,
+    zoom_factor: Sequence[float]
+):
+    """Zooms an ND image
+
+    Parameters
+    ----------
+    img : channel first, cupy.ndarray or numpy.ndarray
+        Input data of shape (C, H, W). Can also batch process input of shape
+        (N, C, H, W). Can be a numpy.ndarray or cupy.ndarray.
+    zoom_factor: Sequence[float]
+        The zoom factor along the spatial axes.
+        Zoom factor should contain one value for each spatial axis.
+    Returns
+    -------
+    out : cupy.ndarray or numpy.ndarray
+        Output data. Same dimensions and type as input.
+
+    Raises
+    ------
+    TypeError
+        If input 'img' is not cupy.ndarray or numpy.ndarray
+
+    Examples
+    --------
+    >>> import cucim.core.operations.intensity as its
+    >>> # input is channel first 3d array
+    >>> output_array = its.zoom(input_arr,[1.1,1.1])
+    """
+    try:
+        to_cupy = False
+
+        if isinstance(img, np.ndarray):
+            to_cupy = True
+            cupy_img = cupy.asarray(img, dtype=cupy.float32, order="C")
+        elif not isinstance(img, cupy.ndarray):
+            raise TypeError("img must be a cupy.ndarray or numpy.ndarray")
+        else:
+            cupy_img = cupy.ascontiguousarray(img)
+
+        if cupy_img.dtype != cupy.float32:
+            if cupy.can_cast(img.dtype, cupy.float32) is False:
+                raise ValueError(
+                    "Cannot safely cast type {cupy_img.dtype.name} \
+                     to 'float32'"
+                )
+            else:
+                cupy_img = cupy_img.astype(cupy.float32)
+
+        if img.ndim not in (3, 4):
+            raise ValueError(
+                f"Unsupported img.ndim={img.ndim}. Expected `img` with "
+                "dimensions (C, H, W) or (N, C, H, W)."
+            )
+
+        if len(img.shape) == 4:
+            N, C, H, W = img.shape
+        elif len(img.shape) == 3:
+            C, H, W = img.shape
+            N = 1
+
+        output_size_cu = [N, C, int(math.floor(H * zoom_factor[0])),
+                          int(math.floor(W * zoom_factor[1]))]
+
+        if output_size_cu[2] == H and output_size_cu[3] == W:
+            return img
+
+        def get_block_size(output_size_cu, H, W):
+            max_smem = 48 * 1024
+            cu_block_options = [(16, 16, 1), (16, 8, 1), (8, 8, 1), (8, 4, 1)]
+            # compare for 48KB for standard CC optimal occupancy
+            # array is H, W but kernel is x--> W, y-->H
+            for param in cu_block_options:
+                h_stretch = [math.floor((0 * H) / output_size_cu[2]),
+                             math.ceil((param[1] * H) / output_size_cu[2])]
+                w_stretch = [math.floor((0 * W) / output_size_cu[3]),
+                             math.ceil((param[0] * W) / output_size_cu[3])]
+
+                smem_size = (h_stretch[1] + 1) * (w_stretch[1] + 1) * 4
+                if smem_size < max_smem:
+                    return param, smem_size
+
+            raise Exception("Random Zoom couldnt find a \
+                             shared memory configuration")
+
+        # input pitch
+        pitch = H * W
+
+        # get block size
+        block_config, smem_size = get_block_size(output_size_cu, H, W)
+        grid = (int((output_size_cu[3] - 1) / block_config[0] + 1),
+                int((output_size_cu[2] - 1) / block_config[1] + 1), C * N)
+
+        is_zoom_out = output_size_cu[2] < H and output_size_cu[3] < W
+        is_zoom_in = output_size_cu[2] > H and output_size_cu[3] > W
+
+        pad_dims = [[0, 0]] * 2  # zoom out
+        slice_dims = [[0, 0]] * 2  # zoom in
+        for idx, (orig, zoom) in enumerate(zip((H, W),
+                                           (output_size_cu[2],
+                                            output_size_cu[3]))):
+            diff = orig - zoom
+            half = abs(diff) // 2
+            if diff > 0:
+                pad_dims[idx] = [half, diff - half]
+            elif diff < 0:
+                slice_dims[idx] = [half, half + orig]
+
+        result = cupy.ndarray(cupy_img.shape, dtype=cupy.float32)
+
+        if is_zoom_in:
+            # slice
+            kernel = CUDA_KERNELS.get_function("zoom_in_kernel")
+            kernel(grid, block_config,
+                   args=(cupy_img, result, np.int32(H), np.int32(W),
+                         np.int32(output_size_cu[2]),
+                         np.int32(output_size_cu[3]),
+                         np.int32(pitch), np.int32(slice_dims[0][0]),
+                         np.int32(slice_dims[0][1]),
+                         np.int32(slice_dims[1][0]),
+                         np.int32(slice_dims[1][1])),
+                   shared_mem=smem_size)
+        elif is_zoom_out:
+            # pad
+            kernel = CUDA_KERNELS.get_function("zoom_out_kernel")
+            kernel(grid, block_config,
+                   args=(cupy_img, result, np.int32(H), np.int32(W),
+                         np.int32(output_size_cu[2]),
+                         np.int32(output_size_cu[3]),
+                         np.int32(pitch), np.int32(pad_dims[0][0]),
+                         np.int32(pad_dims[0][1]),
+                         np.int32(pad_dims[1][0]),
+                         np.int32(pad_dims[1][1])),
+                   shared_mem=smem_size)
+        else:
+            raise Exception("Can only handle simultaneous \
+                            expansion(or shrinkage) in both H,W dimension, \
+                            check zoom factors")
+
+        if img.dtype != np.float32:
+            result = result.astype(img.dtype)
+
+        if to_cupy is True:
+            result = cupy.asnumpy(result)
+
+        return result
+
+    except Exception as e:
+        _logger.error("[cucim] " + str(e), exc_info=True)
+        _logger.info("Error executing random zoom on GPU")
+        raise
+
+
+def rand_zoom(
+    img: Any,
+    min_zoom: Union[Sequence[float], float] = 0.9,
+    max_zoom: Union[Sequence[float], float] = 1.1,
+    prob: float = 0.1
+):
+    """
+    Randomly Calls zoom with random zoom factor
+
+    Parameters
+    ----------
+    img : channel first, cupy.ndarray or numpy.ndarray
+        Input data of shape (C, H, W). Can also batch process input of shape
+        (N, C, H, W). Can be a numpy.ndarray or cupy.ndarray.
+    min_zoom: Min zoom factor. Can be float or sequence same size as image.
+        If a float, select a random factor from `[min_zoom, max_zoom]` then
+        apply to all spatial dims to keep the original spatial shape ratio.
+        If a sequence, min_zoom should contain one value for each spatial axis.
+        If 2 values provided for 3D data, use the first value for both H & W
+        dims to keep the same zoom ratio.
+    max_zoom: Max zoom factor. Can be float or sequence same size as image.
+        If a float, select a random factor from `[min_zoom, max_zoom]` then
+        apply to all spatial dims to keep the original spatial shape ratio.
+        If a sequence, max_zoom should contain one value for each spatial axis.
+        If 2 values provided for 3D data, use the first value for both H & W
+        dims to keep the same zoom ratio.
+    prob: Probability of zooming.
+
+    Returns
+    -------
+    out : cupy.ndarray or numpy.ndarray
+        Output data. Same dimensions and type as input.
+
+    Raises
+    ------
+    TypeError
+        If input 'img' is not cupy.ndarray or numpy.ndarray
+
+    Examples
+    --------
+    >>> import cucim.core.operations.intensity as its
+    >>> # input is channel first 3d array
+    >>> output_array = its.rand_zoom(input_arr)
+    """
+    R = np.random.RandomState()
+
+    rand_factor = R.rand()
+    zoom_factor = []
+
+    if rand_factor < prob:
+        try:
+            zoom_factor = [R.uniform(low, high)
+                           for low, high in zip(min_zoom, max_zoom)]
+        except Exception:
+            zoom_factor = [R.uniform(min_zoom, max_zoom)]
+
+        if len(zoom_factor) != 2:
+            zoom_factor = [zoom_factor[0] for _ in range(2)]
+
+    if rand_factor < prob:
+        return zoom(img, zoom_factor)
+    else:
+        return img
diff --git a/python/cucim/src/cucim/core/operations/spatial/__init__.py b/python/cucim/src/cucim/core/operations/spatial/__init__.py
new file mode 100644
index 000000000..19fbc08e2
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/spatial/__init__.py
@@ -0,0 +1,9 @@
+from .rotate_and_flip import (image_flip, image_rotate_90, rand_image_flip,
+                              rand_image_rotate_90)
+
+__all__ = [
+    "image_rotate_90",
+    "image_flip",
+    "rand_image_flip",
+    "rand_image_rotate_90"
+]
diff --git a/python/cucim/src/cucim/core/operations/spatial/rotate_and_flip.py b/python/cucim/src/cucim/core/operations/spatial/rotate_and_flip.py
new file mode 100755
index 000000000..196369a1b
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/spatial/rotate_and_flip.py
@@ -0,0 +1,214 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Any
+
+import cupy
+import numpy as np
+
+_logger = logging.getLogger("spatial_cucim")
+
+
+def image_flip(
+    img: Any,
+    spatial_axis: tuple()
+) -> Any:
+    """
+    Shape preserving order reversal of elements in input array
+    along the given spatial axis
+
+    Parameters
+    ----------
+    img : cupy.ndarray or numpy.ndarray
+        Input data. Can be numpy.ndarray or cupy.ndarray
+    spatial_axis : tuple
+        spatial axis along which to flip over the input array
+
+    Returns
+    -------
+    out : cupy.ndarray or numpy.ndarray
+        Output data. Same dimensions and type as input.
+
+    Raises
+    ------
+    TypeError
+        If input 'img' is not cupy.ndarray or numpy.ndarray
+
+    Examples
+    --------
+    >>> import cucim.core.operations.spatial as spt
+    >>> # input is channel first 3d array
+    >>> output_array = spt.image_flip(input_arr, (1, 2))
+    """
+    try:
+        to_cupy = False
+
+        if isinstance(img, np.ndarray):
+            to_cupy = True
+            cupy_img = cupy.asarray(img, order="C")
+        elif not isinstance(img, cupy.ndarray):
+            raise TypeError("img must be a cupy.ndarray or numpy.ndarray")
+        else:
+            cupy_img = cupy.ascontiguousarray(img)
+
+        result = cupy.flip(cupy_img, spatial_axis)
+        if to_cupy is True:
+            result = cupy.asnumpy(result)
+        return result
+    except Exception as e:
+        _logger.error("[cucim] " + str(e), exc_info=True)
+        _logger.info("Error executing image flip on GPU")
+        raise
+
+
+def image_rotate_90(
+    img: Any,
+    k: int,
+    spatial_axis: tuple()
+) -> Any:
+    """
+    Rotate input array by 90 degress along the given axis
+
+    Parameters
+    ----------
+    img : cupy.ndarray or numpy.ndarray
+        Input data. Can be numpy.ndarray or cupy.ndarray
+    k : int
+        number of times to rotate
+    spatial_axis : tuple
+        spatial axis along which to rotate the input array by 90 degrees
+
+    Returns
+    -------
+    out : cupy.ndarray or numpy.ndarray
+        Output data. Same dimensions and type as input.
+
+    Raises
+    ------
+    TypeError
+        If input 'img' is not cupy.ndarray or numpy.ndarray
+
+    Examples
+    --------
+    >>> import cucim.core.operations.spatial as spt
+    >>> # input is channel first 3d array
+    >>> output_array = spt.image_rotate_90(input_arr,1,(1,2))
+    """
+    try:
+        to_cupy = False
+
+        if isinstance(img, np.ndarray):
+            to_cupy = True
+            cupy_img = cupy.asarray(img, order="C")
+        elif not isinstance(img, cupy.ndarray):
+            raise TypeError("img must be a cupy.ndarray or numpy.ndarray")
+        else:
+            cupy_img = cupy.ascontiguousarray(img)
+
+        result = cupy.rot90(cupy_img, k, spatial_axis)
+        if to_cupy is True:
+            result = cupy.asnumpy(result)
+        return result
+    except Exception as e:
+        _logger.error("[cucim] " + str(e), exc_info=True)
+        _logger.info("Error executing image rotation on GPU")
+        raise
+
+
+def rand_image_flip(
+    img: Any,
+    spatial_axis: tuple(),
+    prob: float = 0.1
+) -> Any:
+    """
+    Randomly flips the image along axis.
+
+    Parameters
+    ----------
+    img : cupy.ndarray or numpy.ndarray
+        Input data. Can be numpy.ndarray or cupy.ndarray
+    prob: Probability of flipping.
+    spatial_axis : tuple
+        spatial axis along which to flip over the input array
+
+    Returns
+    -------
+    out : cupy.ndarray or numpy.ndarray
+        Output data. Same dimensions and type as input.
+
+    Raises
+    ------
+    TypeError
+        If input 'img' is not cupy.ndarray or numpy.ndarray
+
+    Examples
+    --------
+    >>> import cucim.core.operations.spatial as spt
+    >>> # input is channel first 3d array
+    >>> output_array = spt.rand_image_flip(input_arr,spatial_axis=(1,2))
+    """
+    R = np.random.RandomState()
+
+    if R.rand() < prob:
+        return image_flip(img, spatial_axis)
+    else:
+        return img
+
+
+def rand_image_rotate_90(
+    img: Any,
+    spatial_axis: tuple(),
+    prob: float = 0.1,
+    max_k: int = 3
+) -> Any:
+    """
+    With probability `prob`, input arrays are rotated by 90 degrees
+    in the plane specified by `spatial_axis`.
+
+    Parameters
+    ----------
+    img : cupy.ndarray or numpy.ndarray
+        Input data. Can be numpy.ndarray or cupy.ndarray
+    prob: probability of rotating.
+        (Default 0.1, with 10% probability it returns a rotated array)
+    max_k: number of rotations
+        will be sampled from `np.random.randint(max_k) + 1`, (Default 3).
+    spatial_axis : tuple
+        spatial axis along which to rotate the input array by 90 degrees
+
+    Returns
+    -------
+    out : cupy.ndarray or numpy.ndarray
+        Output data. Same dimensions and type as input.
+
+    Raises
+    ------
+    TypeError
+        If input 'img' is not cupy.ndarray or numpy.ndarray
+
+    Examples
+    --------
+    >>> import cucim.core.operations.spatial as spt
+    >>> # input is channel first 3d array
+    >>> output_array = spt.rand_image_rotate_90(input_arr, spatial_axis=(1, 2))
+    """
+    R = np.random.RandomState()
+
+    _rand_k = R.randint(max_k) + 1
+
+    if R.rand() < prob:
+        return image_rotate_90(img, _rand_k, spatial_axis)
+    else:
+        return img
diff --git a/python/cucim/src/cucim/core/operations/spatial/tests/flipped.png b/python/cucim/src/cucim/core/operations/spatial/tests/flipped.png
new file mode 100644
index 000000000..ec02b4fae
Binary files /dev/null and b/python/cucim/src/cucim/core/operations/spatial/tests/flipped.png differ
diff --git a/python/cucim/src/cucim/core/operations/spatial/tests/rotated.png b/python/cucim/src/cucim/core/operations/spatial/tests/rotated.png
new file mode 100644
index 000000000..42462a375
Binary files /dev/null and b/python/cucim/src/cucim/core/operations/spatial/tests/rotated.png differ
diff --git a/python/cucim/src/cucim/core/operations/spatial/tests/test_flip.py b/python/cucim/src/cucim/core/operations/spatial/tests/test_flip.py
new file mode 100644
index 000000000..08be7829f
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/spatial/tests/test_flip.py
@@ -0,0 +1,59 @@
+import os
+
+import cupy
+import numpy as np
+import pytest
+import skimage.data
+from PIL import Image
+
+import cucim.core.operations.spatial as spt
+
+
+def get_input_arr():
+    img = skimage.data.astronaut()
+    arr = np.asarray(img)
+    arr = np.transpose(arr)
+    return arr
+
+
+def get_flipped_data():
+    dirname = os.path.dirname(__file__)
+    img1 = Image.open(os.path.join(os.path.abspath(dirname), "flipped.png"))
+    arr_o = np.asarray(img1)
+    arr_o = np.transpose(arr_o)
+    return arr_o
+
+
+def test_flip_param():
+    arr = get_input_arr()
+    with pytest.raises(TypeError):
+        img = Image.fromarray(arr.T, 'RGB')
+        spt.image_flip(img, (1, 2))
+
+
+def test_flip_numpy_input():
+    arr = get_input_arr()
+    flip_arr = get_flipped_data()
+    output = spt.image_flip(arr, (1, 2))
+    assert np.allclose(output, flip_arr)
+
+
+def test_flip_cupy_input():
+    arr = get_input_arr()
+    flip_arr = get_flipped_data()
+    cupy_arr = cupy.asarray(arr)
+    cupy_output = spt.image_flip(cupy_arr, (1, 2))
+    np_output = cupy.asnumpy(cupy_output)
+    assert np.allclose(np_output, flip_arr)
+
+
+def test_flip_batchinput():
+    arr = get_input_arr()
+    flip_arr = get_flipped_data()
+    arr_batch = np.stack((arr,) * 8, axis=0)
+    np_output = spt.image_flip(arr_batch, (2, 3))
+
+    assert np_output.shape[0] == 8
+
+    for i in range(np_output.shape[0]):
+        assert np.allclose(np_output[i], flip_arr)
diff --git a/python/cucim/src/cucim/core/operations/spatial/tests/test_random_flip.py b/python/cucim/src/cucim/core/operations/spatial/tests/test_random_flip.py
new file mode 100644
index 000000000..21471e0de
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/spatial/tests/test_random_flip.py
@@ -0,0 +1,57 @@
+import os
+
+import cupy
+import numpy as np
+import skimage.data
+from PIL import Image
+
+import cucim.core.operations.spatial as spt
+
+
+def get_input_arr():
+    img = skimage.data.astronaut()
+    arr = np.asarray(img)
+    arr = np.transpose(arr)
+    return arr
+
+
+def get_flipped_data():
+    dirname = os.path.dirname(__file__)
+    img1 = Image.open(os.path.join(os.path.abspath(dirname), "flipped.png"))
+    arr_o = np.asarray(img1)
+    arr_o = np.transpose(arr_o)
+    return arr_o
+
+
+def test_rand_flip_numpy_input():
+    arr = get_input_arr()
+    flip_arr = get_flipped_data()
+    output = spt.rand_image_flip(arr, prob=1.0, spatial_axis=(1, 2))
+    assert np.allclose(output, flip_arr)
+
+
+def test_rand_flip_zero_prob():
+    arr = get_input_arr()
+    output = spt.rand_image_flip(arr, prob=0.0, spatial_axis=(1, 2))
+    assert np.allclose(output, arr)
+
+
+def test_rand_flip_cupy_input():
+    arr = get_input_arr()
+    flip_arr = get_flipped_data()
+    cupy_arr = cupy.asarray(arr)
+    cupy_output = spt.rand_image_flip(cupy_arr, prob=1.0, spatial_axis=(1, 2))
+    np_output = cupy.asnumpy(cupy_output)
+    assert np.allclose(np_output, flip_arr)
+
+
+def test_rand_flip_batchinput():
+    arr = get_input_arr()
+    flip_arr = get_flipped_data()
+    arr_batch = np.stack((arr,) * 8, axis=0)
+    np_output = spt.rand_image_flip(arr_batch, prob=1.0, spatial_axis=(2, 3))
+
+    assert np_output.shape[0] == 8
+
+    for i in range(np_output.shape[0]):
+        assert np.allclose(np_output[i], flip_arr)
diff --git a/python/cucim/src/cucim/core/operations/spatial/tests/test_random_rotate90.py b/python/cucim/src/cucim/core/operations/spatial/tests/test_random_rotate90.py
new file mode 100644
index 000000000..9e6c60ebb
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/spatial/tests/test_random_rotate90.py
@@ -0,0 +1,68 @@
+import os
+
+import cupy
+import numpy as np
+import skimage.data
+from PIL import Image
+
+import cucim.core.operations.spatial as spt
+
+
+def get_input_arr():
+    img = skimage.data.astronaut()
+    arr = np.asarray(img)
+    arr = np.transpose(arr)
+    return arr
+
+
+def get_rotated_data():
+    dirname = os.path.dirname(__file__)
+    img1 = Image.open(os.path.join(os.path.abspath(dirname), "rotated.png"))
+    arr_o = np.asarray(img1)
+    arr_o = np.transpose(arr_o)
+    return arr_o
+
+
+def test_rand_rotate90_numpy_input():
+    arr = get_input_arr()
+    rotate90_arr = get_rotated_data()
+    output = spt.rand_image_rotate_90(arr,
+                                      max_k=1,
+                                      prob=1.0,
+                                      spatial_axis=[1, 2])
+    assert np.allclose(output, rotate90_arr)
+
+
+def test_rand_rotate90_zero_prob():
+    arr = get_input_arr()
+    output = spt.rand_image_rotate_90(arr,
+                                      max_k=1,
+                                      prob=0.0,
+                                      spatial_axis=[1, 2])
+    assert np.allclose(output, arr)
+
+
+def test_rand_rotate90_cupy_input():
+    arr = get_input_arr()
+    rotate90_arr = get_rotated_data()
+    cupy_arr = cupy.asarray(arr)
+    cupy_output = spt.rand_image_rotate_90(cupy_arr,
+                                           max_k=1,
+                                           prob=1.0,
+                                           spatial_axis=[1, 2])
+    np_output = cupy.asnumpy(cupy_output)
+    assert np.allclose(np_output, rotate90_arr)
+
+
+def test_rand_rotate90_batchinput():
+    arr = get_input_arr()
+    rotate90_arr = get_rotated_data()
+    arr_batch = np.stack((arr,) * 8, axis=0)
+    np_output = spt.rand_image_rotate_90(arr_batch,
+                                         max_k=1,
+                                         prob=1.0,
+                                         spatial_axis=[2, 3])
+    assert np_output.shape[0] == 8
+
+    for i in range(np_output.shape[0]):
+        assert np.allclose(np_output[i], rotate90_arr)
diff --git a/python/cucim/src/cucim/core/operations/spatial/tests/test_rotate90.py b/python/cucim/src/cucim/core/operations/spatial/tests/test_rotate90.py
new file mode 100644
index 000000000..333b2c983
--- /dev/null
+++ b/python/cucim/src/cucim/core/operations/spatial/tests/test_rotate90.py
@@ -0,0 +1,58 @@
+import os
+
+import cupy
+import numpy as np
+import pytest
+import skimage.data
+from PIL import Image
+
+import cucim.core.operations.spatial as spt
+
+
+def get_input_arr():
+    img = skimage.data.astronaut()
+    arr = np.asarray(img)
+    arr = np.transpose(arr)
+    return arr
+
+
+def get_rotated_data():
+    dirname = os.path.dirname(__file__)
+    img1 = Image.open(os.path.join(os.path.abspath(dirname), "rotated.png"))
+    arr_o = np.asarray(img1)
+    arr_o = np.transpose(arr_o)
+    return arr_o
+
+
+def test_rotate90_param():
+    arr = get_input_arr()
+    with pytest.raises(TypeError):
+        img = Image.fromarray(arr.T, 'RGB')
+        spt.image_rotate_90(img, 1, [1, 2])
+
+
+def test_rotate90_numpy_input():
+    arr = get_input_arr()
+    rotate90_arr = get_rotated_data()
+    output = spt.image_rotate_90(arr, 1, [1, 2])
+    assert np.allclose(output, rotate90_arr)
+
+
+def test_rotate90_cupy_input():
+    arr = get_input_arr()
+    rotate90_arr = get_rotated_data()
+    cupy_arr = cupy.asarray(arr)
+    cupy_output = spt.image_rotate_90(cupy_arr, 1, [1, 2])
+    np_output = cupy.asnumpy(cupy_output)
+    assert np.allclose(np_output, rotate90_arr)
+
+
+def test_rotate90_batchinput():
+    arr = get_input_arr()
+    rotate90_arr = get_rotated_data()
+    arr_batch = np.stack((arr,) * 8, axis=0)
+    np_output = spt.image_rotate_90(arr_batch, 1, [2, 3])
+    assert np_output.shape[0] == 8
+
+    for i in range(np_output.shape[0]):
+        assert np.allclose(np_output[i], rotate90_arr)
diff --git a/python/cucim/src/cucim/skimage/measure/_regionprops.py b/python/cucim/src/cucim/skimage/measure/_regionprops.py
index e9b0d73ab..602f939cf 100644
--- a/python/cucim/src/cucim/skimage/measure/_regionprops.py
+++ b/python/cucim/src/cucim/skimage/measure/_regionprops.py
@@ -311,9 +311,10 @@ def convex_image(self):
         # from ..morphology.convex_hull import convex_hull_image
         from skimage.morphology.convex_hull import convex_hull_image
 
-        # CuPy Backend: copy required here to avoid unexpected behavior
+        # CuPy Backend: explicitly cast to uint8 to avoid the issue see in
         #               reported in https://github.com/cupy/cupy/issues/4354
-        return cp.asarray(convex_hull_image(cp.asnumpy(self.image))).copy()
+        return cp.asarray(convex_hull_image(cp.asnumpy(self.image))).astype(
+            cp.uint8)
 
     @property
     def coords(self):
diff --git a/python/cucim/tests/fixtures/testimage.py b/python/cucim/tests/fixtures/testimage.py
index 7e3e2c234..c50b8eb6f 100644
--- a/python/cucim/tests/fixtures/testimage.py
+++ b/python/cucim/tests/fixtures/testimage.py
@@ -46,9 +46,19 @@ def testimg_tiff_stripe_32x24_16_deflate(tmpdir_factory):
     shutil.rmtree(dataset_path)
 
 
+@pytest.fixture(scope='session')
+def testimg_tiff_stripe_32x24_16_raw(tmpdir_factory):
+    dataset_path, image_path = gen_image(
+        tmpdir_factory, 'tiff::stripe:32x24:16:raw')
+    yield image_path
+    # Clean up fake dataset folder
+    shutil.rmtree(dataset_path)
+
+
 @pytest.fixture(scope='session', params=[
     lazy_fixture('testimg_tiff_stripe_32x24_16_jpeg'),
-    lazy_fixture('testimg_tiff_stripe_32x24_16_deflate')
+    lazy_fixture('testimg_tiff_stripe_32x24_16_deflate'),
+    lazy_fixture('testimg_tiff_stripe_32x24_16_raw')
 ])
 def testimg_tiff_stripe_32x24_16(request):
     return request.param
@@ -74,9 +84,19 @@ def testimg_tiff_stripe_4096x4096_256_deflate(tmpdir_factory):
     shutil.rmtree(dataset_path)
 
 
+@pytest.fixture(scope='session')
+def testimg_tiff_stripe_4096x4096_256_raw(tmpdir_factory):
+    dataset_path, image_path = gen_image(
+        tmpdir_factory, 'tiff::stripe:4096x4096:256:raw')
+    yield image_path
+    # Clean up fake dataset folder
+    shutil.rmtree(dataset_path)
+
+
 @pytest.fixture(scope='session', params=[
     lazy_fixture('testimg_tiff_stripe_4096x4096_256_jpeg'),
-    lazy_fixture('testimg_tiff_stripe_4096x4096_256_deflate')
+    lazy_fixture('testimg_tiff_stripe_4096x4096_256_deflate'),
+    lazy_fixture('testimg_tiff_stripe_4096x4096_256_raw')
 ])
 def testimg_tiff_stripe_4096x4096_256(request):
     return request.param
@@ -101,9 +121,19 @@ def testimg_tiff_stripe_100000x100000_256_deflate(tmpdir_factory):
     shutil.rmtree(dataset_path)
 
 
+@pytest.fixture(scope='session')
+def testimg_tiff_stripe_100000x100000_256_raw(tmpdir_factory):
+    dataset_path, image_path = gen_image(
+        tmpdir_factory, 'tiff::stripe:100000x100000:256:raw')
+    yield image_path
+    # Clean up fake dataset folder
+    shutil.rmtree(dataset_path)
+
+
 @pytest.fixture(scope='session', params=[
     lazy_fixture('testimg_tiff_stripe_100000x100000_256_jpeg'),
-    lazy_fixture('testimg_tiff_stripe_100000x100000_256_deflate')
+    lazy_fixture('testimg_tiff_stripe_100000x100000_256_deflate'),
+    lazy_fixture('testimg_tiff_stripe_100000x100000_256_raw')
 ])
 def testimg_tiff_stripe_100000x100000_256(request):
     return request.param
diff --git a/python/cucim/tests/unit/test_init.py b/python/cucim/tests/unit/test_init.py
new file mode 100644
index 000000000..6cd7d5fd9
--- /dev/null
+++ b/python/cucim/tests/unit/test_init.py
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from unittest.mock import patch
+
+
+def test_is_available():
+    with patch('cucim._is_cupy_available', False):
+        with patch('cucim._is_clara_available', False):
+            import cucim
+            assert cucim.is_available() is False
+            assert cucim.is_available("skimage") is False
+            assert cucim.is_available("clara") is False
+            assert cucim.is_available("unknown") is False
+        with patch('cucim._is_clara_available', True):
+            import cucim
+            assert cucim.is_available() is False
+            assert cucim.is_available("skimage") is False
+            assert cucim.is_available("clara") is True
+            assert cucim.is_available("unknown") is False
+
+    with patch('cucim._is_cupy_available', True):
+        with patch('cucim._is_clara_available', False):
+            import cucim
+            assert cucim.is_available() is False
+            assert cucim.is_available("skimage") is True
+            assert cucim.is_available("clara") is False
+            assert cucim.is_available("unknown") is False
+        with patch('cucim._is_clara_available', True):
+            import cucim
+            assert cucim.is_available() is True
+            assert cucim.is_available("skimage") is True
+            assert cucim.is_available("clara") is True
+            assert cucim.is_available("unknown") is True
diff --git a/python/cucim/tests/util/gen_tiff.py b/python/cucim/tests/util/gen_tiff.py
index 764a8ffab..713679657 100644
--- a/python/cucim/tests/util/gen_tiff.py
+++ b/python/cucim/tests/util/gen_tiff.py
@@ -21,7 +21,8 @@
 from tifffile import TiffWriter
 
 COMPRESSION_MAP = {'jpeg': ('jpeg', 95),
-                   'deflate': 'deflate'}
+                   'deflate': 'deflate',
+                   'raw': None}
 
 
 class TiffGenerator:
diff --git a/python/pybind11/cucim_py.cpp b/python/pybind11/cucim_py.cpp
index 65c140178..753cac9bc 100644
--- a/python/pybind11/cucim_py.cpp
+++ b/python/pybind11/cucim_py.cpp
@@ -153,7 +153,21 @@ PYBIND11_MODULE(_cucim, m)
              py::arg("name") = "", //
              py::arg("device") = io::Device()) //
         .def("save", &CuImage::save, doc::CuImage::doc_save, py::call_guard<py::gil_scoped_release>()) //
+        .def("close", &CuImage::close, doc::CuImage::doc_close, py::call_guard<py::gil_scoped_release>()) //
         .def("__bool__", &CuImage::operator bool, py::call_guard<py::gil_scoped_release>()) //
+        .def(
+            "__enter__",
+            [](const std::shared_ptr<CuImage>& cuimg) { //
+                return cuimg; //
+            }, //
+            py::call_guard<py::gil_scoped_release>())
+        .def(
+            "__exit__",
+            [](const std::shared_ptr<CuImage>& cuimg, const py::object& type, const py::object& value,
+               const py::object& traceback) { //
+                cuimg->close(); //
+            }, //
+            py::call_guard<py::gil_scoped_release>())
         .def(
             "__repr__", //
             [](const CuImage& cuimg) { //
diff --git a/python/pybind11/cucim_pydoc.h b/python/pybind11/cucim_pydoc.h
index 1fa4b7af6..8d985535f 100644
--- a/python/pybind11/cucim_pydoc.h
+++ b/python/pybind11/cucim_pydoc.h
@@ -227,6 +227,15 @@ Saves image data to the file path.
 Currently it supports only .ppm file format that can be viewed by `eog` command in Ubuntu.
 )doc")
 
+// void close();
+PYDOC(close, R"doc(
+Closes the file handle.
+
+Once the file handle is closed, the image object (if loaded before) still exists but cannot read additional images
+from the file.
+)doc")
+
+
 // void _set_array_interface(const CuImage& cuimg);
 PYDOC(_set_array_interface, R"doc(
 Add `__array_interface__` or `__cuda_array_interface__` depending on the memory type.
diff --git a/python/pybind11/filesystem/filesystem_py.cpp b/python/pybind11/filesystem/filesystem_py.cpp
index 40c9dc2bc..e11de2bc5 100644
--- a/python/pybind11/filesystem/filesystem_py.cpp
+++ b/python/pybind11/filesystem/filesystem_py.cpp
@@ -58,6 +58,19 @@ void init_filesystem(py::module& fs)
              py::arg("file_offset"), //
              py::arg("buf_offset") = 0) //
         .def("close", &CuFileDriver::close, doc::CuFileDriver::doc_close, py::call_guard<py::gil_scoped_release>()) //
+        .def(
+            "__enter__",
+            [](const std::shared_ptr<CuFileDriver>& fd) { //
+                return fd; //
+            }, //
+            py::call_guard<py::gil_scoped_release>())
+        .def(
+            "__exit__",
+            [](const std::shared_ptr<CuFileDriver>& fd, const py::object& type, const py::object& value,
+               const py::object& traceback) { //
+                fd->close(); //
+            }, //
+            py::call_guard<py::gil_scoped_release>())
         .def("__repr__", [](const CuFileDriver& fd) {
             return fmt::format("<cucim.clara.filesystem.CuFileDriver path:{}>", fd.path());
         });
diff --git a/run b/run
index 9ef6401e8..31bcd9553 100755
--- a/run
+++ b/run
@@ -402,6 +402,7 @@ build_local() {
 copy_gds_files_() {
     local root_folder=${1:-${TOP}}
     local cufile_search="${root_folder}/temp/cuda/include:${root_folder}/temp/cuda/lib64"  #"/usr/local/cuda/include:/usr/local/cuda/lib64 ${PREFIX:-}/include:${PREFIX:-}/lib ${CONDA_PREFIX:-}/include:${CONDA_PREFIX:-}/lib ${root_folder}/temp/cuda/include:${root_folder}/temp/cuda/lib64"
+    local gds_version=1.0.0
     local candidate
     local cufile_include
     local cufile_lib
@@ -425,10 +426,10 @@ copy_gds_files_() {
 
         local temp_tgz_dir=$(mktemp -d)
         pushd ${temp_tgz_dir}
-        run_command wget https://developer.download.nvidia.com/gds/redist/rel-0.95.0/gds-redistrib-0.95.0.tgz
-        run_command tar xzvf gds-redistrib-0.95.0.tgz
-        run_command cp -P gds-redistrib-0.95.0/targets/x86_64-linux/include/cufile.h ${root_folder}/temp/cuda/include/
-        run_command cp -P gds-redistrib-0.95.0/targets/x86_64-linux/lib/* ${root_folder}/temp/cuda/lib64/
+        run_command wget https://developer.download.nvidia.com/gds/redist/rel-${gds_version}/gds-redistrib-${gds_version}.tgz
+        run_command tar xzvf gds-redistrib-${gds_version}.tgz
+        run_command cp -P gds-redistrib-${gds_version}/targets/x86_64-linux/include/cufile.h ${root_folder}/temp/cuda/include/
+        run_command cp -P gds-redistrib-${gds_version}/targets/x86_64-linux/lib/* ${root_folder}/temp/cuda/lib64/
         popd > /dev/null
         run_command rm -r ${temp_tgz_dir}
     else
@@ -720,7 +721,7 @@ test_python() {
 
     local testsuite=""
     local testsuite_unit_skimage="src"
-    local testsuite_unit_clara="tests/unit/clara"
+    local testsuite_unit_clara="tests/unit"
     local testsuite_performance="tests/performance"
 
     install_python_test_deps_