Added source dimension order, split functionality between Zarr and Om…

…eZarr as write-only classes, fixed ome zarr channel metadata
FrancisCrickInstitute · Nov 27, 2023 · 91d6932 · 91d6932
1 parent c962c98
commit 91d6932
Show file tree

Hide file tree

Showing 8 changed files with 150 additions and 78 deletions.
diff --git a/OmeSliCC/OmeSource.py b/OmeSliCC/OmeSource.py
@@ -14,6 +14,8 @@ class OmeSource:
     """metadata dictionary"""
     has_ome_metadata: bool
     """has ome metadata"""
+    dimension_order: str
+    """data dimension order"""
     source_pixel_size: list
     """original source pixel size"""
     target_pixel_size: list
@@ -34,6 +36,7 @@ class OmeSource:
     def __init__(self):
         self.metadata = {}
         self.has_ome_metadata = False
+        self.dimension_order = ''
         self.source_pixel_size = []
         self.target_pixel_size = []
         self.target_scale = []
@@ -133,13 +136,26 @@ def _init_sizes(self):
             self.best_level = 0
             self.best_factor = [1]
 
+        if self.dimension_order == '':
+            x, y, z, c, t = self.get_size_xyzct()
+            self.dimension_order = 'yx'
+            if c > 1:
+                self.dimension_order += 'c'
+            if z > 1:
+                self.dimension_order = 'z' + self.dimension_order
+            if t > 1:
+                self.dimension_order = 't' + self.dimension_order
+
     def get_mag(self) -> float:
         # get effective mag at target pixel size
         if len(self.target_scale) > 0:
             return check_round_significants(self.source_mag / np.mean(self.target_scale), 3)
         else:
             return self.source_mag
 
+    def get_dimension_order(self) -> str:
+        return self.dimension_order
+
     def get_physical_size(self) -> tuple:
         physical_size = []
         for size, pixel_size in zip(self.get_size_xyzct(), self.get_pixel_size()):
@@ -195,14 +211,35 @@ def clone_empty(self) -> np.ndarray:
     def get_thumbnail(self, target_size: tuple, precise: bool = False) -> np.ndarray:
         size, index = get_best_size(self.sizes, target_size)
         scale = np.divide(target_size, self.sizes[index])
-        image = self._asarray_level(index, 0, 0, size[0], size[1])
+        image = self._asarray_level(index)
         if np.round(scale, 3)[0] == 1 and np.round(scale, 3)[1] == 1:
             return image
         elif precise:
             return precise_resize(image, scale)
         else:
             return image_resize(image, target_size)
 
+    def get_min_max(self, channeli):
+        min_quantile = 0.001
+        max_quantile = 0.999
+
+        dtype = self.get_pixel_type()
+        if dtype.kind == 'f':
+            info = np.finfo(dtype)
+        else:
+            info = np.iinfo(dtype)
+        start, end = info.min, info.max
+
+        nsizes = len(self.sizes)
+        if nsizes > 1:
+            image = self._asarray_level(nsizes - 1)
+            if image.ndim > 2:
+                image = image[..., channeli]
+            min, max = get_image_quantile(image, min_quantile), get_image_quantile(image, max_quantile)
+        else:
+            min, max = start, end
+        return start, end, min, max
+
     def asarray(self, x0: float = 0, y0: float = 0, x1: float = -1, y1: float = -1) -> np.ndarray:
         # ensure fixed patch size
         if x1 < 0 or y1 < 0:

diff --git a/OmeSliCC/OmeZarr.py b/OmeSliCC/OmeZarr.py
@@ -5,6 +5,7 @@
 import zarr
 
 from OmeSliCC.image_util import *
+from OmeSliCC.ome_zarr_util import *
 from OmeSliCC.util import *
 
 
@@ -15,8 +16,7 @@ class OmeZarr:
     def __init__(self, filename):
         self.filename = filename
 
-    def write(self, data, source, dimension_order=DEFAULT_DIMENSION_ORDER,
-               tile_size=[1, 1, 1, 256, 256],
+    def write(self, data, source, tile_size=[],
                npyramid_add=0, pyramid_downsample=2, compression=[]):
         compressor, compression_filters = create_compression_filter(compression)
         storage_options = {'dimension_separator': '/', 'chunks': tile_size}
@@ -26,64 +26,24 @@ def write(self, data, source, dimension_order=DEFAULT_DIMENSION_ORDER,
             storage_options['filters'] = compression_filters
 
         zarr_root = zarr.group(parse_url(self.filename, mode="w").store, overwrite=True)
-        pixel_size_um = []
-        for size in source.get_pixel_size_micrometer():
-            if size == 0:
-                size = 1
-            pixel_size_um.append(size)
+        pixel_size_um = source.get_pixel_size_micrometer()
 
-        if dimension_order.index('c') == len(dimension_order) - 1:
+        dimension_order = source.get_dimension_order()
+        if 'c' in dimension_order and dimension_order.index('c') == len(dimension_order) - 1:
             # ome-zarr doesn't support channel after space dimensions (yet)
             data = np.moveaxis(data, -1, 0)
             dimension_order = dimension_order[-1] + dimension_order[:-1]
 
-        axes = []
-        for dimension in dimension_order:
-            unit1 = None
-            if dimension == 't':
-                type1 = 'time'
-                unit1 = 'millisecond'
-            elif dimension == 'c':
-                type1 = 'channel'
-            else:
-                type1 = 'space'
-                unit1 = 'micrometer'
-            axis = {'name': dimension, 'type': type1}
-            if unit1 is not None and unit1 != '':
-                axis['unit'] = unit1
-            axes.append(axis)
+        axes = create_axes_metadata(dimension_order)
 
         pixel_size_scales = []
         scale = 1
         for i in range(npyramid_add + 1):
-            pixel_size_scale = []
-            for dimension in dimension_order:
-                if dimension == 'z':
-                    pixel_size_scale1 = pixel_size_um[2]
-                elif dimension == 'y':
-                    pixel_size_scale1 = pixel_size_um[1] / scale
-                elif dimension == 'x':
-                    pixel_size_scale1 = pixel_size_um[0] / scale
-                else:
-                    pixel_size_scale1 = 1
-                pixel_size_scale.append(pixel_size_scale1)
-            pixel_size_scales.append([{'scale': pixel_size_scale, 'type': 'scale'}])
+            pixel_size_scales.append(create_transformation_metadata(dimension_order, pixel_size_um, scale))
             scale /= pyramid_downsample
 
         write_image(image=data, group=zarr_root, axes=axes, coordinate_transformations=pixel_size_scales,
                     scaler=Scaler(downscale=pyramid_downsample, max_layer=npyramid_add),
                     storage_options=storage_options)
 
-        channels = []
-        for channel0 in source.get_channels():
-            color = channel0.get('Color', '')
-            if not isinstance(color, str):
-                color = hex(color)[2:].zfill(6)
-            channel = {'label': channel0.get('Name', ''), 'color': color}
-            channels.append(channel)
-
-        omero_metadata = {
-            'version': '0.4',
-            'channels': channels,
-        }
-        zarr_root.attrs['omero'] = omero_metadata
+        zarr_root.attrs['omero'] = create_channel_metadata(source)
diff --git a/OmeSliCC/ZarrSource.py → OmeSliCC/OmeZarrSource.py b/OmeSliCC/ZarrSource.py → OmeSliCC/OmeZarrSource.py
@@ -6,7 +6,7 @@
 from OmeSliCC.XmlDict import XmlDict
 
 
-class ZarrSource(OmeSource):
+class OmeZarrSource(OmeSource):
     """Zarr-compatible image source"""
 
     filename: str

diff --git a/OmeSliCC/Zarr.py b/OmeSliCC/Zarr.py
@@ -3,6 +3,7 @@
 import zarr
 
 from OmeSliCC.image_util import *
+from OmeSliCC.ome_zarr_util import *
 from OmeSliCC.util import *
 
 
@@ -13,48 +14,48 @@ class Zarr:
     def __init__(self, filename):
         self.filename = filename
         self.ome = ('ome' == self.filename.split('.')[1].lower())
-        self.metadata = {}
         self.data = []
-        self.sizes = []
-        self.shapes = []
         self.dimension_order = self.DEFAULT_DIMENSION_ORDER
 
-    def create(self, source, dimension_order=DEFAULT_DIMENSION_ORDER,
-               tile_size=[1, 1, 1, 256, 256],
+    def create(self, source, tile_size=[],
                npyramid_add=0, pyramid_downsample=2, compression=[]):
         # create empty dataset
+        dimension_order = source.get_dimension_order()
         self.dimension_order = dimension_order
         self.npyramid_add = npyramid_add
         self.pyramid_downsample = pyramid_downsample
         file_url = pathlib.Path(self.filename).as_uri()
         self.zarr_root = zarr.open_group(file_url, mode='w', storage_options={'dimension_separator': '/'})
         size0 = source.get_size_xyzct()
-        shape0 = list(np.flip(size0))
-        self.dtype = source.pixel_types[0]
-        pixel_size = source.get_pixel_size()
+        shape0 = [size0['xyzct'.index(dimension)] for dimension in dimension_order]
+        dtype = source.pixel_types[0]
+        pixel_size_um = source.get_pixel_size_micrometer()
         compressor, compression_filters = create_compression_filter(compression)
         scale = 1
         datasets = []
         for pathi in range(1 + npyramid_add):
-            shape = shape0[:-2] + np.round(np.multiply(shape0[-2:], scale)).astype(int).tolist()
-            self.shapes.append(shape)
-            self.sizes.append(np.flip(shape))
-            self.data.append(self.zarr_root.create_dataset(str(pathi), shape=shape, chunks=tile_size, dtype=self.dtype,
+            shape = calc_shape_scale(shape0, dimension_order, scale)
+            self.data.append(self.zarr_root.create_dataset(str(pathi), shape=shape, chunks=tile_size, dtype=dtype,
                                                            compressor=compressor, filters=compression_filters))
-            pixel_size_x = pixel_size[0][0] if len(pixel_size) >= 1 else 1
-            pixel_size_y = pixel_size[1][0] if len(pixel_size) >= 2 else 1
-            pixel_size_z = pixel_size[2][0] if len(pixel_size) >= 3 else 1
-            if pixel_size_z == 0:
-                pixel_size_z = 1
             datasets.append({
-                'path': pathi,
-                'coordinateTransformations': [{'type': 'scale', 'scale': [1, 1, pixel_size_z, pixel_size_y / scale, pixel_size_x / scale]}]
+                'path': str(pathi),
+                'coordinateTransformations': create_transformation_metadata(dimension_order, pixel_size_um, scale)
             })
             scale /= pyramid_downsample
 
+        if self.ome:
+            metadata = {
+                'version': '0.4',
+                'axes': create_axes_metadata(dimension_order),
+                'name': get_filetitle(source.source_reference),
+                'datasets': datasets,
+            }
+
+            self.zarr_root.attrs['multiscales'] = [metadata]
+            self.zarr_root.attrs['omero'] = create_channel_metadata(source)
+
     def get(self, level, x0=0, y0=0, x1=-1, y1=-1):
         data = self.data[level][0, :, 0, y0:y1, x0:x1].squeeze()
-        data = np.moveaxis(data, 0, -1)
         return data
 
     def set(self, data, x0=0, y0=0, x1=0, y1=0):
@@ -70,6 +71,5 @@ def set(self, data, x0=0, y0=0, x1=0, y1=0):
                 data1 = image_resize(data, new_size)
             else:
                 data1 = data
-            data1 = np.moveaxis(data1, -1, 0)
-            self.data[pathi][0, :, 0, sy0:sy1, sx0:sx1] = data1
+            self.data[pathi][sy0:sy1, sx0:sx1, :] = data1
             scale /= self.pyramid_downsample
diff --git a/OmeSliCC/conversion.py b/OmeSliCC/conversion.py
@@ -14,7 +14,7 @@
 from OmeSliCC.PlainImageSource import PlainImageSource
 from OmeSliCC.TiffSource import TiffSource
 from OmeSliCC.Zarr import Zarr
-from OmeSliCC.ZarrSource import ZarrSource
+from OmeSliCC.OmeZarrSource import OmeZarrSource
 from OmeSliCC.image_util import *
 from OmeSliCC.util import *
 
@@ -27,7 +27,7 @@ def create_source(source_ref: str, params: dict, omero: Omero = None) -> OmeSour
         from OmeSliCC.OmeroSource import OmeroSource
         source = OmeroSource(omero, int(source_ref), source_pixel_size=source_pixel_size, target_pixel_size=target_pixel_size)
     elif 'zarr' in ext:
-        source = ZarrSource(source_ref, source_pixel_size=source_pixel_size, target_pixel_size=target_pixel_size)
+        source = OmeZarrSource(source_ref, source_pixel_size=source_pixel_size, target_pixel_size=target_pixel_size)
     elif ext.lstrip('.') in TIFF.FILE_EXTENSIONS:
         source = TiffSource(source_ref, source_pixel_size=source_pixel_size, target_pixel_size=target_pixel_size)
     elif ext in Image.registered_extensions().keys():
@@ -127,7 +127,7 @@ def combine_images(sources: list[OmeSource], params: dict):
         channels.append(channel)
     output_filename = os.path.join(output_folder, get_filetitle(source_ref, remove_all_ext=True) + '.' + output_format)
     if 'zar' in output_format:
-        new_source = ZarrSource(source_ref, source0.get_pixel_size())
+        new_source = OmeZarrSource(source_ref, source0.get_pixel_size())
         new_source.channels = channels
         size = list(new_source.sizes_xyzct[0])
         size[3] = nchannels
@@ -159,7 +159,7 @@ def save_image_as_ome_zarr(source: OmeSource, data: np.ndarray, output_filename:
     pyramid_downsample = output_params.get('pyramid_downsample')
 
     zarr = OmeZarr(output_filename)
-    zarr.write(data, source, dimension_order='yxc', tile_size=tile_size, npyramid_add=npyramid_add, pyramid_downsample=pyramid_downsample,
+    zarr.write(data, source, tile_size=tile_size, npyramid_add=npyramid_add, pyramid_downsample=pyramid_downsample,
                compression=compression)
 
 

diff --git a/OmeSliCC/image_util.py b/OmeSliCC/image_util.py
@@ -65,6 +65,11 @@ def convert_image_sign_type(image0: np.ndarray, dtype: np.dtype) -> np.ndarray:
     return image
 
 
+def get_image_quantile(image, quantile):
+    value = np.quantile(image, quantile).astype(image.dtype)
+    return value
+
+
 def get_image_size_info(xyzct: tuple, pixel_nbytes: int, pixel_type: np.dtype, channels: list) -> str:
     w, h, zs, cs, ts = xyzct
     size = print_hbytes(np.int64(pixel_nbytes) * w * h * zs * cs * ts)

diff --git a/OmeSliCC/ome_zarr_util.py b/OmeSliCC/ome_zarr_util.py
@@ -0,0 +1,70 @@
+def create_axes_metadata(dimension_order):
+    axes = []
+    for dimension in dimension_order:
+        unit1 = None
+        if dimension == 't':
+            type1 = 'time'
+            unit1 = 'millisecond'
+        elif dimension == 'c':
+            type1 = 'channel'
+        else:
+            type1 = 'space'
+            unit1 = 'micrometer'
+        axis = {'name': dimension, 'type': type1}
+        if unit1 is not None and unit1 != '':
+            axis['unit'] = unit1
+        axes.append(axis)
+    return axes
+
+
+def create_transformation_metadata(dimension_order, pixel_size_um, scale):
+    pixel_size_scale = []
+    for dimension in dimension_order:
+        if dimension == 'z':
+            pixel_size_scale1 = pixel_size_um[2]
+        elif dimension == 'y':
+            pixel_size_scale1 = pixel_size_um[1] / scale
+        elif dimension == 'x':
+            pixel_size_scale1 = pixel_size_um[0] / scale
+        else:
+            pixel_size_scale1 = 1
+        if pixel_size_scale1 == 0:
+            pixel_size_scale1 = 1
+        pixel_size_scale.append(pixel_size_scale1)
+    return [{'scale': pixel_size_scale, 'type': 'scale'}]
+
+
+def create_channel_metadata(source):
+    channels = []
+    for channeli, channel0 in enumerate(source.get_channels()):
+        channel = {'label': channel0.get('Name', '')}
+        color = channel0.get('Color')
+        if color is None:
+            color = 'FFFFFF'
+        elif isinstance(color, str):
+            # int value as string; convert
+            color = int(color)
+        if not isinstance(color, str):
+            color = hex(color & 0xFFFFFF)[2:].upper().zfill(6)
+        channel['color'] = color
+        if not 'window' in channel:
+            start, end, min, max = source.get_min_max(channeli)
+            channel['window'] = {'start': start, 'end': end, 'min': min, 'max': max}
+        channels.append(channel)
+
+    metadata = {
+        'version': '0.4',
+        'channels': channels,
+    }
+    return metadata
+
+
+def calc_shape_scale(shape0, dimension_order, scale):
+    shape = []
+    if scale == 1:
+        return shape0
+    for shape1, dimension in zip(shape0, dimension_order):
+        if dimension in ['x', 'y']:
+            shape1 = int(round(shape1 / scale))
+        shape.append(shape1)
+    return shape