Skip to content

Commit

Permalink
Set up the caching numpy array to match the dtype of the tiles. (#208)
Browse files Browse the repository at this point in the history
This is particularly important for passthrough pipeline stages that don't do much computation, but reading the data and immediately writing the data alters it.
  • Loading branch information
ttung authored Jun 1, 2018
1 parent a683cce commit f62c515
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 5 deletions.
5 changes: 5 additions & 0 deletions starfish/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class DataFormatWarning(Warning):
"""
Warnings given by starfish when the data is not formatted as expected, though not fatally.
"""
pass
55 changes: 50 additions & 5 deletions starfish/image/_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
import os
from functools import partial
from typing import Any, Iterable, Iterator, Mapping, MutableSequence, Sequence, Tuple, Union
from warnings import warn

import numpy
from scipy.stats import scoreatpercentile
from skimage import exposure
from slicedimage import Reader, Writer

from starfish.constants import Coordinates, Indices
from starfish.errors import DataFormatWarning
from ._base import ImageBase


Expand All @@ -30,14 +32,57 @@ def __init__(self, image_partition):
self._num_zlayers = 1
self._tile_shape = tuple(image_partition.default_tile_shape)

self._data = numpy.zeros((self._num_hybs, self._num_chs, self._num_zlayers) + self._tile_shape)
self._data_needs_writeback = False

for tile in image_partition.tiles():
# Examine the tiles to figure out the right kind (int, float, etc.) and size. We require that all the tiles
# have the same kind of data type, but we do not require that they all have the same size of data type. The
# allocated array is the highest size we encounter.
kind = None
max_size = 0
for tile in self._image_partition.tiles():
dtype = tile.numpy_array.dtype
if kind is None:
kind = dtype.kind
else:
if kind != dtype.kind:
raise TypeError("All tiles should have the same kind of dtype")
if dtype.itemsize > max_size:
max_size = dtype.itemsize

# now that we know the tile data type (kind and size), we can allocate the data array.
self._data = numpy.zeros(
shape=(self._num_hybs, self._num_chs, self._num_zlayers) + self._tile_shape,
dtype=numpy.dtype(f"{kind}{max_size}")
)

# iterate through the tiles and set the data.
for tile in self._image_partition.tiles():
h = tile.indices[Indices.HYB]
c = tile.indices[Indices.CH]
zlayer = tile.indices.get(Indices.Z, 0)
self.set_slice(indices={Indices.HYB: h, Indices.CH: c, Indices.Z: zlayer}, data=tile.numpy_array)
data = tile.numpy_array
if max_size != data.dtype.itemsize:
# this source tile has a smaller data size than the other ones, though the same kind. need to scale the
# data.
if data.dtype.kind == "f":
# floating point can be done with numpy.interp.
src_finfo = numpy.finfo(data.dtype)
dst_finfo = numpy.finfo(self._data.dtype)
data = numpy.interp(
data,
(src_finfo.min, src_finfo.max),
(dst_finfo.min, dst_finfo.max))
else:
# fixed point can be done with a simple multiply.
src_max = numpy.iinfo(data.dtype).max
dst_max = numpy.iinfo(self._data.dtype).max
data = data * (dst_max / src_max)
warn(
f"Tile "
f"(H: {tile.indices[Indices.HYB]} C: {tile.indices[Indices.CH]} Z: {tile.indices[Indices.Z]}) has "
f"dtype {data.dtype}. One or more tiles is of a larger dtype {self._data.dtype}.",
DataFormatWarning)
self.set_slice(indices={Indices.HYB: h, Indices.CH: c, Indices.Z: zlayer}, data=data)
# set_slice will mark the data as needing writeback, so we need to unset that.
self._data_needs_writeback = False

@classmethod
def from_url(cls, relativeurl, baseurl):
Expand Down
112 changes: 112 additions & 0 deletions tests/test_slicedimage_dtype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import unittest
import warnings

import numpy
from slicedimage import Tile, TileSet

from starfish.constants import Coordinates, Indices
from starfish.errors import DataFormatWarning
from starfish.image import ImageStack


class TestSlicedImageDtype(unittest.TestCase):
NUM_HYB = 2
NUM_CH = 2
NUM_Z = 2
HEIGHT = 10
WIDTH = 10

def make_stack(self, dtype: numpy.number, corner_dtype: numpy.number):
"""
Makes a stack that's all of the same type, except the hyb=0,ch=0,z=0 corner, which is a different type. All the
tiles are initialized with ones.
Parameters
----------
dtype : numpy.number
The data type of all the tiles except the hyd=0,ch=0,z=0 corner.
corner_dtype
The data type of the tile in the hyd=0,ch=0,z=0 corner.
Returns
-------
ImageStack :
The image stack with the tiles initialized as described.
"""
img = TileSet(
{Coordinates.X, Coordinates.Y, Indices.HYB, Indices.CH, Indices.Z},
{
Indices.HYB: TestSlicedImageDtype.NUM_HYB,
Indices.CH: TestSlicedImageDtype.NUM_CH,
Indices.Z: TestSlicedImageDtype.NUM_Z,
},
default_tile_shape=(TestSlicedImageDtype.HEIGHT, TestSlicedImageDtype.WIDTH),
)
for hyb in range(TestSlicedImageDtype.NUM_HYB):
for ch in range(TestSlicedImageDtype.NUM_CH):
for z in range(TestSlicedImageDtype.NUM_Z):
tile = Tile(
{
Coordinates.X: (0.0, 0.001),
Coordinates.Y: (0.0, 0.001),
Coordinates.Z: (0.0, 0.001),
},
{
Indices.HYB: hyb,
Indices.CH: ch,
Indices.Z: z,
}
)
if hyb == 0 and ch == 0 and z == 0:
data = numpy.ones((TestSlicedImageDtype.HEIGHT, TestSlicedImageDtype.WIDTH), dtype=corner_dtype)
else:
data = numpy.ones((TestSlicedImageDtype.HEIGHT, TestSlicedImageDtype.WIDTH), dtype=dtype)
tile.numpy_array = data

img.add_tile(tile)

return ImageStack(img)

def test_different_kind(self):
with self.assertRaises(TypeError):
self.make_stack(numpy.uint32, numpy.float32)

def test_all_identical(self):
stack = self.make_stack(numpy.uint32, numpy.uint32)
self.assertEqual(stack.numpy_array.all(), 1)

def test_int_type_promotion(self):
with warnings.catch_warnings(record=True) as w:
stack = self.make_stack(numpy.int32, numpy.int8)
self.assertEqual(len(w), 1)
self.assertTrue(issubclass(w[0].category, DataFormatWarning))
expected = numpy.empty(
(TestSlicedImageDtype.NUM_HYB,
TestSlicedImageDtype.NUM_CH,
TestSlicedImageDtype.NUM_Z,
TestSlicedImageDtype.HEIGHT,
TestSlicedImageDtype.WIDTH), dtype=numpy.int32)
expected.fill(16777216)
corner = numpy.ones(
(TestSlicedImageDtype.HEIGHT,
TestSlicedImageDtype.WIDTH), dtype=numpy.int32)
expected[0, 0, 0] = corner
self.assertEqual(stack.numpy_array.all(), expected.all())

def test_float_type_promotion(self):
with warnings.catch_warnings(record=True) as w:
stack = self.make_stack(numpy.float64, numpy.float32)
self.assertEqual(len(w), 1)
self.assertTrue(issubclass(w[0].category, DataFormatWarning))
expected = numpy.empty(
(TestSlicedImageDtype.NUM_HYB,
TestSlicedImageDtype.NUM_CH,
TestSlicedImageDtype.NUM_Z,
TestSlicedImageDtype.HEIGHT,
TestSlicedImageDtype.WIDTH), dtype=numpy.int64)
expected.fill(2.0)
corner = numpy.ones(
(TestSlicedImageDtype.HEIGHT,
TestSlicedImageDtype.WIDTH), dtype=numpy.int64)
expected[0, 0, 0] = corner
self.assertEqual(stack.numpy_array.all(), expected.all())

0 comments on commit f62c515

Please sign in to comment.