Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix scaling small images in the multi source with bicubic smoothing #1627

Merged
merged 3 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Change Log

## 1.29.8

### Improvements

- Add the option to compute additional columns for plottable data ([#1626](../../pull/1626))

### Bug Fixes

- Fix scaling small images in the multi source with bicubic smoothing ([#1627](../../pull/1627))

## 1.29.7

### Improvements
Expand Down
14 changes: 12 additions & 2 deletions girder_annotation/girder_large_image_annotation/rest/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,19 +661,29 @@ def getItemPlottableElements(self, item, annotations, adjacentItems, sources=Non
.param('sources', 'An optional comma separated list that can contain '
'folder, item, annotation, annotationelement, datafile.',
required=False)
.jsonParam(
'compute', 'A dictionary with keys "columns": a list of columns '
'to include in the computation; if unspecified or an empty list, '
'no computation is done, "function": a string with the name of '
'the function, such as umap, "params": additional parameters to '
'pass to the function. If none of the requiredKeys are '
'compute.(x|y|z), the computation will not be performed. Only '
'rows which have all selected columns present will be included in '
'the computation.',
paramType='formData', requireObject=True, required=False)
.errorResponse('ID was invalid.')
.errorResponse('Read access was denied for the item.', 403),
)
@access.public(cookie=True, scope=TokenScope.DATA_READ)
def getItemPlottableData(
self, item, keys, adjacentItems, annotations, requiredKeys, sources=None):
self, item, keys, adjacentItems, annotations, requiredKeys, sources=None, compute=None):
user = self.getCurrentUser()
if adjacentItems != '__all__':
adjacentItems = str(adjacentItems).lower() == 'true'
sources = sources or None
data = utils.PlottableItemData(
user, item, annotations=annotations, adjacentItems=adjacentItems,
sources=sources)
sources=sources, compute=compute)
return data.data(keys, requiredKeys)

def getFolderAnnotations(self, id, recurse, user, limit=False, offset=False, sort=False,
Expand Down
118 changes: 111 additions & 7 deletions girder_annotation/girder_large_image_annotation/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
'application/x-xls': 'read_excel',
}
scanDatafileRecords = 50
scanAnnotationElements = 5000


@functools.lru_cache(maxsize=100)
Expand Down Expand Up @@ -393,7 +394,8 @@ class PlottableItemData:
maxDistinct = 20
allowedTypes = (str, bool, int, float)

def __init__(self, user, item, annotations=None, adjacentItems=False, sources=None):
def __init__(self, user, item, annotations=None, adjacentItems=False,
sources=None, compute=None):
"""
Get plottable data associated with an item.

Expand All @@ -408,15 +410,32 @@ def __init__(self, user, item, annotations=None, adjacentItems=False, sources=No
:param sources: None for all, or a string with a comma-separated list
or a list of strings; when a list, the options are folder, item,
annotation, datafile.
:param compute: None for none, or a dictionary with keys "columns": a
list of columns to include in the computation; if unspecified or an
empty list, no computation is done, "function": a string with the
name of the function, such as umap, "params": additional parameters
to pass to the function. If none of the requiredKeys are
compute.(x|y|z), the computation will not be performed. Only rows
which have all selected columns present will be included in the
computation.
"""
self.user = user
self._columns = None
self._datacolumns = None
self._data = None
self._compute = None
try:
if len(compute['columns']):
self._compute = {'function': 'umap', 'params': {
'random_state': 1, 'n_jobs': 1}}
self._compute.update(compute)
except Exception:
pass
if sources and not isinstance(sources, (list, tuple)):
sources = sources.split(',')
self._sources = tuple(sources) if sources else None
if self._sources and 'annotation' not in self._sources:
if (self._sources and 'annotation' not in self._sources and
'annotationelement' not in self._sources):
annotations = None
self._fullScan = adjacentItems == '__all__'
self._findItems(item, adjacentItems)
Expand Down Expand Up @@ -559,7 +578,11 @@ def _findDataFiles(self): # noqa
'bbox.y0': 'Bounding Box Low Y',
'bbox.x1': 'Bounding Box High X',
'bbox.y1': 'Bounding Box High Y',
'compute.x': 'Dimension Reduction X',
'compute.y': 'Dimension Reduction Y',
'compute.z': 'Dimension Reduction Z',
}
computeColumns = {'compute.x', 'compute.y', 'compute.z'}

def itemNameIDSelector(self, isName, selector):
"""
Expand Down Expand Up @@ -1068,6 +1091,78 @@ def _getColumnsFromDataFiles(self, columns):
countsPerDataFile[dfidx] = count - startcount
return count

def _computeFunction(self, rows):
if self._compute['function'] == 'umap':
import umap

logger.info(f'Calling umap on {len(rows)} rows')
reducer = umap.UMAP(**self._compute['params'])
self._computed = reducer.fit_transform(list(rows.values()))
logger.info('Called umap')
return True

def _getColumnsFromCompute(self, columns): # noqa
"""
Collect columns and data from compute actions.
"""

def computeGetData(record):
return {}

def computeLength(record, data):
return len(self._computed)

def computeSelector(key):
axis = ord(key[-1:]) - ord('x')

def computeSelectorAxis(record, data, row):
return self._computed[row][axis]

return computeSelectorAxis

if not self._datacolumns:
for key in self.computeColumns:
title = self.commonColumns[key]
self._ensureColumn(
columns, key, title, 'compute', computeGetData,
computeSelector(key), computeLength)
columns[key]['count'] = 1
columns[key]['min'] = columns[key]['max'] = 0
return 0
if self._compute is None or not len(self._requiredColumns & self.computeColumns):
return 0
compcol = {
key for key, col in columns.items()
if col['type'] == 'number' and col.get('min') is not None
} & set(self._compute['columns'])
if not len(compcol):
return 0
rows = {}
cols = sorted({col for col in self._compute['columns'] if col in self._datacolumns})
for kidx, key in enumerate(cols):
for row, value in self._datacolumns[key].items():
if not kidx:
rows[row] = [value]
elif row in rows and len(rows[row]) == kidx:
rows[row].append(value)
rows = {k: row for k, row in rows.items() if len(row) == len(cols)}
if not len(rows):
return 0
if not self._computeFunction(rows):
return 0
for key in self.computeColumns:
if key in self._requiredColumns and key in self._datacolumns:
title = self.commonColumns[key]
self._ensureColumn(
columns, key, title, 'compute', computeGetData,
computeSelector(key), computeLength)
cidx = ord(key[-1:]) - ord('x')
for ridx, row in enumerate(rows):
self._datacolumns[key][row] = float(self._computed[ridx][cidx])
columns[key]['count'] = len(rows)
columns[key]['min'] = columns[key]['max'] = 0
return len(rows)

def _getColumns(self):
"""
Get a sorted list of plottable columns with some metadata for each.
Expand All @@ -1086,6 +1181,7 @@ def _getColumns(self):
count += self._collectColumns(columns, [item], 'item', first=False)
count += self._getColumnsFromAnnotations(columns)
count += self._getColumnsFromDataFiles(columns)
count += self._getColumnsFromCompute(columns)
for result in columns.values():
if len(result['distinct']) <= self.maxDistinct:
result['distinct'] = sorted(result['distinct'])
Expand All @@ -1095,7 +1191,9 @@ def _getColumns(self):
if result['type'] != 'number' or result['min'] is None:
result.pop('min', None)
result.pop('max', None)
prefixOrder = {'item': 0, 'annotation': 1, 'annotationelement': 2, 'data': 3, 'bbox': 4}
prefixOrder = {
'item': 0, 'annotation': 1, 'annotationelement': 2, 'data': 3,
'bbox': 4, 'compute': 5}
columns = sorted(columns.values(), key=lambda x: (
prefixOrder.get(x['key'].split('.', 1)[0], len(prefixOrder)), x['key']))
return columns
Expand Down Expand Up @@ -1168,7 +1266,7 @@ def _collectData(self, rows, colsout):
rows = [row for ridx, row in enumerate(rows) if rows[ridx] not in discard]
return data, rows

def data(self, columns, requiredColumns=None):
def data(self, columns, requiredColumns=None): # noqa
"""
Get plottable data.

Expand All @@ -1182,8 +1280,14 @@ def data(self, columns, requiredColumns=None):
columns = columns.split(',')
if not isinstance(requiredColumns, list):
requiredColumns = requiredColumns.split(',') if requiredColumns is not None else []
requiredColumns = set(requiredColumns)
specifiedReqColumns = set(requiredColumns)
self._requiredColumns = set(requiredColumns)
if self._compute:
if ('compute.z' in specifiedReqColumns and
self._compute['function'] == 'umap' and
'n_components' not in self._compute['params']):
self._compute['params']['n_components'] = 3
self._requiredColumns.update(self._compute['columns'])
with self._dataLock:
self._datacolumns = {c: {} for c in columns}
rows = set()
Expand All @@ -1201,7 +1305,7 @@ def data(self, columns, requiredColumns=None):
for cidx, col in enumerate(colsout):
colkey = col['key']
numrows = len(data)
if colkey in requiredColumns:
if colkey in specifiedReqColumns:
data = [row for row in data if row[cidx] is not None]
if len(data) < numrows:
logger.info(f'Reduced row count from {numrows} to {len(data)} '
Expand All @@ -1210,7 +1314,7 @@ def data(self, columns, requiredColumns=None):
for cidx, col in enumerate(colsout):
colkey = col['key']
numrows = len(data)
if colkey in self._requiredColumns and colkey not in requiredColumns:
if colkey in self._requiredColumns and colkey not in specifiedReqColumns:
subdata = [row for row in subdata if row[cidx] is not None]
if len(subdata) and len(subdata) < len(data):
logger.info(f'Reduced row count from {len(data)} to {len(subdata)} '
Expand Down
3 changes: 3 additions & 0 deletions girder_annotation/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def prerelease_local_scheme(version):
'orjson',
],
extras_require={
'compute': [
'umap-learn',
],
'tasks': [
f'girder-large-image[tasks]{limit_version}',
],
Expand Down
20 changes: 10 additions & 10 deletions girder_annotation/test_annotation/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ def testPlottableDataAccess(admin):
plottable = girder_large_image_annotation.utils.PlottableItemData(admin, item)
col = plottable.columns
# Also contains item id, name, and description
assert len(col) == 12
assert len(col) == 15

data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 12
Expand Down Expand Up @@ -937,71 +937,71 @@ def testPlottableDataMultipleItems(admin):
plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item')
col = plottable.columns
assert len(col) == 3
assert len(col) == 6
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 3
assert len(data['data']) == 1

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item', adjacentItems=True)
col = plottable.columns
assert len(col) == 3
assert len(col) == 6
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 3
assert len(data['data']) == 2

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item', adjacentItems='__all__')
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 2

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1)
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 3

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, adjacentItems=True)
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 4

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations=[str(annot1a['_id']), str(annot1c['_id'])])
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 6

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations=[str(annot1a['_id']), str(annot1c['_id'])], adjacentItems=True)
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 8

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations='__all__')
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 8

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations='__all__', adjacentItems=True)
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 12
2 changes: 1 addition & 1 deletion girder_annotation/test_annotation/test_annotations_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ def testPlottableEndpoints(self, server, admin):
},
)
assert utilities.respStatus(resp) == 200
assert len(resp.json) == 2
assert len(resp.json) == 5

resp = server.request(
path=f'/annotation/item/{itemSrc["_id"]}/plot/list',
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ girder-jobs>=3.0.3
# Girder and worker dependencies are already installed above
-e utilities/tasks[girder]
-e girder/.
-e girder_annotation/.
-e girder_annotation/.[compute]

# Extras from main setup.py
pylibmc>=1.5.1
Expand Down
4 changes: 2 additions & 2 deletions sources/multi/large_image_source_multi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,7 @@ def _getTransformedTile(self, ts, transform, corners, scale, frame, crop=None):
# we only need every 1/srcscale pixel.
srcscale = int(2 ** math.log2(max(1, srcscale)))
# Pad to reduce edge effects at tile boundaries
border = int(math.ceil(2 * srcscale))
border = int(math.ceil(4 * srcscale))
region = {
'left': int(max(0, minx - border) // srcscale) * srcscale,
'top': int(max(0, miny - border) // srcscale) * srcscale,
Expand Down Expand Up @@ -1119,8 +1119,8 @@ def _getTransformedTile(self, ts, transform, corners, scale, frame, crop=None):
# Recompute where the source corners will land
destcorners = (np.dot(transform, regioncorners.T).T).tolist()
destShape = [
max(max(math.ceil(c[1]) for c in destcorners), srcImage.shape[0]),
max(max(math.ceil(c[0]) for c in destcorners), srcImage.shape[1]),
max(max(math.ceil(c[1]) for c in destcorners), srcImage.shape[0]),
]
if max(0, -x) or max(0, -y):
transform[0][2] -= max(0, -x)
Expand Down
Loading