Skip to content

Commit

Permalink
Fix bugs in per-round-max-decoder (#1602)
Browse files Browse the repository at this point in the history
1. When the entire row is nan, the decoder chokes.  This is remedied by decoding on an array where the nan values are replaced with 0s.
2. When the entire row is of equal intensity, the `np.argmax` arbitrarily picks the first column as the winner.  That erroneously decodes as ch=0 having the max intensity.  This code detects that scenario, and rewrites the ch to an impossible value in that situation.

Test plan: Wrote tests that failed with the existing code, applied fixes and verified that they now work.
Fixes #1485
  • Loading branch information
Tony Tung authored Oct 8, 2019
1 parent 81b1f76 commit 505f73c
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 3 deletions.
2 changes: 1 addition & 1 deletion notebooks/ISS.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -450,4 +450,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
11 changes: 10 additions & 1 deletion starfish/core/codebook/codebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,16 @@ def _view_row_as_element(array: np.ndarray) -> np.ndarray:
distances=(Features.AXIS, np.empty(0, dtype=np.float64)),
passes_threshold=(Features.AXIS, np.empty(0, dtype=bool)))

max_channels = intensities.argmax(Axes.CH.value)
intensities_without_nans = intensities.fillna(0)
max_channels = intensities_without_nans.argmax(Axes.CH.value)
# this snippet of code finds all the (feature, round) spots that have uniform illumination,
# and assigns them to a ch number that's one larger than max possible to ensure that such
# spots decode to `NaN`.
max_channels_max = intensities_without_nans.reduce(np.amax, Axes.CH.value)
max_channels_min = intensities_without_nans.reduce(np.amin, Axes.CH.value)
uniform_illumination_mask = (max_channels_max == max_channels_min).values

max_channels.values[uniform_illumination_mask] = intensities.sizes[Axes.CH.value]
codes = self.argmax(Axes.CH.value)

# TODO ambrosejcarr, dganguli: explore this quality score further
Expand Down
39 changes: 39 additions & 0 deletions starfish/core/codebook/test/test_per_round_max_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,42 @@ def test_argmax_selects_the_last_equal_intensity_channel_and_decodes_consistentl

decoded_intensities = codebook.decode_per_round_max(intensities)
assert np.array_equal(decoded_intensities[Features.TARGET].values, ['nan', 'GENE_A'])


def test_argmax_does_not_select_first_code():
"""
When all the channels in a round are uniform, argmax erroneously picks the first channel as the
max. In this case, it incorrectly assigns the wrong code for that round. This test ensures
that the workaround we put in for this works correctly.
"""

data = np.array(
[[[0.0, 1.0],
[1.0, 1.0]], # this round is uniform, so it will erroneously be decoded as the first ch.
[[0.0, 1.0],
[1.0, 0.0]]]
)
intensities = intensity_table_factory(data)
codebook = codebook_factory()

decoded_intensities = codebook.decode_per_round_max(intensities)
assert np.array_equal(decoded_intensities[Features.TARGET].values, ['nan', 'GENE_A'])


def test_feature_round_all_nan():
"""
When all the channels in a round are NaN, argmax chokes. This test ensures that the workaround
we put in for this works correctly.
"""

data = np.array(
[[[0.0, 1.0],
[np.nan, np.nan]],
[[0.0, 1.0],
[1.0, 0.0]]]
)
intensities = intensity_table_factory(data)
codebook = codebook_factory()

decoded_intensities = codebook.decode_per_round_max(intensities)
assert np.array_equal(decoded_intensities[Features.TARGET].values, ['nan', 'GENE_A'])
2 changes: 1 addition & 1 deletion starfish/test/full_pipelines/api/test_iss_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_iss_pipeline_cropped_data(tmpdir):
assert np.array_equal(genes, np.array(['ACTB', 'CD68', 'CTSL2', 'EPCAM',
'ETV4', 'GAPDH', 'GUS', 'HER2', 'RAC1',
'TFRC', 'TP53', 'VEGF']))
assert np.array_equal(gene_counts, [20, 1, 5, 2, 1, 11, 1, 3, 2, 1, 1, 2])
assert np.array_equal(gene_counts, [19, 1, 5, 2, 1, 11, 1, 3, 2, 1, 1, 2])

masks = iss.masks

Expand Down

0 comments on commit 505f73c

Please sign in to comment.