From a7e9e8a46761187476123640a18afce522fce695 Mon Sep 17 00:00:00 2001 From: Tony Tung Date: Mon, 20 Aug 2018 13:49:14 -0700 Subject: [PATCH] Upgrade to nbencdec 0.0.5 (#454) Picks up this sort-of [format change](https://github.com/ttung/nbencdec/commit/9e3548c1aef920f3bf4cbec4f9894504777a23d2). Reason it's not completely breaking is that it still decodes correctly, just that each markdown cell will have a space in front of each line. Rationale for this change: markdown cells get prefixed with "# ", and if it's just an empty line, and an editor comes along and strips trailing whitespace, then it becomes "#", which doesn't decode the same as "# ". --- REQUIREMENTS-DEV.txt | 2 +- ...peline_-_Human_Occipital_Cortex_-_1_FOV.py | 30 ++--- notebooks/py/ISS_Pipeline_-_Breast_-_1_FOV.py | 82 ++++++------- ..._tutorial_-_Mouse_vs._Human_Fibroblasts.py | 34 +++--- ...SH_Pipeline_-_U2O2_Cell_Culture_-_1_FOV.py | 112 +++++++++--------- notebooks/py/allen_smFISH.py | 32 ++--- notebooks/py/osmFISH.py | 6 +- 7 files changed, 149 insertions(+), 149 deletions(-) diff --git a/REQUIREMENTS-DEV.txt b/REQUIREMENTS-DEV.txt index b9a145ddd..5729367be 100644 --- a/REQUIREMENTS-DEV.txt +++ b/REQUIREMENTS-DEV.txt @@ -4,5 +4,5 @@ jsonpath_rw pytest-cov>=2.5.1 pytest-xdist mypy -nbencdec +nbencdec >= 0.0.5 -r REQUIREMENTS.txt diff --git a/notebooks/py/DARTFISH_Pipeline_-_Human_Occipital_Cortex_-_1_FOV.py b/notebooks/py/DARTFISH_Pipeline_-_Human_Occipital_Cortex_-_1_FOV.py index 9273a781d..89301109d 100644 --- a/notebooks/py/DARTFISH_Pipeline_-_Human_Occipital_Cortex_-_1_FOV.py +++ b/notebooks/py/DARTFISH_Pipeline_-_Human_Occipital_Cortex_-_1_FOV.py @@ -4,7 +4,7 @@ # EPY: stripped_notebook: {"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5"}}, "nbformat": 4, "nbformat_minor": 2} # EPY: START markdown -# ## Reproduce DARTFISH results with a Pixel Decoding Method +### Reproduce DARTFISH results with a Pixel Decoding Method # EPY: END markdown # EPY: START code @@ -33,9 +33,9 @@ # EPY: END code # EPY: START markdown -# ### Load image stack -# -# Note that the data here corresopond to DARTFISHv1 2017. The group is actively working on improving the protocol. +#### Load image stack +# +#Note that the data here corresopond to DARTFISHv1 2017. The group is actively working on improving the protocol. # EPY: END markdown # EPY: START code @@ -54,7 +54,7 @@ # EPY: END code # EPY: START markdown -# ### Load codebook +#### Load codebook # EPY: END markdown # EPY: START code @@ -63,7 +63,7 @@ # EPY: END code # EPY: START markdown -# ### Load copy number benchmark results +#### Load copy number benchmark results # EPY: END markdown # EPY: START code @@ -72,7 +72,7 @@ # EPY: END code # EPY: START markdown -# ### Filter Image Stack +#### Filter Image Stack # EPY: END markdown # EPY: START code @@ -84,7 +84,7 @@ # EPY: END code # EPY: START markdown -# #### Visualize barcode magnitudes to help determine an appropriate threshold for decoding +##### Visualize barcode magnitudes to help determine an appropriate threshold for decoding # EPY: END markdown # EPY: START code @@ -106,7 +106,7 @@ def compute_magnitudes(stack, norm_order=2): # EPY: END code # EPY: START markdown -# ### Decode +#### Decode # EPY: END markdown # EPY: START code @@ -136,11 +136,11 @@ def compute_magnitudes(stack, norm_order=2): # EPY: END code # EPY: START markdown -# ### QC Plots +#### QC Plots # EPY: END markdown # EPY: START markdown -# #### parameter tuning plots +##### parameter tuning plots # EPY: END markdown # EPY: START code @@ -176,7 +176,7 @@ def compute_magnitudes(stack, norm_order=2): # EPY: END code # EPY: START markdown -# #### Copy number comparisons +##### Copy number comparisons # EPY: END markdown # EPY: START code @@ -205,7 +205,7 @@ def compute_magnitudes(stack, norm_order=2): # EPY: END code # EPY: START markdown -# #### visualization of rolonies +##### visualization of rolonies # EPY: END markdown # EPY: START code @@ -249,8 +249,8 @@ def compute_magnitudes(stack, norm_order=2): # EPY: END code # EPY: START markdown -# ### visualization of matched barcodes -# here, we 1. pick a rolony that was succesfully decoded to a gene. 2. pull out the average pixel trace for that rolony and 3. plot that pixel trace against the barcode of that gene +#### visualization of matched barcodes +#here, we 1. pick a rolony that was succesfully decoded to a gene. 2. pull out the average pixel trace for that rolony and 3. plot that pixel trace against the barcode of that gene # EPY: END markdown # EPY: START code diff --git a/notebooks/py/ISS_Pipeline_-_Breast_-_1_FOV.py b/notebooks/py/ISS_Pipeline_-_Breast_-_1_FOV.py index 0c07b2d8e..0a0e04095 100644 --- a/notebooks/py/ISS_Pipeline_-_Breast_-_1_FOV.py +++ b/notebooks/py/ISS_Pipeline_-_Breast_-_1_FOV.py @@ -4,11 +4,11 @@ # EPY: stripped_notebook: {"metadata": {"hide_input": false, "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5"}, "toc": {"nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "toc_cell": false, "toc_position": {}, "toc_section_display": "block", "toc_window_display": false}}, "nbformat": 4, "nbformat_minor": 2} # EPY: START markdown -# ## Reproduce In-situ Sequencing results with Starfish -# -# This notebook walks through a work flow that reproduces an ISS result for one field of view using the starfish package. -# -# ## Load tiff stack and visualize one field of view +### Reproduce In-situ Sequencing results with Starfish +# +#This notebook walks through a work flow that reproduces an ISS result for one field of view using the starfish package. +# +### Load tiff stack and visualize one field of view # EPY: END markdown # EPY: START code @@ -32,9 +32,9 @@ # EPY: END code # EPY: START markdown -# ## Show input file format that specifies how the tiff stack is organized -# -# The stack contains multiple single plane images, one for each color channel, 'c', (columns in above image) and imaging round, 'r', (rows in above image). This protocol assumes that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'. The goal is now to process these image data into spatially organized barcodes, e.g., ACTG, which can then be mapped back to a codebook that specifies what gene this codeword corresponds to. +### Show input file format that specifies how the tiff stack is organized +# +#The stack contains multiple single plane images, one for each color channel, 'c', (columns in above image) and imaging round, 'r', (rows in above image). This protocol assumes that genes are encoded with a length 4 quatenary barcode that can be read out from the images. Each round encodes a position in the codeword. The maximum signal in each color channel (columns in the above image) corresponds to a letter in the codeword. The channels, in order, correspond to the letters: 'T', 'G', 'C', 'A'. The goal is now to process these image data into spatially organized barcodes, e.g., ACTG, which can then be mapped back to a codebook that specifies what gene this codeword corresponds to. # EPY: END markdown # EPY: START code @@ -43,7 +43,7 @@ # EPY: END code # EPY: START markdown -# The flat TIFF files are loaded into a 4-d tensor with dimensions corresponding to imaging round, channel, x, and y. For other volumetric approaches that image the z-plane, this would be a 5-d tensor. +#The flat TIFF files are loaded into a 4-d tensor with dimensions corresponding to imaging round, channel, x, and y. For other volumetric approaches that image the z-plane, this would be a 5-d tensor. # EPY: END markdown # EPY: START code @@ -59,11 +59,11 @@ # EPY: END code # EPY: START markdown -# ## Show auxiliary images captured during the experiment +### Show auxiliary images captured during the experiment # EPY: END markdown # EPY: START markdown -# 'dots' is a general stain for all possible transcripts. This image should correspond to the maximum projcection of all color channels within a single imaging round. This auxiliary image is useful for registering images from multiple imaging rounds to this reference image. We'll see an example of this further on in the notebook +#'dots' is a general stain for all possible transcripts. This image should correspond to the maximum projcection of all color channels within a single imaging round. This auxiliary image is useful for registering images from multiple imaging rounds to this reference image. We'll see an example of this further on in the notebook # EPY: END markdown # EPY: START code @@ -71,7 +71,7 @@ # EPY: END code # EPY: START markdown -# Below is a DAPI auxiliary image, which specifically marks nuclei. This is useful cell segmentation later on in the processing. +#Below is a DAPI auxiliary image, which specifically marks nuclei. This is useful cell segmentation later on in the processing. # EPY: END markdown # EPY: START code @@ -79,11 +79,11 @@ # EPY: END code # EPY: START markdown -# ## Examine the codebook +### Examine the codebook # EPY: END markdown # EPY: START markdown -# Each 4 letter quatenary code (as read out from the 4 imaging rounds and 4 color channels) represents a gene. This relationship is stored in a codebook +#Each 4 letter quatenary code (as read out from the 4 imaging rounds and 4 color channels) represents a gene. This relationship is stored in a codebook # EPY: END markdown # EPY: START code @@ -92,9 +92,9 @@ # EPY: END code # EPY: START markdown -# ## Filter and scale raw data -# -# Now apply the white top hat filter to both the spots image and the individual channels. White top had enhances white spots on a black background. +### Filter and scale raw data +# +#Now apply the white top hat filter to both the spots image and the individual channels. White top had enhances white spots on a black background. # EPY: END markdown # EPY: START code @@ -108,16 +108,16 @@ # EPY: END code # EPY: START markdown -# ## Register data +### Register data # EPY: END markdown # EPY: START markdown -# For each imaging round, the max projection across color channels should look like the dots stain. -# Below, this computes the max projection across the color channels of an imaging round and learns the linear transformation to maps the resulting image onto the dots image. -# -# The Fourier shift registration approach can be thought of as maximizing the cross-correlation of two images. -# -# In the below table, Error is the minimum mean-squared error, and shift reports changes in x and y dimension. +#For each imaging round, the max projection across color channels should look like the dots stain. +#Below, this computes the max projection across the color channels of an imaging round and learns the linear transformation to maps the resulting image onto the dots image. +# +#The Fourier shift registration approach can be thought of as maximizing the cross-correlation of two images. +# +#In the below table, Error is the minimum mean-squared error, and shift reports changes in x and y dimension. # EPY: END markdown # EPY: START code @@ -131,11 +131,11 @@ # EPY: END code # EPY: START markdown -# ## Use spot-detector to create 'encoder' table for standardized input to decoder +### Use spot-detector to create 'encoder' table for standardized input to decoder # EPY: END markdown # EPY: START markdown -# Each pipeline exposes an encoder that translates an image into spots with intensities. This approach uses a Gaussian spot detector. +#Each pipeline exposes an encoder that translates an image into spots with intensities. This approach uses a Gaussian spot detector. # EPY: END markdown # EPY: START code @@ -173,7 +173,7 @@ # EPY: END code # EPY: START markdown -# This visualizes a single spot (#100) across all imaging rounds and channels. It contains the intensity and bit index, which allow it to be mapped onto the correct barcode. +#This visualizes a single spot (#100) across all imaging rounds and channels. It contains the intensity and bit index, which allow it to be mapped onto the correct barcode. # EPY: END markdown # EPY: START code @@ -181,25 +181,25 @@ # EPY: END code # EPY: START markdown -# The Encoder table is the hypothesized standardized file format for the output of a spot detector, and is the first output file format in the pipeline that is not an image or set of images +#The Encoder table is the hypothesized standardized file format for the output of a spot detector, and is the first output file format in the pipeline that is not an image or set of images # EPY: END markdown # EPY: START markdown -# `attributes` is produced by the encoder and contains all the information necessary to map the encoded spots back to the original image -# -# `x, y` describe the position, while `x_min` through `y_max` describe the bounding box for the spot, which is refined by a radius `r`. This table also stores the intensity and spot_id. +#`attributes` is produced by the encoder and contains all the information necessary to map the encoded spots back to the original image +# +#`x, y` describe the position, while `x_min` through `y_max` describe the bounding box for the spot, which is refined by a radius `r`. This table also stores the intensity and spot_id. # EPY: END markdown # EPY: START markdown -# ## Decode +### Decode # EPY: END markdown # EPY: START markdown -# Each assay type also exposes a decoder. A decoder translates each spot (spot_id) in the Encoder table into a gene (that matches a barcode) and associates this information with the stored position. The goal is to decode and output a quality score that describes the confidence in the decoding. +#Each assay type also exposes a decoder. A decoder translates each spot (spot_id) in the Encoder table into a gene (that matches a barcode) and associates this information with the stored position. The goal is to decode and output a quality score that describes the confidence in the decoding. # EPY: END markdown # EPY: START markdown -# There are hard and soft decodings -- hard decoding is just looking for the max value in the code book. Soft decoding, by contrast, finds the closest code by distance (in intensity). Because different assays each have their own intensities and error modes, we leave decoders as user-defined functions. +#There are hard and soft decodings -- hard decoding is just looking for the max value in the code book. Soft decoding, by contrast, finds the closest code by distance (in intensity). Because different assays each have their own intensities and error modes, we leave decoders as user-defined functions. # EPY: END markdown # EPY: START code @@ -207,11 +207,11 @@ # EPY: END code # EPY: START markdown -# ## Compare to results from paper +### Compare to results from paper # EPY: END markdown # EPY: START markdown -# Besides house keeping genes, VIM and HER2 should be most highly expessed, which is consistent here. +#Besides house keeping genes, VIM and HER2 should be most highly expessed, which is consistent here. # EPY: END markdown # EPY: START code @@ -226,11 +226,11 @@ # EPY: END code # EPY: START markdown -# ### Segment +#### Segment # EPY: END markdown # EPY: START markdown -# After calling spots and decoding their gene information, cells must be segmented to assign genes to cells. This paper used a seeded watershed approach. +#After calling spots and decoding their gene information, cells must be segmented to assign genes to cells. This paper used a seeded watershed approach. # EPY: END markdown # EPY: START code @@ -254,9 +254,9 @@ # EPY: END code # EPY: START markdown -# ### Visualize results -# -# This FOV was selected to make sure that we can visualize the tumor/stroma boundary, below this is described by pseudo-coloring `HER2` (tumor) and vimentin (`VIM`, stroma) +#### Visualize results +# +#This FOV was selected to make sure that we can visualize the tumor/stroma boundary, below this is described by pseudo-coloring `HER2` (tumor) and vimentin (`VIM`, stroma) # EPY: END markdown # EPY: START code diff --git a/notebooks/py/ISS_Simple_tutorial_-_Mouse_vs._Human_Fibroblasts.py b/notebooks/py/ISS_Simple_tutorial_-_Mouse_vs._Human_Fibroblasts.py index 6bc3277d3..e03c317fd 100644 --- a/notebooks/py/ISS_Simple_tutorial_-_Mouse_vs._Human_Fibroblasts.py +++ b/notebooks/py/ISS_Simple_tutorial_-_Mouse_vs._Human_Fibroblasts.py @@ -4,15 +4,15 @@ # EPY: stripped_notebook: {"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5"}}, "nbformat": 4, "nbformat_minor": 2} # EPY: START markdown -# # User note: This notebook is currently broken -# -# For a working ISS demonstration, please see the ISS_Pipeline notebook in the same directory +## User note: This notebook is currently broken +# +#For a working ISS demonstration, please see the ISS_Pipeline notebook in the same directory # EPY: END markdown # EPY: START markdown -# # Starfish re-creation of an in-situ sequencing pipeline -# -# Here, we reproduce the results of a pipeline run on data collected using the gap filling and padlock probe litigation method described in [Ke, Mignardi, et. al, 2013](http://www.nature.com/nmeth/journal/v10/n9/full/nmeth.2563.html). These data represent 5 co-cultured mouse and human cells -- the main idea is to detect a single nucleotide polymorphism (SNP) in the Beta-Actin (ACTB) gene across species. The Python code below correctly re-produces the same results from the original cell profiler - matlab - imagej [pipeline](http://cellprofiler.org/examples/#InSitu) that is publicly accessible. +## Starfish re-creation of an in-situ sequencing pipeline +# +#Here, we reproduce the results of a pipeline run on data collected using the gap filling and padlock probe litigation method described in [Ke, Mignardi, et. al, 2013](http://www.nature.com/nmeth/journal/v10/n9/full/nmeth.2563.html). These data represent 5 co-cultured mouse and human cells -- the main idea is to detect a single nucleotide polymorphism (SNP) in the Beta-Actin (ACTB) gene across species. The Python code below correctly re-produces the same results from the original cell profiler - matlab - imagej [pipeline](http://cellprofiler.org/examples/#InSitu) that is publicly accessible. # EPY: END markdown # EPY: START code @@ -29,9 +29,9 @@ # EPY: END code # EPY: START markdown -# ## Raw Data -# -# The raw data can be downloaded and formatted for analysis by running: ```python examples/get_iss_data.py > --d 1``` from the Starfish directory +### Raw Data +# +#The raw data can be downloaded and formatted for analysis by running: ```python examples/get_iss_data.py > --d 1``` from the Starfish directory # EPY: END markdown # EPY: START code @@ -49,7 +49,7 @@ # EPY: END code # EPY: START markdown -# ## Register +### Register # EPY: END markdown # EPY: START code @@ -62,7 +62,7 @@ # EPY: END code # EPY: START markdown -# ## Filter +### Filter # EPY: END markdown # EPY: START code @@ -93,7 +93,7 @@ # EPY: END code # EPY: START markdown -# ## Detect +### Detect # EPY: END markdown # EPY: START code @@ -120,7 +120,7 @@ # EPY: END code # EPY: START markdown -# ## Segmentation +### Segmentation # EPY: END markdown # EPY: START code @@ -139,7 +139,7 @@ # EPY: END code # EPY: START markdown -# ## Assignment +### Assignment # EPY: END markdown # EPY: START code @@ -157,7 +157,7 @@ # EPY: END code # EPY: START markdown -# ## Decode +### Decode # EPY: END markdown # EPY: START code @@ -172,7 +172,7 @@ # EPY: END code # EPY: START markdown -# ## Visualization +### Visualization # EPY: END markdown # EPY: START code @@ -211,7 +211,7 @@ # EPY: END code # EPY: START markdown -# ## Cell by gene expression table +### Cell by gene expression table # EPY: END markdown # EPY: START code diff --git a/notebooks/py/MERFISH_Pipeline_-_U2O2_Cell_Culture_-_1_FOV.py b/notebooks/py/MERFISH_Pipeline_-_U2O2_Cell_Culture_-_1_FOV.py index d59311637..5ce9c062f 100644 --- a/notebooks/py/MERFISH_Pipeline_-_U2O2_Cell_Culture_-_1_FOV.py +++ b/notebooks/py/MERFISH_Pipeline_-_U2O2_Cell_Culture_-_1_FOV.py @@ -4,9 +4,9 @@ # EPY: stripped_notebook: {"metadata": {"hide_input": false, "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5"}, "toc": {"nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "toc_cell": false, "toc_position": {}, "toc_section_display": "block", "toc_window_display": false}}, "nbformat": 4, "nbformat_minor": 2} # EPY: START markdown -# ## Reproduce Published results with Starfish -# -# This notebook walks through a workflow that reproduces a MERFISH result for one field of view using the starfish package. +### Reproduce Published results with Starfish +# +#This notebook walks through a workflow that reproduces a MERFISH result for one field of view using the starfish package. # EPY: END markdown # EPY: START code @@ -35,7 +35,7 @@ # EPY: END code # EPY: START markdown -# Individual imaging rounds and channels can also be visualized +#Individual imaging rounds and channels can also be visualized # EPY: END markdown # EPY: START code @@ -48,13 +48,13 @@ # EPY: END code # EPY: START markdown -# ## Show input file format that specifies how the tiff stack is organized -# -# The stack contains multiple images corresponding to the channel and imaging rounds. MERFISH builds a 16 bit barcode from 8 imaging rounds, each of which measures two channels that correspond to contiguous (but not necessarily consistently ordered) bits of the barcode. -# -# The MERFISH computational pipeline also constructs a scalar that corrects for intensity differences across each of the 16 images, e.g., one scale factor per bit position. -# -# The stacks in this example are pre-registered using fiduciary beads. +### Show input file format that specifies how the tiff stack is organized +# +#The stack contains multiple images corresponding to the channel and imaging rounds. MERFISH builds a 16 bit barcode from 8 imaging rounds, each of which measures two channels that correspond to contiguous (but not necessarily consistently ordered) bits of the barcode. +# +#The MERFISH computational pipeline also constructs a scalar that corrects for intensity differences across each of the 16 images, e.g., one scale factor per bit position. +# +#The stacks in this example are pre-registered using fiduciary beads. # EPY: END markdown # EPY: START code @@ -63,11 +63,11 @@ # EPY: END code # EPY: START markdown -# ## Visualize codebook +### Visualize codebook # EPY: END markdown # EPY: START markdown -# The MERFISH codebook maps each barcode to a gene (or blank) feature. The codes in the MERFISH codebook are constructed from a 4-hamming error correcting code with exactly 4 "on" bits per barcode +#The MERFISH codebook maps each barcode to a gene (or blank) feature. The codes in the MERFISH codebook are constructed from a 4-hamming error correcting code with exactly 4 "on" bits per barcode # EPY: END markdown # EPY: START code @@ -77,11 +77,11 @@ # EPY: END code # EPY: START markdown -# ## Filter and scale raw data before decoding +### Filter and scale raw data before decoding # EPY: END markdown # EPY: START markdown -# Begin filtering with a high pass filter to remove background signal. +#Begin filtering with a high pass filter to remove background signal. # EPY: END markdown # EPY: START code @@ -91,7 +91,7 @@ # EPY: END code # EPY: START markdown -# The below algorithm deconvolves out the point spread function introduced by the microcope and is specifically designed for this use case. The number of iterations is an important parameter that needs careful optimization. +#The below algorithm deconvolves out the point spread function introduced by the microcope and is specifically designed for this use case. The number of iterations is an important parameter that needs careful optimization. # EPY: END markdown # EPY: START code @@ -100,9 +100,9 @@ # EPY: END code # EPY: START markdown -# Recall that the image is pre-registered, as stated above. Despite this, individual RNA molecules may still not be perfectly aligned across imaging rounds. This is crucial in order to read out a measure of the itended barcode (across imaging rounds) in order to map it to the codebook. To solve for potential mis-alignment, the images can be blurred with a 1-pixel Gaussian kernel. The risk here is that this will obfuscate signals from nearby molecules. -# -# A local search in pixel space across imaging rounds can also solve this. +#Recall that the image is pre-registered, as stated above. Despite this, individual RNA molecules may still not be perfectly aligned across imaging rounds. This is crucial in order to read out a measure of the itended barcode (across imaging rounds) in order to map it to the codebook. To solve for potential mis-alignment, the images can be blurred with a 1-pixel Gaussian kernel. The risk here is that this will obfuscate signals from nearby molecules. +# +#A local search in pixel space across imaging rounds can also solve this. # EPY: END markdown # EPY: START code @@ -111,7 +111,7 @@ # EPY: END code # EPY: START markdown -# Use MERFISH-calculated size factors to scale the channels across the imaging rounds and visualize the resulting filtered and scaled images. Right now we have to extract this information from the metadata and apply this transformation manually. +#Use MERFISH-calculated size factors to scale the channels across the imaging rounds and visualize the resulting filtered and scaled images. Right now we have to extract this information from the metadata and apply this transformation manually. # EPY: END markdown # EPY: START code @@ -146,37 +146,37 @@ # EPY: END code # EPY: START markdown -# ## Use spot-detector to create 'encoder' table for standardized input to decoder -# -# Each pipeline exposes a spot detector, and this spot detector translates the filtered image into an encoded table by detecting spots. The table contains the spot_id, the corresponding intensity (v) and the channel (c), imaging round (r) of each spot. -# -# The MERFISH pipeline merges these two steps together by finding pixel-based features, and then later collapsing these into spots and filtering out undesirable (non-spot) features. -# -# Therefore, no encoder table is generated, but a robust SpotAttribute and DecodedTable are both produced: +### Use spot-detector to create 'encoder' table for standardized input to decoder +# +#Each pipeline exposes a spot detector, and this spot detector translates the filtered image into an encoded table by detecting spots. The table contains the spot_id, the corresponding intensity (v) and the channel (c), imaging round (r) of each spot. +# +#The MERFISH pipeline merges these two steps together by finding pixel-based features, and then later collapsing these into spots and filtering out undesirable (non-spot) features. +# +#Therefore, no encoder table is generated, but a robust SpotAttribute and DecodedTable are both produced: # EPY: END markdown # EPY: START markdown -# ## Decode -# -# Each assay type also exposes a decoder. A decoder translates each spot (spot_id) in the encoded table into a gene that matches a barcode in the codebook. The goal is to decode and output a quality score, per spot, that describes the confidence in the decoding. Recall that in the MERFISH pipeline, each 'spot' is actually a 16 dimensional vector, one per pixel in the image. From here on, we will refer to these as pixel vectors. Once these pixel vectors are decoded into gene values, contiguous pixels that are decoded to the same gene are labeled as 'spots' via a connected components labeler. We shall refer to the latter as spots. -# -# There are hard and soft decodings -- hard decoding is just looking for the max value in the code book. Soft decoding, by contrast, finds the closest code by distance in intensity. Because different assays each have their own intensities and error modes, we leave decoders as user-defined functions. -# -# For MERFISH, which uses soft decoding, there are several parameters which are important to determining the result of the decoding method: -# -# ### Distance threshold -# In MERFISH, each pixel vector is a 16d vector that we want to map onto a barcode via minimum euclidean distance. Each barcode in the codebook, and each pixel vector is first mapped to the unit sphere by L2 normalization. As such, the maximum distance between a pixel vector and the nearest single-bit error barcode is 0.5176. As such, the decoder only accepts pixel vectors that are below this distance for assignment to a codeword in the codebook. -# -# ### Magnitude threshold -# This is a signal floor for decoding. Pixel vectors with an L2 norm below this floor are not considered for decoding. -# -# ### Area threshold -# Contiguous pixels that decode to the same gene are called as spots via connected components labeling. The minimum area of these spots are set by this parameter. The intuition is that pixel vectors, that pass the distance and magnitude thresholds, shold probably not be trusted as genes as the mRNA transcript would be too small for them to be real. This parameter can be set based on microscope resolution and signal amplification strategy. -# -# ### Crop size -# The crop size crops the image by a number of pixels large enough to eliminate parts of the image that suffer from boundary effects from both signal aquisition (e.g., FOV overlap) and image processing. Here this value is 40. -# -# Given these three thresholds, for each pixel vector, the decoder picks the closest code (minimum distance) that satisfies each of the above thresholds, where the distance is calculated between the code and a normalized intensity vector and throws away subsequent spots that are too small. +### Decode +# +#Each assay type also exposes a decoder. A decoder translates each spot (spot_id) in the encoded table into a gene that matches a barcode in the codebook. The goal is to decode and output a quality score, per spot, that describes the confidence in the decoding. Recall that in the MERFISH pipeline, each 'spot' is actually a 16 dimensional vector, one per pixel in the image. From here on, we will refer to these as pixel vectors. Once these pixel vectors are decoded into gene values, contiguous pixels that are decoded to the same gene are labeled as 'spots' via a connected components labeler. We shall refer to the latter as spots. +# +#There are hard and soft decodings -- hard decoding is just looking for the max value in the code book. Soft decoding, by contrast, finds the closest code by distance in intensity. Because different assays each have their own intensities and error modes, we leave decoders as user-defined functions. +# +#For MERFISH, which uses soft decoding, there are several parameters which are important to determining the result of the decoding method: +# +#### Distance threshold +#In MERFISH, each pixel vector is a 16d vector that we want to map onto a barcode via minimum euclidean distance. Each barcode in the codebook, and each pixel vector is first mapped to the unit sphere by L2 normalization. As such, the maximum distance between a pixel vector and the nearest single-bit error barcode is 0.5176. As such, the decoder only accepts pixel vectors that are below this distance for assignment to a codeword in the codebook. +# +#### Magnitude threshold +#This is a signal floor for decoding. Pixel vectors with an L2 norm below this floor are not considered for decoding. +# +#### Area threshold +#Contiguous pixels that decode to the same gene are called as spots via connected components labeling. The minimum area of these spots are set by this parameter. The intuition is that pixel vectors, that pass the distance and magnitude thresholds, shold probably not be trusted as genes as the mRNA transcript would be too small for them to be real. This parameter can be set based on microscope resolution and signal amplification strategy. +# +#### Crop size +#The crop size crops the image by a number of pixels large enough to eliminate parts of the image that suffer from boundary effects from both signal aquisition (e.g., FOV overlap) and image processing. Here this value is 40. +# +#Given these three thresholds, for each pixel vector, the decoder picks the closest code (minimum distance) that satisfies each of the above thresholds, where the distance is calculated between the code and a normalized intensity vector and throws away subsequent spots that are too small. # EPY: END markdown # EPY: START code @@ -197,11 +197,11 @@ # EPY: END code # EPY: START markdown -# ## Compare to results from paper -# -# The below plot aggregates gene copy number across single cells in the field of view and compares the results to the published intensities in the MERFISH paper. -# -# To make this match perfectly, run deconvolution 15 times instead of 14. As presented below, STARFISH displays a lower detection rate. +### Compare to results from paper +# +#The below plot aggregates gene copy number across single cells in the field of view and compares the results to the published intensities in the MERFISH paper. +# +#To make this match perfectly, run deconvolution 15 times instead of 14. As presented below, STARFISH displays a lower detection rate. # EPY: END markdown # EPY: START code @@ -228,9 +228,9 @@ # EPY: END code # EPY: START markdown -# ## Visualize results -# -# This image applies a pseudo-color to each gene channel to visualize the position and size of all called spots in a subset of the test image +### Visualize results +# +#This image applies a pseudo-color to each gene channel to visualize the position and size of all called spots in a subset of the test image # EPY: END markdown # EPY: START code diff --git a/notebooks/py/allen_smFISH.py b/notebooks/py/allen_smFISH.py index d3ee2b837..a174d1f45 100644 --- a/notebooks/py/allen_smFISH.py +++ b/notebooks/py/allen_smFISH.py @@ -4,9 +4,9 @@ # EPY: stripped_notebook: {"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5"}}, "nbformat": 4, "nbformat_minor": 2} # EPY: START markdown -# # Reproduce Allen smFISH results with Starfish -# -# This notebook walks through a work flow that reproduces the smFISH result for one field of view using the starfish package. +## Reproduce Allen smFISH results with Starfish +# +#This notebook walks through a work flow that reproduces the smFISH result for one field of view using the starfish package. # EPY: END markdown # EPY: START code @@ -31,9 +31,9 @@ # EPY: END code # EPY: START markdown -# Load the Stack object, which while not well-named right now, should be thought of as an access point to an "ImageDataSet". In practice, we expect the Stack object or something similar to it to be an access point for _multiple_ fields of view. In practice, the thing we talk about as a "TileSet" is the `Stack.image` object. The data are currently stored in-memory in a `numpy.ndarray`, and that is where most of our operations are done. -# -# The numpy array can be accessed through Stack.image.numpy\_array (public method, read only) or Stack.image.\_data (read and write) +#Load the Stack object, which while not well-named right now, should be thought of as an access point to an "ImageDataSet". In practice, we expect the Stack object or something similar to it to be an access point for _multiple_ fields of view. In practice, the thing we talk about as a "TileSet" is the `Stack.image` object. The data are currently stored in-memory in a `numpy.ndarray`, and that is where most of our operations are done. +# +#The numpy array can be accessed through Stack.image.numpy\_array (public method, read only) or Stack.image.\_data (read and write) # EPY: END markdown # EPY: START code @@ -43,7 +43,7 @@ # EPY: END code # EPY: START markdown -# We're ready now to load the experiment into starfish (This experiment is big, it takes a few minutes): +#We're ready now to load the experiment into starfish (This experiment is big, it takes a few minutes): # EPY: END markdown # EPY: START code @@ -52,9 +52,9 @@ # EPY: END code # EPY: START markdown -# All of our implemented operations leverage the `Stack.image.apply` method to apply a single function over each of the tiles or volumes in the FOV, depending on whether the method accepts a 2d or 3d array. Below, we're clipping each image independently at the 10th percentile. I've placed the imports next to the methods so that you can easily locate the code, should you want to look under the hood and understand what parameters have been chosen. -# -# The verbose flag for our apply loops could use a bit more refinement. We should be able to tell it how many images it needs to process from looking at the image stack, but for now it's dumb so just reports the number of tiles or volumes it's processed. This FOV has 102 images over 3 volumes. +#All of our implemented operations leverage the `Stack.image.apply` method to apply a single function over each of the tiles or volumes in the FOV, depending on whether the method accepts a 2d or 3d array. Below, we're clipping each image independently at the 10th percentile. I've placed the imports next to the methods so that you can easily locate the code, should you want to look under the hood and understand what parameters have been chosen. +# +#The verbose flag for our apply loops could use a bit more refinement. We should be able to tell it how many images it needs to process from looking at the image stack, but for now it's dumb so just reports the number of tiles or volumes it's processed. This FOV has 102 images over 3 volumes. # EPY: END markdown # EPY: START code @@ -64,9 +64,9 @@ # EPY: END code # EPY: START markdown -# If you ever want to visualize the image in the notebook, we've added a widget to do that. The first parameter is an indices dict that specifies which imaging round, channel, z-slice you want to view. The result is a pageable visualization across that arbitrary set of slices. Below I'm visualizing the first channel, which your codebook tells me is Nmnt. -# -# [N.B. once you click on the slider, you can page with the arrow keys on the keyboard.] +#If you ever want to visualize the image in the notebook, we've added a widget to do that. The first parameter is an indices dict that specifies which imaging round, channel, z-slice you want to view. The result is a pageable visualization across that arbitrary set of slices. Below I'm visualizing the first channel, which your codebook tells me is Nmnt. +# +#[N.B. once you click on the slider, you can page with the arrow keys on the keyboard.] # EPY: END markdown # EPY: START code @@ -79,7 +79,7 @@ # EPY: END code # EPY: START markdown -# For bandpass, there's a point where things get weird, at `c == 0; z <= 14`. In that range the images look mostly like noise. However, _above_ that, they look great + background subtracted! The later stages of the pipeline appear robust to this, though, as no spots are called for the noisy sections. +#For bandpass, there's a point where things get weird, at `c == 0; z <= 14`. In that range the images look mostly like noise. However, _above_ that, they look great + background subtracted! The later stages of the pipeline appear robust to this, though, as no spots are called for the noisy sections. # EPY: END markdown # EPY: START code @@ -96,7 +96,7 @@ # EPY: END code # EPY: START markdown -# Below, because spot finding is so slow when single-plex, we'll pilot this on a max projection to show that the parameters work. Here's what trackpy.locate, which we wrap, produces for a z-projection of channel 1. To do use our plotting methods on z-projections we have to expose some of the starfish internals, which will be improved upon. +#Below, because spot finding is so slow when single-plex, we'll pilot this on a max projection to show that the parameters work. Here's what trackpy.locate, which we wrap, produces for a z-projection of channel 1. To do use our plotting methods on z-projections we have to expose some of the starfish internals, which will be improved upon. # EPY: END markdown # EPY: START code @@ -128,7 +128,7 @@ # EPY: END code # EPY: START markdown -# Below spot finding is on the _volumes_ for each channel. This will take about `11m30s` +#Below spot finding is on the _volumes_ for each channel. This will take about `11m30s` # EPY: END markdown # EPY: START code diff --git a/notebooks/py/osmFISH.py b/notebooks/py/osmFISH.py index c14a6ee5b..311c7b282 100644 --- a/notebooks/py/osmFISH.py +++ b/notebooks/py/osmFISH.py @@ -4,7 +4,7 @@ # EPY: stripped_notebook: {"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5"}}, "nbformat": 4, "nbformat_minor": 2} # EPY: START markdown -# ## Loading the data into Starfish +### Loading the data into Starfish # EPY: END markdown # EPY: START code @@ -15,7 +15,7 @@ # EPY: END code # EPY: START markdown -# This notebook demonstrates how to load osmFISH data into starfish. Below loads fov_001, however fovs 002 and 003 are also converted and can be loaded by exchanging the number in the cloudflare link. The data can be dumped for local loading with `s.image.write` +#This notebook demonstrates how to load osmFISH data into starfish. Below loads fov_001, however fovs 002 and 003 are also converted and can be loaded by exchanging the number in the cloudflare link. The data can be dumped for local loading with `s.image.write` # EPY: END markdown # EPY: START code @@ -23,7 +23,7 @@ # EPY: END code # EPY: START markdown -# The below plot displays the z-volume for channel 0 of fov_001 +#The below plot displays the z-volume for channel 0 of fov_001 # EPY: END markdown # EPY: START code