Saxelrod hybridization to primary (#494)

* changing hybridization_images key to primary_images * changed date in notebook s3 urls * rebasing from master changes * fixing notebooks
spacetx · Aug 31, 2018 · cfe1ff5 · cfe1ff5
1 parent 58db7c0
commit cfe1ff5
Show file tree

Hide file tree

Showing 21 changed files with 117 additions and 111 deletions.
diff --git a/examples/get_iss_data.py b/examples/get_iss_data.py
@@ -85,7 +85,7 @@ def add_codebook(experiment_json_doc):
 
         # TODO: (ttung) remove the following unholy hacks.  this is because we want to point at a
         # tileset rather than a collection.
-        experiment_json_doc['hybridization_images'] = "hybridization-fov_000.json"
+        experiment_json_doc['primary_images'] = "hybridization-fov_000.json"
         experiment_json_doc['auxiliary_images']['nuclei'] = "nuclei-fov_000.json"
         experiment_json_doc['auxiliary_images']['dots'] = "dots-fov_000.json"
         return experiment_json_doc

diff --git a/notebooks/DARTFISH_Pipeline_-_Human_Occipital_Cortex_-_1_FOV.ipynb b/notebooks/DARTFISH_Pipeline_-_Human_Occipital_Cortex_-_1_FOV.ipynb
@@ -52,8 +52,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "exp = Experiment.from_json(\"https://dmf0bdeheu4zf.cloudfront.net/20180827/DARTFISH/experiment.json\")\n",
+    "\n",
+    "exp = Experiment.from_json('https://dmf0bdeheu4zf.cloudfront.net/20180828/DARTFISH/experiment.json')\n",
     "stack = exp.fov().primary_image\n",
+    "\n",
     "# TODO the latter will be fixed by https://github.com/spacetx/starfish/issues/316\n",
     "stack._data = stack._data.astype(float)"
    ]
@@ -105,7 +107,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cnts_benchmark = pd.read_csv('https://dmf0bdeheu4zf.cloudfront.net/20180827/DARTFISH/fov_001/counts.csv')\n",
+    "\n",
+    "cnts_benchmark = pd.read_csv('https://dmf0bdeheu4zf.cloudfront.net/20180828/DARTFISH/fov_001/counts.csv')\n",
     "cnts_benchmark.head()"
    ]
   },
@@ -143,7 +146,7 @@
    "outputs": [],
    "source": [
     "def compute_magnitudes(stack, norm_order=2):\n",
-    "    \n",
+    "\n",
     "    pixel_intensities = IntensityTable.from_image_stack(zero_norm_stack)\n",
     "    feature_traces = pixel_intensities.stack(traces=(Indices.CH.value, Indices.ROUND.value))\n",
     "    norm = np.linalg.norm(feature_traces.values, ord=norm_order, axis=1)\n",
@@ -172,9 +175,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# how much magnitude should a barcode have for it to be considered by decoding? this was set by looking at \n",
+    "# how much magnitude should a barcode have for it to be considered by decoding? this was set by looking at\n",
     "# the plot above\n",
-    "magnitude_threshold = 0.5 \n",
+    "magnitude_threshold = 0.5\n",
     "# how big do we expect our spots to me, min/max size. this was set to be equivalent to the parameters\n",
     "# determined by the Zhang lab.\n",
     "area_threshold = (5, 30)\n",

diff --git a/notebooks/ISS_Pipeline_-_Breast_-_1_FOV.ipynb b/notebooks/ISS_Pipeline_-_Breast_-_1_FOV.ipynb
@@ -36,7 +36,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "experiment = Experiment.from_json(\"https://dmf0bdeheu4zf.cloudfront.net/20180827/ISS/experiment.json\")\n",
+    "experiment = Experiment.from_json(\"https://dmf0bdeheu4zf.cloudfront.net/20180828/ISS/experiment.json\")\n",
     "# s.image.squeeze() simply converts the 4D tensor H*C*X*Y into a list of len(H*C) image planes for rendering by 'tile'"
    ]
   },
@@ -203,7 +203,7 @@
     "from starfish.image import Registration\n",
     "\n",
     "registration = Registration.FourierShiftRegistration(\n",
-    "    upsampling=1000, \n",
+    "    upsampling=1000,\n",
     "    reference_stack=dots,\n",
     "    verbose=True)\n",
     "registration.run(primary_image)"

diff --git a/notebooks/MERFISH_Pipeline_-_U2O2_Cell_Culture_-_1_FOV.ipynb b/notebooks/MERFISH_Pipeline_-_U2O2_Cell_Culture_-_1_FOV.ipynb
@@ -30,7 +30,7 @@
     "import pprint\n",
     "\n",
     "import numpy as np\n",
-    "import pandas as pd \n",
+    "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "from showit import image as show_image\n",
@@ -45,7 +45,7 @@
    "outputs": [],
    "source": [
     "# load the data from cloudfront\n",
-    "experiment = Experiment.from_json(\"https://dmf0bdeheu4zf.cloudfront.net/20180827/MERFISH/experiment.json\")"
+    "experiment = Experiment.from_json(\"https://dmf0bdeheu4zf.cloudfront.net/20180828/MERFISH/experiment.json\")"
    ]
   },
   {
@@ -80,11 +80,11 @@
    "source": [
     "## Show input file format that specifies how the tiff stack is organized\n",
     "\n",
-    "The stack contains multiple images corresponding to the channel and imaging rounds. MERFISH builds a 16 bit barcode from 8 imaging rounds, each of which measures two channels that correspond to contiguous (but not necessarily consistently ordered) bits of the barcode. \n",
+    "The stack contains multiple images corresponding to the channel and imaging rounds. MERFISH builds a 16 bit barcode from 8 imaging rounds, each of which measures two channels that correspond to contiguous (but not necessarily consistently ordered) bits of the barcode.\n",
     "\n",
     "The MERFISH computational pipeline also constructs a scalar that corrects for intensity differences across each of the 16 images, e.g., one scale factor per bit position.\n",
     "\n",
-    "The stacks in this example are pre-registered using fiduciary beads. "
+    "The stacks in this example are pre-registered using fiduciary beads."
    ]
   },
   {
@@ -149,7 +149,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The below algorithm deconvolves out the point spread function introduced by the microcope and is specifically designed for this use case. The number of iterations is an important parameter that needs careful optimization. "
+    "The below algorithm deconvolves out the point spread function introduced by the microcope and is specifically designed for this use case. The number of iterations is an important parameter that needs careful optimization."
    ]
   },
   {
@@ -166,9 +166,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Recall that the image is pre-registered, as stated above. Despite this, individual RNA molecules may still not be perfectly aligned across imaging rounds. This is crucial in order to read out a measure of the itended barcode (across imaging rounds) in order to map it to the codebook. To solve for potential mis-alignment, the images can be blurred with a 1-pixel Gaussian kernel. The risk here is that this will obfuscate signals from nearby molecules. \n",
+    "Recall that the image is pre-registered, as stated above. Despite this, individual RNA molecules may still not be perfectly aligned across imaging rounds. This is crucial in order to read out a measure of the itended barcode (across imaging rounds) in order to map it to the codebook. To solve for potential mis-alignment, the images can be blurred with a 1-pixel Gaussian kernel. The risk here is that this will obfuscate signals from nearby molecules.\n",
     "\n",
-    "A local search in pixel space across imaging rounds can also solve this. "
+    "A local search in pixel space across imaging rounds can also solve this."
    ]
   },
   {
@@ -195,7 +195,7 @@
    "outputs": [],
    "source": [
     "scale_factors = {\n",
-    "    (t[Indices.ROUND], t[Indices.CH]): t['scale_factor'] \n",
+    "    (t[Indices.ROUND], t[Indices.CH]): t['scale_factor']\n",
     "    for index, t in primary_image.tile_metadata.iterrows()\n",
     "}"
    ]
@@ -210,7 +210,7 @@
     "# will operate entirely on float data and this cast can be removed\n",
     "primary_image._data = primary_image._data.astype(np.float64)\n",
     "\n",
-    "# this is a scaling method. It would be great to use image.apply here. It's possible, but we need to expose H & C to \n",
+    "# this is a scaling method. It would be great to use image.apply here. It's possible, but we need to expose H & C to\n",
     "# at least we can do it with get_slice and set_slice right now.\n",
     "\n",
     "for indices in primary_image._iter_indices():\n",
@@ -245,9 +245,9 @@
    "source": [
     "## Use spot-detector to create 'encoder' table  for standardized input  to decoder\n",
     "\n",
-    "Each pipeline exposes a spot detector, and this spot detector translates the filtered image into an encoded table by detecting spots. The table contains the spot_id, the corresponding intensity (v) and the channel (c), imaging round (r) of each spot. \n",
+    "Each pipeline exposes a spot detector, and this spot detector translates the filtered image into an encoded table by detecting spots. The table contains the spot_id, the corresponding intensity (v) and the channel (c), imaging round (r) of each spot.\n",
     "\n",
-    "The MERFISH pipeline merges these two steps together by finding pixel-based features, and then later collapsing these into spots and filtering out undesirable (non-spot) features. \n",
+    "The MERFISH pipeline merges these two steps together by finding pixel-based features, and then later collapsing these into spots and filtering out undesirable (non-spot) features.\n",
     "\n",
     "Therefore, no encoder table is generated, but a robust SpotAttribute and DecodedTable are both produced:"
    ]
@@ -260,20 +260,20 @@
     "\n",
     "Each assay type also exposes a decoder. A decoder translates each spot (spot_id) in the encoded table into a gene that matches a barcode in the codebook. The goal is to decode and output a quality score, per spot, that describes the confidence in the decoding. Recall that in the MERFISH pipeline, each 'spot' is actually a 16 dimensional vector, one per pixel in the image. From here on, we will refer to these as pixel vectors. Once these pixel vectors are decoded into gene values, contiguous pixels that are decoded to the same gene are labeled as 'spots' via a connected components labeler. We shall refer to the latter as spots.\n",
     "\n",
-    "There are hard and soft decodings -- hard decoding is just looking for the max value in the code book. Soft decoding, by contrast, finds the closest code by distance in intensity. Because different assays each have their own intensities and error modes, we leave decoders as user-defined functions. \n",
+    "There are hard and soft decodings -- hard decoding is just looking for the max value in the code book. Soft decoding, by contrast, finds the closest code by distance in intensity. Because different assays each have their own intensities and error modes, we leave decoders as user-defined functions.\n",
     "\n",
-    "For MERFISH, which uses soft decoding, there are several parameters which are important to determining the result of the decoding method: \n",
+    "For MERFISH, which uses soft decoding, there are several parameters which are important to determining the result of the decoding method:\n",
     "\n",
     "### Distance threshold\n",
-    "In MERFISH, each pixel vector is a 16d vector that we want to map onto a barcode via minimum euclidean distance. Each barcode in the codebook, and each pixel vector is first mapped to the unit sphere by L2 normalization. As such, the maximum distance between a pixel vector and the nearest single-bit error barcode is 0.5176. As such, the decoder only accepts pixel vectors that are below this distance for assignment to a codeword in the codebook. \n",
+    "In MERFISH, each pixel vector is a 16d vector that we want to map onto a barcode via minimum euclidean distance. Each barcode in the codebook, and each pixel vector is first mapped to the unit sphere by L2 normalization. As such, the maximum distance between a pixel vector and the nearest single-bit error barcode is 0.5176. As such, the decoder only accepts pixel vectors that are below this distance for assignment to a codeword in the codebook.\n",
     "\n",
     "### Magnitude threshold\n",
-    "This is a signal floor for decoding. Pixel vectors with an L2 norm below this floor are not considered for decoding. \n",
+    "This is a signal floor for decoding. Pixel vectors with an L2 norm below this floor are not considered for decoding.\n",
     "\n",
     "### Area threshold\n",
     "Contiguous pixels that decode to the same gene are called as spots via connected components labeling. The minimum area of these spots are set by this parameter. The intuition is that pixel vectors, that pass the distance and magnitude thresholds, shold probably not be trusted as genes as the mRNA transcript would be too small for them to be real. This parameter can be set based on microscope resolution and signal amplification strategy.\n",
     "\n",
-    "### Crop size \n",
+    "### Crop size\n",
     "The crop size crops the image by a number of pixels large enough to eliminate parts of the image that suffer from boundary effects from both signal aquisition (e.g., FOV overlap) and image processing. Here this value is 40.\n",
     "\n",
     "Given these three thresholds, for each pixel vector, the decoder picks the closest code (minimum distance) that satisfies each of the above thresholds, where the distance is calculated between the code and a normalized intensity vector and throws away subsequent spots that are too small."
@@ -293,7 +293,7 @@
     "    magnitude_threshold=1,\n",
     "    min_area=2,\n",
     "    max_area=np.inf,\n",
-    "    norm_order=2, \n",
+    "    norm_order=2,\n",
     "    crop_size=(0, 40, 40)\n",
     ")\n",
     "\n",
@@ -305,11 +305,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Compare to results from paper \n",
+    "## Compare to results from paper\n",
     "\n",
-    "The below plot aggregates gene copy number across single cells in the field of view and compares the results to the published intensities in the MERFISH paper. \n",
+    "The below plot aggregates gene copy number across single cells in the field of view and compares the results to the published intensities in the MERFISH paper.\n",
     "\n",
-    "To make this match perfectly, run deconvolution 15 times instead of 14. As presented below, STARFISH displays a lower detection rate.  "
+    "To make this match perfectly, run deconvolution 15 times instead of 14. As presented below, STARFISH displays a lower detection rate."
    ]
   },
   {
@@ -318,7 +318,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "bench = pd.read_csv('https://dmf0bdeheu4zf.cloudfront.net/MERFISH/benchmark_results.csv', \n",
+    "bench = pd.read_csv('https://dmf0bdeheu4zf.cloudfront.net/MERFISH/benchmark_results.csv',\n",
     "                    dtype = {'barcode':object})\n",
     "\n",
     "benchmark_counts = bench.groupby('gene')['gene'].count()\n",