scanimagetiff_utils

catalystneuro · Sep 28, 2023 · 630e9e5 · 630e9e5
1 parent 8f4f37b
commit 630e9e5
Showing 1 changed file with 193 additions and 0 deletions.
diff --git a/src/roiextractors/extractors/tiffimagingextractors/scanimagetiff_utils.py b/src/roiextractors/extractors/tiffimagingextractors/scanimagetiff_utils.py
@@ -0,0 +1,193 @@
+import numpy as np
+
+from ...extraction_tools import PathType, get_package
+
+
+def _get_scanimage_reader() -> type:
+    """Import the scanimage-tiff-reader package and return the ScanImageTiffReader class."""
+    return get_package(
+        package_name="ScanImageTiffReader", installation_instructions="pip install scanimage-tiff-reader"
+    ).ScanImageTiffReader
+
+
+def extract_extra_metadata(
+    file_path: PathType,
+) -> dict:  # TODO: Refactor neuroconv to reference this implementation to avoid duplication
+    """Extract metadata from a ScanImage TIFF file.
+
+    Parameters
+    ----------
+    file_path : PathType
+        Path to the TIFF file.
+
+    Returns
+    -------
+    extra_metadata: dict
+        Dictionary of metadata extracted from the TIFF file.
+
+    Notes
+    -----
+    Known to work on SI versions v3.8.0, v2019bR0, v2022.0.0, and v2023.0.0
+    """
+    ScanImageTiffReader = _get_scanimage_reader()
+    io = ScanImageTiffReader(str(file_path))
+    extra_metadata = {}
+    for metadata_string in (io.description(iframe=0), io.metadata()):
+        metadata_dict = {
+            x.split("=")[0].strip(): x.split("=")[1].strip()
+            for x in metadata_string.replace("\n", "\r").split("\r")
+            if "=" in x
+        }
+        extra_metadata = dict(**extra_metadata, **metadata_dict)
+    return extra_metadata
+
+
+def parse_matlab_vector(matlab_vector: str) -> list:
+    """Parse a MATLAB vector string into a list of integer values.
+
+    Parameters
+    ----------
+    matlab_vector : str
+        MATLAB vector string.
+
+    Returns
+    -------
+    vector: list of int
+        List of integer values.
+
+    Raises
+    ------
+    ValueError
+        If the MATLAB vector string cannot be parsed.
+
+    Notes
+    -----
+    MATLAB vector string is of the form "[1 2 3 ... N]" or "[1,2,3,...,N]" or "[1;2;3;...;N]".
+    There may or may not be whitespace between the values. Ex. "[1, 2, 3]" or "[1,2,3]".
+    """
+    vector = matlab_vector.strip("[]")
+    if ";" in vector:
+        vector = vector.split(";")
+    elif "," in vector:
+        vector = vector.split(",")
+    elif " " in vector:
+        vector = vector.split(" ")
+    elif len(vector) == 1:
+        pass
+    else:
+        raise ValueError(f"Could not parse vector from {matlab_vector}.")
+    vector = [int(x.strip()) for x in vector if x != ""]
+    return vector
+
+
+def parse_metadata(metadata: dict) -> dict:
+    """Parse metadata dictionary to extract relevant information and store it standard keys for ImagingExtractors.
+
+    Currently supports
+    - sampling_frequency
+    - num_planes
+    - frames_per_slice
+    - channel_names
+    - num_channels
+
+    Parameters
+    ----------
+    metadata : dict
+        Dictionary of metadata extracted from the TIFF file.
+
+    Returns
+    -------
+    metadata_parsed: dict
+        Dictionary of parsed metadata.
+
+    Notes
+    -----
+    Known to work on SI versions v2019bR0, v2022.0.0, and v2023.0.0. Fails on v3.8.0.
+    SI.hChannels.channelsActive = string of MATLAB-style vector with channel integers (see parse_matlab_vector).
+    SI.hChannels.channelName = "{'channel_name_1' 'channel_name_2' ... 'channel_name_M'}"
+        where M is the number of channels (active or not).
+    """
+    sampling_frequency = float(metadata["SI.hRoiManager.scanFrameRate"])
+    num_planes = int(metadata["SI.hStackManager.numSlices"])
+    frames_per_slice = int(metadata["SI.hStackManager.framesPerSlice"])
+    active_channels = parse_matlab_vector(metadata["SI.hChannels.channelsActive"])
+    channel_indices = np.array(active_channels) - 1  # Account for MATLAB indexing
+    channel_names = np.array(metadata["SI.hChannels.channelName"].split("'")[1::2])
+    channel_names = channel_names[channel_indices].tolist()
+    num_channels = len(channel_names)
+    metadata_parsed = dict(
+        sampling_frequency=sampling_frequency,
+        num_channels=num_channels,
+        num_planes=num_planes,
+        frames_per_slice=frames_per_slice,
+        channel_names=channel_names,
+    )
+    return metadata_parsed
+
+
+def parse_metadata_v3_8(metadata: dict) -> dict:
+    """Parse metadata dictionary to extract relevant information and store it standard keys for ImagingExtractors.
+
+    Requires old version of metadata (v3.8).
+    Currently supports
+    - sampling frequency
+    - num_channels
+    - num_planes
+
+    Parameters
+    ----------
+    metadata : dict
+        Dictionary of metadata extracted from the TIFF file.
+
+    Returns
+    -------
+    metadata_parsed: dict
+        Dictionary of parsed metadata.
+    """
+    sampling_frequency = float(metadata["state.acq.frameRate"])
+    num_channels = int(metadata["state.acq.numberOfChannelsSave"])
+    num_planes = int(metadata["state.acq.numberOfZSlices"])
+    metadata_parsed = dict(
+        sampling_frequency=sampling_frequency,
+        num_channels=num_channels,
+        num_planes=num_planes,
+    )
+    return metadata_parsed
+
+
+def extract_timestamps_from_file(file_path: PathType) -> np.ndarray:
+    """Extract the frame timestamps from a ScanImage TIFF file.
+
+    Parameters
+    ----------
+    file_path : PathType
+        Path to the TIFF file.
+
+    Returns
+    -------
+    timestamps : numpy.ndarray
+        Array of frame timestamps in seconds.
+
+    Raises
+    ------
+    AssertionError
+        If the frame timestamps are not found in the TIFF file.
+
+    Notes
+    -----
+    Known to work on SI versions v2019bR0, v2022.0.0, and v2023.0.0. Fails on v3.8.0.
+    """
+    ScanImageTiffReader = _get_scanimage_reader()
+    io = ScanImageTiffReader(str(file_path))
+    assert "frameTimestamps_sec" in io.description(iframe=0), "frameTimestamps_sec not found in TIFF file"
+    num_frames = io.shape()[0]
+    timestamps = np.zeros(num_frames)
+    for iframe in range(num_frames):
+        description = io.description(iframe=iframe)
+        description_lines = description.split("\n")
+        for line in description_lines:
+            if "frameTimestamps_sec" in line:
+                timestamps[iframe] = float(line.split("=")[1].strip())
+                break
+
+    return timestamps