Merge pull request #221 from RichieHakim/dev

Dev
RichieHakim · Apr 10, 2024 · 3ee0b40 · 3ee0b40
2 parents 53d6937 + 628873a
commit 3ee0b40
Show file tree

Hide file tree

Showing 8 changed files with 97 additions and 67 deletions.
diff --git a/notebooks/colab/tracking/tracking_interactive_notebook.ipynb b/notebooks/colab/tracking/tracking_interactive_notebook.ipynb
diff --git a/notebooks/jupyter/classification/classify_by_drawingSelection.ipynb b/notebooks/jupyter/classification/classify_by_drawingSelection.ipynb
@@ -133,7 +133,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dir_allOuterFolders = r'/media/rich/bigSSD/analysis_data/face_rhythm/mouse_0322N'\n",
+    "dir_allOuterFolders = r'/media/rich/bigSSD/analysis_data/face_rhythm/mouse_0916N/'\n",
     "\n",
     "pathSuffixToStat = 'stat.npy'\n",
     "pathSuffixToOps = 'ops.npy'\n",
@@ -355,7 +355,7 @@
     "    data=emb,\n",
     "    idx_images_overlay=idx_images_overlay,\n",
     "    images_overlay=images_overlay[:, 6:30][:,:,6:30],\n",
-    "    size_images_overlay=0.4,\n",
+    "    size_images_overlay=0.35,\n",
     "    frac_overlap_allowed=0.5,\n",
     "    figsize=(1200,1200),\n",
     "    alpha_points=1.0,\n",
@@ -453,7 +453,8 @@
     "The results file can be opened using any of the following methods:\n",
     "1. `roicat.helpers.pickle_load(path)`\n",
     "2. `np.load(path)`\n",
-    "3. ```\n",
+    "3. \n",
+    "```\n",
     "    import pickle\n",
     "    with open(path_save, mode='rb') as f:\n",
     "        test = pickle.load(f)\n",
@@ -467,7 +468,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "mouse = 'mouse_0322N'"
+    "mouse = 'mouse_0916N'"
    ]
   },
   {

diff --git a/notebooks/jupyter/tracking/tracking_interactive_notebook.ipynb b/notebooks/jupyter/tracking/tracking_interactive_notebook.ipynb
@@ -111,7 +111,7 @@
    "metadata": {},
    "source": [
     "In this example we are using suite2p output files, but other data types can be used (CaImAn, etc.) \\\n",
-    "See the notebook on ingesting diverse data: https://github.com/RichieHakim/ROICaT/blob/main/notebooks/jupyter/other/demo_custom_data_importing.ipynb\n",
+    "See the notebook on ingesting diverse data: https://github.com/RichieHakim/ROICaT/blob/main/notebooks/jupyter/other/demo_data_importing.ipynb\n",
     "\n",
     "Make a list containing the paths to all the input files.\n",
     "\n",
@@ -127,7 +127,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dir_allOuterFolders = r'/media/rich/bigSSD/analysis_data/face_rhythm/mouse_0322N/stat_and_ops/'\n",
+    "dir_allOuterFolders = r'/media/rich/bigSSD/analysis_data/face_rhythm/mouse_2_6/stat_and_ops/'\n",
     "\n",
     "pathSuffixToStat = 'stat.npy'\n",
     "pathSuffixToOps = 'ops.npy'\n",
@@ -178,7 +178,6 @@
     "    new_or_old_suite2p='new',\n",
     "    type_meanImg='meanImgE',\n",
     "#     FOV_images=FOVs_mixed,\n",
-    "\n",
     "    verbose=True,\n",
     ")\n",
     "\n",
@@ -691,7 +690,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "88fc1ff7-1379-4d49-826f-ac22e188d7f6",
+   "id": "3fbfaa15",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -704,8 +703,8 @@
     ")\n",
     "\n",
     "kwargs_makeConjunctiveDistanceMatrix_best = clusterer.find_optimal_parameters_for_pruning(\n",
-    "    n_bins=None,  ## Number of bins to use for the histograms of the distributions\n",
-    "    smoothing_window_bins=None,  ## Number of bins to use to smooth the distributions\n",
+    "    n_bins=None,  ## Number of bins to use for the histograms of the distributions. If None, then a heuristic is used.\n",
+    "    smoothing_window_bins=None,  ## Number of bins to use to smooth the distributions. If None, then a heuristic is used.\n",
     "    kwargs_findParameters={\n",
     "        'n_patience': 300,  ## Number of optimization epoch to wait for tol_frac to converge\n",
     "        'tol_frac': 0.001,  ## Fractional change below which optimization will conclude\n",
@@ -728,7 +727,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5af66e1d-bbaa-48b4-992d-c1662d9ead68",
+   "id": "df8f4741",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -760,7 +759,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a0a19d6c",
+   "id": "d2511098",
    "metadata": {},
    "source": [
     "##### 2. Prune the distance matrix\n",
@@ -774,14 +773,14 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0ef7ab14-cadf-4f64-899a-00980e6bee0e",
+   "id": "bed653db",
    "metadata": {},
    "outputs": [],
    "source": [
     "clusterer.make_pruned_similarity_graphs(\n",
     "    d_cutoff=None,  ## Optionally manually specify a distance cutoff\n",
     "    kwargs_makeConjunctiveDistanceMatrix=kwargs_mcdm_tmp,\n",
-    "    stringency=1.0,  ## \n",
+    "    stringency=1.0,  ## Modifies the threshold for pruning the distance matrix. Higher values result in LESS pruning. New d_cutoff = stringency * truncated d_cutoff.\n",
     "    convert_to_probability=False,    \n",
     ")"
    ]
@@ -807,7 +806,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "2729208f-d551-4509-922e-8649afcb90b7",
+   "id": "9c8e872c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -846,7 +845,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "dc074a72-b6a4-4899-9321-ef97731724e1",
+   "id": "d91c8543",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -868,7 +867,7 @@
    "metadata": {},
    "source": [
     "1. Make different versions of the labels for convenience.\n",
-    "2. Put all the useful results and info into a dictionary to save later\n",
+    "2. Put all the useful results and info into a dictionary to save later. ADJUST THIS ANY WAY YOU WANT.\n",
     "3. Put all the class objects from the run into a dictionary to save later"
    ]
   },
@@ -934,7 +933,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "77bb3272",
+   "id": "6e9a72ab",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -960,7 +959,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1e2b6166-7231-4641-972a-b4984f2cb07e",
+   "id": "88dcdef4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -988,7 +987,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "85dc3e4d-aacc-4ef9-8d15-ff963e7067cc",
+   "id": "74359fc8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1028,7 +1027,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a4bf1634-f14e-42b1-87e7-df3f80207833",
+   "id": "75c0d8b2",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1078,7 +1077,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dir_save = Path('/media/rich/bigSSD/analysis_data/face_rhythm/mouse_0322N').resolve()\n",
+    "dir_save = Path('/media/rich/bigSSD/analysis_data/face_rhythm/mouse_2_6').resolve()\n",
     "name_save = Path(dir_allOuterFolders).resolve().name\n",
     "\n",
     "path_save = dir_save / (name_save + '.ROICaT.tracking.results' + '.pkl')\n",

diff --git a/requirements.txt b/requirements.txt
@@ -2,32 +2,32 @@ hdbscan==0.8.33
 holoviews[recommended]==1.18.3
 jupyter==1.0.0
 kymatio==0.3.0
-matplotlib==3.8.3
+matplotlib==3.8.4
 natsort==8.4.0
 numpy==1.26.4
 opencv_contrib_python<=4.9.0.80
-optuna==3.5.0
-Pillow==10.2.0
-pytest==8.0.2
-scikit_learn==1.4.1.post1
-scipy==1.12.0
+optuna==3.6.1
+Pillow==10.3.0
+pytest==8.1.1
+scikit_learn==1.4.2
+scipy==1.13.0
 seaborn==0.13.2
 sparse==0.15.1
 tqdm==4.66.2
-umap_learn==0.5.5
+umap_learn==0.5.6
 xxhash==3.4.1
-bokeh==3.3.4
+bokeh==3.4.0
 psutil==5.9.8
 py_cpuinfo==9.0.0
 GPUtil==1.4.0
 PyYAML==6.0.1
-mat73==0.62
-torch==2.2.1
-torchvision==0.17.1
-torchaudio==2.2.1
-selenium==4.18.1
+mat73==0.63
+torch==2.2.2
+torchvision==0.17.2
+torchaudio==2.2.2
+selenium==4.19.0
 skl2onnx==1.16.0
-onnx==1.15.0
+onnx==1.16.0
 onnxruntime==1.17.1
-jupyter_bokeh==4.0.0
-onnx2torch==1.5.13
+jupyter_bokeh==4.0.1
+onnx2torch==1.5.14
diff --git a/roicat/__init__.py b/roicat/__init__.py
@@ -13,4 +13,4 @@
 for pkg in __all__:
     exec('from . import ' + pkg)
 
-__version__ = '1.1.36'
+__version__ = '1.1.37'
diff --git a/roicat/data_importing.py b/roicat/data_importing.py
@@ -808,8 +808,10 @@ class Data_suite2p(Data_roicat):
             Type of suite2p output files. Matlab=old, Python=new. Should be:
             ``'new'`` or ``'old'``.
         out_height_width (tuple of int):
-            Height and width of output ROI images. Should be: *(int, int)* *(y,
-            x)*.
+            Height and width of output ROI images. These are the little images
+            of centered ROIs that are typically used for passing through the
+            neural net. Unless your ROIs are larger than the default size, it's
+            best to just leave it as default. Should be: *(int, int)* *(y, x)*.
         type_meanImg (str):
             Type of mean image to use. Should be: ``'meanImgE'`` or
             ``'meanImg'``.

diff --git a/roicat/tracking/clustering.py b/roicat/tracking/clustering.py
@@ -46,6 +46,17 @@ class Clusterer(util.ROICaT_Module):
             The similarity matrix for session similarity. Shape: *(n_rois,
             n_rois)*. Boolean, with 1s where the two ROIs are from different
             sessions.
+        n_bins (int): 
+            Number of bins to use for the pairwise similarity distribution. If
+            using automatic parameter finding, then using a large number of bins
+            makes finding the separation point more noisy, and only slightly
+            more accurate. If ``None``, then a heuristic is used to estimate the
+            value based on the number of ROIs. (Default is ``50``)
+        smoothing_window_bins (int): 
+            Number of bins to use when smoothing the distribution. Using a small
+            number of bins makes finding the separation point more noisy, and
+            only slightly more accurate. Aim for 5-10% of the number of bins. If
+            ``None``, then a heuristic is used. (Default is ``5``)
         verbose (bool):
             Specifies whether to print out information about the clustering
             process. (Default is ``True``)
@@ -65,19 +76,28 @@ class Clusterer(util.ROICaT_Module):
         s_sesh (scipy.sparse.csr_matrix):
             The similarity matrix for session similarity. It is symmetric and
             has a shape of *(n_rois, n_rois)*.
+        s_sesh_inv (scipy.sparse.csr_matrix):
+            The inverse of the session similarity matrix. It is symmetric and
+            has a shape of *(n_rois, n_rois)*.
+        n_bins Optional[int]:
+            Number of bins to use for the pairwise similarity distribution.
+        smoothing_window_bins Optional[int]:
+            Number of bins to use when smoothing the distribution.
         verbose (bool):
-            Specifies how much information to print out:
-                0/False: Warnings only
-                1/True: Basic info, progress bar
-                2: All info
+            Specifies how much information to print out: \n
+                * 0/False: Warnings only
+                * 1/True: Basic info, progress bar
+                * 2: All info
     """
     def __init__(
         self,
-        s_sf=None,
-        s_NN_z=None,
-        s_SWT_z=None,
-        s_sesh=None,
-        verbose=True,
+        s_sf: Optional[scipy.sparse.csr_matrix] = None,
+        s_NN_z: Optional[scipy.sparse.csr_matrix] = None,
+        s_SWT_z: Optional[scipy.sparse.csr_matrix] = None,
+        s_sesh: Optional[scipy.sparse.csr_matrix] = None,
+        n_bins: Optional[int] = None,
+        smoothing_window_bins: Optional[int] = None,
+        verbose: bool = True,
     ):
         """
         Initializes the Clusterer with the given similarity matrices and verbosity setting.
@@ -103,10 +123,12 @@ def __init__(
 
         self._verbose = verbose
 
+        self.n_bins = max(min(self.s_sf.nnz // 10000, 200), 20) if n_bins is None else n_bins
+        self.smooth_window = self.n_bins // 10 if smoothing_window_bins is None else smoothing_window_bins
+        # print(f'Pruning similarity graphs with {self.n_bins} bins and smoothing window {smoothing_window}...') if self._verbose else None
+
     def find_optimal_parameters_for_pruning(
         self,
-        n_bins: int = 50,
-        smoothing_window_bins: int = 5,
         kwargs_findParameters: Dict[str, Union[int, float, bool]] = {
             'n_patience': 100,
             'tol_frac': 0.05,
@@ -124,6 +146,8 @@ def find_optimal_parameters_for_pruning(
             'sig_SWT_kwargs_b': (0.05, 2),
         },
         n_jobs_findParameters: int = -1,
+        n_bins: Optional[int] = None,
+        smoothing_window_bins: Optional[int] = None,
         seed=None,
     ) -> Dict:
         """
@@ -143,22 +167,25 @@ def find_optimal_parameters_for_pruning(
         RH 2023
 
         Args:
-            n_bins (int): 
-                Number of bins to use when estimating the distributions. Using a
-                large number of bins makes finding the separation point more
-                noisy, and only slightly more accurate. (Default is ``50``)
-            smoothing_window_bins (int): 
-                Number of bins to use when smoothing the distributions. Using a
-                small number of bins makes finding the separation point more
-                noisy, and only slightly more accurate. Aim for 5-10% of the
-                number of bins. (Default is ``5``)
             kwargs_findParameters (Dict[str, Union[int, float, bool]]): 
                 Keyword arguments for the Convergence_checker class __init__.
             bounds_findParameters (Dict[str, Tuple[float, float]]):
                 Bounds for the parameters to be optimized.
             n_jobs_findParameters (int):
                 Number of jobs to use when finding the optimal parameters. If
                 -1, use all available cores.
+            n_bins Optional[int]: 
+                Overwrites ``n_bins`` specified in __init__. \n
+                Number of bins to use when estimating the distributions. Using a
+                large number of bins makes finding the separation point more
+                noisy, and only slightly more accurate. (Default is ``None`` or
+                ``50``)
+            smoothing_window_bins (int): 
+                Overwrites ``smoothing_window_bins`` specified in __init__. \n
+                Number of bins to use when smoothing the distributions. Using a
+                small number of bins makes finding the separation point more
+                noisy, and only slightly more accurate. Aim for 5-10% of the
+                number of bins. (Default is ``None`` or ``5``)
             seed (int):
                 Seed for the random number generator in the optuna sampler.
                 None: use a random seed.
@@ -170,15 +197,15 @@ def find_optimal_parameters_for_pruning(
                     self.make_conjunctive_distance_matrix function.
         """
         import optuna
+
+        self.n_bins = self.n_bins if n_bins is None else n_bins
+        self.smoothing_window_bins = self.smooth_window if smoothing_window_bins is None else smoothing_window_bins
+
         self.bounds_findParameters = bounds_findParameters
 
         self._seed = seed
         np.random.seed(self._seed)
 
-        self.n_bins = max(min(self.s_sf.nnz // 30000, 1000), 30) if n_bins is None else n_bins
-        self.smooth_window = self.n_bins // 10 if smoothing_window_bins is None else smoothing_window_bins
-        # print(f'Pruning similarity graphs with {self.n_bins} bins and smoothing window {smoothing_window}...') if self._verbose else None
-
         print('Finding mixing parameters using automated hyperparameter tuning...') if self._verbose else None
         optuna.logging.set_verbosity(optuna.logging.WARNING)
         self.checker = helpers.Convergence_checker_optuna(verbose=self._verbose>=2, **kwargs_findParameters)