Skip to content

Commit

Permalink
Merge pull request #221 from RichieHakim/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
RichieHakim authored Apr 10, 2024
2 parents 53d6937 + 628873a commit 3ee0b40
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 67 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
"metadata": {},
"outputs": [],
"source": [
"dir_allOuterFolders = r'/media/rich/bigSSD/analysis_data/face_rhythm/mouse_0322N'\n",
"dir_allOuterFolders = r'/media/rich/bigSSD/analysis_data/face_rhythm/mouse_0916N/'\n",
"\n",
"pathSuffixToStat = 'stat.npy'\n",
"pathSuffixToOps = 'ops.npy'\n",
Expand Down Expand Up @@ -355,7 +355,7 @@
" data=emb,\n",
" idx_images_overlay=idx_images_overlay,\n",
" images_overlay=images_overlay[:, 6:30][:,:,6:30],\n",
" size_images_overlay=0.4,\n",
" size_images_overlay=0.35,\n",
" frac_overlap_allowed=0.5,\n",
" figsize=(1200,1200),\n",
" alpha_points=1.0,\n",
Expand Down Expand Up @@ -453,7 +453,8 @@
"The results file can be opened using any of the following methods:\n",
"1. `roicat.helpers.pickle_load(path)`\n",
"2. `np.load(path)`\n",
"3. ```\n",
"3. \n",
"```\n",
" import pickle\n",
" with open(path_save, mode='rb') as f:\n",
" test = pickle.load(f)\n",
Expand All @@ -467,7 +468,7 @@
"metadata": {},
"outputs": [],
"source": [
"mouse = 'mouse_0322N'"
"mouse = 'mouse_0916N'"
]
},
{
Expand Down
35 changes: 17 additions & 18 deletions notebooks/jupyter/tracking/tracking_interactive_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
"metadata": {},
"source": [
"In this example we are using suite2p output files, but other data types can be used (CaImAn, etc.) \\\n",
"See the notebook on ingesting diverse data: https://github.com/RichieHakim/ROICaT/blob/main/notebooks/jupyter/other/demo_custom_data_importing.ipynb\n",
"See the notebook on ingesting diverse data: https://github.com/RichieHakim/ROICaT/blob/main/notebooks/jupyter/other/demo_data_importing.ipynb\n",
"\n",
"Make a list containing the paths to all the input files.\n",
"\n",
Expand All @@ -127,7 +127,7 @@
"metadata": {},
"outputs": [],
"source": [
"dir_allOuterFolders = r'/media/rich/bigSSD/analysis_data/face_rhythm/mouse_0322N/stat_and_ops/'\n",
"dir_allOuterFolders = r'/media/rich/bigSSD/analysis_data/face_rhythm/mouse_2_6/stat_and_ops/'\n",
"\n",
"pathSuffixToStat = 'stat.npy'\n",
"pathSuffixToOps = 'ops.npy'\n",
Expand Down Expand Up @@ -178,7 +178,6 @@
" new_or_old_suite2p='new',\n",
" type_meanImg='meanImgE',\n",
"# FOV_images=FOVs_mixed,\n",
"\n",
" verbose=True,\n",
")\n",
"\n",
Expand Down Expand Up @@ -691,7 +690,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "88fc1ff7-1379-4d49-826f-ac22e188d7f6",
"id": "3fbfaa15",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -704,8 +703,8 @@
")\n",
"\n",
"kwargs_makeConjunctiveDistanceMatrix_best = clusterer.find_optimal_parameters_for_pruning(\n",
" n_bins=None, ## Number of bins to use for the histograms of the distributions\n",
" smoothing_window_bins=None, ## Number of bins to use to smooth the distributions\n",
" n_bins=None, ## Number of bins to use for the histograms of the distributions. If None, then a heuristic is used.\n",
" smoothing_window_bins=None, ## Number of bins to use to smooth the distributions. If None, then a heuristic is used.\n",
" kwargs_findParameters={\n",
" 'n_patience': 300, ## Number of optimization epoch to wait for tol_frac to converge\n",
" 'tol_frac': 0.001, ## Fractional change below which optimization will conclude\n",
Expand All @@ -728,7 +727,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "5af66e1d-bbaa-48b4-992d-c1662d9ead68",
"id": "df8f4741",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -760,7 +759,7 @@
},
{
"cell_type": "markdown",
"id": "a0a19d6c",
"id": "d2511098",
"metadata": {},
"source": [
"##### 2. Prune the distance matrix\n",
Expand All @@ -774,14 +773,14 @@
{
"cell_type": "code",
"execution_count": null,
"id": "0ef7ab14-cadf-4f64-899a-00980e6bee0e",
"id": "bed653db",
"metadata": {},
"outputs": [],
"source": [
"clusterer.make_pruned_similarity_graphs(\n",
" d_cutoff=None, ## Optionally manually specify a distance cutoff\n",
" kwargs_makeConjunctiveDistanceMatrix=kwargs_mcdm_tmp,\n",
" stringency=1.0, ## \n",
" stringency=1.0, ## Modifies the threshold for pruning the distance matrix. Higher values result in LESS pruning. New d_cutoff = stringency * truncated d_cutoff.\n",
" convert_to_probability=False, \n",
")"
]
Expand All @@ -807,7 +806,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "2729208f-d551-4509-922e-8649afcb90b7",
"id": "9c8e872c",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -846,7 +845,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "dc074a72-b6a4-4899-9321-ef97731724e1",
"id": "d91c8543",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -868,7 +867,7 @@
"metadata": {},
"source": [
"1. Make different versions of the labels for convenience.\n",
"2. Put all the useful results and info into a dictionary to save later\n",
"2. Put all the useful results and info into a dictionary to save later. ADJUST THIS ANY WAY YOU WANT.\n",
"3. Put all the class objects from the run into a dictionary to save later"
]
},
Expand Down Expand Up @@ -934,7 +933,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "77bb3272",
"id": "6e9a72ab",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -960,7 +959,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "1e2b6166-7231-4641-972a-b4984f2cb07e",
"id": "88dcdef4",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -988,7 +987,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "85dc3e4d-aacc-4ef9-8d15-ff963e7067cc",
"id": "74359fc8",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -1028,7 +1027,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "a4bf1634-f14e-42b1-87e7-df3f80207833",
"id": "75c0d8b2",
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -1078,7 +1077,7 @@
"metadata": {},
"outputs": [],
"source": [
"dir_save = Path('/media/rich/bigSSD/analysis_data/face_rhythm/mouse_0322N').resolve()\n",
"dir_save = Path('/media/rich/bigSSD/analysis_data/face_rhythm/mouse_2_6').resolve()\n",
"name_save = Path(dir_allOuterFolders).resolve().name\n",
"\n",
"path_save = dir_save / (name_save + '.ROICaT.tracking.results' + '.pkl')\n",
Expand Down
32 changes: 16 additions & 16 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,32 @@ hdbscan==0.8.33
holoviews[recommended]==1.18.3
jupyter==1.0.0
kymatio==0.3.0
matplotlib==3.8.3
matplotlib==3.8.4
natsort==8.4.0
numpy==1.26.4
opencv_contrib_python<=4.9.0.80
optuna==3.5.0
Pillow==10.2.0
pytest==8.0.2
scikit_learn==1.4.1.post1
scipy==1.12.0
optuna==3.6.1
Pillow==10.3.0
pytest==8.1.1
scikit_learn==1.4.2
scipy==1.13.0
seaborn==0.13.2
sparse==0.15.1
tqdm==4.66.2
umap_learn==0.5.5
umap_learn==0.5.6
xxhash==3.4.1
bokeh==3.3.4
bokeh==3.4.0
psutil==5.9.8
py_cpuinfo==9.0.0
GPUtil==1.4.0
PyYAML==6.0.1
mat73==0.62
torch==2.2.1
torchvision==0.17.1
torchaudio==2.2.1
selenium==4.18.1
mat73==0.63
torch==2.2.2
torchvision==0.17.2
torchaudio==2.2.2
selenium==4.19.0
skl2onnx==1.16.0
onnx==1.15.0
onnx==1.16.0
onnxruntime==1.17.1
jupyter_bokeh==4.0.0
onnx2torch==1.5.13
jupyter_bokeh==4.0.1
onnx2torch==1.5.14
2 changes: 1 addition & 1 deletion roicat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
for pkg in __all__:
exec('from . import ' + pkg)

__version__ = '1.1.36'
__version__ = '1.1.37'
6 changes: 4 additions & 2 deletions roicat/data_importing.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,8 +808,10 @@ class Data_suite2p(Data_roicat):
Type of suite2p output files. Matlab=old, Python=new. Should be:
``'new'`` or ``'old'``.
out_height_width (tuple of int):
Height and width of output ROI images. Should be: *(int, int)* *(y,
x)*.
Height and width of output ROI images. These are the little images
of centered ROIs that are typically used for passing through the
neural net. Unless your ROIs are larger than the default size, it's
best to just leave it as default. Should be: *(int, int)* *(y, x)*.
type_meanImg (str):
Type of mean image to use. Should be: ``'meanImgE'`` or
``'meanImg'``.
Expand Down
75 changes: 51 additions & 24 deletions roicat/tracking/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ class Clusterer(util.ROICaT_Module):
The similarity matrix for session similarity. Shape: *(n_rois,
n_rois)*. Boolean, with 1s where the two ROIs are from different
sessions.
n_bins (int):
Number of bins to use for the pairwise similarity distribution. If
using automatic parameter finding, then using a large number of bins
makes finding the separation point more noisy, and only slightly
more accurate. If ``None``, then a heuristic is used to estimate the
value based on the number of ROIs. (Default is ``50``)
smoothing_window_bins (int):
Number of bins to use when smoothing the distribution. Using a small
number of bins makes finding the separation point more noisy, and
only slightly more accurate. Aim for 5-10% of the number of bins. If
``None``, then a heuristic is used. (Default is ``5``)
verbose (bool):
Specifies whether to print out information about the clustering
process. (Default is ``True``)
Expand All @@ -65,19 +76,28 @@ class Clusterer(util.ROICaT_Module):
s_sesh (scipy.sparse.csr_matrix):
The similarity matrix for session similarity. It is symmetric and
has a shape of *(n_rois, n_rois)*.
s_sesh_inv (scipy.sparse.csr_matrix):
The inverse of the session similarity matrix. It is symmetric and
has a shape of *(n_rois, n_rois)*.
n_bins Optional[int]:
Number of bins to use for the pairwise similarity distribution.
smoothing_window_bins Optional[int]:
Number of bins to use when smoothing the distribution.
verbose (bool):
Specifies how much information to print out:
0/False: Warnings only
1/True: Basic info, progress bar
2: All info
Specifies how much information to print out: \n
* 0/False: Warnings only
* 1/True: Basic info, progress bar
* 2: All info
"""
def __init__(
self,
s_sf=None,
s_NN_z=None,
s_SWT_z=None,
s_sesh=None,
verbose=True,
s_sf: Optional[scipy.sparse.csr_matrix] = None,
s_NN_z: Optional[scipy.sparse.csr_matrix] = None,
s_SWT_z: Optional[scipy.sparse.csr_matrix] = None,
s_sesh: Optional[scipy.sparse.csr_matrix] = None,
n_bins: Optional[int] = None,
smoothing_window_bins: Optional[int] = None,
verbose: bool = True,
):
"""
Initializes the Clusterer with the given similarity matrices and verbosity setting.
Expand All @@ -103,10 +123,12 @@ def __init__(

self._verbose = verbose

self.n_bins = max(min(self.s_sf.nnz // 10000, 200), 20) if n_bins is None else n_bins
self.smooth_window = self.n_bins // 10 if smoothing_window_bins is None else smoothing_window_bins
# print(f'Pruning similarity graphs with {self.n_bins} bins and smoothing window {smoothing_window}...') if self._verbose else None

def find_optimal_parameters_for_pruning(
self,
n_bins: int = 50,
smoothing_window_bins: int = 5,
kwargs_findParameters: Dict[str, Union[int, float, bool]] = {
'n_patience': 100,
'tol_frac': 0.05,
Expand All @@ -124,6 +146,8 @@ def find_optimal_parameters_for_pruning(
'sig_SWT_kwargs_b': (0.05, 2),
},
n_jobs_findParameters: int = -1,
n_bins: Optional[int] = None,
smoothing_window_bins: Optional[int] = None,
seed=None,
) -> Dict:
"""
Expand All @@ -143,22 +167,25 @@ def find_optimal_parameters_for_pruning(
RH 2023
Args:
n_bins (int):
Number of bins to use when estimating the distributions. Using a
large number of bins makes finding the separation point more
noisy, and only slightly more accurate. (Default is ``50``)
smoothing_window_bins (int):
Number of bins to use when smoothing the distributions. Using a
small number of bins makes finding the separation point more
noisy, and only slightly more accurate. Aim for 5-10% of the
number of bins. (Default is ``5``)
kwargs_findParameters (Dict[str, Union[int, float, bool]]):
Keyword arguments for the Convergence_checker class __init__.
bounds_findParameters (Dict[str, Tuple[float, float]]):
Bounds for the parameters to be optimized.
n_jobs_findParameters (int):
Number of jobs to use when finding the optimal parameters. If
-1, use all available cores.
n_bins Optional[int]:
Overwrites ``n_bins`` specified in __init__. \n
Number of bins to use when estimating the distributions. Using a
large number of bins makes finding the separation point more
noisy, and only slightly more accurate. (Default is ``None`` or
``50``)
smoothing_window_bins (int):
Overwrites ``smoothing_window_bins`` specified in __init__. \n
Number of bins to use when smoothing the distributions. Using a
small number of bins makes finding the separation point more
noisy, and only slightly more accurate. Aim for 5-10% of the
number of bins. (Default is ``None`` or ``5``)
seed (int):
Seed for the random number generator in the optuna sampler.
None: use a random seed.
Expand All @@ -170,15 +197,15 @@ def find_optimal_parameters_for_pruning(
self.make_conjunctive_distance_matrix function.
"""
import optuna

self.n_bins = self.n_bins if n_bins is None else n_bins
self.smoothing_window_bins = self.smooth_window if smoothing_window_bins is None else smoothing_window_bins

self.bounds_findParameters = bounds_findParameters

self._seed = seed
np.random.seed(self._seed)

self.n_bins = max(min(self.s_sf.nnz // 30000, 1000), 30) if n_bins is None else n_bins
self.smooth_window = self.n_bins // 10 if smoothing_window_bins is None else smoothing_window_bins
# print(f'Pruning similarity graphs with {self.n_bins} bins and smoothing window {smoothing_window}...') if self._verbose else None

print('Finding mixing parameters using automated hyperparameter tuning...') if self._verbose else None
optuna.logging.set_verbosity(optuna.logging.WARNING)
self.checker = helpers.Convergence_checker_optuna(verbose=self._verbose>=2, **kwargs_findParameters)
Expand Down
Loading

0 comments on commit 3ee0b40

Please sign in to comment.