From c752761416c88c52cbd4bc6cb7108dbe4e057f9b Mon Sep 17 00:00:00 2001 From: Anwai Archit Date: Fri, 7 Jun 2024 11:13:49 +0200 Subject: [PATCH 1/6] Add first draft of loaders --- scripts/misc/get_loaders_for_lora.py | 126 +++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 scripts/misc/get_loaders_for_lora.py diff --git a/scripts/misc/get_loaders_for_lora.py b/scripts/misc/get_loaders_for_lora.py new file mode 100644 index 00000000..02552fae --- /dev/null +++ b/scripts/misc/get_loaders_for_lora.py @@ -0,0 +1,126 @@ +import os + +from torch_em.util.debug import check_loader +from torch_em.data import MinInstanceSampler +from torch_em.data.datasets import light_microscopy, electron_microscopy + + +ROOT = "/media/anwai/ANWAI/data/" + + +def _fetch_loaders(dataset_name): + if dataset_name == "covid_if": + # 1, Covid IF does ot have internal splits. For this example I chose first 10 samples for training, + # and next 3 samples for validation, left the rest for testing. + train_loader = light_microscopy.get_covid_if_loader( + path=os.path.join(ROOT, "covid_if"), + patch_shape=(512, 512), + batch_size=2, + sample_range=(None, 10), + target="cells", + num_workers=16, + shuffle=True, + download=True, + ) + val_loader = light_microscopy.get_covid_if_loader( + path=os.path.join(ROOT, "covid_if"), + patch_shape=(512, 512), + batch_size=1, + sample_range=(10, 13), + target="cells", + num_workers=16, + download=True, + ) + + elif dataset_name == "orgasegment": + # 2. OrgaSegment has internal splits provided. We follow the respective splits for our experiments. + train_loader = light_microscopy.get_orgasegment_loader( + path=os.path.join(ROOT, "orgasegment"), + patch_shape=(512, 512), + split="train", + batch_size=2, + num_workers=16, + shuffle=True, + download=True, + ) + val_loader = light_microscopy.get_orgasegment_loader( + path=os.path.join(ROOT, "orgasegment"), + patch_shape=(512, 512), + split="val", + batch_size=1, + num_workers=16, + download=True, + ) + + elif dataset_name == "mouse_embryo": + # 3. Mouse Embryo + # TODO: @AA: one particular volume seens to have annotations for the bg, need to investigate this. + # TODO: make roi splits in favor of using "val" for testing purposes. + train_loader = light_microscopy.get_mouse_embryo_loader( + path=os.path.join(ROOT, "mouse_embryo"), + name="membrane", + split="train", + patch_shape=(1, 512, 512), + batch_size=1, + download=True, + num_workers=16, + shuffle=True, + sampler=MinInstanceSampler(min_num_instances=3) + ) + val_loader = light_microscopy.get_mouse_embryo_loader( + path=os.path.join(ROOT, "mouse_embryo"), + name="membrane", + split="train", + patch_shape=(1, 512, 512), + batch_size=1, + download=True, + num_workers=16, + sampler=MinInstanceSampler(min_num_instances=3) + ) + + elif dataset_name == "mitolab_glycotic_muscle": + # 4. This dataset would need aspera-cli to be installed, I'll provide you with this data + # TODO: @AA: test this and check this one out. + loader = electron_microscopy.cem.get_benchmark_loader( + path=os.path.join(ROOT, "mitolab"), + dataset_id=2, + batch_size=2, + patch_shape=(1, 512, 512), + download=True, + num_workers=16, + shuffle=True, + ) + train_loader = loader + val_loader = loader + + elif dataset_name == "platy_cilia": + # 5. Platynereis (Cilia) + # TODO: @AA: check this + loader = electron_microscopy.get_platynereis_cilia_loader( + path=os.path.join(ROOT, "platynereis"), + patch_shape=(1, 512, 512), + ndim=2, + batch_size=2, + sample_ids=..., + rois=..., + download=True, + num_workers=16, + shuffle=True, + ) + + return train_loader, val_loader + + +def _verify_loaders(): + dataset_name = "mouse_embryo" + + train_loader, val_loader = _fetch_loaders(dataset_name=dataset_name) + + # NOTE: if using on the cluster, napari visualization won't work with "check_loader". + # turn "plt=True" and provide path to save the matplotlib outputs of the loader. + check_loader(train_loader, 8) + check_loader(val_loader, 8) + + +if __name__ == "__main__": + _verify_loaders() From 49cad2e81b791a1506d784d40a4f83a9c1a03eda Mon Sep 17 00:00:00 2001 From: anwai98 Date: Mon, 10 Jun 2024 09:41:20 +0200 Subject: [PATCH 2/6] Add mouse embryo train-val rois from the train split --- scripts/misc/get_loaders_for_lora.py | 52 +++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/scripts/misc/get_loaders_for_lora.py b/scripts/misc/get_loaders_for_lora.py index 02552fae..bc1d3a2f 100644 --- a/scripts/misc/get_loaders_for_lora.py +++ b/scripts/misc/get_loaders_for_lora.py @@ -1,16 +1,21 @@ import os +from glob import glob +from pathlib import Path + +import h5py +import numpy as np from torch_em.util.debug import check_loader from torch_em.data import MinInstanceSampler from torch_em.data.datasets import light_microscopy, electron_microscopy -ROOT = "/media/anwai/ANWAI/data/" +ROOT = "/scratch/projects/nim00007/sam/data" def _fetch_loaders(dataset_name): if dataset_name == "covid_if": - # 1, Covid IF does ot have internal splits. For this example I chose first 10 samples for training, + # 1, Covid IF does not have internal splits. For this example I chose first 10 samples for training, # and next 3 samples for validation, left the rest for testing. train_loader = light_microscopy.get_covid_if_loader( path=os.path.join(ROOT, "covid_if"), @@ -52,12 +57,24 @@ def _fetch_loaders(dataset_name): download=True, ) - elif dataset_name == "mouse_embryo": + elif dataset_name == "mouse-embryo": # 3. Mouse Embryo - # TODO: @AA: one particular volume seens to have annotations for the bg, need to investigate this. - # TODO: make roi splits in favor of using "val" for testing purposes. + + train_rois = { + "fused_paral_tp00073_raw_nuclei_membrane_crop_label_corrected_postprocessed_crop": np.s_[0:100, :, :], + "ginst_membrane_filt_E2_last_predictions_gasp_average_raw_corrected": np.s_[0:100, :, :], + "ginst_membrane_filt_E3_first_predictions_gasp_average_raw_corrected": np.s_[0:100, :, :], + "ginst_membrane_filt_E3_last_predictions_gasp_average_raw_corrected": np.s_[0:100, :, :] + } + val_rois = { + "fused_paral_tp00073_raw_nuclei_membrane_crop_label_corrected_postprocessed_crop": np.s_[100:, :, :], + "ginst_membrane_filt_E2_last_predictions_gasp_average_raw_corrected": np.s_[100:, :, :], + "ginst_membrane_filt_E3_first_predictions_gasp_average_raw_corrected": np.s_[100:, :, :], + "ginst_membrane_filt_E3_last_predictions_gasp_average_raw_corrected": np.s_[100:, :, :] + } + train_loader = light_microscopy.get_mouse_embryo_loader( - path=os.path.join(ROOT, "mouse_embryo"), + path=os.path.join(ROOT, "mouse-embryo"), name="membrane", split="train", patch_shape=(1, 512, 512), @@ -65,7 +82,8 @@ def _fetch_loaders(dataset_name): download=True, num_workers=16, shuffle=True, - sampler=MinInstanceSampler(min_num_instances=3) + sampler=MinInstanceSampler(min_num_instances=3), + rois=train_rois, ) val_loader = light_microscopy.get_mouse_embryo_loader( path=os.path.join(ROOT, "mouse_embryo"), @@ -75,7 +93,8 @@ def _fetch_loaders(dataset_name): batch_size=1, download=True, num_workers=16, - sampler=MinInstanceSampler(min_num_instances=3) + sampler=MinInstanceSampler(min_num_instances=3), + rois=val_rois, ) elif dataset_name == "mitolab_glycotic_muscle": @@ -108,6 +127,9 @@ def _fetch_loaders(dataset_name): shuffle=True, ) + else: + raise ValueError(f"{dataset_name} is not a valid dataset name.") + return train_loader, val_loader @@ -122,5 +144,17 @@ def _verify_loaders(): check_loader(val_loader, 8) +def _check_samples(): + all_volpaths = sorted(glob(os.path.join(ROOT, "mouse-embryo", "Membrane", "train", "*.h5"))) + + for volpath in all_volpaths: + with h5py.File(volpath, "r") as f: + raw = f["raw"][:] + labels = f["label"][:] + + print(raw.shape, Path(volpath).stem) + + if __name__ == "__main__": - _verify_loaders() + # _verify_loaders() + _check_samples() From 171882d73daf2811c6dea5165224965889ec026f Mon Sep 17 00:00:00 2001 From: Anwai Archit Date: Mon, 10 Jun 2024 09:56:06 +0200 Subject: [PATCH 3/6] Finalize mouse embryo dataset --- scripts/misc/get_loaders_for_lora.py | 41 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/scripts/misc/get_loaders_for_lora.py b/scripts/misc/get_loaders_for_lora.py index bc1d3a2f..0b320731 100644 --- a/scripts/misc/get_loaders_for_lora.py +++ b/scripts/misc/get_loaders_for_lora.py @@ -10,7 +10,8 @@ from torch_em.data.datasets import light_microscopy, electron_microscopy -ROOT = "/scratch/projects/nim00007/sam/data" +# ROOT = "/scratch/projects/nim00007/sam/data" +ROOT = "/media/anwai/ANWAI/data/" def _fetch_loaders(dataset_name): @@ -59,19 +60,11 @@ def _fetch_loaders(dataset_name): elif dataset_name == "mouse-embryo": # 3. Mouse Embryo - - train_rois = { - "fused_paral_tp00073_raw_nuclei_membrane_crop_label_corrected_postprocessed_crop": np.s_[0:100, :, :], - "ginst_membrane_filt_E2_last_predictions_gasp_average_raw_corrected": np.s_[0:100, :, :], - "ginst_membrane_filt_E3_first_predictions_gasp_average_raw_corrected": np.s_[0:100, :, :], - "ginst_membrane_filt_E3_last_predictions_gasp_average_raw_corrected": np.s_[0:100, :, :] - } - val_rois = { - "fused_paral_tp00073_raw_nuclei_membrane_crop_label_corrected_postprocessed_crop": np.s_[100:, :, :], - "ginst_membrane_filt_E2_last_predictions_gasp_average_raw_corrected": np.s_[100:, :, :], - "ginst_membrane_filt_E3_first_predictions_gasp_average_raw_corrected": np.s_[100:, :, :], - "ginst_membrane_filt_E3_last_predictions_gasp_average_raw_corrected": np.s_[100:, :, :] - } + # the logic used here is: I use the first 100 slices per volume from the training split for training + # and the next ~20/30 slices per volume from the training split for validation + # and we use the whole volume from the val set for testing + train_rois = [np.s_[0:100, :, :], np.s_[0:100, :, :], np.s_[0:100, :, :], np.s_[0:100, :, :]] + val_rois = [np.s_[100:, :, :], np.s_[100:, :, :], np.s_[100:, :, :], np.s_[100:, :, :]] train_loader = light_microscopy.get_mouse_embryo_loader( path=os.path.join(ROOT, "mouse-embryo"), @@ -86,7 +79,7 @@ def _fetch_loaders(dataset_name): rois=train_rois, ) val_loader = light_microscopy.get_mouse_embryo_loader( - path=os.path.join(ROOT, "mouse_embryo"), + path=os.path.join(ROOT, "mouse-embryo"), name="membrane", split="train", patch_shape=(1, 512, 512), @@ -134,27 +127,33 @@ def _fetch_loaders(dataset_name): def _verify_loaders(): - dataset_name = "mouse_embryo" + dataset_name = "mouse-embryo" train_loader, val_loader = _fetch_loaders(dataset_name=dataset_name) # NOTE: if using on the cluster, napari visualization won't work with "check_loader". # turn "plt=True" and provide path to save the matplotlib outputs of the loader. - check_loader(train_loader, 8) + # check_loader(train_loader, 8) check_loader(val_loader, 8) def _check_samples(): - all_volpaths = sorted(glob(os.path.join(ROOT, "mouse-embryo", "Membrane", "train", "*.h5"))) + all_volpaths = sorted(glob(os.path.join(ROOT, "mouse-embryo", "Membrane", "val", "*.h5"))) for volpath in all_volpaths: with h5py.File(volpath, "r") as f: raw = f["raw"][:] labels = f["label"][:] - print(raw.shape, Path(volpath).stem) + fname = Path(volpath).stem + + import napari + v = napari.Viewer() + v.add_image(raw, name=fname) + v.add_labels(labels) + napari.run() if __name__ == "__main__": - # _verify_loaders() - _check_samples() + _verify_loaders() + # _check_samples() From 725818e21657500a0c56b169c969be475fdcfd2c Mon Sep 17 00:00:00 2001 From: Anwai Archit Date: Mon, 10 Jun 2024 10:56:10 +0200 Subject: [PATCH 4/6] Add platy cilia roi splits --- scripts/misc/get_loaders_for_lora.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/scripts/misc/get_loaders_for_lora.py b/scripts/misc/get_loaders_for_lora.py index 0b320731..0462e715 100644 --- a/scripts/misc/get_loaders_for_lora.py +++ b/scripts/misc/get_loaders_for_lora.py @@ -107,18 +107,32 @@ def _fetch_loaders(dataset_name): elif dataset_name == "platy_cilia": # 5. Platynereis (Cilia) - # TODO: @AA: check this - loader = electron_microscopy.get_platynereis_cilia_loader( + train_rois = { + 1: np.s_[0:100, :, :], 2: np.s_[0:100, :, :], 3: np.s_[0:100, :, :] + } + val_rois = { + 1: np.s_[100:, :, :], 2: np.s_[100:, :, :], 3: np.s_[100:, :, :] + } + + train_loader = electron_microscopy.get_platynereis_cilia_loader( path=os.path.join(ROOT, "platynereis"), patch_shape=(1, 512, 512), ndim=2, batch_size=2, - sample_ids=..., - rois=..., + rois=train_rois, download=True, num_workers=16, shuffle=True, ) + val_loader = electron_microscopy.get_platynereis_cilia_loader( + path=os.path.join(ROOT, "platynereis"), + patch_shape=(1, 512, 512), + ndim=2, + batch_size=2, + rois=val_rois, + download=True, + num_workers=16, + ) else: raise ValueError(f"{dataset_name} is not a valid dataset name.") @@ -127,7 +141,7 @@ def _fetch_loaders(dataset_name): def _verify_loaders(): - dataset_name = "mouse-embryo" + dataset_name = "platy_cilia" train_loader, val_loader = _fetch_loaders(dataset_name=dataset_name) From 54d95074b0846dd394040f3d4f08eae841d62674 Mon Sep 17 00:00:00 2001 From: Anwai Archit Date: Mon, 10 Jun 2024 13:54:47 +0200 Subject: [PATCH 5/6] Minor fix to platynereis cilia dataset --- scripts/misc/get_loaders_for_lora.py | 34 +++++-------------- .../electron_microscopy/platynereis.py | 1 + 2 files changed, 10 insertions(+), 25 deletions(-) diff --git a/scripts/misc/get_loaders_for_lora.py b/scripts/misc/get_loaders_for_lora.py index 0462e715..8e92cfde 100644 --- a/scripts/misc/get_loaders_for_lora.py +++ b/scripts/misc/get_loaders_for_lora.py @@ -1,8 +1,5 @@ import os -from glob import glob -from pathlib import Path -import h5py import numpy as np from torch_em.util.debug import check_loader @@ -107,11 +104,14 @@ def _fetch_loaders(dataset_name): elif dataset_name == "platy_cilia": # 5. Platynereis (Cilia) + # the logic used here is: I use the first 85 slices per volume from the training split for training + # and the next ~10-15 slices per volume from the training split for validation + # and we use the whole volume from the val set for testing train_rois = { - 1: np.s_[0:100, :, :], 2: np.s_[0:100, :, :], 3: np.s_[0:100, :, :] + 1: np.s_[0:85, :, :], 2: np.s_[0:85, :, :], 3: np.s_[0:85, :, :] } val_rois = { - 1: np.s_[100:, :, :], 2: np.s_[100:, :, :], 3: np.s_[100:, :, :] + 1: np.s_[85:, :, :], 2: np.s_[85:, :, :], 3: np.s_[85:, :, :] } train_loader = electron_microscopy.get_platynereis_cilia_loader( @@ -123,15 +123,17 @@ def _fetch_loaders(dataset_name): download=True, num_workers=16, shuffle=True, + sampler=MinInstanceSampler(), ) val_loader = electron_microscopy.get_platynereis_cilia_loader( path=os.path.join(ROOT, "platynereis"), patch_shape=(1, 512, 512), ndim=2, - batch_size=2, + batch_size=1, rois=val_rois, download=True, num_workers=16, + sampler=MinInstanceSampler(), ) else: @@ -147,27 +149,9 @@ def _verify_loaders(): # NOTE: if using on the cluster, napari visualization won't work with "check_loader". # turn "plt=True" and provide path to save the matplotlib outputs of the loader. - # check_loader(train_loader, 8) + check_loader(train_loader, 8) check_loader(val_loader, 8) -def _check_samples(): - all_volpaths = sorted(glob(os.path.join(ROOT, "mouse-embryo", "Membrane", "val", "*.h5"))) - - for volpath in all_volpaths: - with h5py.File(volpath, "r") as f: - raw = f["raw"][:] - labels = f["label"][:] - - fname = Path(volpath).stem - - import napari - v = napari.Viewer() - v.add_image(raw, name=fname) - v.add_labels(labels) - napari.run() - - if __name__ == "__main__": _verify_loaders() - # _check_samples() diff --git a/torch_em/data/datasets/electron_microscopy/platynereis.py b/torch_em/data/datasets/electron_microscopy/platynereis.py index cfe8a66b..342bad86 100644 --- a/torch_em/data/datasets/electron_microscopy/platynereis.py +++ b/torch_em/data/datasets/electron_microscopy/platynereis.py @@ -193,6 +193,7 @@ def get_platynereis_cilia_dataset( raw_key = "volumes/raw" label_key = "volumes/labels/segmentation" + kwargs = util.update_kwargs(kwargs, "rois", rois) kwargs, _ = util.add_instance_label_transform( kwargs, add_binary_target=True, boundaries=boundaries, offsets=offsets, binary=binary, ) From 62f61da3ee808a72bd4b2c34ca26419ffb1a0048 Mon Sep 17 00:00:00 2001 From: anwai98 Date: Mon, 10 Jun 2024 14:14:18 +0200 Subject: [PATCH 6/6] Add mitolab glycolytic loader --- scripts/misc/get_loaders_for_lora.py | 40 +++++++++++++++++++--------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/scripts/misc/get_loaders_for_lora.py b/scripts/misc/get_loaders_for_lora.py index 8e92cfde..5ccfa70c 100644 --- a/scripts/misc/get_loaders_for_lora.py +++ b/scripts/misc/get_loaders_for_lora.py @@ -7,8 +7,8 @@ from torch_em.data.datasets import light_microscopy, electron_microscopy -# ROOT = "/scratch/projects/nim00007/sam/data" -ROOT = "/media/anwai/ANWAI/data/" +ROOT = "/scratch/projects/nim00007/sam/data" +# ROOT = "/media/anwai/ANWAI/data/" def _fetch_loaders(dataset_name): @@ -87,20 +87,34 @@ def _fetch_loaders(dataset_name): rois=val_rois, ) - elif dataset_name == "mitolab_glycotic_muscle": + elif dataset_name == "mitolab_glycolytic_muscle": # 4. This dataset would need aspera-cli to be installed, I'll provide you with this data - # TODO: @AA: test this and check this one out. - loader = electron_microscopy.cem.get_benchmark_loader( + # ... + train_rois = np.s_[0:175, :, :] + val_rois = np.s_[175:225, :, :] + test_rois = np.s_[225:, :, :] + train_loader = electron_microscopy.cem.get_benchmark_loader( path=os.path.join(ROOT, "mitolab"), - dataset_id=2, + dataset_id=3, batch_size=2, patch_shape=(1, 512, 512), - download=True, + download=False, + num_workers=16, + shuffle=True, + sampler=MinInstanceSampler(), + rois=train_rois, + ) + val_loader = electron_microscopy.cem.get_benchmark_loader( + path=os.path.join(ROOT, "mitolab"), + dataset_id=3, + batch_size=2, + patch_shape=(1, 512, 512), + download=False, num_workers=16, shuffle=True, + sampler=MinInstanceSampler(), + rois=val_rois, ) - train_loader = loader - val_loader = loader elif dataset_name == "platy_cilia": # 5. Platynereis (Cilia) @@ -143,14 +157,16 @@ def _fetch_loaders(dataset_name): def _verify_loaders(): - dataset_name = "platy_cilia" + dataset_name = "mitolab_glycolytic_muscle" train_loader, val_loader = _fetch_loaders(dataset_name=dataset_name) + breakpoint() + # NOTE: if using on the cluster, napari visualization won't work with "check_loader". # turn "plt=True" and provide path to save the matplotlib outputs of the loader. - check_loader(train_loader, 8) - check_loader(val_loader, 8) + check_loader(train_loader, 8, plt=True, save_path="./train_loader.png") + check_loader(val_loader, 8, plt=True, save_path="./val_loader.png") if __name__ == "__main__":