From 5f13501a1d51ea65134d5502e4fe51fb4116b3f8 Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Wed, 30 Nov 2022 12:38:55 -0800 Subject: [PATCH 01/34] Starting to refactor logo_split --- src/eddymotion/data/dmri.py | 88 +++++++++++++++++++------------------ src/eddymotion/estimator.py | 3 +- 2 files changed, 47 insertions(+), 44 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index 0f407bde..133e2ee4 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -39,47 +39,14 @@ def _data_repr(value): return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>" -@attr.s(slots=True) -class DWI: - """Data representation structure for dMRI data.""" - - dataobj = attr.ib(default=None, repr=_data_repr) - """A numpy ndarray object for the data array, without *b=0* volumes.""" - affine = attr.ib(default=None, repr=_data_repr) - """Best affine for RAS-to-voxel conversion of coordinates (NIfTI header).""" - brainmask = attr.ib(default=None, repr=_data_repr) - """A boolean ndarray object containing a corresponding brainmask.""" - bzero = attr.ib(default=None, repr=_data_repr) - """ - A *b=0* reference map, preferably obtained by some smart averaging. - If the :math:`B_0` fieldmap is set, this *b=0* reference map should also - be unwarped. - """ - gradients = attr.ib(default=None, repr=_data_repr) - """A 2D numpy array of the gradient table in RAS+B format.""" - em_affines = attr.ib(default=None) - """ - List of :obj:`nitransforms.linear.Affine` objects that bring - DWIs (i.e., no b=0) into alignment. - """ - fieldmap = attr.ib(default=None, repr=_data_repr) - """A 3D displacements field to unwarp susceptibility distortions.""" - _filepath = attr.ib( - factory=lambda: Path(mkdtemp()) / "em_cache.h5", - repr=False, - ) - """A path to an HDF5 file to store the whole dataset.""" - - def __len__(self): - """Obtain the number of high-*b* orientations.""" - return self.dataobj.shape[-1] - - def logo_split(self, index, with_b0=False): +def logo_split(dwdata, index, with_b0=False): """ Produce one fold of LOGO (leave-one-gradient-out). Parameters ---------- + dwdata : :obj:`DWI` + DWI object index : :obj:`int` Index of the DWI orientation to be left out in this fold. with_b0 : :obj:`bool` @@ -95,25 +62,25 @@ def logo_split(self, index, with_b0=False): The test data/gradient come **from the original dataset**. """ - if not Path(self._filepath).exists(): - self.to_filename(self._filepath) + if not Path(dwdata._filepath).exists(): + dwdata.to_filename(dwdata._filepath) # read original DWI data & b-vector - with h5py.File(self._filepath, "r") as in_file: + with h5py.File(dwdata._filepath, "r") as in_file: root = in_file["/0"] dwframe = np.asanyarray(root["dataobj"][..., index]) bframe = np.asanyarray(root["gradients"][..., index]) # if the size of the mask does not match data, cache is stale - mask = np.zeros(len(self), dtype=bool) + mask = np.zeros(len(dwdata), dtype=bool) mask[index] = True - train_data = self.dataobj[..., ~mask] - train_gradients = self.gradients[..., ~mask] + train_data = dwdata.dataobj[..., ~mask] + train_gradients = dwdata.gradients[..., ~mask] if with_b0: train_data = np.concatenate( - (np.asanyarray(self.bzero)[..., np.newaxis], train_data), + (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data), axis=-1, ) b0vec = np.zeros((4, 1)) @@ -128,6 +95,41 @@ def logo_split(self, index, with_b0=False): (dwframe, bframe), ) +@attr.s(slots=True) +class DWI: + """Data representation structure for dMRI data.""" + + dataobj = attr.ib(default=None, repr=_data_repr) + """A numpy ndarray object for the data array, without *b=0* volumes.""" + affine = attr.ib(default=None, repr=_data_repr) + """Best affine for RAS-to-voxel conversion of coordinates (NIfTI header).""" + brainmask = attr.ib(default=None, repr=_data_repr) + """A boolean ndarray object containing a corresponding brainmask.""" + bzero = attr.ib(default=None, repr=_data_repr) + """ + A *b=0* reference map, preferably obtained by some smart averaging. + If the :math:`B_0` fieldmap is set, this *b=0* reference map should also + be unwarped. + """ + gradients = attr.ib(default=None, repr=_data_repr) + """A 2D numpy array of the gradient table in RAS+B format.""" + em_affines = attr.ib(default=None) + """ + List of :obj:`nitransforms.linear.Affine` objects that bring + DWIs (i.e., no b=0) into alignment. + """ + fieldmap = attr.ib(default=None, repr=_data_repr) + """A 3D displacements field to unwarp susceptibility distortions.""" + _filepath = attr.ib( + factory=lambda: Path(mkdtemp()) / "em_cache.h5", + repr=False, + ) + """A path to an HDF5 file to store the whole dataset.""" + + def __len__(self): + """Obtain the number of high-*b* orientations.""" + return self.dataobj.shape[-1] + def set_transform(self, index, affine, order=3): """Set an affine, and update data object and gradients.""" reference = namedtuple("ImageGrid", ("shape", "affine"))( diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index 8e812b67..a28ce527 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -34,6 +34,7 @@ from tqdm import tqdm from eddymotion.model import ModelFactory +from eddymotion.dmri import logo_split class EddyMotionEstimator: @@ -150,7 +151,7 @@ def fit( pbar.set_description_str( f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>" ) - data_train, data_test = dwdata.logo_split(i, with_b0=True) + data_train, data_test = logo_split(dwdata, i, with_b0=True) grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}" pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs") From 7fa9b05cbe421bc8857eaccd921d29d0ad8575ac Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Wed, 30 Nov 2022 12:43:48 -0800 Subject: [PATCH 02/34] Fixed flake8 errors --- src/eddymotion/data/dmri.py | 103 ++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index 133e2ee4..8e42ee5d 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -40,60 +40,61 @@ def _data_repr(value): def logo_split(dwdata, index, with_b0=False): - """ - Produce one fold of LOGO (leave-one-gradient-out). - - Parameters - ---------- - dwdata : :obj:`DWI` - DWI object - index : :obj:`int` - Index of the DWI orientation to be left out in this fold. - with_b0 : :obj:`bool` - Insert the *b=0* reference at the beginning of the training dataset. - - Returns - ------- - (train_data, train_gradients) : :obj:`tuple` - Training DWI and corresponding gradients. - Training data/gradients come **from the updated dataset**. - (test_data, test_gradients) :obj:`tuple` - Test 3D map (one DWI orientation) and corresponding b-vector/value. - The test data/gradient come **from the original dataset**. - - """ - if not Path(dwdata._filepath).exists(): - dwdata.to_filename(dwdata._filepath) - - # read original DWI data & b-vector - with h5py.File(dwdata._filepath, "r") as in_file: - root = in_file["/0"] - dwframe = np.asanyarray(root["dataobj"][..., index]) - bframe = np.asanyarray(root["gradients"][..., index]) - - # if the size of the mask does not match data, cache is stale - mask = np.zeros(len(dwdata), dtype=bool) - mask[index] = True + """ + Produce one fold of LOGO (leave-one-gradient-out). + + Parameters + ---------- + dwdata : :obj:`DWI` + DWI object + index : :obj:`int` + Index of the DWI orientation to be left out in this fold. + with_b0 : :obj:`bool` + Insert the *b=0* reference at the beginning of the training dataset. + + Returns + ------- + (train_data, train_gradients) : :obj:`tuple` + Training DWI and corresponding gradients. + Training data/gradients come **from the updated dataset**. + (test_data, test_gradients) :obj:`tuple` + Test 3D map (one DWI orientation) and corresponding b-vector/value. + The test data/gradient come **from the original dataset**. - train_data = dwdata.dataobj[..., ~mask] - train_gradients = dwdata.gradients[..., ~mask] + """ + if not Path(dwdata._filepath).exists(): + dwdata.to_filename(dwdata._filepath) + + # read original DWI data & b-vector + with h5py.File(dwdata._filepath, "r") as in_file: + root = in_file["/0"] + dwframe = np.asanyarray(root["dataobj"][..., index]) + bframe = np.asanyarray(root["gradients"][..., index]) + + # if the size of the mask does not match data, cache is stale + mask = np.zeros(len(dwdata), dtype=bool) + mask[index] = True + + train_data = dwdata.dataobj[..., ~mask] + train_gradients = dwdata.gradients[..., ~mask] + + if with_b0: + train_data = np.concatenate( + (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data), + axis=-1, + ) + b0vec = np.zeros((4, 1)) + b0vec[0, 0] = 1 + train_gradients = np.concatenate( + (b0vec, train_gradients), + axis=-1, + ) - if with_b0: - train_data = np.concatenate( - (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data), - axis=-1, - ) - b0vec = np.zeros((4, 1)) - b0vec[0, 0] = 1 - train_gradients = np.concatenate( - (b0vec, train_gradients), - axis=-1, - ) + return ( + (train_data, train_gradients), + (dwframe, bframe), + ) - return ( - (train_data, train_gradients), - (dwframe, bframe), - ) @attr.s(slots=True) class DWI: From 5956168a229f3b45d7d80911773d0c6ea250f8ce Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Wed, 30 Nov 2022 12:53:16 -0800 Subject: [PATCH 03/34] Moved h5 file read --- src/eddymotion/data/dmri.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index 8e42ee5d..60535a1c 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -62,14 +62,9 @@ def logo_split(dwdata, index, with_b0=False): The test data/gradient come **from the original dataset**. """ - if not Path(dwdata._filepath).exists(): - dwdata.to_filename(dwdata._filepath) - # read original DWI data & b-vector - with h5py.File(dwdata._filepath, "r") as in_file: - root = in_file["/0"] - dwframe = np.asanyarray(root["dataobj"][..., index]) - bframe = np.asanyarray(root["gradients"][..., index]) + dwframe = np.asanyarray(dwdata.root["dataobj"][..., index]) + bframe = np.asanyarray(dwdata.root["gradients"][..., index]) # if the size of the mask does not match data, cache is stale mask = np.zeros(len(dwdata), dtype=bool) @@ -130,6 +125,15 @@ class DWI: def __len__(self): """Obtain the number of high-*b* orientations.""" return self.dataobj.shape[-1] + + def set_data(self): + # Generate dwframe and bframe + if not Path(self._filepath).exists(): + self.to_filename(self._filepath) + + # read original DWI data & b-vector + with h5py.File(self._filepath, "r") as in_file: + self.root = in_file["/0"] def set_transform(self, index, affine, order=3): """Set an affine, and update data object and gradients.""" From bc1482080d512a83713c9e36ab236f8c9edd8454 Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Thu, 1 Dec 2022 14:07:39 -0800 Subject: [PATCH 04/34] Pull dwframe, bframe, bvec into estimator.py --- src/eddymotion/data/dmri.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index 60535a1c..e87043a2 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -39,7 +39,7 @@ def _data_repr(value): return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>" -def logo_split(dwdata, index, with_b0=False): +def logo_split(dwdata, dwframe, bframe, index, with_b0=False): """ Produce one fold of LOGO (leave-one-gradient-out). @@ -63,9 +63,6 @@ def logo_split(dwdata, index, with_b0=False): """ - dwframe = np.asanyarray(dwdata.root["dataobj"][..., index]) - bframe = np.asanyarray(dwdata.root["gradients"][..., index]) - # if the size of the mask does not match data, cache is stale mask = np.zeros(len(dwdata), dtype=bool) mask[index] = True @@ -130,12 +127,12 @@ def set_data(self): # Generate dwframe and bframe if not Path(self._filepath).exists(): self.to_filename(self._filepath) - + # read original DWI data & b-vector with h5py.File(self._filepath, "r") as in_file: - self.root = in_file["/0"] - - def set_transform(self, index, affine, order=3): + self._root = in_file["/0"] + + def set_transform(self, dwframe, bvec, index, affine, order=3): """Set an affine, and update data object and gradients.""" reference = namedtuple("ImageGrid", ("shape", "affine"))( shape=self.dataobj.shape[:3], affine=self.affine @@ -148,15 +145,6 @@ def set_transform(self, index, affine, order=3): else: xform = Affine(matrix=affine, reference=reference) - if not Path(self._filepath).exists(): - self.to_filename(self._filepath) - - # read original DWI data & b-vector - with h5py.File(self._filepath, "r") as in_file: - root = in_file["/0"] - dwframe = np.asanyarray(root["dataobj"][..., index]) - bvec = np.asanyarray(root["gradients"][:3, index]) - dwmoving = nb.Nifti1Image(dwframe, self.affine, None) # resample and update orientation at index From 200b9f95e7535ff2244184f2db74f197cae4a662 Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Thu, 1 Dec 2022 14:17:50 -0800 Subject: [PATCH 05/34] Fixed flake8 errors --- src/eddymotion/data/dmri.py | 4 ++-- src/eddymotion/estimator.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index e87043a2..f7ea6571 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -122,7 +122,7 @@ class DWI: def __len__(self): """Obtain the number of high-*b* orientations.""" return self.dataobj.shape[-1] - + def set_data(self): # Generate dwframe and bframe if not Path(self._filepath).exists(): @@ -131,7 +131,7 @@ def set_data(self): # read original DWI data & b-vector with h5py.File(self._filepath, "r") as in_file: self._root = in_file["/0"] - + def set_transform(self, dwframe, bvec, index, affine, order=3): """Set an affine, and update data object and gradients.""" reference = namedtuple("ImageGrid", ("shape", "affine"))( diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index a28ce527..e6142445 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -33,8 +33,8 @@ from pkg_resources import resource_filename as pkg_fn from tqdm import tqdm -from eddymotion.model import ModelFactory from eddymotion.dmri import logo_split +from eddymotion.model import ModelFactory class EddyMotionEstimator: From 38b427612c3c62ca3282ce53726758d27f9b2cb3 Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Wed, 30 Nov 2022 12:38:55 -0800 Subject: [PATCH 06/34] Starting to refactor logo_split --- src/eddymotion/data/dmri.py | 30 +++++++++++++++++++----------- src/eddymotion/estimator.py | 1 + 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index f7ea6571..8e42ee5d 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -39,7 +39,7 @@ def _data_repr(value): return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>" -def logo_split(dwdata, dwframe, bframe, index, with_b0=False): +def logo_split(dwdata, index, with_b0=False): """ Produce one fold of LOGO (leave-one-gradient-out). @@ -62,6 +62,14 @@ def logo_split(dwdata, dwframe, bframe, index, with_b0=False): The test data/gradient come **from the original dataset**. """ + if not Path(dwdata._filepath).exists(): + dwdata.to_filename(dwdata._filepath) + + # read original DWI data & b-vector + with h5py.File(dwdata._filepath, "r") as in_file: + root = in_file["/0"] + dwframe = np.asanyarray(root["dataobj"][..., index]) + bframe = np.asanyarray(root["gradients"][..., index]) # if the size of the mask does not match data, cache is stale mask = np.zeros(len(dwdata), dtype=bool) @@ -123,16 +131,7 @@ def __len__(self): """Obtain the number of high-*b* orientations.""" return self.dataobj.shape[-1] - def set_data(self): - # Generate dwframe and bframe - if not Path(self._filepath).exists(): - self.to_filename(self._filepath) - - # read original DWI data & b-vector - with h5py.File(self._filepath, "r") as in_file: - self._root = in_file["/0"] - - def set_transform(self, dwframe, bvec, index, affine, order=3): + def set_transform(self, index, affine, order=3): """Set an affine, and update data object and gradients.""" reference = namedtuple("ImageGrid", ("shape", "affine"))( shape=self.dataobj.shape[:3], affine=self.affine @@ -145,6 +144,15 @@ def set_transform(self, dwframe, bvec, index, affine, order=3): else: xform = Affine(matrix=affine, reference=reference) + if not Path(self._filepath).exists(): + self.to_filename(self._filepath) + + # read original DWI data & b-vector + with h5py.File(self._filepath, "r") as in_file: + root = in_file["/0"] + dwframe = np.asanyarray(root["dataobj"][..., index]) + bvec = np.asanyarray(root["gradients"][:3, index]) + dwmoving = nb.Nifti1Image(dwframe, self.affine, None) # resample and update orientation at index diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index e6142445..4ca5b277 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -35,6 +35,7 @@ from eddymotion.dmri import logo_split from eddymotion.model import ModelFactory +from eddymotion.dmri import logo_split class EddyMotionEstimator: From 56f9fd04349f36ed4cd436858397d18541eed63e Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Wed, 30 Nov 2022 12:53:16 -0800 Subject: [PATCH 07/34] Moved h5 file read --- src/eddymotion/data/dmri.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index 8e42ee5d..60535a1c 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -62,14 +62,9 @@ def logo_split(dwdata, index, with_b0=False): The test data/gradient come **from the original dataset**. """ - if not Path(dwdata._filepath).exists(): - dwdata.to_filename(dwdata._filepath) - # read original DWI data & b-vector - with h5py.File(dwdata._filepath, "r") as in_file: - root = in_file["/0"] - dwframe = np.asanyarray(root["dataobj"][..., index]) - bframe = np.asanyarray(root["gradients"][..., index]) + dwframe = np.asanyarray(dwdata.root["dataobj"][..., index]) + bframe = np.asanyarray(dwdata.root["gradients"][..., index]) # if the size of the mask does not match data, cache is stale mask = np.zeros(len(dwdata), dtype=bool) @@ -130,6 +125,15 @@ class DWI: def __len__(self): """Obtain the number of high-*b* orientations.""" return self.dataobj.shape[-1] + + def set_data(self): + # Generate dwframe and bframe + if not Path(self._filepath).exists(): + self.to_filename(self._filepath) + + # read original DWI data & b-vector + with h5py.File(self._filepath, "r") as in_file: + self.root = in_file["/0"] def set_transform(self, index, affine, order=3): """Set an affine, and update data object and gradients.""" From 6aeb4d76804590eba813cd74ae5f2fcf7c0a2641 Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Thu, 1 Dec 2022 14:07:39 -0800 Subject: [PATCH 08/34] Pull dwframe, bframe, bvec into estimator.py --- src/eddymotion/data/dmri.py | 22 +++++----------------- src/eddymotion/estimator.py | 9 ++++++++- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index 60535a1c..e87043a2 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -39,7 +39,7 @@ def _data_repr(value): return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>" -def logo_split(dwdata, index, with_b0=False): +def logo_split(dwdata, dwframe, bframe, index, with_b0=False): """ Produce one fold of LOGO (leave-one-gradient-out). @@ -63,9 +63,6 @@ def logo_split(dwdata, index, with_b0=False): """ - dwframe = np.asanyarray(dwdata.root["dataobj"][..., index]) - bframe = np.asanyarray(dwdata.root["gradients"][..., index]) - # if the size of the mask does not match data, cache is stale mask = np.zeros(len(dwdata), dtype=bool) mask[index] = True @@ -130,12 +127,12 @@ def set_data(self): # Generate dwframe and bframe if not Path(self._filepath).exists(): self.to_filename(self._filepath) - + # read original DWI data & b-vector with h5py.File(self._filepath, "r") as in_file: - self.root = in_file["/0"] - - def set_transform(self, index, affine, order=3): + self._root = in_file["/0"] + + def set_transform(self, dwframe, bvec, index, affine, order=3): """Set an affine, and update data object and gradients.""" reference = namedtuple("ImageGrid", ("shape", "affine"))( shape=self.dataobj.shape[:3], affine=self.affine @@ -148,15 +145,6 @@ def set_transform(self, index, affine, order=3): else: xform = Affine(matrix=affine, reference=reference) - if not Path(self._filepath).exists(): - self.to_filename(self._filepath) - - # read original DWI data & b-vector - with h5py.File(self._filepath, "r") as in_file: - root = in_file["/0"] - dwframe = np.asanyarray(root["dataobj"][..., index]) - bvec = np.asanyarray(root["gradients"][:3, index]) - dwmoving = nb.Nifti1Image(dwframe, self.affine, None) # resample and update orientation at index diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index 4ca5b277..e571fa0c 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -94,6 +94,8 @@ def fit( if "num_threads" not in align_kwargs and omp_nthreads is not None: align_kwargs["num_threads"] = omp_nthreads + orig = dwdata + n_iter = len(models) for i_iter, model in enumerate(models): reg_target_type = ( @@ -152,7 +154,12 @@ def fit( pbar.set_description_str( f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>" ) - data_train, data_test = logo_split(dwdata, i, with_b0=True) + dwframe = np.asanyarray(orig.dataobj[..., i]) + bframe = np.asanyarray(orig.gradients[..., i]) + data_train, data_test = logo_split( + dwdata, dwframe, bframe, i, with_b0=True + ) + grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}" pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs") From 1d5c6eac7ea7e059329970f2ff017946062b0205 Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Thu, 1 Dec 2022 14:17:50 -0800 Subject: [PATCH 09/34] Fixed flake8 errors --- src/eddymotion/data/dmri.py | 4 ++-- src/eddymotion/estimator.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index e87043a2..f7ea6571 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -122,7 +122,7 @@ class DWI: def __len__(self): """Obtain the number of high-*b* orientations.""" return self.dataobj.shape[-1] - + def set_data(self): # Generate dwframe and bframe if not Path(self._filepath).exists(): @@ -131,7 +131,7 @@ def set_data(self): # read original DWI data & b-vector with h5py.File(self._filepath, "r") as in_file: self._root = in_file["/0"] - + def set_transform(self, dwframe, bvec, index, affine, order=3): """Set an affine, and update data object and gradients.""" reference = namedtuple("ImageGrid", ("shape", "affine"))( diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index e571fa0c..858efc00 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -35,7 +35,6 @@ from eddymotion.dmri import logo_split from eddymotion.model import ModelFactory -from eddymotion.dmri import logo_split class EddyMotionEstimator: From 26a0dc441093aab00b60a4de4192e5dffd571555 Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Thu, 8 Dec 2022 15:20:17 -0800 Subject: [PATCH 10/34] Preallocate em_affines as array, not Affine object --- src/eddymotion/estimator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index 858efc00..c81e54c6 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -223,6 +223,7 @@ def fit( matrix=dwdata.em_affines[i], reference=reference ) mat_file = tmp_dir / f"init_{i_iter}_{i:05d}.mat" + initial_xform.to_filename(mat_file, fmt="itk") registration.inputs.initial_moving_transform = str(mat_file) From c0693a145ad8959fe9bcd094584a7177560b1cda Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Thu, 8 Dec 2022 15:27:55 -0800 Subject: [PATCH 11/34] Removed orig copy of dwdata --- src/eddymotion/estimator.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index c81e54c6..648bd12f 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -93,8 +93,6 @@ def fit( if "num_threads" not in align_kwargs and omp_nthreads is not None: align_kwargs["num_threads"] = omp_nthreads - orig = dwdata - n_iter = len(models) for i_iter, model in enumerate(models): reg_target_type = ( @@ -153,8 +151,8 @@ def fit( pbar.set_description_str( f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>" ) - dwframe = np.asanyarray(orig.dataobj[..., i]) - bframe = np.asanyarray(orig.gradients[..., i]) + dwframe = np.asanyarray(dwdata.dataobj[..., i]) + bframe = np.asanyarray(dwdata.gradients[..., i]) data_train, data_test = logo_split( dwdata, dwframe, bframe, i, with_b0=True ) From 21406737ba52ba03c9d976349946bdf49df884e2 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Thu, 15 Dec 2022 17:24:50 +0100 Subject: [PATCH 12/34] enh: move logo_split to new submodule --- src/eddymotion/data/dmri.py | 49 --------------------- src/eddymotion/data/splitting.py | 73 ++++++++++++++++++++++++++++++++ src/eddymotion/estimator.py | 2 +- test/test_model.py | 3 +- 4 files changed, 76 insertions(+), 51 deletions(-) create mode 100644 src/eddymotion/data/splitting.py diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index f7ea6571..0da7ad14 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -39,55 +39,6 @@ def _data_repr(value): return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>" -def logo_split(dwdata, dwframe, bframe, index, with_b0=False): - """ - Produce one fold of LOGO (leave-one-gradient-out). - - Parameters - ---------- - dwdata : :obj:`DWI` - DWI object - index : :obj:`int` - Index of the DWI orientation to be left out in this fold. - with_b0 : :obj:`bool` - Insert the *b=0* reference at the beginning of the training dataset. - - Returns - ------- - (train_data, train_gradients) : :obj:`tuple` - Training DWI and corresponding gradients. - Training data/gradients come **from the updated dataset**. - (test_data, test_gradients) :obj:`tuple` - Test 3D map (one DWI orientation) and corresponding b-vector/value. - The test data/gradient come **from the original dataset**. - - """ - - # if the size of the mask does not match data, cache is stale - mask = np.zeros(len(dwdata), dtype=bool) - mask[index] = True - - train_data = dwdata.dataobj[..., ~mask] - train_gradients = dwdata.gradients[..., ~mask] - - if with_b0: - train_data = np.concatenate( - (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data), - axis=-1, - ) - b0vec = np.zeros((4, 1)) - b0vec[0, 0] = 1 - train_gradients = np.concatenate( - (b0vec, train_gradients), - axis=-1, - ) - - return ( - (train_data, train_gradients), - (dwframe, bframe), - ) - - @attr.s(slots=True) class DWI: """Data representation structure for dMRI data.""" diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py new file mode 100644 index 00000000..90ed00b9 --- /dev/null +++ b/src/eddymotion/data/splitting.py @@ -0,0 +1,73 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2022 The NiPreps Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# +"""Data splitting helpers.""" +import numpy as np + + +def lovo_split(dwdata, dwframe, bframe, index, with_b0=False): + """ + Produce one fold of LOVO (leave-one-volume-out). + + Parameters + ---------- + dwdata : :obj:`DWI` + DWI object + index : :obj:`int` + Index of the DWI orientation to be left out in this fold. + with_b0 : :obj:`bool` + Insert the *b=0* reference at the beginning of the training dataset. + + Returns + ------- + (train_data, train_gradients) : :obj:`tuple` + Training DWI and corresponding gradients. + Training data/gradients come **from the updated dataset**. + (test_data, test_gradients) :obj:`tuple` + Test 3D map (one DWI orientation) and corresponding b-vector/value. + The test data/gradient come **from the original dataset**. + + """ + + # if the size of the mask does not match data, cache is stale + mask = np.zeros(len(dwdata), dtype=bool) + mask[index] = True + + train_data = dwdata.dataobj[..., ~mask] + train_gradients = dwdata.gradients[..., ~mask] + + if with_b0: + train_data = np.concatenate( + (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data), + axis=-1, + ) + b0vec = np.zeros((4, 1)) + b0vec[0, 0] = 1 + train_gradients = np.concatenate( + (b0vec, train_gradients), + axis=-1, + ) + + return ( + (train_data, train_gradients), + (dwframe, bframe), + ) diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index 648bd12f..ad49454e 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -33,7 +33,7 @@ from pkg_resources import resource_filename as pkg_fn from tqdm import tqdm -from eddymotion.dmri import logo_split +from eddymotion.data.splitting import lovo_split as logo_split from eddymotion.model import ModelFactory diff --git a/test/test_model.py b/test/test_model.py index 4b3187d1..12801eaf 100644 --- a/test/test_model.py +++ b/test/test_model.py @@ -25,6 +25,7 @@ import pytest from eddymotion import model +from eddymotion.data.splitting import lovo_split from eddymotion.data.dmri import DWI @@ -94,7 +95,7 @@ def test_two_initialisations(datadir): dmri_dataset = DWI.from_filename(datadir / "dwi.h5") # Split data into test and train set - data_train, data_test = dmri_dataset.logo_split(10) + data_train, data_test = lovo_split(dmri_dataset, 10) # Direct initialisation model1 = model.AverageDWModel( From 374864adf57cc312d690778395e32eb1b6e10ff8 Mon Sep 17 00:00:00 2001 From: teresamg <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:13:44 -0800 Subject: [PATCH 13/34] Update src/eddymotion/data/splitting.py Co-authored-by: Oscar Esteban --- src/eddymotion/data/splitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index 90ed00b9..f4e5c5cc 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -24,7 +24,7 @@ import numpy as np -def lovo_split(dwdata, dwframe, bframe, index, with_b0=False): +def lovo_split(data, index): """ Produce one fold of LOVO (leave-one-volume-out). From bc845d927c61bc3b908a77ae44def479e8b5090e Mon Sep 17 00:00:00 2001 From: teresamg <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:14:08 -0800 Subject: [PATCH 14/34] Update src/eddymotion/data/splitting.py Co-authored-by: Oscar Esteban --- src/eddymotion/data/splitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index f4e5c5cc..ab798a8f 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -52,7 +52,7 @@ def lovo_split(data, index): mask = np.zeros(len(dwdata), dtype=bool) mask[index] = True - train_data = dwdata.dataobj[..., ~mask] + train_data = data.dataobj[..., ~mask] train_gradients = dwdata.gradients[..., ~mask] if with_b0: From 6191fa16845600ca4dae1ea17b244924a63c83de Mon Sep 17 00:00:00 2001 From: teresamg <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:14:16 -0800 Subject: [PATCH 15/34] Update src/eddymotion/data/splitting.py Co-authored-by: Oscar Esteban --- src/eddymotion/data/splitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index ab798a8f..0afb9c00 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -53,7 +53,7 @@ def lovo_split(data, index): mask[index] = True train_data = data.dataobj[..., ~mask] - train_gradients = dwdata.gradients[..., ~mask] + train_gradients = data.gradients[..., ~mask] if with_b0: train_data = np.concatenate( From 175d1dddd538b7e0c30550ac7c689ec7f1dae39b Mon Sep 17 00:00:00 2001 From: teresamg <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:14:28 -0800 Subject: [PATCH 16/34] Update src/eddymotion/data/splitting.py Co-authored-by: Oscar Esteban --- src/eddymotion/data/splitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index 0afb9c00..e93a90b5 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -69,5 +69,5 @@ def lovo_split(data, index): return ( (train_data, train_gradients), - (dwframe, bframe), + (data.dataobj[..., mask], data.gradients[..., mask]), ) From cb19fb1bdb8914842e2aa81df5ab3cfcf063b2a0 Mon Sep 17 00:00:00 2001 From: teresamg <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:14:40 -0800 Subject: [PATCH 17/34] Update src/eddymotion/data/splitting.py Co-authored-by: Oscar Esteban --- src/eddymotion/data/splitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index e93a90b5..5cd73122 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -30,7 +30,7 @@ def lovo_split(data, index): Parameters ---------- - dwdata : :obj:`DWI` + data : :obj:`eddymotion.data.dmri.DWI` DWI object index : :obj:`int` Index of the DWI orientation to be left out in this fold. From 64ccc58e043ba8dc577cac7d0baa93f361e06dad Mon Sep 17 00:00:00 2001 From: teresamg <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:14:54 -0800 Subject: [PATCH 18/34] Update src/eddymotion/data/splitting.py Co-authored-by: Oscar Esteban --- src/eddymotion/data/splitting.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index 5cd73122..9458db56 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -34,8 +34,6 @@ def lovo_split(data, index): DWI object index : :obj:`int` Index of the DWI orientation to be left out in this fold. - with_b0 : :obj:`bool` - Insert the *b=0* reference at the beginning of the training dataset. Returns ------- From 88bf1dcf905f26321fedae194a70c18f270c1709 Mon Sep 17 00:00:00 2001 From: teresamg <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:15:33 -0800 Subject: [PATCH 19/34] Update src/eddymotion/data/splitting.py Co-authored-by: Oscar Esteban --- src/eddymotion/data/splitting.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index 9458db56..d6229575 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -53,18 +53,6 @@ def lovo_split(data, index): train_data = data.dataobj[..., ~mask] train_gradients = data.gradients[..., ~mask] - if with_b0: - train_data = np.concatenate( - (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data), - axis=-1, - ) - b0vec = np.zeros((4, 1)) - b0vec[0, 0] = 1 - train_gradients = np.concatenate( - (b0vec, train_gradients), - axis=-1, - ) - return ( (train_data, train_gradients), (data.dataobj[..., mask], data.gradients[..., mask]), From a3a3f1b719bdc3c1986edf1f6f18ade57393f9ea Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:28:39 -0800 Subject: [PATCH 20/34] Fixed logo_split() call and dwdata->data --- src/eddymotion/data/splitting.py | 2 +- src/eddymotion/estimator.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index d6229575..aec47059 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -47,7 +47,7 @@ def lovo_split(data, index): """ # if the size of the mask does not match data, cache is stale - mask = np.zeros(len(dwdata), dtype=bool) + mask = np.zeros(len(data), dtype=bool) mask[index] = True train_data = data.dataobj[..., ~mask] diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index ad49454e..b864c266 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -151,11 +151,7 @@ def fit( pbar.set_description_str( f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>" ) - dwframe = np.asanyarray(dwdata.dataobj[..., i]) - bframe = np.asanyarray(dwdata.gradients[..., i]) - data_train, data_test = logo_split( - dwdata, dwframe, bframe, i, with_b0=True - ) + data_train, data_test = logo_split(dwdata, i) grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}" pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs") From f18029eb7db820cdcd045c750e2a1c87a9db27ea Mon Sep 17 00:00:00 2001 From: Teresa Gomez <46339554+teresamg@users.noreply.github.com> Date: Thu, 15 Dec 2022 14:42:56 -0800 Subject: [PATCH 21/34] Updated set_transform(), removed pbar grad_str text --- src/eddymotion/data/dmri.py | 5 ++++- src/eddymotion/estimator.py | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index 0da7ad14..bfc4c788 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -83,7 +83,7 @@ def set_data(self): with h5py.File(self._filepath, "r") as in_file: self._root = in_file["/0"] - def set_transform(self, dwframe, bvec, index, affine, order=3): + def set_transform(self, index, affine, order=3): """Set an affine, and update data object and gradients.""" reference = namedtuple("ImageGrid", ("shape", "affine"))( shape=self.dataobj.shape[:3], affine=self.affine @@ -96,6 +96,9 @@ def set_transform(self, dwframe, bvec, index, affine, order=3): else: xform = Affine(matrix=affine, reference=reference) + dwframe = np.asanyarray(self.dataobj[..., index]) + bvec = np.asanyarray(self.gradients[:3, index]) + dwmoving = nb.Nifti1Image(dwframe, self.affine, None) # resample and update orientation at index diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index b864c266..57cc4cd6 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -153,9 +153,6 @@ def fit( ) data_train, data_test = logo_split(dwdata, i) - grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}" - pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs") - if not single_model: # A true LOGO estimator if hasattr(dwdata, "gradients"): kwargs["gtab"] = data_train[1] From a8538bf988da406d635ad810647c7b67289e0f03 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 27 Mar 2024 11:59:09 +0100 Subject: [PATCH 22/34] fix: revise merge conflicts and get ready for final revision --- src/eddymotion/data/dmri.py | 19 ++++++++----------- src/eddymotion/estimator.py | 1 - 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index bfc4c788..f84767fe 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -74,15 +74,6 @@ def __len__(self): """Obtain the number of high-*b* orientations.""" return self.dataobj.shape[-1] - def set_data(self): - # Generate dwframe and bframe - if not Path(self._filepath).exists(): - self.to_filename(self._filepath) - - # read original DWI data & b-vector - with h5py.File(self._filepath, "r") as in_file: - self._root = in_file["/0"] - def set_transform(self, index, affine, order=3): """Set an affine, and update data object and gradients.""" reference = namedtuple("ImageGrid", ("shape", "affine"))( @@ -96,8 +87,14 @@ def set_transform(self, index, affine, order=3): else: xform = Affine(matrix=affine, reference=reference) - dwframe = np.asanyarray(self.dataobj[..., index]) - bvec = np.asanyarray(self.gradients[:3, index]) + if not Path(self._filepath).exists(): + self.to_filename(self._filepath) + + # read original DWI data & b-vector + with h5py.File(self._filepath, "r") as in_file: + root = in_file["/0"] + dwframe = np.asanyarray(root["dataobj"][..., index]) + bvec = np.asanyarray(root["gradients"][:3, index]) dwmoving = nb.Nifti1Image(dwframe, self.affine, None) diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index 57cc4cd6..1ad42d13 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -214,7 +214,6 @@ def fit( matrix=dwdata.em_affines[i], reference=reference ) mat_file = tmp_dir / f"init_{i_iter}_{i:05d}.mat" - initial_xform.to_filename(mat_file, fmt="itk") registration.inputs.initial_moving_transform = str(mat_file) From 542caa887cee4cd774e6f46ff924d640bca56278 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 27 Mar 2024 12:14:57 +0100 Subject: [PATCH 23/34] fix: revert accidental removal of two lines --- src/eddymotion/estimator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index 1ad42d13..b1aeb583 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -152,6 +152,8 @@ def fit( f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>" ) data_train, data_test = logo_split(dwdata, i) + grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}" + pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs") if not single_model: # A true LOGO estimator if hasattr(dwdata, "gradients"): From 82a557e654725c10cd21f8d72d957d5e0d68a8cc Mon Sep 17 00:00:00 2001 From: esavary Date: Wed, 27 Mar 2024 13:15:17 +0100 Subject: [PATCH 24/34] Apply suggestions from code review Co-authored-by: Oscar Esteban --- src/eddymotion/estimator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index b1aeb583..79852618 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -33,7 +33,7 @@ from pkg_resources import resource_filename as pkg_fn from tqdm import tqdm -from eddymotion.data.splitting import lovo_split as logo_split +from eddymotion.data.splitting import lovo_split from eddymotion.model import ModelFactory @@ -151,7 +151,7 @@ def fit( pbar.set_description_str( f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>" ) - data_train, data_test = logo_split(dwdata, i) + data_train, data_test = lovo_split(dwdata, i) grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}" pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs") From d45ec01fca08573ace18e7413c92c00ecbabc65f Mon Sep 17 00:00:00 2001 From: esavary Date: Wed, 27 Mar 2024 14:35:56 +0100 Subject: [PATCH 25/34] fix: Restore access to HDF5 file --- src/eddymotion/data/dmri.py | 4 ++++ src/eddymotion/data/splitting.py | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py index f84767fe..884ca953 100644 --- a/src/eddymotion/data/dmri.py +++ b/src/eddymotion/data/dmri.py @@ -70,6 +70,10 @@ class DWI: ) """A path to an HDF5 file to store the whole dataset.""" + def get_filename(self): + """Get the filepath of the HDF5 file.""" + return self._filepath + def __len__(self): """Obtain the number of high-*b* orientations.""" return self.dataobj.shape[-1] diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index aec47059..90e179ab 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -21,7 +21,9 @@ # https://www.nipreps.org/community/licensing/ # """Data splitting helpers.""" +from pathlib import Path import numpy as np +import h5py def lovo_split(data, index): @@ -46,14 +48,23 @@ def lovo_split(data, index): """ + if not Path(data.get_filename()).exists(): + data.to_filename(data.get_filename()) + # if the size of the mask does not match data, cache is stale mask = np.zeros(len(data), dtype=bool) mask[index] = True + # read original DWI data & b-vector + with h5py.File(data.get_filename(), "r") as in_file: + root = in_file["/0"] + dwframe = np.asanyarray(root["dataobj"][..., mask]) + bframe = np.asanyarray(root["gradients"][..., mask]) + train_data = data.dataobj[..., ~mask] train_gradients = data.gradients[..., ~mask] return ( (train_data, train_gradients), - (data.dataobj[..., mask], data.gradients[..., mask]), + (dwframe, bframe), ) From e7761559752c521d225e76c31b71f345871cdd2d Mon Sep 17 00:00:00 2001 From: esavary Date: Wed, 27 Mar 2024 14:50:12 +0100 Subject: [PATCH 26/34] fix: Restore b0 argument --- src/eddymotion/data/splitting.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index 90e179ab..e1ae9a47 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -26,7 +26,7 @@ import h5py -def lovo_split(data, index): +def lovo_split(data, index, with_b0=False): """ Produce one fold of LOVO (leave-one-volume-out). @@ -50,7 +50,7 @@ def lovo_split(data, index): if not Path(data.get_filename()).exists(): data.to_filename(data.get_filename()) - + # if the size of the mask does not match data, cache is stale mask = np.zeros(len(data), dtype=bool) mask[index] = True @@ -64,6 +64,18 @@ def lovo_split(data, index): train_data = data.dataobj[..., ~mask] train_gradients = data.gradients[..., ~mask] + if with_b0: + train_data = np.concatenate( + (np.asanyarray(data.bzero)[..., np.newaxis], train_data), + axis=-1, + ) + b0vec = np.zeros((4, 1)) + b0vec[0, 0] = 1 + train_gradients = np.concatenate( + (b0vec, train_gradients), + axis=-1, + ) + return ( (train_data, train_gradients), (dwframe, bframe), From b4a18ad6863ff59ce0e04f33a9522475c7f44c90 Mon Sep 17 00:00:00 2001 From: esavary Date: Wed, 27 Mar 2024 15:03:23 +0100 Subject: [PATCH 27/34] fix: typos --- src/eddymotion/data/splitting.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index e1ae9a47..adb88d28 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -51,15 +51,15 @@ def lovo_split(data, index, with_b0=False): if not Path(data.get_filename()).exists(): data.to_filename(data.get_filename()) - # if the size of the mask does not match data, cache is stale - mask = np.zeros(len(data), dtype=bool) - mask[index] = True - # read original DWI data & b-vector with h5py.File(data.get_filename(), "r") as in_file: root = in_file["/0"] - dwframe = np.asanyarray(root["dataobj"][..., mask]) - bframe = np.asanyarray(root["gradients"][..., mask]) + dwframe = np.asanyarray(root["dataobj"][..., index]) + bframe = np.asanyarray(root["gradients"][..., index]) + + # if the size of the mask does not match data, cache is stale + mask = np.zeros(len(data), dtype=bool) + mask[index] = True train_data = data.dataobj[..., ~mask] train_gradients = data.gradients[..., ~mask] From 3a96d51feb616722034036702036b4db14327046 Mon Sep 17 00:00:00 2001 From: esavary Date: Thu, 28 Mar 2024 12:12:18 +0100 Subject: [PATCH 28/34] Add: test for lovo_split --- test/test_splitting.py | 63 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 test/test_splitting.py diff --git a/test/test_splitting.py b/test/test_splitting.py new file mode 100644 index 00000000..de5707b4 --- /dev/null +++ b/test/test_splitting.py @@ -0,0 +1,63 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2021 The NiPreps Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# +"""Unit test testing the lovo_split function.""" +import pytest +import numpy as np +from eddymotion.data.dmri import DWI +from eddymotion.data.splitting import lovo_split + + +def test_lovo_split(datadir): + """ + Test the lovo_split function. + + Parameters: + - datadir: The directory containing the test data. + + Returns: + None + """ + data = DWI.from_filename(datadir / "dwi.h5") + + # Set zeros in dataobj and gradients of the dwi object + data.dataobj[:] = 0 + data.gradients[:] = 0 + + # Select a random index + index = np.random.randint(len(data)) + + # Set 1 in dataobj and gradients of the dwi object at this specific index + data.dataobj[..., index] = 1 + data.gradients[..., index] = 1 + + # Apply the lovo_split function at the specified index + (train_data, train_gradients), \ + (test_data, test_gradients) = lovo_split(data, index) + + # Check if the test data contains only 1s + # and the train data contains only 0s after the split + assert np.all(test_data == 1) + assert np.all(train_data == 0) + assert np.all(test_gradients == 1) + assert np.all(train_gradients == 0) + From 1d6762c82bc8544774029354ed05e831cb8b22f5 Mon Sep 17 00:00:00 2001 From: esavary Date: Thu, 28 Mar 2024 12:14:26 +0100 Subject: [PATCH 29/34] Fix: remove unused import --- test/test_splitting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_splitting.py b/test/test_splitting.py index de5707b4..155a8794 100644 --- a/test/test_splitting.py +++ b/test/test_splitting.py @@ -21,7 +21,6 @@ # https://www.nipreps.org/community/licensing/ # """Unit test testing the lovo_split function.""" -import pytest import numpy as np from eddymotion.data.dmri import DWI from eddymotion.data.splitting import lovo_split From bcff16d15195a16fa9344312df95c5186491d98d Mon Sep 17 00:00:00 2001 From: esavary Date: Thu, 28 Mar 2024 13:23:04 +0100 Subject: [PATCH 30/34] Apply suggestions from code review Co-authored-by: Oscar Esteban --- src/eddymotion/data/splitting.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index adb88d28..b8f5db01 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -26,13 +26,13 @@ import h5py -def lovo_split(data, index, with_b0=False): +def lovo_split(dataset, index, with_b0=False): """ Produce one fold of LOVO (leave-one-volume-out). Parameters ---------- - data : :obj:`eddymotion.data.dmri.DWI` + dataset : :obj:`eddymotion.data.dmri.DWI` DWI object index : :obj:`int` Index of the DWI orientation to be left out in this fold. @@ -48,25 +48,25 @@ def lovo_split(data, index, with_b0=False): """ - if not Path(data.get_filename()).exists(): - data.to_filename(data.get_filename()) + if not Path(dataset.get_filename()).exists(): + dataset.to_filename(data.get_filename()) # read original DWI data & b-vector - with h5py.File(data.get_filename(), "r") as in_file: + with h5py.File(dataset.get_filename(), "r") as in_file: root = in_file["/0"] - dwframe = np.asanyarray(root["dataobj"][..., index]) - bframe = np.asanyarray(root["gradients"][..., index]) + data = np.asanyarray(root["dataobj"]) + gradients = np.asanyarray(root["gradients"]) # if the size of the mask does not match data, cache is stale - mask = np.zeros(len(data), dtype=bool) + mask = np.zeros(data.shape[-1], dtype=bool) mask[index] = True - train_data = data.dataobj[..., ~mask] - train_gradients = data.gradients[..., ~mask] + train_data = data[..., ~mask] + train_gradients = gradients[..., ~mask] if with_b0: train_data = np.concatenate( - (np.asanyarray(data.bzero)[..., np.newaxis], train_data), + (np.asanyarray(dataset.bzero)[..., np.newaxis], train_data), axis=-1, ) b0vec = np.zeros((4, 1)) From 316eb57f5b524fdfed84e6c58205e7e6ee7542bb Mon Sep 17 00:00:00 2001 From: esavary Date: Thu, 28 Mar 2024 13:43:50 +0100 Subject: [PATCH 31/34] Fix: return test data and gradient --- src/eddymotion/data/splitting.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index b8f5db01..b6f9e834 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -62,7 +62,9 @@ def lovo_split(dataset, index, with_b0=False): mask[index] = True train_data = data[..., ~mask] - train_gradients = gradients[..., ~mask] + train_gradients = gradients[..., mask] + test_data = data[..., ~mask] + test_gradients = gradients[..., mask] if with_b0: train_data = np.concatenate( @@ -78,5 +80,5 @@ def lovo_split(dataset, index, with_b0=False): return ( (train_data, train_gradients), - (dwframe, bframe), + (test_data, test_gradients), ) From 421763544e93b4acfc294febdc58021122581925 Mon Sep 17 00:00:00 2001 From: esavary Date: Thu, 28 Mar 2024 13:45:12 +0100 Subject: [PATCH 32/34] Fix: typo --- src/eddymotion/data/splitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index b6f9e834..253cb035 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -49,7 +49,7 @@ def lovo_split(dataset, index, with_b0=False): """ if not Path(dataset.get_filename()).exists(): - dataset.to_filename(data.get_filename()) + dataset.to_filename(dataset.get_filename()) # read original DWI data & b-vector with h5py.File(dataset.get_filename(), "r") as in_file: From 14cd4d05eeeb09ed541264899043e42bc4382b7d Mon Sep 17 00:00:00 2001 From: esavary Date: Thu, 28 Mar 2024 14:32:05 +0100 Subject: [PATCH 33/34] Fix: masking --- src/eddymotion/data/splitting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py index 253cb035..7d7ff8b4 100644 --- a/src/eddymotion/data/splitting.py +++ b/src/eddymotion/data/splitting.py @@ -62,8 +62,8 @@ def lovo_split(dataset, index, with_b0=False): mask[index] = True train_data = data[..., ~mask] - train_gradients = gradients[..., mask] - test_data = data[..., ~mask] + train_gradients = gradients[..., ~mask] + test_data = data[..., mask] test_gradients = gradients[..., mask] if with_b0: From 9591b818ede1b4f8571a783cd8b93c2464e8ee34 Mon Sep 17 00:00:00 2001 From: esavary Date: Thu, 28 Mar 2024 15:00:10 +0100 Subject: [PATCH 34/34] Update src/eddymotion/estimator.py Co-authored-by: Oscar Esteban --- src/eddymotion/estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py index 79852618..1cafb233 100644 --- a/src/eddymotion/estimator.py +++ b/src/eddymotion/estimator.py @@ -151,7 +151,7 @@ def fit( pbar.set_description_str( f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>" ) - data_train, data_test = lovo_split(dwdata, i) + data_train, data_test = lovo_split(dwdata, i, with_b0=True) grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}" pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs")