From 5f13501a1d51ea65134d5502e4fe51fb4116b3f8 Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Wed, 30 Nov 2022 12:38:55 -0800
Subject: [PATCH 01/34] Starting to refactor logo_split

---
 src/eddymotion/data/dmri.py | 88 +++++++++++++++++++------------------
 src/eddymotion/estimator.py |  3 +-
 2 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index 0f407bde..133e2ee4 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -39,47 +39,14 @@ def _data_repr(value):
     return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>"
 
 
-@attr.s(slots=True)
-class DWI:
-    """Data representation structure for dMRI data."""
-
-    dataobj = attr.ib(default=None, repr=_data_repr)
-    """A numpy ndarray object for the data array, without *b=0* volumes."""
-    affine = attr.ib(default=None, repr=_data_repr)
-    """Best affine for RAS-to-voxel conversion of coordinates (NIfTI header)."""
-    brainmask = attr.ib(default=None, repr=_data_repr)
-    """A boolean ndarray object containing a corresponding brainmask."""
-    bzero = attr.ib(default=None, repr=_data_repr)
-    """
-    A *b=0* reference map, preferably obtained by some smart averaging.
-    If the :math:`B_0` fieldmap is set, this *b=0* reference map should also
-    be unwarped.
-    """
-    gradients = attr.ib(default=None, repr=_data_repr)
-    """A 2D numpy array of the gradient table in RAS+B format."""
-    em_affines = attr.ib(default=None)
-    """
-    List of :obj:`nitransforms.linear.Affine` objects that bring
-    DWIs (i.e., no b=0) into alignment.
-    """
-    fieldmap = attr.ib(default=None, repr=_data_repr)
-    """A 3D displacements field to unwarp susceptibility distortions."""
-    _filepath = attr.ib(
-        factory=lambda: Path(mkdtemp()) / "em_cache.h5",
-        repr=False,
-    )
-    """A path to an HDF5 file to store the whole dataset."""
-
-    def __len__(self):
-        """Obtain the number of high-*b* orientations."""
-        return self.dataobj.shape[-1]
-
-    def logo_split(self, index, with_b0=False):
+def logo_split(dwdata, index, with_b0=False):
         """
         Produce one fold of LOGO (leave-one-gradient-out).
 
         Parameters
         ----------
+        dwdata : :obj:`DWI`
+            DWI object
         index : :obj:`int`
             Index of the DWI orientation to be left out in this fold.
         with_b0 : :obj:`bool`
@@ -95,25 +62,25 @@ def logo_split(self, index, with_b0=False):
             The test data/gradient come **from the original dataset**.
 
         """
-        if not Path(self._filepath).exists():
-            self.to_filename(self._filepath)
+        if not Path(dwdata._filepath).exists():
+            dwdata.to_filename(dwdata._filepath)
 
         # read original DWI data & b-vector
-        with h5py.File(self._filepath, "r") as in_file:
+        with h5py.File(dwdata._filepath, "r") as in_file:
             root = in_file["/0"]
             dwframe = np.asanyarray(root["dataobj"][..., index])
             bframe = np.asanyarray(root["gradients"][..., index])
 
         # if the size of the mask does not match data, cache is stale
-        mask = np.zeros(len(self), dtype=bool)
+        mask = np.zeros(len(dwdata), dtype=bool)
         mask[index] = True
 
-        train_data = self.dataobj[..., ~mask]
-        train_gradients = self.gradients[..., ~mask]
+        train_data = dwdata.dataobj[..., ~mask]
+        train_gradients = dwdata.gradients[..., ~mask]
 
         if with_b0:
             train_data = np.concatenate(
-                (np.asanyarray(self.bzero)[..., np.newaxis], train_data),
+                (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data),
                 axis=-1,
             )
             b0vec = np.zeros((4, 1))
@@ -128,6 +95,41 @@ def logo_split(self, index, with_b0=False):
             (dwframe, bframe),
         )
 
+@attr.s(slots=True)
+class DWI:
+    """Data representation structure for dMRI data."""
+
+    dataobj = attr.ib(default=None, repr=_data_repr)
+    """A numpy ndarray object for the data array, without *b=0* volumes."""
+    affine = attr.ib(default=None, repr=_data_repr)
+    """Best affine for RAS-to-voxel conversion of coordinates (NIfTI header)."""
+    brainmask = attr.ib(default=None, repr=_data_repr)
+    """A boolean ndarray object containing a corresponding brainmask."""
+    bzero = attr.ib(default=None, repr=_data_repr)
+    """
+    A *b=0* reference map, preferably obtained by some smart averaging.
+    If the :math:`B_0` fieldmap is set, this *b=0* reference map should also
+    be unwarped.
+    """
+    gradients = attr.ib(default=None, repr=_data_repr)
+    """A 2D numpy array of the gradient table in RAS+B format."""
+    em_affines = attr.ib(default=None)
+    """
+    List of :obj:`nitransforms.linear.Affine` objects that bring
+    DWIs (i.e., no b=0) into alignment.
+    """
+    fieldmap = attr.ib(default=None, repr=_data_repr)
+    """A 3D displacements field to unwarp susceptibility distortions."""
+    _filepath = attr.ib(
+        factory=lambda: Path(mkdtemp()) / "em_cache.h5",
+        repr=False,
+    )
+    """A path to an HDF5 file to store the whole dataset."""
+
+    def __len__(self):
+        """Obtain the number of high-*b* orientations."""
+        return self.dataobj.shape[-1]
+
     def set_transform(self, index, affine, order=3):
         """Set an affine, and update data object and gradients."""
         reference = namedtuple("ImageGrid", ("shape", "affine"))(
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index 8e812b67..a28ce527 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -34,6 +34,7 @@
 from tqdm import tqdm
 
 from eddymotion.model import ModelFactory
+from eddymotion.dmri import logo_split
 
 
 class EddyMotionEstimator:
@@ -150,7 +151,7 @@ def fit(
                         pbar.set_description_str(
                             f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>"
                         )
-                        data_train, data_test = dwdata.logo_split(i, with_b0=True)
+                        data_train, data_test = logo_split(dwdata, i, with_b0=True)
                         grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}"
                         pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs")
 

From 7fa9b05cbe421bc8857eaccd921d29d0ad8575ac Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Wed, 30 Nov 2022 12:43:48 -0800
Subject: [PATCH 02/34] Fixed flake8 errors

---
 src/eddymotion/data/dmri.py | 103 ++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index 133e2ee4..8e42ee5d 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -40,60 +40,61 @@ def _data_repr(value):
 
 
 def logo_split(dwdata, index, with_b0=False):
-        """
-        Produce one fold of LOGO (leave-one-gradient-out).
-
-        Parameters
-        ----------
-        dwdata : :obj:`DWI`
-            DWI object
-        index : :obj:`int`
-            Index of the DWI orientation to be left out in this fold.
-        with_b0 : :obj:`bool`
-            Insert the *b=0* reference at the beginning of the training dataset.
-
-        Returns
-        -------
-        (train_data, train_gradients) : :obj:`tuple`
-            Training DWI and corresponding gradients.
-            Training data/gradients come **from the updated dataset**.
-        (test_data, test_gradients) :obj:`tuple`
-            Test 3D map (one DWI orientation) and corresponding b-vector/value.
-            The test data/gradient come **from the original dataset**.
-
-        """
-        if not Path(dwdata._filepath).exists():
-            dwdata.to_filename(dwdata._filepath)
-
-        # read original DWI data & b-vector
-        with h5py.File(dwdata._filepath, "r") as in_file:
-            root = in_file["/0"]
-            dwframe = np.asanyarray(root["dataobj"][..., index])
-            bframe = np.asanyarray(root["gradients"][..., index])
-
-        # if the size of the mask does not match data, cache is stale
-        mask = np.zeros(len(dwdata), dtype=bool)
-        mask[index] = True
+    """
+    Produce one fold of LOGO (leave-one-gradient-out).
+
+    Parameters
+    ----------
+    dwdata : :obj:`DWI`
+        DWI object
+    index : :obj:`int`
+        Index of the DWI orientation to be left out in this fold.
+    with_b0 : :obj:`bool`
+        Insert the *b=0* reference at the beginning of the training dataset.
+
+    Returns
+    -------
+    (train_data, train_gradients) : :obj:`tuple`
+        Training DWI and corresponding gradients.
+        Training data/gradients come **from the updated dataset**.
+    (test_data, test_gradients) :obj:`tuple`
+        Test 3D map (one DWI orientation) and corresponding b-vector/value.
+        The test data/gradient come **from the original dataset**.
 
-        train_data = dwdata.dataobj[..., ~mask]
-        train_gradients = dwdata.gradients[..., ~mask]
+    """
+    if not Path(dwdata._filepath).exists():
+        dwdata.to_filename(dwdata._filepath)
+
+    # read original DWI data & b-vector
+    with h5py.File(dwdata._filepath, "r") as in_file:
+        root = in_file["/0"]
+        dwframe = np.asanyarray(root["dataobj"][..., index])
+        bframe = np.asanyarray(root["gradients"][..., index])
+
+    # if the size of the mask does not match data, cache is stale
+    mask = np.zeros(len(dwdata), dtype=bool)
+    mask[index] = True
+
+    train_data = dwdata.dataobj[..., ~mask]
+    train_gradients = dwdata.gradients[..., ~mask]
+
+    if with_b0:
+        train_data = np.concatenate(
+            (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data),
+            axis=-1,
+        )
+        b0vec = np.zeros((4, 1))
+        b0vec[0, 0] = 1
+        train_gradients = np.concatenate(
+            (b0vec, train_gradients),
+            axis=-1,
+        )
 
-        if with_b0:
-            train_data = np.concatenate(
-                (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data),
-                axis=-1,
-            )
-            b0vec = np.zeros((4, 1))
-            b0vec[0, 0] = 1
-            train_gradients = np.concatenate(
-                (b0vec, train_gradients),
-                axis=-1,
-            )
+    return (
+        (train_data, train_gradients),
+        (dwframe, bframe),
+    )
 
-        return (
-            (train_data, train_gradients),
-            (dwframe, bframe),
-        )
 
 @attr.s(slots=True)
 class DWI:

From 5956168a229f3b45d7d80911773d0c6ea250f8ce Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Wed, 30 Nov 2022 12:53:16 -0800
Subject: [PATCH 03/34] Moved h5 file read

---
 src/eddymotion/data/dmri.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index 8e42ee5d..60535a1c 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -62,14 +62,9 @@ def logo_split(dwdata, index, with_b0=False):
         The test data/gradient come **from the original dataset**.
 
     """
-    if not Path(dwdata._filepath).exists():
-        dwdata.to_filename(dwdata._filepath)
 
-    # read original DWI data & b-vector
-    with h5py.File(dwdata._filepath, "r") as in_file:
-        root = in_file["/0"]
-        dwframe = np.asanyarray(root["dataobj"][..., index])
-        bframe = np.asanyarray(root["gradients"][..., index])
+    dwframe = np.asanyarray(dwdata.root["dataobj"][..., index])
+    bframe = np.asanyarray(dwdata.root["gradients"][..., index])
 
     # if the size of the mask does not match data, cache is stale
     mask = np.zeros(len(dwdata), dtype=bool)
@@ -130,6 +125,15 @@ class DWI:
     def __len__(self):
         """Obtain the number of high-*b* orientations."""
         return self.dataobj.shape[-1]
+    
+    def set_data(self):
+        # Generate dwframe and bframe
+        if not Path(self._filepath).exists():
+            self.to_filename(self._filepath)
+            
+        # read original DWI data & b-vector
+        with h5py.File(self._filepath, "r") as in_file:
+            self.root = in_file["/0"]
 
     def set_transform(self, index, affine, order=3):
         """Set an affine, and update data object and gradients."""

From bc1482080d512a83713c9e36ab236f8c9edd8454 Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Thu, 1 Dec 2022 14:07:39 -0800
Subject: [PATCH 04/34] Pull dwframe, bframe, bvec into estimator.py

---
 src/eddymotion/data/dmri.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index 60535a1c..e87043a2 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -39,7 +39,7 @@ def _data_repr(value):
     return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>"
 
 
-def logo_split(dwdata, index, with_b0=False):
+def logo_split(dwdata, dwframe, bframe, index, with_b0=False):
     """
     Produce one fold of LOGO (leave-one-gradient-out).
 
@@ -63,9 +63,6 @@ def logo_split(dwdata, index, with_b0=False):
 
     """
 
-    dwframe = np.asanyarray(dwdata.root["dataobj"][..., index])
-    bframe = np.asanyarray(dwdata.root["gradients"][..., index])
-
     # if the size of the mask does not match data, cache is stale
     mask = np.zeros(len(dwdata), dtype=bool)
     mask[index] = True
@@ -130,12 +127,12 @@ def set_data(self):
         # Generate dwframe and bframe
         if not Path(self._filepath).exists():
             self.to_filename(self._filepath)
-            
+
         # read original DWI data & b-vector
         with h5py.File(self._filepath, "r") as in_file:
-            self.root = in_file["/0"]
-
-    def set_transform(self, index, affine, order=3):
+            self._root = in_file["/0"]
+            
+    def set_transform(self, dwframe, bvec, index, affine, order=3):
         """Set an affine, and update data object and gradients."""
         reference = namedtuple("ImageGrid", ("shape", "affine"))(
             shape=self.dataobj.shape[:3], affine=self.affine
@@ -148,15 +145,6 @@ def set_transform(self, index, affine, order=3):
         else:
             xform = Affine(matrix=affine, reference=reference)
 
-        if not Path(self._filepath).exists():
-            self.to_filename(self._filepath)
-
-        # read original DWI data & b-vector
-        with h5py.File(self._filepath, "r") as in_file:
-            root = in_file["/0"]
-            dwframe = np.asanyarray(root["dataobj"][..., index])
-            bvec = np.asanyarray(root["gradients"][:3, index])
-
         dwmoving = nb.Nifti1Image(dwframe, self.affine, None)
 
         # resample and update orientation at index

From 200b9f95e7535ff2244184f2db74f197cae4a662 Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Thu, 1 Dec 2022 14:17:50 -0800
Subject: [PATCH 05/34] Fixed flake8 errors

---
 src/eddymotion/data/dmri.py | 4 ++--
 src/eddymotion/estimator.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index e87043a2..f7ea6571 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -122,7 +122,7 @@ class DWI:
     def __len__(self):
         """Obtain the number of high-*b* orientations."""
         return self.dataobj.shape[-1]
-    
+
     def set_data(self):
         # Generate dwframe and bframe
         if not Path(self._filepath).exists():
@@ -131,7 +131,7 @@ def set_data(self):
         # read original DWI data & b-vector
         with h5py.File(self._filepath, "r") as in_file:
             self._root = in_file["/0"]
-            
+
     def set_transform(self, dwframe, bvec, index, affine, order=3):
         """Set an affine, and update data object and gradients."""
         reference = namedtuple("ImageGrid", ("shape", "affine"))(
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index a28ce527..e6142445 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -33,8 +33,8 @@
 from pkg_resources import resource_filename as pkg_fn
 from tqdm import tqdm
 
-from eddymotion.model import ModelFactory
 from eddymotion.dmri import logo_split
+from eddymotion.model import ModelFactory
 
 
 class EddyMotionEstimator:

From 38b427612c3c62ca3282ce53726758d27f9b2cb3 Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Wed, 30 Nov 2022 12:38:55 -0800
Subject: [PATCH 06/34] Starting to refactor logo_split

---
 src/eddymotion/data/dmri.py | 30 +++++++++++++++++++-----------
 src/eddymotion/estimator.py |  1 +
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index f7ea6571..8e42ee5d 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -39,7 +39,7 @@ def _data_repr(value):
     return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>"
 
 
-def logo_split(dwdata, dwframe, bframe, index, with_b0=False):
+def logo_split(dwdata, index, with_b0=False):
     """
     Produce one fold of LOGO (leave-one-gradient-out).
 
@@ -62,6 +62,14 @@ def logo_split(dwdata, dwframe, bframe, index, with_b0=False):
         The test data/gradient come **from the original dataset**.
 
     """
+    if not Path(dwdata._filepath).exists():
+        dwdata.to_filename(dwdata._filepath)
+
+    # read original DWI data & b-vector
+    with h5py.File(dwdata._filepath, "r") as in_file:
+        root = in_file["/0"]
+        dwframe = np.asanyarray(root["dataobj"][..., index])
+        bframe = np.asanyarray(root["gradients"][..., index])
 
     # if the size of the mask does not match data, cache is stale
     mask = np.zeros(len(dwdata), dtype=bool)
@@ -123,16 +131,7 @@ def __len__(self):
         """Obtain the number of high-*b* orientations."""
         return self.dataobj.shape[-1]
 
-    def set_data(self):
-        # Generate dwframe and bframe
-        if not Path(self._filepath).exists():
-            self.to_filename(self._filepath)
-
-        # read original DWI data & b-vector
-        with h5py.File(self._filepath, "r") as in_file:
-            self._root = in_file["/0"]
-
-    def set_transform(self, dwframe, bvec, index, affine, order=3):
+    def set_transform(self, index, affine, order=3):
         """Set an affine, and update data object and gradients."""
         reference = namedtuple("ImageGrid", ("shape", "affine"))(
             shape=self.dataobj.shape[:3], affine=self.affine
@@ -145,6 +144,15 @@ def set_transform(self, dwframe, bvec, index, affine, order=3):
         else:
             xform = Affine(matrix=affine, reference=reference)
 
+        if not Path(self._filepath).exists():
+            self.to_filename(self._filepath)
+
+        # read original DWI data & b-vector
+        with h5py.File(self._filepath, "r") as in_file:
+            root = in_file["/0"]
+            dwframe = np.asanyarray(root["dataobj"][..., index])
+            bvec = np.asanyarray(root["gradients"][:3, index])
+
         dwmoving = nb.Nifti1Image(dwframe, self.affine, None)
 
         # resample and update orientation at index
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index e6142445..4ca5b277 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -35,6 +35,7 @@
 
 from eddymotion.dmri import logo_split
 from eddymotion.model import ModelFactory
+from eddymotion.dmri import logo_split
 
 
 class EddyMotionEstimator:

From 56f9fd04349f36ed4cd436858397d18541eed63e Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Wed, 30 Nov 2022 12:53:16 -0800
Subject: [PATCH 07/34] Moved h5 file read

---
 src/eddymotion/data/dmri.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index 8e42ee5d..60535a1c 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -62,14 +62,9 @@ def logo_split(dwdata, index, with_b0=False):
         The test data/gradient come **from the original dataset**.
 
     """
-    if not Path(dwdata._filepath).exists():
-        dwdata.to_filename(dwdata._filepath)
 
-    # read original DWI data & b-vector
-    with h5py.File(dwdata._filepath, "r") as in_file:
-        root = in_file["/0"]
-        dwframe = np.asanyarray(root["dataobj"][..., index])
-        bframe = np.asanyarray(root["gradients"][..., index])
+    dwframe = np.asanyarray(dwdata.root["dataobj"][..., index])
+    bframe = np.asanyarray(dwdata.root["gradients"][..., index])
 
     # if the size of the mask does not match data, cache is stale
     mask = np.zeros(len(dwdata), dtype=bool)
@@ -130,6 +125,15 @@ class DWI:
     def __len__(self):
         """Obtain the number of high-*b* orientations."""
         return self.dataobj.shape[-1]
+    
+    def set_data(self):
+        # Generate dwframe and bframe
+        if not Path(self._filepath).exists():
+            self.to_filename(self._filepath)
+            
+        # read original DWI data & b-vector
+        with h5py.File(self._filepath, "r") as in_file:
+            self.root = in_file["/0"]
 
     def set_transform(self, index, affine, order=3):
         """Set an affine, and update data object and gradients."""

From 6aeb4d76804590eba813cd74ae5f2fcf7c0a2641 Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Thu, 1 Dec 2022 14:07:39 -0800
Subject: [PATCH 08/34] Pull dwframe, bframe, bvec into estimator.py

---
 src/eddymotion/data/dmri.py | 22 +++++-----------------
 src/eddymotion/estimator.py |  9 ++++++++-
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index 60535a1c..e87043a2 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -39,7 +39,7 @@ def _data_repr(value):
     return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>"
 
 
-def logo_split(dwdata, index, with_b0=False):
+def logo_split(dwdata, dwframe, bframe, index, with_b0=False):
     """
     Produce one fold of LOGO (leave-one-gradient-out).
 
@@ -63,9 +63,6 @@ def logo_split(dwdata, index, with_b0=False):
 
     """
 
-    dwframe = np.asanyarray(dwdata.root["dataobj"][..., index])
-    bframe = np.asanyarray(dwdata.root["gradients"][..., index])
-
     # if the size of the mask does not match data, cache is stale
     mask = np.zeros(len(dwdata), dtype=bool)
     mask[index] = True
@@ -130,12 +127,12 @@ def set_data(self):
         # Generate dwframe and bframe
         if not Path(self._filepath).exists():
             self.to_filename(self._filepath)
-            
+
         # read original DWI data & b-vector
         with h5py.File(self._filepath, "r") as in_file:
-            self.root = in_file["/0"]
-
-    def set_transform(self, index, affine, order=3):
+            self._root = in_file["/0"]
+            
+    def set_transform(self, dwframe, bvec, index, affine, order=3):
         """Set an affine, and update data object and gradients."""
         reference = namedtuple("ImageGrid", ("shape", "affine"))(
             shape=self.dataobj.shape[:3], affine=self.affine
@@ -148,15 +145,6 @@ def set_transform(self, index, affine, order=3):
         else:
             xform = Affine(matrix=affine, reference=reference)
 
-        if not Path(self._filepath).exists():
-            self.to_filename(self._filepath)
-
-        # read original DWI data & b-vector
-        with h5py.File(self._filepath, "r") as in_file:
-            root = in_file["/0"]
-            dwframe = np.asanyarray(root["dataobj"][..., index])
-            bvec = np.asanyarray(root["gradients"][:3, index])
-
         dwmoving = nb.Nifti1Image(dwframe, self.affine, None)
 
         # resample and update orientation at index
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index 4ca5b277..e571fa0c 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -94,6 +94,8 @@ def fit(
         if "num_threads" not in align_kwargs and omp_nthreads is not None:
             align_kwargs["num_threads"] = omp_nthreads
 
+        orig = dwdata
+
         n_iter = len(models)
         for i_iter, model in enumerate(models):
             reg_target_type = (
@@ -152,7 +154,12 @@ def fit(
                         pbar.set_description_str(
                             f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>"
                         )
-                        data_train, data_test = logo_split(dwdata, i, with_b0=True)
+                        dwframe = np.asanyarray(orig.dataobj[..., i])
+                        bframe = np.asanyarray(orig.gradients[..., i])
+                        data_train, data_test = logo_split(
+                            dwdata, dwframe, bframe, i, with_b0=True
+                        )
+
                         grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}"
                         pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs")
 

From 1d5c6eac7ea7e059329970f2ff017946062b0205 Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Thu, 1 Dec 2022 14:17:50 -0800
Subject: [PATCH 09/34] Fixed flake8 errors

---
 src/eddymotion/data/dmri.py | 4 ++--
 src/eddymotion/estimator.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index e87043a2..f7ea6571 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -122,7 +122,7 @@ class DWI:
     def __len__(self):
         """Obtain the number of high-*b* orientations."""
         return self.dataobj.shape[-1]
-    
+
     def set_data(self):
         # Generate dwframe and bframe
         if not Path(self._filepath).exists():
@@ -131,7 +131,7 @@ def set_data(self):
         # read original DWI data & b-vector
         with h5py.File(self._filepath, "r") as in_file:
             self._root = in_file["/0"]
-            
+
     def set_transform(self, dwframe, bvec, index, affine, order=3):
         """Set an affine, and update data object and gradients."""
         reference = namedtuple("ImageGrid", ("shape", "affine"))(
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index e571fa0c..858efc00 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -35,7 +35,6 @@
 
 from eddymotion.dmri import logo_split
 from eddymotion.model import ModelFactory
-from eddymotion.dmri import logo_split
 
 
 class EddyMotionEstimator:

From 26a0dc441093aab00b60a4de4192e5dffd571555 Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Thu, 8 Dec 2022 15:20:17 -0800
Subject: [PATCH 10/34] Preallocate em_affines as array, not Affine object

---
 src/eddymotion/estimator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index 858efc00..c81e54c6 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -223,6 +223,7 @@ def fit(
                                     matrix=dwdata.em_affines[i], reference=reference
                                 )
                             mat_file = tmp_dir / f"init_{i_iter}_{i:05d}.mat"
+
                             initial_xform.to_filename(mat_file, fmt="itk")
                             registration.inputs.initial_moving_transform = str(mat_file)
 

From c0693a145ad8959fe9bcd094584a7177560b1cda Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Thu, 8 Dec 2022 15:27:55 -0800
Subject: [PATCH 11/34] Removed orig copy of dwdata

---
 src/eddymotion/estimator.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index c81e54c6..648bd12f 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -93,8 +93,6 @@ def fit(
         if "num_threads" not in align_kwargs and omp_nthreads is not None:
             align_kwargs["num_threads"] = omp_nthreads
 
-        orig = dwdata
-
         n_iter = len(models)
         for i_iter, model in enumerate(models):
             reg_target_type = (
@@ -153,8 +151,8 @@ def fit(
                         pbar.set_description_str(
                             f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>"
                         )
-                        dwframe = np.asanyarray(orig.dataobj[..., i])
-                        bframe = np.asanyarray(orig.gradients[..., i])
+                        dwframe = np.asanyarray(dwdata.dataobj[..., i])
+                        bframe = np.asanyarray(dwdata.gradients[..., i])
                         data_train, data_test = logo_split(
                             dwdata, dwframe, bframe, i, with_b0=True
                         )

From 21406737ba52ba03c9d976349946bdf49df884e2 Mon Sep 17 00:00:00 2001
From: Oscar Esteban <code@oscaresteban.es>
Date: Thu, 15 Dec 2022 17:24:50 +0100
Subject: [PATCH 12/34] enh: move logo_split to new submodule

---
 src/eddymotion/data/dmri.py      | 49 ---------------------
 src/eddymotion/data/splitting.py | 73 ++++++++++++++++++++++++++++++++
 src/eddymotion/estimator.py      |  2 +-
 test/test_model.py               |  3 +-
 4 files changed, 76 insertions(+), 51 deletions(-)
 create mode 100644 src/eddymotion/data/splitting.py

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index f7ea6571..0da7ad14 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -39,55 +39,6 @@ def _data_repr(value):
     return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>"
 
 
-def logo_split(dwdata, dwframe, bframe, index, with_b0=False):
-    """
-    Produce one fold of LOGO (leave-one-gradient-out).
-
-    Parameters
-    ----------
-    dwdata : :obj:`DWI`
-        DWI object
-    index : :obj:`int`
-        Index of the DWI orientation to be left out in this fold.
-    with_b0 : :obj:`bool`
-        Insert the *b=0* reference at the beginning of the training dataset.
-
-    Returns
-    -------
-    (train_data, train_gradients) : :obj:`tuple`
-        Training DWI and corresponding gradients.
-        Training data/gradients come **from the updated dataset**.
-    (test_data, test_gradients) :obj:`tuple`
-        Test 3D map (one DWI orientation) and corresponding b-vector/value.
-        The test data/gradient come **from the original dataset**.
-
-    """
-
-    # if the size of the mask does not match data, cache is stale
-    mask = np.zeros(len(dwdata), dtype=bool)
-    mask[index] = True
-
-    train_data = dwdata.dataobj[..., ~mask]
-    train_gradients = dwdata.gradients[..., ~mask]
-
-    if with_b0:
-        train_data = np.concatenate(
-            (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data),
-            axis=-1,
-        )
-        b0vec = np.zeros((4, 1))
-        b0vec[0, 0] = 1
-        train_gradients = np.concatenate(
-            (b0vec, train_gradients),
-            axis=-1,
-        )
-
-    return (
-        (train_data, train_gradients),
-        (dwframe, bframe),
-    )
-
-
 @attr.s(slots=True)
 class DWI:
     """Data representation structure for dMRI data."""
diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
new file mode 100644
index 00000000..90ed00b9
--- /dev/null
+++ b/src/eddymotion/data/splitting.py
@@ -0,0 +1,73 @@
+# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
+# vi: set ft=python sts=4 ts=4 sw=4 et:
+#
+# Copyright 2022 The NiPreps Developers <nipreps@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# We support and encourage derived works from this project, please read
+# about our expectations at
+#
+#     https://www.nipreps.org/community/licensing/
+#
+"""Data splitting helpers."""
+import numpy as np
+
+
+def lovo_split(dwdata, dwframe, bframe, index, with_b0=False):
+    """
+    Produce one fold of LOVO (leave-one-volume-out).
+
+    Parameters
+    ----------
+    dwdata : :obj:`DWI`
+        DWI object
+    index : :obj:`int`
+        Index of the DWI orientation to be left out in this fold.
+    with_b0 : :obj:`bool`
+        Insert the *b=0* reference at the beginning of the training dataset.
+
+    Returns
+    -------
+    (train_data, train_gradients) : :obj:`tuple`
+        Training DWI and corresponding gradients.
+        Training data/gradients come **from the updated dataset**.
+    (test_data, test_gradients) :obj:`tuple`
+        Test 3D map (one DWI orientation) and corresponding b-vector/value.
+        The test data/gradient come **from the original dataset**.
+
+    """
+
+    # if the size of the mask does not match data, cache is stale
+    mask = np.zeros(len(dwdata), dtype=bool)
+    mask[index] = True
+
+    train_data = dwdata.dataobj[..., ~mask]
+    train_gradients = dwdata.gradients[..., ~mask]
+
+    if with_b0:
+        train_data = np.concatenate(
+            (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data),
+            axis=-1,
+        )
+        b0vec = np.zeros((4, 1))
+        b0vec[0, 0] = 1
+        train_gradients = np.concatenate(
+            (b0vec, train_gradients),
+            axis=-1,
+        )
+
+    return (
+        (train_data, train_gradients),
+        (dwframe, bframe),
+    )
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index 648bd12f..ad49454e 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -33,7 +33,7 @@
 from pkg_resources import resource_filename as pkg_fn
 from tqdm import tqdm
 
-from eddymotion.dmri import logo_split
+from eddymotion.data.splitting import lovo_split as logo_split
 from eddymotion.model import ModelFactory
 
 
diff --git a/test/test_model.py b/test/test_model.py
index 4b3187d1..12801eaf 100644
--- a/test/test_model.py
+++ b/test/test_model.py
@@ -25,6 +25,7 @@
 import pytest
 
 from eddymotion import model
+from eddymotion.data.splitting import lovo_split
 from eddymotion.data.dmri import DWI
 
 
@@ -94,7 +95,7 @@ def test_two_initialisations(datadir):
     dmri_dataset = DWI.from_filename(datadir / "dwi.h5")
 
     # Split data into test and train set
-    data_train, data_test = dmri_dataset.logo_split(10)
+    data_train, data_test = lovo_split(dmri_dataset, 10)
 
     # Direct initialisation
     model1 = model.AverageDWModel(

From 374864adf57cc312d690778395e32eb1b6e10ff8 Mon Sep 17 00:00:00 2001
From: teresamg <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:13:44 -0800
Subject: [PATCH 13/34] Update src/eddymotion/data/splitting.py

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/data/splitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index 90ed00b9..f4e5c5cc 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -24,7 +24,7 @@
 import numpy as np
 
 
-def lovo_split(dwdata, dwframe, bframe, index, with_b0=False):
+def lovo_split(data, index):
     """
     Produce one fold of LOVO (leave-one-volume-out).
 

From bc845d927c61bc3b908a77ae44def479e8b5090e Mon Sep 17 00:00:00 2001
From: teresamg <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:14:08 -0800
Subject: [PATCH 14/34] Update src/eddymotion/data/splitting.py

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/data/splitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index f4e5c5cc..ab798a8f 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -52,7 +52,7 @@ def lovo_split(data, index):
     mask = np.zeros(len(dwdata), dtype=bool)
     mask[index] = True
 
-    train_data = dwdata.dataobj[..., ~mask]
+    train_data = data.dataobj[..., ~mask]
     train_gradients = dwdata.gradients[..., ~mask]
 
     if with_b0:

From 6191fa16845600ca4dae1ea17b244924a63c83de Mon Sep 17 00:00:00 2001
From: teresamg <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:14:16 -0800
Subject: [PATCH 15/34] Update src/eddymotion/data/splitting.py

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/data/splitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index ab798a8f..0afb9c00 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -53,7 +53,7 @@ def lovo_split(data, index):
     mask[index] = True
 
     train_data = data.dataobj[..., ~mask]
-    train_gradients = dwdata.gradients[..., ~mask]
+    train_gradients = data.gradients[..., ~mask]
 
     if with_b0:
         train_data = np.concatenate(

From 175d1dddd538b7e0c30550ac7c689ec7f1dae39b Mon Sep 17 00:00:00 2001
From: teresamg <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:14:28 -0800
Subject: [PATCH 16/34] Update src/eddymotion/data/splitting.py

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/data/splitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index 0afb9c00..e93a90b5 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -69,5 +69,5 @@ def lovo_split(data, index):
 
     return (
         (train_data, train_gradients),
-        (dwframe, bframe),
+        (data.dataobj[..., mask], data.gradients[..., mask]),
     )

From cb19fb1bdb8914842e2aa81df5ab3cfcf063b2a0 Mon Sep 17 00:00:00 2001
From: teresamg <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:14:40 -0800
Subject: [PATCH 17/34] Update src/eddymotion/data/splitting.py

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/data/splitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index e93a90b5..5cd73122 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -30,7 +30,7 @@ def lovo_split(data, index):
 
     Parameters
     ----------
-    dwdata : :obj:`DWI`
+    data : :obj:`eddymotion.data.dmri.DWI`
         DWI object
     index : :obj:`int`
         Index of the DWI orientation to be left out in this fold.

From 64ccc58e043ba8dc577cac7d0baa93f361e06dad Mon Sep 17 00:00:00 2001
From: teresamg <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:14:54 -0800
Subject: [PATCH 18/34] Update src/eddymotion/data/splitting.py

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/data/splitting.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index 5cd73122..9458db56 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -34,8 +34,6 @@ def lovo_split(data, index):
         DWI object
     index : :obj:`int`
         Index of the DWI orientation to be left out in this fold.
-    with_b0 : :obj:`bool`
-        Insert the *b=0* reference at the beginning of the training dataset.
 
     Returns
     -------

From 88bf1dcf905f26321fedae194a70c18f270c1709 Mon Sep 17 00:00:00 2001
From: teresamg <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:15:33 -0800
Subject: [PATCH 19/34] Update src/eddymotion/data/splitting.py

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/data/splitting.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index 9458db56..d6229575 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -53,18 +53,6 @@ def lovo_split(data, index):
     train_data = data.dataobj[..., ~mask]
     train_gradients = data.gradients[..., ~mask]
 
-    if with_b0:
-        train_data = np.concatenate(
-            (np.asanyarray(dwdata.bzero)[..., np.newaxis], train_data),
-            axis=-1,
-        )
-        b0vec = np.zeros((4, 1))
-        b0vec[0, 0] = 1
-        train_gradients = np.concatenate(
-            (b0vec, train_gradients),
-            axis=-1,
-        )
-
     return (
         (train_data, train_gradients),
         (data.dataobj[..., mask], data.gradients[..., mask]),

From a3a3f1b719bdc3c1986edf1f6f18ade57393f9ea Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:28:39 -0800
Subject: [PATCH 20/34] Fixed logo_split() call and dwdata->data

---
 src/eddymotion/data/splitting.py | 2 +-
 src/eddymotion/estimator.py      | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index d6229575..aec47059 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -47,7 +47,7 @@ def lovo_split(data, index):
     """
 
     # if the size of the mask does not match data, cache is stale
-    mask = np.zeros(len(dwdata), dtype=bool)
+    mask = np.zeros(len(data), dtype=bool)
     mask[index] = True
 
     train_data = data.dataobj[..., ~mask]
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index ad49454e..b864c266 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -151,11 +151,7 @@ def fit(
                         pbar.set_description_str(
                             f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>"
                         )
-                        dwframe = np.asanyarray(dwdata.dataobj[..., i])
-                        bframe = np.asanyarray(dwdata.gradients[..., i])
-                        data_train, data_test = logo_split(
-                            dwdata, dwframe, bframe, i, with_b0=True
-                        )
+                        data_train, data_test = logo_split(dwdata, i)
 
                         grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}"
                         pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs")

From f18029eb7db820cdcd045c750e2a1c87a9db27ea Mon Sep 17 00:00:00 2001
From: Teresa Gomez <46339554+teresamg@users.noreply.github.com>
Date: Thu, 15 Dec 2022 14:42:56 -0800
Subject: [PATCH 21/34] Updated set_transform(), removed pbar grad_str text

---
 src/eddymotion/data/dmri.py | 5 ++++-
 src/eddymotion/estimator.py | 3 ---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index 0da7ad14..bfc4c788 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -83,7 +83,7 @@ def set_data(self):
         with h5py.File(self._filepath, "r") as in_file:
             self._root = in_file["/0"]
 
-    def set_transform(self, dwframe, bvec, index, affine, order=3):
+    def set_transform(self, index, affine, order=3):
         """Set an affine, and update data object and gradients."""
         reference = namedtuple("ImageGrid", ("shape", "affine"))(
             shape=self.dataobj.shape[:3], affine=self.affine
@@ -96,6 +96,9 @@ def set_transform(self, dwframe, bvec, index, affine, order=3):
         else:
             xform = Affine(matrix=affine, reference=reference)
 
+        dwframe = np.asanyarray(self.dataobj[..., index])
+        bvec = np.asanyarray(self.gradients[:3, index])
+
         dwmoving = nb.Nifti1Image(dwframe, self.affine, None)
 
         # resample and update orientation at index
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index b864c266..57cc4cd6 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -153,9 +153,6 @@ def fit(
                         )
                         data_train, data_test = logo_split(dwdata, i)
 
-                        grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}"
-                        pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs")
-
                         if not single_model:  # A true LOGO estimator
                             if hasattr(dwdata, "gradients"):
                                 kwargs["gtab"] = data_train[1]

From a8538bf988da406d635ad810647c7b67289e0f03 Mon Sep 17 00:00:00 2001
From: Oscar Esteban <code@oscaresteban.es>
Date: Wed, 27 Mar 2024 11:59:09 +0100
Subject: [PATCH 22/34] fix: revise merge conflicts and get ready for final
 revision

---
 src/eddymotion/data/dmri.py | 19 ++++++++-----------
 src/eddymotion/estimator.py |  1 -
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index bfc4c788..f84767fe 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -74,15 +74,6 @@ def __len__(self):
         """Obtain the number of high-*b* orientations."""
         return self.dataobj.shape[-1]
 
-    def set_data(self):
-        # Generate dwframe and bframe
-        if not Path(self._filepath).exists():
-            self.to_filename(self._filepath)
-
-        # read original DWI data & b-vector
-        with h5py.File(self._filepath, "r") as in_file:
-            self._root = in_file["/0"]
-
     def set_transform(self, index, affine, order=3):
         """Set an affine, and update data object and gradients."""
         reference = namedtuple("ImageGrid", ("shape", "affine"))(
@@ -96,8 +87,14 @@ def set_transform(self, index, affine, order=3):
         else:
             xform = Affine(matrix=affine, reference=reference)
 
-        dwframe = np.asanyarray(self.dataobj[..., index])
-        bvec = np.asanyarray(self.gradients[:3, index])
+        if not Path(self._filepath).exists():
+            self.to_filename(self._filepath)
+
+        # read original DWI data & b-vector
+        with h5py.File(self._filepath, "r") as in_file:
+            root = in_file["/0"]
+            dwframe = np.asanyarray(root["dataobj"][..., index])
+            bvec = np.asanyarray(root["gradients"][:3, index])
 
         dwmoving = nb.Nifti1Image(dwframe, self.affine, None)
 
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index 57cc4cd6..1ad42d13 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -214,7 +214,6 @@ def fit(
                                     matrix=dwdata.em_affines[i], reference=reference
                                 )
                             mat_file = tmp_dir / f"init_{i_iter}_{i:05d}.mat"
-
                             initial_xform.to_filename(mat_file, fmt="itk")
                             registration.inputs.initial_moving_transform = str(mat_file)
 

From 542caa887cee4cd774e6f46ff924d640bca56278 Mon Sep 17 00:00:00 2001
From: Oscar Esteban <code@oscaresteban.es>
Date: Wed, 27 Mar 2024 12:14:57 +0100
Subject: [PATCH 23/34] fix: revert accidental removal of two lines

---
 src/eddymotion/estimator.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index 1ad42d13..b1aeb583 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -152,6 +152,8 @@ def fit(
                             f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>"
                         )
                         data_train, data_test = logo_split(dwdata, i)
+                        grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}"
+                        pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs")
 
                         if not single_model:  # A true LOGO estimator
                             if hasattr(dwdata, "gradients"):

From 82a557e654725c10cd21f8d72d957d5e0d68a8cc Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Wed, 27 Mar 2024 13:15:17 +0100
Subject: [PATCH 24/34] Apply suggestions from code review

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/estimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index b1aeb583..79852618 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -33,7 +33,7 @@
 from pkg_resources import resource_filename as pkg_fn
 from tqdm import tqdm
 
-from eddymotion.data.splitting import lovo_split as logo_split
+from eddymotion.data.splitting import lovo_split
 from eddymotion.model import ModelFactory
 
 
@@ -151,7 +151,7 @@ def fit(
                         pbar.set_description_str(
                             f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>"
                         )
-                        data_train, data_test = logo_split(dwdata, i)
+                        data_train, data_test = lovo_split(dwdata, i)
                         grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}"
                         pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs")
 

From d45ec01fca08573ace18e7413c92c00ecbabc65f Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Wed, 27 Mar 2024 14:35:56 +0100
Subject: [PATCH 25/34] fix: Restore access to HDF5 file

---
 src/eddymotion/data/dmri.py      |  4 ++++
 src/eddymotion/data/splitting.py | 13 ++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/eddymotion/data/dmri.py b/src/eddymotion/data/dmri.py
index f84767fe..884ca953 100644
--- a/src/eddymotion/data/dmri.py
+++ b/src/eddymotion/data/dmri.py
@@ -70,6 +70,10 @@ class DWI:
     )
     """A path to an HDF5 file to store the whole dataset."""
 
+    def get_filename(self):
+        """Get the filepath of the HDF5 file."""
+        return self._filepath
+
     def __len__(self):
         """Obtain the number of high-*b* orientations."""
         return self.dataobj.shape[-1]
diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index aec47059..90e179ab 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -21,7 +21,9 @@
 #     https://www.nipreps.org/community/licensing/
 #
 """Data splitting helpers."""
+from pathlib import Path
 import numpy as np
+import h5py
 
 
 def lovo_split(data, index):
@@ -46,14 +48,23 @@ def lovo_split(data, index):
 
     """
 
+    if not Path(data.get_filename()).exists():
+        data.to_filename(data.get_filename())
+    
     # if the size of the mask does not match data, cache is stale
     mask = np.zeros(len(data), dtype=bool)
     mask[index] = True
 
+    # read original DWI data & b-vector
+    with h5py.File(data.get_filename(), "r") as in_file:
+        root = in_file["/0"]
+        dwframe = np.asanyarray(root["dataobj"][..., mask])
+        bframe = np.asanyarray(root["gradients"][..., mask])
+
     train_data = data.dataobj[..., ~mask]
     train_gradients = data.gradients[..., ~mask]
 
     return (
         (train_data, train_gradients),
-        (data.dataobj[..., mask], data.gradients[..., mask]),
+        (dwframe, bframe),
     )

From e7761559752c521d225e76c31b71f345871cdd2d Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Wed, 27 Mar 2024 14:50:12 +0100
Subject: [PATCH 26/34] fix: Restore b0 argument

---
 src/eddymotion/data/splitting.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index 90e179ab..e1ae9a47 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -26,7 +26,7 @@
 import h5py
 
 
-def lovo_split(data, index):
+def lovo_split(data, index, with_b0=False):
     """
     Produce one fold of LOVO (leave-one-volume-out).
 
@@ -50,7 +50,7 @@ def lovo_split(data, index):
 
     if not Path(data.get_filename()).exists():
         data.to_filename(data.get_filename())
-    
+
     # if the size of the mask does not match data, cache is stale
     mask = np.zeros(len(data), dtype=bool)
     mask[index] = True
@@ -64,6 +64,18 @@ def lovo_split(data, index):
     train_data = data.dataobj[..., ~mask]
     train_gradients = data.gradients[..., ~mask]
 
+    if with_b0:
+        train_data = np.concatenate(
+            (np.asanyarray(data.bzero)[..., np.newaxis], train_data),
+            axis=-1,
+        )
+        b0vec = np.zeros((4, 1))
+        b0vec[0, 0] = 1
+        train_gradients = np.concatenate(
+            (b0vec, train_gradients),
+            axis=-1,
+        )
+
     return (
         (train_data, train_gradients),
         (dwframe, bframe),

From b4a18ad6863ff59ce0e04f33a9522475c7f44c90 Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Wed, 27 Mar 2024 15:03:23 +0100
Subject: [PATCH 27/34] fix: typos

---
 src/eddymotion/data/splitting.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index e1ae9a47..adb88d28 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -51,15 +51,15 @@ def lovo_split(data, index, with_b0=False):
     if not Path(data.get_filename()).exists():
         data.to_filename(data.get_filename())
 
-    # if the size of the mask does not match data, cache is stale
-    mask = np.zeros(len(data), dtype=bool)
-    mask[index] = True
-
     # read original DWI data & b-vector
     with h5py.File(data.get_filename(), "r") as in_file:
         root = in_file["/0"]
-        dwframe = np.asanyarray(root["dataobj"][..., mask])
-        bframe = np.asanyarray(root["gradients"][..., mask])
+        dwframe = np.asanyarray(root["dataobj"][..., index])
+        bframe = np.asanyarray(root["gradients"][..., index])
+
+    # if the size of the mask does not match data, cache is stale
+    mask = np.zeros(len(data), dtype=bool)
+    mask[index] = True
 
     train_data = data.dataobj[..., ~mask]
     train_gradients = data.gradients[..., ~mask]

From 3a96d51feb616722034036702036b4db14327046 Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Thu, 28 Mar 2024 12:12:18 +0100
Subject: [PATCH 28/34] Add: test for lovo_split

---
 test/test_splitting.py | 63 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 test/test_splitting.py

diff --git a/test/test_splitting.py b/test/test_splitting.py
new file mode 100644
index 00000000..de5707b4
--- /dev/null
+++ b/test/test_splitting.py
@@ -0,0 +1,63 @@
+# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
+# vi: set ft=python sts=4 ts=4 sw=4 et:
+#
+# Copyright 2021 The NiPreps Developers <nipreps@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# We support and encourage derived works from this project, please read
+# about our expectations at
+#
+#     https://www.nipreps.org/community/licensing/
+#
+"""Unit test testing the lovo_split function."""
+import pytest
+import numpy as np
+from eddymotion.data.dmri import DWI
+from eddymotion.data.splitting import lovo_split
+
+
+def test_lovo_split(datadir):
+    """
+    Test the lovo_split function.
+
+    Parameters:
+    - datadir: The directory containing the test data.
+
+    Returns:
+    None
+    """
+    data = DWI.from_filename(datadir / "dwi.h5")
+
+    # Set zeros in dataobj and gradients of the dwi object
+    data.dataobj[:] = 0
+    data.gradients[:] = 0
+
+    # Select a random index
+    index = np.random.randint(len(data))
+
+    # Set 1 in dataobj and gradients of the dwi object at this specific index
+    data.dataobj[..., index] = 1
+    data.gradients[..., index] = 1
+
+    # Apply the lovo_split function at the specified index
+    (train_data, train_gradients), \
+        (test_data, test_gradients) = lovo_split(data, index)
+
+    # Check if the test data contains only 1s
+    # and the train data contains only 0s after the split
+    assert np.all(test_data == 1)
+    assert np.all(train_data == 0)
+    assert np.all(test_gradients == 1)
+    assert np.all(train_gradients == 0)
+

From 1d6762c82bc8544774029354ed05e831cb8b22f5 Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Thu, 28 Mar 2024 12:14:26 +0100
Subject: [PATCH 29/34] Fix: remove unused import

---
 test/test_splitting.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_splitting.py b/test/test_splitting.py
index de5707b4..155a8794 100644
--- a/test/test_splitting.py
+++ b/test/test_splitting.py
@@ -21,7 +21,6 @@
 #     https://www.nipreps.org/community/licensing/
 #
 """Unit test testing the lovo_split function."""
-import pytest
 import numpy as np
 from eddymotion.data.dmri import DWI
 from eddymotion.data.splitting import lovo_split

From bcff16d15195a16fa9344312df95c5186491d98d Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Thu, 28 Mar 2024 13:23:04 +0100
Subject: [PATCH 30/34] Apply suggestions from code review

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/data/splitting.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index adb88d28..b8f5db01 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -26,13 +26,13 @@
 import h5py
 
 
-def lovo_split(data, index, with_b0=False):
+def lovo_split(dataset, index, with_b0=False):
     """
     Produce one fold of LOVO (leave-one-volume-out).
 
     Parameters
     ----------
-    data : :obj:`eddymotion.data.dmri.DWI`
+    dataset : :obj:`eddymotion.data.dmri.DWI`
         DWI object
     index : :obj:`int`
         Index of the DWI orientation to be left out in this fold.
@@ -48,25 +48,25 @@ def lovo_split(data, index, with_b0=False):
 
     """
 
-    if not Path(data.get_filename()).exists():
-        data.to_filename(data.get_filename())
+    if not Path(dataset.get_filename()).exists():
+        dataset.to_filename(data.get_filename())
 
     # read original DWI data & b-vector
-    with h5py.File(data.get_filename(), "r") as in_file:
+    with h5py.File(dataset.get_filename(), "r") as in_file:
         root = in_file["/0"]
-        dwframe = np.asanyarray(root["dataobj"][..., index])
-        bframe = np.asanyarray(root["gradients"][..., index])
+        data = np.asanyarray(root["dataobj"])
+        gradients = np.asanyarray(root["gradients"])
 
     # if the size of the mask does not match data, cache is stale
-    mask = np.zeros(len(data), dtype=bool)
+    mask = np.zeros(data.shape[-1], dtype=bool)
     mask[index] = True
 
-    train_data = data.dataobj[..., ~mask]
-    train_gradients = data.gradients[..., ~mask]
+    train_data = data[..., ~mask]
+    train_gradients = gradients[..., ~mask]
 
     if with_b0:
         train_data = np.concatenate(
-            (np.asanyarray(data.bzero)[..., np.newaxis], train_data),
+            (np.asanyarray(dataset.bzero)[..., np.newaxis], train_data),
             axis=-1,
         )
         b0vec = np.zeros((4, 1))

From 316eb57f5b524fdfed84e6c58205e7e6ee7542bb Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Thu, 28 Mar 2024 13:43:50 +0100
Subject: [PATCH 31/34] Fix: return test data and gradient

---
 src/eddymotion/data/splitting.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index b8f5db01..b6f9e834 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -62,7 +62,9 @@ def lovo_split(dataset, index, with_b0=False):
     mask[index] = True
 
     train_data = data[..., ~mask]
-    train_gradients = gradients[..., ~mask]
+    train_gradients = gradients[..., mask]
+    test_data = data[..., ~mask]
+    test_gradients = gradients[..., mask]
 
     if with_b0:
         train_data = np.concatenate(
@@ -78,5 +80,5 @@ def lovo_split(dataset, index, with_b0=False):
 
     return (
         (train_data, train_gradients),
-        (dwframe, bframe),
+        (test_data, test_gradients),
     )

From 421763544e93b4acfc294febdc58021122581925 Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Thu, 28 Mar 2024 13:45:12 +0100
Subject: [PATCH 32/34] Fix: typo

---
 src/eddymotion/data/splitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index b6f9e834..253cb035 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -49,7 +49,7 @@ def lovo_split(dataset, index, with_b0=False):
     """
 
     if not Path(dataset.get_filename()).exists():
-        dataset.to_filename(data.get_filename())
+        dataset.to_filename(dataset.get_filename())
 
     # read original DWI data & b-vector
     with h5py.File(dataset.get_filename(), "r") as in_file:

From 14cd4d05eeeb09ed541264899043e42bc4382b7d Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Thu, 28 Mar 2024 14:32:05 +0100
Subject: [PATCH 33/34] Fix: masking

---
 src/eddymotion/data/splitting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/eddymotion/data/splitting.py b/src/eddymotion/data/splitting.py
index 253cb035..7d7ff8b4 100644
--- a/src/eddymotion/data/splitting.py
+++ b/src/eddymotion/data/splitting.py
@@ -62,8 +62,8 @@ def lovo_split(dataset, index, with_b0=False):
     mask[index] = True
 
     train_data = data[..., ~mask]
-    train_gradients = gradients[..., mask]
-    test_data = data[..., ~mask]
+    train_gradients = gradients[..., ~mask]
+    test_data = data[..., mask]
     test_gradients = gradients[..., mask]
 
     if with_b0:

From 9591b818ede1b4f8571a783cd8b93c2464e8ee34 Mon Sep 17 00:00:00 2001
From: esavary <elodie.savary@outlook.com>
Date: Thu, 28 Mar 2024 15:00:10 +0100
Subject: [PATCH 34/34] Update src/eddymotion/estimator.py

Co-authored-by: Oscar Esteban <code@oscaresteban.es>
---
 src/eddymotion/estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index 79852618..1cafb233 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -151,7 +151,7 @@ def fit(
                         pbar.set_description_str(
                             f"Pass {i_iter + 1}/{n_iter} | Fit and predict b-index <{i}>"
                         )
-                        data_train, data_test = lovo_split(dwdata, i)
+                        data_train, data_test = lovo_split(dwdata, i, with_b0=True)
                         grad_str = f"{i}, {data_test[1][:3]}, b={int(data_test[1][3])}"
                         pbar.set_description_str(f"[{grad_str}], {n_jobs} jobs")