raamana · zuxfoucault · Jun 17, 2020 · Jun 17, 2020 · Jun 18, 2020 · Jun 18, 2020
diff --git a/confounds/__init__.py b/confounds/__init__.py
@@ -18,3 +18,7 @@
 from ._version import get_versions
 __version__ = get_versions()['version']
 del get_versions
+
+
+from .tests import (test_residualize_linear,
+                    test_residualize_targets_linear)
diff --git a/confounds/base.py b/confounds/base.py
@@ -293,12 +293,113 @@ class ResidualizeTarget(BaseDeconfound):
     """
 
 
-    def __init__(self):
+    def __init__(self, model='linear'):
         """Constructor"""
 
         super().__init__(name='ResidualizeTarget')
 
-        raise NotImplementedError()
+        self.model = model
+
+
+    def fit(self,
+            X,  # variable names chosen to correspond to sklearn when possible
+            y=None,  # y is the confound variables here, not the target!
+            ):
+        """
+        Fits the residualizing model (estimates the contributions of confounding
+        variables (y) to the given [training] target set X.  Variable names X,
+        y had to be used to pass sklearn conventions. y here refers to the
+        confound variables. See examples in docs!
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_targets)
+            The training input samples.
+        y : ndarray
+            Array of covariates, shape (n_samples, n_covariates)
+            This does not refer to target as is typical in scikit-learn.
+
+        Returns
+        -------
+        self : object
+            Returns self
+        """
+
+        return self._fit(X, y)  # which itself must return self
+
+
+    def _fit(self, in_targets, confounds=None):
+        """Actual fit method"""
+
+        in_targets = check_array(in_targets)
+        confounds = check_array(confounds, ensure_2d=False)
+
+        # turning it into 2D, in case if its just a column
+        if confounds.ndim == 1:
+            confounds = confounds[:, np.newaxis]
+
+        try:
+            check_consistent_length(in_targets, confounds)
+        except:
+            raise ValueError('X (targets) and y (confounds) '
+                             'must have the same number of rows/samplets!')
+
+        self.n_targets_ = in_targets.shape[1]
+
+        regr_model = clone(get_model(self.model))
+        regr_model.fit(confounds, in_targets)
+        self.model_ = regr_model
+
+        return self
+
+
+    def transform(self, X, y=None):
+        """
+        Transforms the given target set by residualizing the [test] targets
+        by subtracting the contributions of their confounding variables.
+
+        Variable names X, y had to be used to pass scikit-learn conventions. y here
+        refers to the confound variables for the [test] to be transformed.
+        See examples in docs!
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_targets)
+            The training input samples.
+        y : ndarray
+            Array of covariates, shape (n_samples, n_covariates)
+            This does not refer to target as is typical in scikit-learn.
+
+        Returns
+        -------
+        self : object
+            Returns self
+        """
+
+        return self._transform(X, y)
+
+
+    def _transform(self, test_targets, test_confounds):
+        """Actual deconfounding of the test targets"""
+
+        check_is_fitted(self, 'model_', 'n_targets_')
+        test_targets = check_array(test_targets, accept_sparse=True)
+
+        if test_targets.shape[1] != self.n_targets_:
+            raise ValueError('number of targets must be {}. Given {}'
+                             ''.format(self.n_targets_, test_targets.shape[1]))
+
+        if test_confounds is None:  # during estimator checks
+            return test_targets  # do nothing
+
+        test_confounds = check_array(test_confounds, ensure_2d=False)
+        check_consistent_length(test_targets, test_confounds)
+
+        # test targets as can be explained/predicted by their covariates
+        test_target_predicted = self.model_.predict(test_confounds)
+        residuals = test_targets - test_target_predicted
+
+        return residuals
 
 
 class DummyDeconfounding(BaseDeconfound):

diff --git a/confounds/tests/test_confounds.py b/confounds/tests/test_confounds.py
@@ -8,7 +8,8 @@
 from sklearn.datasets import make_classification, make_sparse_uncorrelated
 from sklearn.utils.estimator_checks import check_estimator
 
-from confounds.base import Augment, DummyDeconfounding, Residualize
+from confounds.base import (Augment, DummyDeconfounding,
+                            Residualize, ResidualizeTarget)
 
 
 def test_estimator_API():
@@ -62,6 +63,27 @@ def test_residualize_linear():
             assert_almost_equal(residual_train_X.T.dot(train_confounds), 0)
 
 
+def test_residualize_targets_linear():
+    """sanity checks on implementation"""
+
+    min_dim = 6  # atleast 4+ required for make_sparse_uncorrelated
+    max_dim = 100
+    for n_samples in np.random.randint(0, 20, 1):
+        for num_confounds in np.random.randint(min_dim, max_dim, 3):
+            train_all, _ = make_sparse_uncorrelated(
+                n_samples=n_samples, n_features=min_dim + num_confounds + 1)
+
+            train_y, train_confounds = splitter_X_confounds(train_all, num_confounds)
+
+            resid = ResidualizeTarget(model='linear')
+            resid.fit(train_y, train_confounds)
+
+            residual_train_y = resid.transform(train_y, train_confounds)
+
+            # residual_train_X and train_confounds must be orthogonal now!
+            assert_almost_equal(residual_train_y.T.dot(train_confounds), 0)
+
+
 def test_method_does_not_introduce_bias():
     """
     Test to ensure any deconfounding method does NOT introduce bias in a sample