Skip to content

Commit

Permalink
Feature selection (#59)
Browse files Browse the repository at this point in the history
* change module name to feature_selection

* add range_cut_selector

* update tests for rangecut selector

* refactor range cut selector

* Add index selector

* test index selector

* Remove old range_cut and select_features

* upate depricated np.matrix by standard np.array, and adjust code accordingly

* rename funcitons in feature selection to follow sklearn
  • Loading branch information
paucablop authored Nov 17, 2023
1 parent d163681 commit 84a6b62
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 167 deletions.
6 changes: 3 additions & 3 deletions chemotools/baseline/air_pls.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,14 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
return X_.reshape(-1, 1) if X_.ndim == 1 else X_

def _calculate_whittaker_smooth(self, x, w):
X = np.matrix(x)
X = np.array(x)
m = X.size
E = eye(m, format="csc")
for i in range(self.polynomial_order):
E = E[1:] - E[:-1]
W = diags(w, 0, shape=(m, m))
A = csc_matrix(W + (self.lam * E.T * E))
B = csc_matrix(W * X.T)
A = csc_matrix(W + (self.lam * E.T @ E))
B = csc_matrix(W @ X.T).toarray().ravel()
background = spsolve(A, B)
return np.array(background)

Expand Down
2 changes: 2 additions & 0 deletions chemotools/feature_selection/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from ._index_selector import IndexSelector
from ._range_cut import RangeCut
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
from sklearn.base import BaseEstimator
from sklearn.feature_selection._base import SelectorMixin

from sklearn.utils.validation import check_is_fitted

from chemotools.utils.check_inputs import check_input


class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
class IndexSelector(BaseEstimator, SelectorMixin):
"""
A transformer that Selects the spectral data to a specified array of features. This
array can be continuous or discontinuous. The array of features is specified by:
Expand Down Expand Up @@ -52,7 +54,7 @@ def __init__(
self.features = features
self.wavenumbers = wavenumbers

def fit(self, X: np.ndarray, y=None) -> "SelectFeatures":
def fit(self, X: np.ndarray, y=None) -> "IndexSelector":
"""
Fit the transformer to the input data.
Expand All @@ -66,14 +68,11 @@ def fit(self, X: np.ndarray, y=None) -> "SelectFeatures":
Returns
-------
self : SelectFeatures
self : IndexSelector
The fitted transformer.
"""
# Check that X is a 2D array and has only finite values
X = check_input(X)

# Set the number of features
self.n_features_in_ = X.shape[1]
# validate that X is a 2D array and has only finite values
X = self._validate_data(X)

# Set the fitted attribute to True
self._is_fitted = True
Expand All @@ -91,41 +90,23 @@ def fit(self, X: np.ndarray, y=None) -> "SelectFeatures":

return self

def transform(self, X: np.ndarray, y=None) -> np.ndarray:
def _get_support_mask(self):
"""
Transform the input data by cutting it to the specified range.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data to transform.
y : None
Ignored.
Get the boolean mask indicating which features are selected.
Returns
-------
X_ : np.ndarray of shape (n_samples, n_features)
The transformed data.
mask : ndarray of shape (n_features,)
The mask indicating the selected features.
"""
# Check that the estimator is fitted
check_is_fitted(self, "_is_fitted")
check_is_fitted(self)

# Check that X is a 2D array and has only finite values
X = check_input(X)
X_ = X.copy()

# Check that the number of features is the same as the fitted data
if X_.shape[1] != self.n_features_in_:
raise ValueError(
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
)

# Select the features
if self.features is None:
return X_
# Create the mask
mask = np.zeros(self.n_features_in_, dtype=bool)
mask[self.features_index_] = True

return X_[:, self.features_index_]
return mask

def _find_index(self, target: float) -> int:
if self.wavenumbers is None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
from sklearn.base import BaseEstimator
from sklearn.feature_selection._base import SelectorMixin
from sklearn.utils.validation import check_is_fitted

from chemotools.utils.check_inputs import check_input


class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
class RangeCut(BaseEstimator, SelectorMixin):
"""
A transformer that cuts the input data to a specified range. The range is specified:
A selector that cuts the input data to a specified range. The range is specified:
- by the indices of the start and end of the range,
- by the wavenumbers of the start and end of the range. In this case, the wavenumbers
must be provided to the transformer when it is initialised. If the wavenumbers
Expand Down Expand Up @@ -35,19 +34,11 @@ class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
end_index_ : int
The index of the end of the range. It is -1 if the wavenumbers are not provided.
n_features_in_ : int
The number of features in the input data.
_is_fitted : bool
Whether the transformer has been fitted to data.
Methods
-------
fit(X, y=None)
Fit the transformer to the input data.
transform(X, y=0, copy=True)
Transform the input data by cutting it to the specified range.
"""

def __init__(
Expand Down Expand Up @@ -78,13 +69,7 @@ def fit(self, X: np.ndarray, y=None) -> "RangeCut":
The fitted transformer.
"""
# Check that X is a 2D array and has only finite values
X = check_input(X)

# Set the number of features
self.n_features_in_ = X.shape[1]

# Set the fitted attribute to True
self._is_fitted = True
X = self._validate_data(X)

# Set the start and end indices
if self.wavenumbers is None:
Expand All @@ -95,39 +80,25 @@ def fit(self, X: np.ndarray, y=None) -> "RangeCut":
self.end_index_ = self._find_index(self.end)

return self


def transform(self, X: np.ndarray, y=None) -> np.ndarray:
def _get_support_mask(self):
"""
Transform the input data by cutting it to the specified range.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data to transform.
y : None
Ignored.
Get the boolean mask indicating which features are selected.
Returns
-------
X_ : np.ndarray of shape (n_samples, n_features)
The transformed data.
mask : np.ndarray of shape (n_features,)
The boolean mask indicating which features are selected.
"""
# Check that the estimator is fitted
check_is_fitted(self, "_is_fitted")

# Check that X is a 2D array and has only finite values
X = check_input(X)
X_ = X.copy()
check_is_fitted(self, ["start_index_", "end_index_"])

# Check that the number of features is the same as the fitted data
if X_.shape[1] != self.n_features_in_:
raise ValueError(
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
)
# Create the mask
mask = np.zeros(self.n_features_in_, dtype=bool)
mask[self.start_index_ : self.end_index_] = True

# Range cut the spectra
return X_[:, self.start_index_ : self.end_index_]
return mask

def _find_index(self, target: float) -> int:
wavenumbers = np.array(self.wavenumbers)
Expand Down
6 changes: 3 additions & 3 deletions chemotools/smooth/whittaker_smooth.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,14 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
return X_.reshape(-1, 1) if X_.ndim == 1 else X_

def _calculate_whittaker_smooth(self, x):
X = np.matrix(x)
X = np.array(x)
m = X.size
E = eye(m, format="csc")
w = np.ones(m)
for i in range(self.differences):
E = E[1:] - E[:-1]
W = diags(w, 0, shape=(m, m))
A = csc_matrix(W + (self.lam * E.T * E))
B = csc_matrix(W * X.T)
A = csc_matrix(W + (self.lam * E.T @ E))
B = csc_matrix(W @ X.T).toarray().ravel()
background = spsolve(A, B)
return np.array(background)
2 changes: 0 additions & 2 deletions chemotools/variable_selection/__init__.py

This file was deleted.

Loading

0 comments on commit 84a6b62

Please sign in to comment.