Skip to content

Commit

Permalink
fix: compatibility with sklearn 1.4 and 1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
andrei-stoian-zama committed Nov 25, 2024
1 parent 1b7f47f commit a043f51
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 22 deletions.
29 changes: 29 additions & 0 deletions src/concrete/ml/sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1572,6 +1572,29 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray:

return super().post_processing(y_preds)

def get_sklearn_params(self, deep: bool = True) -> dict:
"""Get parameters for this estimator.
This method is used to instantiate a scikit-learn model using the Concrete ML model's
parameters. It does not override scikit-learn's existing `get_params` method in order to
not break its implementation of `set_params`.
Args:
deep (bool): If True, will return the parameters for this estimator and contained
subobjects that are estimators. Default to True.
Returns:
params (dict): Parameter names mapped to their values.
"""
# pylint: disable-next=no-member
params = super().get_params(deep=deep) # type: ignore[misc]

params.pop("n_bits", None)
if "1.1." in sklearn.__version__:
params.pop("monotonic_cst", None)

return params


class BaseTreeRegressorMixin(BaseTreeEstimatorMixin, sklearn.base.RegressorMixin, ABC):
"""Mixin class for tree-based regressors.
Expand Down Expand Up @@ -1668,6 +1691,12 @@ def from_sklearn_model(
# Extract scikit-learn's initialization parameters
init_params = sklearn_model.get_params()

# Ensure compatibility for both sklearn 1.1 and >=1.4
# This parameter was removed in 1.4. If this package is installed
# with sklearn 1.1 which has it, then remove it when
# instantiating the 1.4 API compatible Concrete ML model
init_params.pop("normalize", None)

# Instantiate the Concrete ML model and update initialization parameters
# This update is necessary as we currently store scikit-learn attributes in Concrete ML
# classes during initialization (for example: link or power attributes in GLMs)
Expand Down
26 changes: 23 additions & 3 deletions src/concrete/ml/sklearn/glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Dict, Union

import numpy
import sklearn
import sklearn.linear_model

from ..common.debugging.custom_assert import assert_true
Expand Down Expand Up @@ -136,6 +137,19 @@ def load_dict(cls, metadata: Dict):

return obj

def get_sklearn_params(self, deep: bool = True) -> dict:
# Here, the `get_params` method is the `BaseEstimator.get_params` method from scikit-learn
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3373
params = super().get_params(deep=deep) # type: ignore[misc]

# Remove the parameters added by Concrete ML
params.pop("n_bits", None)
# Remove sklearn 1.4 parameter when using sklearn 1.1
if "1.1." in sklearn.__version__:
params.pop("solver", None)

return params


class PoissonRegressor(_GeneralizedLinearRegressor):
"""A Poisson regression model with FHE.
Expand All @@ -156,12 +170,13 @@ class PoissonRegressor(_GeneralizedLinearRegressor):
sklearn_model_class = sklearn.linear_model.PoissonRegressor
_is_a_public_cml_model = True

def __init__(
def __init__( # pylint: disable=useless-parent-delegation
self,
*,
n_bits: Union[int, dict] = 8,
alpha: float = 1.0,
fit_intercept: bool = True,
solver="lbfgs",
max_iter: int = 100,
tol: float = 1e-4,
warm_start: bool = False,
Expand All @@ -171,6 +186,7 @@ def __init__(
n_bits=n_bits,
alpha=alpha,
fit_intercept=fit_intercept,
solver=solver,
max_iter=max_iter,
tol=tol,
warm_start=warm_start,
Expand Down Expand Up @@ -200,12 +216,13 @@ class GammaRegressor(_GeneralizedLinearRegressor):
sklearn_model_class = sklearn.linear_model.GammaRegressor
_is_a_public_cml_model = True

def __init__(
def __init__( # pylint: disable=useless-parent-delegation
self,
*,
n_bits: Union[int, dict] = 8,
alpha: float = 1.0,
fit_intercept: bool = True,
solver="lbfgs",
max_iter: int = 100,
tol: float = 1e-4,
warm_start: bool = False,
Expand All @@ -215,6 +232,7 @@ def __init__(
n_bits=n_bits,
alpha=alpha,
fit_intercept=fit_intercept,
solver=solver,
max_iter=max_iter,
tol=tol,
warm_start=warm_start,
Expand Down Expand Up @@ -245,14 +263,15 @@ class TweedieRegressor(_GeneralizedLinearRegressor):
sklearn_model_class = sklearn.linear_model.TweedieRegressor
_is_a_public_cml_model = True

def __init__(
def __init__( # pylint: disable=too-many-arguments
self,
*,
n_bits: Union[int, dict] = 8,
power: float = 0.0,
alpha: float = 1.0,
fit_intercept: bool = True,
link: str = "auto",
solver="lbfgs",
max_iter: int = 100,
tol: float = 1e-4,
warm_start: bool = False,
Expand All @@ -262,6 +281,7 @@ def __init__(
n_bits=n_bits,
alpha=alpha,
fit_intercept=fit_intercept,
solver=solver,
max_iter=max_iter,
tol=tol,
warm_start=warm_start,
Expand Down
18 changes: 1 addition & 17 deletions src/concrete/ml/sklearn/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def __init__(
self,
n_bits=8,
fit_intercept=True,
normalize="deprecated",
copy_X=True,
n_jobs=None,
positive=False,
Expand All @@ -60,7 +59,6 @@ def __init__(
super().__init__(n_bits=n_bits)

self.fit_intercept = fit_intercept
self.normalize = normalize
self.copy_X = copy_X
self.n_jobs = n_jobs
self.positive = positive
Expand All @@ -85,7 +83,6 @@ def dump_dict(self) -> Dict[str, Any]:

# scikit-learn
metadata["fit_intercept"] = self.fit_intercept
metadata["normalize"] = self.normalize
metadata["copy_X"] = self.copy_X
metadata["n_jobs"] = self.n_jobs
metadata["positive"] = self.positive
Expand All @@ -112,7 +109,6 @@ def load_dict(cls, metadata: Dict):

# scikit-learn
obj.fit_intercept = metadata["fit_intercept"]
obj.normalize = metadata["normalize"]
obj.copy_X = metadata["copy_X"]
obj.n_jobs = metadata["n_jobs"]
obj.positive = metadata["positive"]
Expand Down Expand Up @@ -1302,7 +1298,6 @@ def __init__(
alpha=1.0,
l1_ratio=0.5,
fit_intercept=True,
normalize="deprecated",
precompute=False,
max_iter=1000,
copy_X=True,
Expand All @@ -1318,7 +1313,6 @@ def __init__(
self.alpha = alpha
self.l1_ratio = l1_ratio
self.fit_intercept = fit_intercept
self.normalize = normalize
self.copy_X = copy_X
self.positive = positive
self.precompute = precompute
Expand Down Expand Up @@ -1350,7 +1344,6 @@ def dump_dict(self) -> Dict[str, Any]:
metadata["alpha"] = self.alpha
metadata["l1_ratio"] = self.l1_ratio
metadata["fit_intercept"] = self.fit_intercept
metadata["normalize"] = self.normalize
metadata["copy_X"] = self.copy_X
metadata["positive"] = self.positive
metadata["precompute"] = self.precompute
Expand Down Expand Up @@ -1384,7 +1377,6 @@ def load_dict(cls, metadata: Dict):
obj.alpha = metadata["alpha"]
obj.l1_ratio = metadata["l1_ratio"]
obj.fit_intercept = metadata["fit_intercept"]
obj.normalize = metadata["normalize"]
obj.copy_X = metadata["copy_X"]
obj.positive = metadata["positive"]
obj.precompute = metadata["precompute"]
Expand Down Expand Up @@ -1422,7 +1414,6 @@ def __init__(
n_bits=8,
alpha: float = 1.0,
fit_intercept=True,
normalize="deprecated",
precompute=False,
copy_X=True,
max_iter=1000,
Expand All @@ -1437,7 +1428,6 @@ def __init__(

self.alpha = alpha
self.fit_intercept = fit_intercept
self.normalize = normalize
self.copy_X = copy_X
self.positive = positive
self.max_iter = max_iter
Expand Down Expand Up @@ -1468,7 +1458,6 @@ def dump_dict(self) -> Dict[str, Any]:
# scikit-learn
metadata["alpha"] = self.alpha
metadata["fit_intercept"] = self.fit_intercept
metadata["normalize"] = self.normalize
metadata["copy_X"] = self.copy_X
metadata["positive"] = self.positive
metadata["max_iter"] = self.max_iter
Expand Down Expand Up @@ -1501,7 +1490,6 @@ def load_dict(cls, metadata: Dict):
# scikit-learn
obj.alpha = metadata["alpha"]
obj.fit_intercept = metadata["fit_intercept"]
obj.normalize = metadata["normalize"]
obj.copy_X = metadata["copy_X"]
obj.positive = metadata["positive"]
obj.max_iter = metadata["max_iter"]
Expand Down Expand Up @@ -1539,10 +1527,9 @@ def __init__(
n_bits=8,
alpha: float = 1.0,
fit_intercept=True,
normalize="deprecated",
copy_X=True,
max_iter=None,
tol=0.001,
tol=0.0001,
solver="auto",
positive=False,
random_state=None,
Expand All @@ -1552,7 +1539,6 @@ def __init__(

self.alpha = alpha
self.fit_intercept = fit_intercept
self.normalize = normalize
self.copy_X = copy_X
self.positive = positive
self.max_iter = max_iter
Expand Down Expand Up @@ -1581,7 +1567,6 @@ def dump_dict(self) -> Dict[str, Any]:
# scikit-learn
metadata["alpha"] = self.alpha
metadata["fit_intercept"] = self.fit_intercept
metadata["normalize"] = self.normalize
metadata["copy_X"] = self.copy_X
metadata["positive"] = self.positive
metadata["max_iter"] = self.max_iter
Expand Down Expand Up @@ -1612,7 +1597,6 @@ def load_dict(cls, metadata: Dict):
# scikit-learn
obj.alpha = metadata["alpha"]
obj.fit_intercept = metadata["fit_intercept"]
obj.normalize = metadata["normalize"]
obj.copy_X = metadata["copy_X"]
obj.positive = metadata["positive"]
obj.max_iter = metadata["max_iter"]
Expand Down
8 changes: 8 additions & 0 deletions src/concrete/ml/sklearn/rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
class_weight=None,
ccp_alpha=0.0,
max_samples=None,
monotonic_cst=None,
):
"""Initialize the RandomForestClassifier.
Expand All @@ -65,6 +66,7 @@ def __init__(
self.max_leaf_nodes = max_leaf_nodes
self.min_impurity_decrease = min_impurity_decrease
self.ccp_alpha = ccp_alpha
self.monotonic_cst = monotonic_cst

def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray:
# Here, we want to use BaseTreeEstimatorMixin's `post-processing` method as
Expand Down Expand Up @@ -106,6 +108,7 @@ def dump_dict(self) -> Dict[str, Any]:
metadata["max_leaf_nodes"] = self.max_leaf_nodes
metadata["min_impurity_decrease"] = self.min_impurity_decrease
metadata["ccp_alpha"] = self.ccp_alpha
metadata["monotonic_cst"] = self.monotonic_cst

return metadata

Expand Down Expand Up @@ -151,6 +154,7 @@ def load_dict(cls, metadata: Dict):
obj.max_leaf_nodes = metadata["max_leaf_nodes"]
obj.min_impurity_decrease = metadata["min_impurity_decrease"]
obj.ccp_alpha = metadata["ccp_alpha"]
obj.monotonic_cst = metadata["monotonic_cst"]

return obj

Expand Down Expand Up @@ -184,6 +188,7 @@ def __init__(
warm_start=False,
ccp_alpha=0.0,
max_samples=None,
monotonic_cst=None,
):
"""Initialize the RandomForestRegressor.
Expand All @@ -209,6 +214,7 @@ def __init__(
self.max_leaf_nodes = max_leaf_nodes
self.min_impurity_decrease = min_impurity_decrease
self.ccp_alpha = ccp_alpha
self.monotonic_cst = monotonic_cst

def dump_dict(self) -> Dict[str, Any]:
metadata: Dict[str, Any] = {}
Expand Down Expand Up @@ -243,6 +249,7 @@ def dump_dict(self) -> Dict[str, Any]:
metadata["max_leaf_nodes"] = self.max_leaf_nodes
metadata["min_impurity_decrease"] = self.min_impurity_decrease
metadata["ccp_alpha"] = self.ccp_alpha
metadata["monotonic_cst"] = self.monotonic_cst

return metadata

Expand Down Expand Up @@ -288,5 +295,6 @@ def load_dict(cls, metadata: Dict):
obj.max_leaf_nodes = metadata["max_leaf_nodes"]
obj.min_impurity_decrease = metadata["min_impurity_decrease"]
obj.ccp_alpha = metadata["ccp_alpha"]
obj.monotonic_cst = metadata["monotonic_cst"]

return obj
8 changes: 8 additions & 0 deletions src/concrete/ml/sklearn/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(
min_impurity_decrease=0.0,
class_weight=None,
ccp_alpha: float = 0.0,
monotonic_cst=None,
n_bits: Union[int, Dict[str, int]] = 6,
):
"""Initialize the DecisionTreeClassifier.
Expand All @@ -54,6 +55,7 @@ def __init__(
self.random_state = random_state
self.min_impurity_decrease = min_impurity_decrease
self.ccp_alpha = ccp_alpha
self.monotonic_cst = monotonic_cst

def __getattr__(self, attr: str):
# We directly expose the following methods as they are commonly used with decision trees
Expand Down Expand Up @@ -100,6 +102,7 @@ def dump_dict(self) -> Dict[str, Any]:
metadata["random_state"] = self.random_state
metadata["min_impurity_decrease"] = self.min_impurity_decrease
metadata["ccp_alpha"] = self.ccp_alpha
metadata["monotonic_cst"] = self.monotonic_cst

return metadata

Expand Down Expand Up @@ -140,6 +143,7 @@ def load_dict(cls, metadata: Dict):
obj.random_state = metadata["random_state"]
obj.min_impurity_decrease = metadata["min_impurity_decrease"]
obj.ccp_alpha = metadata["ccp_alpha"]
obj.monotonic_cst = metadata["monotonic_cst"]

return obj

Expand All @@ -166,6 +170,7 @@ def __init__(
max_leaf_nodes=None,
min_impurity_decrease=0.0,
ccp_alpha=0.0,
monotonic_cst=None,
n_bits: Union[int, Dict[str, int]] = 6,
):
"""Initialize the DecisionTreeRegressor.
Expand All @@ -187,6 +192,7 @@ def __init__(
self.random_state = random_state
self.min_impurity_decrease = min_impurity_decrease
self.ccp_alpha = ccp_alpha
self.monotonic_cst = monotonic_cst

def __getattr__(self, attr: str):
# We directly expose the following methods as they are commonly used with decision trees
Expand Down Expand Up @@ -226,6 +232,7 @@ def dump_dict(self) -> Dict[str, Any]:
metadata["random_state"] = self.random_state
metadata["min_impurity_decrease"] = self.min_impurity_decrease
metadata["ccp_alpha"] = self.ccp_alpha
metadata["monotonic_cst"] = self.monotonic_cst

return metadata

Expand Down Expand Up @@ -266,5 +273,6 @@ def load_dict(cls, metadata: Dict):
obj.random_state = metadata["random_state"]
obj.min_impurity_decrease = metadata["min_impurity_decrease"]
obj.ccp_alpha = metadata["ccp_alpha"]
obj.monotonic_cst = metadata["monotonic_cst"]

return obj
Loading

0 comments on commit a043f51

Please sign in to comment.