Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: SGDClassifier post-processing with multi-class and improve linear models' predict method #585

Merged
merged 5 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from concrete.fhe.compilation import Circuit, Configuration
from concrete.fhe.mlir.utils import MAXIMUM_TLU_BIT_WIDTH
from sklearn.datasets import make_classification, make_regression
from sklearn.metrics import accuracy_score

from concrete.ml.common.utils import (
SUPPORTED_FLOAT_TYPES,
Expand Down Expand Up @@ -420,7 +421,7 @@ def check_accuracy():
"""Fixture to check the accuracy."""

def check_accuracy_impl(expected, actual, threshold=0.9):
accuracy = numpy.mean(expected == actual)
accuracy = accuracy_score(expected, actual)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better imo

assert accuracy >= threshold, f"Accuracy of {accuracy} is not high enough ({threshold})."

return check_accuracy_impl
Expand Down
120 changes: 79 additions & 41 deletions src/concrete/ml/sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,8 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray:
Returns:
numpy.ndarray: The post-processed predictions.
"""
assert isinstance(y_preds, numpy.ndarray), "Output predictions must be an array."

return y_preds


Expand Down Expand Up @@ -805,8 +807,10 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray:

# If the prediction array is 1D, transform the output into a 2D array [1-p, p],
# with p the initial output probabilities
# This is similar to what is done in scikit-learn
if y_preds.ndim == 1 or y_preds.shape[1] == 1:
y_preds = numpy.concatenate((1 - y_preds, y_preds), axis=1)
y_preds = y_preds.flatten()
return numpy.vstack([1 - y_preds, y_preds]).T
RomanBredehoft marked this conversation as resolved.
Show resolved Hide resolved

# Else, apply the softmax operator
else:
Expand Down Expand Up @@ -1387,8 +1391,13 @@ def quantize_input(self, X: numpy.ndarray) -> numpy.ndarray:
def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
self.check_model_is_fitted()

q_y_preds = self.output_quantizers[0].dequant(q_y_preds)
return q_y_preds
y_preds = self.output_quantizers[0].dequant(q_y_preds)

# If the preds have shape (n, 1), squeeze it to shape (n,) like in scikit-learn
if y_preds.ndim == 2 and y_preds.shape[1] == 1:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

like in sklearn (same for the followings one)

return y_preds.ravel()

return y_preds

def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
assert self._tree_inference is not None, self._is_not_fitted_error_message()
Expand Down Expand Up @@ -1442,7 +1451,12 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray:
if not self._fhe_ensembling:
y_preds = numpy.sum(y_preds, axis=-1)

assert_true(y_preds.ndim == 2, "y_preds should be a 2D array")
assert isinstance(y_preds, numpy.ndarray), "Output predictions must be an array."

# If the preds have shape (n, 1), squeeze it to shape (n,) like in scikit-learn
if y_preds.ndim == 2 and y_preds.shape[1] == 1:
return y_preds.ravel()

return y_preds

return super().post_processing(y_preds)
Expand Down Expand Up @@ -1693,6 +1707,10 @@ def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
# De-quantize the output values
y_preds = self.output_quantizers[0].dequant(q_y_preds)

# If the preds have shape (n, 1), squeeze it to shape (n,) like in scikit-learn
if y_preds.ndim == 2 and y_preds.shape[1] == 1:
return y_preds.ravel()

return y_preds

def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
Expand Down Expand Up @@ -1735,38 +1753,6 @@ class SklearnLinearRegressorMixin(SklearnLinearModelMixin, sklearn.base.Regresso
"""


class SklearnSGDRegressorMixin(SklearnLinearRegressorMixin):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just moved it for better readability

"""A Mixin class for sklearn SGD regressors with FHE.

This class is used to create a SGD regressor class what can be exported
to ONNX using Hummingbird.
"""

# Remove once Hummingbird supports SGDRegressor
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4100
def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
"""Retrieve the model's ONNX graph using Hummingbird conversion.

Args:
test_input (numpy.ndarray): An input data used to trace the model execution.
"""
# Check that the underlying sklearn model has been set and fit
assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message()

model_for_onnx = LinearRegression()
model_for_onnx.coef_ = self.sklearn_model.coef_
model_for_onnx.intercept_ = self.sklearn_model.intercept_

self.onnx_model_ = hb_convert(
model_for_onnx,
backend="onnx",
test_input=test_input,
extra_config={"onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT},
).model

self._clean_graph()


class SklearnLinearClassifierMixin(
BaseClassifier, SklearnLinearModelMixin, sklearn.base.ClassifierMixin, ABC
):
Expand Down Expand Up @@ -1807,14 +1793,61 @@ def decision_function(
"""
# Here, we want to use SklearnLinearModelMixin's `predict` method as confidence scores are
# the dot product's output values, without any post-processing
y_preds = SklearnLinearModelMixin.predict(self, X, fhe=fhe)
return y_preds
y_scores = SklearnLinearModelMixin.predict(self, X, fhe=fhe)

return y_scores

def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
y_logits = self.decision_function(X, fhe=fhe)
y_proba = self.post_processing(y_logits)
y_scores = self.decision_function(X, fhe=fhe)
y_proba = self.post_processing(y_scores)
return y_proba

# In scikit-learn, the argmax is done on the scores directly, not the probabilities
def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this fixes https://github.com/zama-ai/concrete-ml-internal/issues/4344 (reason is explained in main comment, but basically this is how sklearn does)

# Compute the predicted scores
y_scores = self.decision_function(X, fhe=fhe)

# Retrieve the class with the highest score
# If there is a single dimension, only compare the scores to 0
if y_scores.ndim == 1:
y_preds = (y_scores > 0).astype(int)
else:
y_preds = numpy.argmax(y_scores, axis=1)

return self.classes_[y_preds]


class SklearnSGDRegressorMixin(SklearnLinearRegressorMixin):
"""A Mixin class for sklearn SGD regressors with FHE.

This class is used to create a SGD regressor class what can be exported
to ONNX using Hummingbird.
"""

# Remove once Hummingbird supports SGDRegressor
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4100
def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
"""Retrieve the model's ONNX graph using Hummingbird conversion.

Args:
test_input (numpy.ndarray): An input data used to trace the model execution.
"""
# Check that the underlying sklearn model has been set and fit
assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message()

model_for_onnx = LinearRegression()
model_for_onnx.coef_ = self.sklearn_model.coef_
model_for_onnx.intercept_ = self.sklearn_model.intercept_

self.onnx_model_ = hb_convert(
model_for_onnx,
backend="onnx",
test_input=test_input,
extra_config={"onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT},
).model

self._clean_graph()


class SklearnSGDClassifierMixin(SklearnLinearClassifierMixin):
"""A Mixin class for sklearn SGD classifiers with FHE.
Expand All @@ -1823,7 +1856,7 @@ class SklearnSGDClassifierMixin(SklearnLinearClassifierMixin):
to ONNX using Hummingbird.
"""

# Remove once Hummingbird supports SGDRegressor
# Remove once Hummingbird supports SGDClassifier
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4100
def _set_onnx_model(self, test_input: numpy.ndarray) -> None:
"""Retrieve the model's ONNX graph using Hummingbird conversion.
Expand Down Expand Up @@ -1988,6 +2021,11 @@ def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray:
self.check_model_is_fitted()
# We compute the sorted argmax in FHE, which are integers.
# No need to de-quantize the output values

# If the preds have shape (n, 1), squeeze it to shape (n,) like in scikit-learn
if q_y_preds.ndim > 1 and q_y_preds.shape[1] == 1:
return q_y_preds.ravel()

return q_y_preds

def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]:
Expand Down
120 changes: 38 additions & 82 deletions src/concrete/ml/sklearn/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from sklearn.linear_model import SGDClassifier as SklearnSGDClassifier
from sklearn.preprocessing import LabelEncoder

from ..common.check_inputs import check_array_and_assert
from ..common.utils import FheMode
from ..onnx.ops_impl import numpy_sigmoid
from ..quantization import QuantizedModule
Expand Down Expand Up @@ -253,27 +252,12 @@ def __init__(
"Setting 'parameter_range' is mandatory if FHE training is enabled "
f"({fit_encrypted=}). Got {parameters_range=}"
)

def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this post_processing was wrong + I moved it below

# If the prediction array is 1D, which happens with some models such as XGBCLassifier or
# LogisticRegression models, we have a binary classification problem
n_classes = y_preds.shape[1] if y_preds.ndim > 1 and y_preds.shape[1] > 1 else 2

# For binary classification problem, apply the sigmoid operator
if n_classes == 2:
y_preds = numpy_sigmoid(y_preds)[0]

# If the prediction array is 1D, transform the output into a 2D array [1-p, p],
# with p the initial output probabilities
if y_preds.ndim == 1 or y_preds.shape[1] == 1:
y_preds = numpy.concatenate((1 - y_preds, y_preds), axis=1)

# Else, apply the softmax operator
else:
y_preds = numpy_sigmoid(y_preds)[0]
y_preds = y_preds / y_preds.sum(axis=1)
RomanBredehoft marked this conversation as resolved.
Show resolved Hide resolved

return y_preds
supported_losses = ["log_loss", "modified_huber"]
if self.loss not in supported_losses:
raise NotImplementedError(
f"Only one of {supported_losses} loss is supported. Got {self.loss}."
)

def get_sklearn_params(self, deep: bool = True) -> dict:
# Here, the `get_params` method is the `BaseEstimator.get_params` method from scikit-learn
Expand Down Expand Up @@ -835,61 +819,24 @@ def partial_fit(
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4184
raise NotImplementedError("Partial fit is not currently supported for clear training.")

# This method is taken directly from scikit-learn
def _predict_proba_lr(self, X: Data, fhe: Union[FheMode, str]) -> numpy.ndarray:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this part should be included in our post_processing method

"""Probability estimation for OvR logistic regression.

Positive class probabilities are computed as
1. / (1. + np.exp(-self.decision_function(X)));
multiclass is handled by normalizing that over all classes.

Args:
X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame
or List. It mush have a shape of (n_samples, n_features).
fhe (Union[FheMode, str]): The mode to use for prediction.
Can be FheMode.DISABLE for Concrete ML Python inference,
FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution.
Can also be the string representation of any of these values.

Returns:
numpy.ndarray: The predicted class probabilities.
"""
prob = self.decision_function(X, fhe=fhe)
prob = numpy_sigmoid(prob)[0]

assert isinstance(prob, numpy.ndarray)

if prob.shape[1] == 1:
prob = prob.flatten()
return numpy.vstack([1 - prob, prob]).T

# OvR normalization, like LibLinear's predict_probability
prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))

return prob

def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray:
"""Probability estimates.
def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

basically we don't need to define the predict_proba method as it is in sklearn, we only need to re-define post_processing with sklearn's implem

"""Apply post-processing to the de-quantized predictions.

This method is only available for log loss and modified Huber loss.
Multiclass probability estimates are derived from binary (one-vs.-rest)
estimates by simple normalization, as recommended by Zadrozny and Elkan.
This is called at the end of the `predict_proba` method and is only available for log loss
and modified Huber losses. Multiclass probability estimates are derived from binary
(one-vs.-rest) estimates by simple normalization, as recommended by Zadrozny and Elkan.

Binary probability estimates for loss="modified_huber" are given by
(clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions
it is necessary to perform proper probability calibration by wrapping
the classifier with `sklearn.calibration.CalibratedClassifierCV` instead.

Args:
X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame
or List. It mush have a shape of (n_samples, n_features).
fhe (Union[FheMode, str]): The mode to use for prediction.
Can be FheMode.DISABLE for Concrete ML Python inference,
FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution.
Can also be the string representation of any of these values.
y_preds (Data): The de-quantized predictions to post-process. It mush have a shape of
(n_samples, n_features).

Returns:
numpy.ndarray: The predicted class probabilities, with shape (n_samples, n_classes).
numpy.ndarray: The post-processed predictions, with shape (n_samples, n_classes).

Raises:
NotImplementedError: If the given loss is not supported.
Expand All @@ -903,52 +850,61 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) ->
case is in the appendix B in:
http://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf
"""
X = check_array_and_assert(X)

# The following lines are taken directly from scikit-learn's source code
if self.loss == "log_loss":
return self._predict_proba_lr(X, fhe=fhe)
y_preds = numpy_sigmoid(y_preds)[0]

assert isinstance(y_preds, numpy.ndarray)

if y_preds.ndim == 1 or y_preds.shape[1] == 1:
y_preds = y_preds.flatten()
return numpy.vstack([1 - y_preds, y_preds]).T

if self.loss == "modified_huber":
# OvR normalization, like LibLinear's predict_probability
prob = y_preds / y_preds.sum(axis=1).reshape((y_preds.shape[0], -1))

# The following lines are taken directly from scikit-learn's source code
elif self.loss == "modified_huber":
assert isinstance(self.classes_, numpy.ndarray)
binary = len(self.classes_) == 2
scores = self.decision_function(X)

if binary:
scores = scores[:, 0]

prob2 = numpy.empty(tuple())
if binary:
prob2 = numpy.ones((scores.shape[0], 2))
prob2 = numpy.ones((y_preds.shape[0], 2))
prob = prob2[:, 1]

else:
prob = scores
prob = y_preds

numpy.clip(scores, -1, 1, prob)
numpy.clip(y_preds, -1, 1, prob)
prob += 1.0
prob /= 2.0

if binary:
prob2[:, 0] -= prob
prob = prob2

else:
# the above might assign zero to all classes, which doesn't
# normalize neatly; work around this to produce uniform
# probabilities
prob_sum = prob.sum(axis=1)
all_zero = prob_sum == 0

if numpy.any(all_zero): # pragma: no cover
prob[all_zero, :] = 1
prob_sum[all_zero] = len(self.classes_)

# normalize
prob /= prob_sum.reshape((prob.shape[0], -1))

return prob
else: # pragma: no cover
supported_losses = ["log_loss", "modified_huber"]
raise NotImplementedError(
f"Only one of {supported_losses} loss is supported. Got {self.loss}."
)

raise NotImplementedError(
f"Method 'predict_proba' currently only supports one of"
f" ['log_loss', 'modifier_huber'] loss. Got {self.loss}."
)
return prob

def dump_dict(self) -> Dict[str, Any]:
assert self._weight_quantizer is not None, self._is_not_fitted_error_message()
Expand Down
Loading
Loading