-
Notifications
You must be signed in to change notification settings - Fork 156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: SGDClassifier post-processing with multi-class and improve linear models' predict method #585
Changes from all commits
3b2cfe8
833eb09
a7b606c
a7dc084
c16ca0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -694,6 +694,8 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: | |
Returns: | ||
numpy.ndarray: The post-processed predictions. | ||
""" | ||
assert isinstance(y_preds, numpy.ndarray), "Output predictions must be an array." | ||
|
||
return y_preds | ||
|
||
|
||
|
@@ -805,8 +807,10 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: | |
|
||
# If the prediction array is 1D, transform the output into a 2D array [1-p, p], | ||
# with p the initial output probabilities | ||
# This is similar to what is done in scikit-learn | ||
if y_preds.ndim == 1 or y_preds.shape[1] == 1: | ||
y_preds = numpy.concatenate((1 - y_preds, y_preds), axis=1) | ||
y_preds = y_preds.flatten() | ||
return numpy.vstack([1 - y_preds, y_preds]).T | ||
RomanBredehoft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Else, apply the softmax operator | ||
else: | ||
|
@@ -1387,8 +1391,13 @@ def quantize_input(self, X: numpy.ndarray) -> numpy.ndarray: | |
def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray: | ||
self.check_model_is_fitted() | ||
|
||
q_y_preds = self.output_quantizers[0].dequant(q_y_preds) | ||
return q_y_preds | ||
y_preds = self.output_quantizers[0].dequant(q_y_preds) | ||
|
||
# If the preds have shape (n, 1), squeeze it to shape (n,) like in scikit-learn | ||
if y_preds.ndim == 2 and y_preds.shape[1] == 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. like in sklearn (same for the followings one) |
||
return y_preds.ravel() | ||
|
||
return y_preds | ||
|
||
def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]: | ||
assert self._tree_inference is not None, self._is_not_fitted_error_message() | ||
|
@@ -1442,7 +1451,12 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: | |
if not self._fhe_ensembling: | ||
y_preds = numpy.sum(y_preds, axis=-1) | ||
|
||
assert_true(y_preds.ndim == 2, "y_preds should be a 2D array") | ||
assert isinstance(y_preds, numpy.ndarray), "Output predictions must be an array." | ||
|
||
# If the preds have shape (n, 1), squeeze it to shape (n,) like in scikit-learn | ||
if y_preds.ndim == 2 and y_preds.shape[1] == 1: | ||
return y_preds.ravel() | ||
|
||
return y_preds | ||
|
||
return super().post_processing(y_preds) | ||
|
@@ -1693,6 +1707,10 @@ def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray: | |
# De-quantize the output values | ||
y_preds = self.output_quantizers[0].dequant(q_y_preds) | ||
|
||
# If the preds have shape (n, 1), squeeze it to shape (n,) like in scikit-learn | ||
if y_preds.ndim == 2 and y_preds.shape[1] == 1: | ||
return y_preds.ravel() | ||
|
||
return y_preds | ||
|
||
def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]: | ||
|
@@ -1735,38 +1753,6 @@ class SklearnLinearRegressorMixin(SklearnLinearModelMixin, sklearn.base.Regresso | |
""" | ||
|
||
|
||
class SklearnSGDRegressorMixin(SklearnLinearRegressorMixin): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just moved it for better readability |
||
"""A Mixin class for sklearn SGD regressors with FHE. | ||
|
||
This class is used to create a SGD regressor class what can be exported | ||
to ONNX using Hummingbird. | ||
""" | ||
|
||
# Remove once Hummingbird supports SGDRegressor | ||
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4100 | ||
def _set_onnx_model(self, test_input: numpy.ndarray) -> None: | ||
"""Retrieve the model's ONNX graph using Hummingbird conversion. | ||
|
||
Args: | ||
test_input (numpy.ndarray): An input data used to trace the model execution. | ||
""" | ||
# Check that the underlying sklearn model has been set and fit | ||
assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message() | ||
|
||
model_for_onnx = LinearRegression() | ||
model_for_onnx.coef_ = self.sklearn_model.coef_ | ||
model_for_onnx.intercept_ = self.sklearn_model.intercept_ | ||
|
||
self.onnx_model_ = hb_convert( | ||
model_for_onnx, | ||
backend="onnx", | ||
test_input=test_input, | ||
extra_config={"onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT}, | ||
).model | ||
|
||
self._clean_graph() | ||
|
||
|
||
class SklearnLinearClassifierMixin( | ||
BaseClassifier, SklearnLinearModelMixin, sklearn.base.ClassifierMixin, ABC | ||
): | ||
|
@@ -1807,14 +1793,61 @@ def decision_function( | |
""" | ||
# Here, we want to use SklearnLinearModelMixin's `predict` method as confidence scores are | ||
# the dot product's output values, without any post-processing | ||
y_preds = SklearnLinearModelMixin.predict(self, X, fhe=fhe) | ||
return y_preds | ||
y_scores = SklearnLinearModelMixin.predict(self, X, fhe=fhe) | ||
|
||
return y_scores | ||
|
||
def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray: | ||
y_logits = self.decision_function(X, fhe=fhe) | ||
y_proba = self.post_processing(y_logits) | ||
y_scores = self.decision_function(X, fhe=fhe) | ||
y_proba = self.post_processing(y_scores) | ||
return y_proba | ||
|
||
# In scikit-learn, the argmax is done on the scores directly, not the probabilities | ||
def predict(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this fixes https://github.com/zama-ai/concrete-ml-internal/issues/4344 (reason is explained in main comment, but basically this is how sklearn does) |
||
# Compute the predicted scores | ||
y_scores = self.decision_function(X, fhe=fhe) | ||
|
||
# Retrieve the class with the highest score | ||
# If there is a single dimension, only compare the scores to 0 | ||
if y_scores.ndim == 1: | ||
y_preds = (y_scores > 0).astype(int) | ||
else: | ||
y_preds = numpy.argmax(y_scores, axis=1) | ||
|
||
return self.classes_[y_preds] | ||
|
||
|
||
class SklearnSGDRegressorMixin(SklearnLinearRegressorMixin): | ||
"""A Mixin class for sklearn SGD regressors with FHE. | ||
|
||
This class is used to create a SGD regressor class what can be exported | ||
to ONNX using Hummingbird. | ||
""" | ||
|
||
# Remove once Hummingbird supports SGDRegressor | ||
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4100 | ||
def _set_onnx_model(self, test_input: numpy.ndarray) -> None: | ||
"""Retrieve the model's ONNX graph using Hummingbird conversion. | ||
|
||
Args: | ||
test_input (numpy.ndarray): An input data used to trace the model execution. | ||
""" | ||
# Check that the underlying sklearn model has been set and fit | ||
assert self.sklearn_model is not None, self._sklearn_model_is_not_fitted_error_message() | ||
|
||
model_for_onnx = LinearRegression() | ||
model_for_onnx.coef_ = self.sklearn_model.coef_ | ||
model_for_onnx.intercept_ = self.sklearn_model.intercept_ | ||
|
||
self.onnx_model_ = hb_convert( | ||
model_for_onnx, | ||
backend="onnx", | ||
test_input=test_input, | ||
extra_config={"onnx_target_opset": OPSET_VERSION_FOR_ONNX_EXPORT}, | ||
).model | ||
|
||
self._clean_graph() | ||
|
||
|
||
class SklearnSGDClassifierMixin(SklearnLinearClassifierMixin): | ||
"""A Mixin class for sklearn SGD classifiers with FHE. | ||
|
@@ -1823,7 +1856,7 @@ class SklearnSGDClassifierMixin(SklearnLinearClassifierMixin): | |
to ONNX using Hummingbird. | ||
""" | ||
|
||
# Remove once Hummingbird supports SGDRegressor | ||
# Remove once Hummingbird supports SGDClassifier | ||
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4100 | ||
def _set_onnx_model(self, test_input: numpy.ndarray) -> None: | ||
"""Retrieve the model's ONNX graph using Hummingbird conversion. | ||
|
@@ -1988,6 +2021,11 @@ def dequantize_output(self, q_y_preds: numpy.ndarray) -> numpy.ndarray: | |
self.check_model_is_fitted() | ||
# We compute the sorted argmax in FHE, which are integers. | ||
# No need to de-quantize the output values | ||
|
||
# If the preds have shape (n, 1), squeeze it to shape (n,) like in scikit-learn | ||
if q_y_preds.ndim > 1 and q_y_preds.shape[1] == 1: | ||
return q_y_preds.ravel() | ||
|
||
return q_y_preds | ||
|
||
def _get_module_to_compile(self) -> Union[Compiler, QuantizedModule]: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,6 @@ | |
from sklearn.linear_model import SGDClassifier as SklearnSGDClassifier | ||
from sklearn.preprocessing import LabelEncoder | ||
|
||
from ..common.check_inputs import check_array_and_assert | ||
from ..common.utils import FheMode | ||
from ..onnx.ops_impl import numpy_sigmoid | ||
from ..quantization import QuantizedModule | ||
|
@@ -253,27 +252,12 @@ def __init__( | |
"Setting 'parameter_range' is mandatory if FHE training is enabled " | ||
f"({fit_encrypted=}). Got {parameters_range=}" | ||
) | ||
|
||
def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this post_processing was wrong + I moved it below |
||
# If the prediction array is 1D, which happens with some models such as XGBCLassifier or | ||
# LogisticRegression models, we have a binary classification problem | ||
n_classes = y_preds.shape[1] if y_preds.ndim > 1 and y_preds.shape[1] > 1 else 2 | ||
|
||
# For binary classification problem, apply the sigmoid operator | ||
if n_classes == 2: | ||
y_preds = numpy_sigmoid(y_preds)[0] | ||
|
||
# If the prediction array is 1D, transform the output into a 2D array [1-p, p], | ||
# with p the initial output probabilities | ||
if y_preds.ndim == 1 or y_preds.shape[1] == 1: | ||
y_preds = numpy.concatenate((1 - y_preds, y_preds), axis=1) | ||
|
||
# Else, apply the softmax operator | ||
else: | ||
y_preds = numpy_sigmoid(y_preds)[0] | ||
y_preds = y_preds / y_preds.sum(axis=1) | ||
RomanBredehoft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return y_preds | ||
supported_losses = ["log_loss", "modified_huber"] | ||
if self.loss not in supported_losses: | ||
raise NotImplementedError( | ||
f"Only one of {supported_losses} loss is supported. Got {self.loss}." | ||
) | ||
|
||
def get_sklearn_params(self, deep: bool = True) -> dict: | ||
# Here, the `get_params` method is the `BaseEstimator.get_params` method from scikit-learn | ||
|
@@ -835,61 +819,24 @@ def partial_fit( | |
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4184 | ||
raise NotImplementedError("Partial fit is not currently supported for clear training.") | ||
|
||
# This method is taken directly from scikit-learn | ||
def _predict_proba_lr(self, X: Data, fhe: Union[FheMode, str]) -> numpy.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this part should be included in our |
||
"""Probability estimation for OvR logistic regression. | ||
|
||
Positive class probabilities are computed as | ||
1. / (1. + np.exp(-self.decision_function(X))); | ||
multiclass is handled by normalizing that over all classes. | ||
|
||
Args: | ||
X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame | ||
or List. It mush have a shape of (n_samples, n_features). | ||
fhe (Union[FheMode, str]): The mode to use for prediction. | ||
Can be FheMode.DISABLE for Concrete ML Python inference, | ||
FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution. | ||
Can also be the string representation of any of these values. | ||
|
||
Returns: | ||
numpy.ndarray: The predicted class probabilities. | ||
""" | ||
prob = self.decision_function(X, fhe=fhe) | ||
prob = numpy_sigmoid(prob)[0] | ||
|
||
assert isinstance(prob, numpy.ndarray) | ||
|
||
if prob.shape[1] == 1: | ||
prob = prob.flatten() | ||
return numpy.vstack([1 - prob, prob]).T | ||
|
||
# OvR normalization, like LibLinear's predict_probability | ||
prob /= prob.sum(axis=1).reshape((prob.shape[0], -1)) | ||
|
||
return prob | ||
|
||
def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> numpy.ndarray: | ||
"""Probability estimates. | ||
def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. basically we don't need to define the |
||
"""Apply post-processing to the de-quantized predictions. | ||
|
||
This method is only available for log loss and modified Huber loss. | ||
Multiclass probability estimates are derived from binary (one-vs.-rest) | ||
estimates by simple normalization, as recommended by Zadrozny and Elkan. | ||
This is called at the end of the `predict_proba` method and is only available for log loss | ||
and modified Huber losses. Multiclass probability estimates are derived from binary | ||
(one-vs.-rest) estimates by simple normalization, as recommended by Zadrozny and Elkan. | ||
|
||
Binary probability estimates for loss="modified_huber" are given by | ||
(clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions | ||
it is necessary to perform proper probability calibration by wrapping | ||
the classifier with `sklearn.calibration.CalibratedClassifierCV` instead. | ||
|
||
Args: | ||
X (Data): The input values to predict, as a Numpy array, Torch tensor, Pandas DataFrame | ||
or List. It mush have a shape of (n_samples, n_features). | ||
fhe (Union[FheMode, str]): The mode to use for prediction. | ||
Can be FheMode.DISABLE for Concrete ML Python inference, | ||
FheMode.SIMULATE for FHE simulation and FheMode.EXECUTE for actual FHE execution. | ||
Can also be the string representation of any of these values. | ||
y_preds (Data): The de-quantized predictions to post-process. It mush have a shape of | ||
(n_samples, n_features). | ||
|
||
Returns: | ||
numpy.ndarray: The predicted class probabilities, with shape (n_samples, n_classes). | ||
numpy.ndarray: The post-processed predictions, with shape (n_samples, n_classes). | ||
|
||
Raises: | ||
NotImplementedError: If the given loss is not supported. | ||
|
@@ -903,52 +850,61 @@ def predict_proba(self, X: Data, fhe: Union[FheMode, str] = FheMode.DISABLE) -> | |
case is in the appendix B in: | ||
http://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf | ||
""" | ||
X = check_array_and_assert(X) | ||
|
||
# The following lines are taken directly from scikit-learn's source code | ||
if self.loss == "log_loss": | ||
return self._predict_proba_lr(X, fhe=fhe) | ||
y_preds = numpy_sigmoid(y_preds)[0] | ||
|
||
assert isinstance(y_preds, numpy.ndarray) | ||
|
||
if y_preds.ndim == 1 or y_preds.shape[1] == 1: | ||
y_preds = y_preds.flatten() | ||
return numpy.vstack([1 - y_preds, y_preds]).T | ||
|
||
if self.loss == "modified_huber": | ||
# OvR normalization, like LibLinear's predict_probability | ||
prob = y_preds / y_preds.sum(axis=1).reshape((y_preds.shape[0], -1)) | ||
|
||
# The following lines are taken directly from scikit-learn's source code | ||
elif self.loss == "modified_huber": | ||
assert isinstance(self.classes_, numpy.ndarray) | ||
binary = len(self.classes_) == 2 | ||
scores = self.decision_function(X) | ||
|
||
if binary: | ||
scores = scores[:, 0] | ||
|
||
prob2 = numpy.empty(tuple()) | ||
if binary: | ||
prob2 = numpy.ones((scores.shape[0], 2)) | ||
prob2 = numpy.ones((y_preds.shape[0], 2)) | ||
prob = prob2[:, 1] | ||
|
||
else: | ||
prob = scores | ||
prob = y_preds | ||
|
||
numpy.clip(scores, -1, 1, prob) | ||
numpy.clip(y_preds, -1, 1, prob) | ||
prob += 1.0 | ||
prob /= 2.0 | ||
|
||
if binary: | ||
prob2[:, 0] -= prob | ||
prob = prob2 | ||
|
||
else: | ||
# the above might assign zero to all classes, which doesn't | ||
# normalize neatly; work around this to produce uniform | ||
# probabilities | ||
prob_sum = prob.sum(axis=1) | ||
all_zero = prob_sum == 0 | ||
|
||
if numpy.any(all_zero): # pragma: no cover | ||
prob[all_zero, :] = 1 | ||
prob_sum[all_zero] = len(self.classes_) | ||
|
||
# normalize | ||
prob /= prob_sum.reshape((prob.shape[0], -1)) | ||
|
||
return prob | ||
else: # pragma: no cover | ||
supported_losses = ["log_loss", "modified_huber"] | ||
raise NotImplementedError( | ||
f"Only one of {supported_losses} loss is supported. Got {self.loss}." | ||
) | ||
|
||
raise NotImplementedError( | ||
f"Method 'predict_proba' currently only supports one of" | ||
f" ['log_loss', 'modifier_huber'] loss. Got {self.loss}." | ||
) | ||
return prob | ||
|
||
def dump_dict(self) -> Dict[str, Any]: | ||
assert self._weight_quantizer is not None, self._is_not_fitted_error_message() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
better imo