diff --git a/daal4py/sklearn/linear_model/logistic_path.py b/daal4py/sklearn/linear_model/logistic_path.py index 8ab97ea9d2..a010311c11 100755 --- a/daal4py/sklearn/linear_model/logistic_path.py +++ b/daal4py/sklearn/linear_model/logistic_path.py @@ -426,6 +426,10 @@ def __logistic_regression_path( (classes.size, n_features + int(fit_intercept)), order="F", dtype=X.dtype ) + # Adoption of https://github.com/scikit-learn/scikit-learn/pull/26721 + if solver in ["lbfgs", "newton-cg", "newton-cholesky"]: + sw_sum = len(X) if sample_weight is None else np.sum(sample_weight) + if coef is not None: # it must work both giving the bias term and not if multi_class == "ovr": @@ -592,7 +596,7 @@ def grad(x, *args): X, target, 0.0, - 1.0 / (2 * C * C_daal_multiplier), + 1.0 / (2 * C * C_daal_multiplier * sw_sum), fit_intercept, value=True, gradient=True, @@ -600,10 +604,10 @@ def grad(x, *args): ) else: if sklearn_check_version("1.1"): - l2_reg_strength = 1.0 / C + l2_reg_strength = 1.0 / (C * sw_sum) extra_args = (X, target, sample_weight, l2_reg_strength, n_threads) else: - extra_args = (X, target, 1.0 / C, sample_weight) + extra_args = (X, target, 1.0 / (C * sw_sum), sample_weight) iprint = [-1, 50, 1, 100, 101][ np.searchsorted(np.array([0, 1, 2, 3]), verbose) @@ -614,7 +618,13 @@ def grad(x, *args): method="L-BFGS-B", jac=True, args=extra_args, - options={"iprint": iprint, "gtol": tol, "maxiter": max_iter}, + options={ + "maxiter": max_iter, + "maxls": 50, + "iprint": iprint, + "gtol": tol, + "ftol": 64 * np.finfo(float).eps, + }, ) n_iter_i = _check_optimize_result( solver, @@ -629,7 +639,7 @@ def grad(x, *args): if _dal_ready: def make_ncg_funcs(f, value=False, gradient=False, hessian=False): - daal_penaltyL2 = 1.0 / (2 * C * C_daal_multiplier) + daal_penaltyL2 = 1.0 / (2 * C * C_daal_multiplier * sw_sum) _obj_, X_, y_, n_samples = daal_extra_args_func( classes.size, w0, @@ -662,10 +672,10 @@ def _func_(x, *args): ) else: if sklearn_check_version("1.1"): - l2_reg_strength = 1.0 / C + l2_reg_strength = 1.0 / (C * sw_sum) args = (X, target, sample_weight, l2_reg_strength, n_threads) else: - args = (X, target, 1.0 / C, sample_weight) + args = (X, target, 1.0 / (C * sw_sum), sample_weight) w0, n_iter_i = _newton_cg( hess, func, grad, w0, args=args, maxiter=max_iter, tol=tol diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 9976dc9b9a..0fd71dd443 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -190,6 +190,12 @@ deselected_tests: # margin above the test threshold, see https://github.com/scikit-learn/scikit-learn/pull/13645 - linear_model/tests/test_logistic.py::test_dtype_match + # Logistic Regression coeffs change due to fix for loss scaling + # (https://github.com/scikit-learn/scikit-learn/pull/26721) + - feature_selection/tests/test_from_model.py::test_importance_getter[estimator0-named_steps.logisticregression.coef_] + - inspection/_plot/tests/test_boundary_decision_display.py::test_class_of_interest_binary[predict_proba] + - linear_model/tests/test_sag.py::test_sag_pobj_matches_logistic_regression + # This fails on certain platforms. While weighted data does not go through DAAL, # unweighted does. Since convergence does not occur (comment in the test # suggests that) and because coefficients are slightly different,