Skip to content

Commit

Permalink
SOLVER add newton-lsmr+CLN up (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomMoral authored Oct 23, 2023
1 parent e71f091 commit b96fe4d
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 36 deletions.
4 changes: 2 additions & 2 deletions datasets/insurance.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ def get_data(self):
],
remainder="drop",
)
w = df["Exposure"]
w = df["Exposure"].values
X = linear_model_preprocessor.fit_transform(df)
y = df["Frequency"].values

X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(
X, y, w, train_size=5_000, test_size=10_000, random_state=0
X, y, w, test_size=10_000, random_state=0
)
return dict(
X_train=X_train, y_train=y_train, w_train=w_train,
Expand Down
20 changes: 9 additions & 11 deletions objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,16 @@ class Objective(BaseObjective):
install_cmd = "conda"
requirements = [
'pip:git+https://github.com/lorentzenchr/'
'scikit-learn@glm_newton_cholesky'
'scikit-learn@glm_newton_lsmr_only'
]

parameters = {
'datafit': ['binom', 'poisson'],
'reg': [1e-4, 1e-12],
'fit_intercept': [True, False]
'reg': [1e-4, 1e-12]
}

def get_one_solution(self):
return np.zeros(self.X_train.shape[1] + self.fit_intercept)
def get_one_result(self):
return dict(beta=np.zeros(self.X_train.shape[1] + 1))

def set_data(self, X_train, y_train, w_train, X_test, y_test, w_test):
# The keyword arguments of this function are the keys of the `data`
Expand All @@ -43,14 +42,14 @@ def set_data(self, X_train, y_train, w_train, X_test, y_test, w_test):
self.y_test = (y_test > y_thresh).astype(np.float64)

self.lml = LinearModelLoss(
base_loss=HalfBinomialLoss(), fit_intercept=self.fit_intercept
base_loss=HalfBinomialLoss(), fit_intercept=True
)
elif self.datafit == "poisson":
self.lml = LinearModelLoss(
base_loss=HalfPoissonLoss(), fit_intercept=self.fit_intercept
base_loss=HalfPoissonLoss(), fit_intercept=True
)

def compute(self, beta):
def evaluate_result(self, beta):
# The arguments of this function are the outputs of the
# `get_result` method of the solver.
# They are customizable.
Expand All @@ -68,12 +67,11 @@ def compute(self, beta):
)
return dict(value=train_loss, test_loss=test_loss)

def to_dict(self):
def get_objective(self):
# The output of this function are the keyword arguments
# for the `set_objective` method of the solver.
# They are customizable.
return dict(
X=self.X_train, y=self.y_train, w=self.w_train,
datafit=self.datafit, reg=self.reg,
fit_intercept=self.fit_intercept
datafit=self.datafit, reg=self.reg
)
36 changes: 13 additions & 23 deletions solvers/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ class Solver(BaseSolver):
install_cmd = "conda"
requirements = [
'pip:git+https://github.com/lorentzenchr/'
'scikit-learn@glm_newton_cholesky'
'scikit-learn@glm_newton_lsmr_only'
]

# any parameter defined here is accessible as a class attribute
parameters = {'solver': [
'lbfgs', 'lbfgs2', 'newton-cg', 'newton-cholesky'
'lbfgs', 'newton-lsmr', 'newton-cg', 'newton-cholesky'
]}

stopping_criterion = SufficientProgressCriterion(
Expand All @@ -43,53 +43,43 @@ class Solver(BaseSolver):
def get_next(stop_val):
return int(max(stop_val + 1, stop_val * 1.3))

def skip(self, X, y, w, datafit, reg, fit_intercept=False):
if datafit == "poisson" and self.solver in ["lbfgs2", "newton-cg"]:
def skip(self, X, y, w, datafit, reg):
if datafit == "poisson" and self.solver in ["newton-cg"]:
return True, "solvers only compared for binom datafit"
return False, None

def set_objective(self, X, y, w, datafit, reg, fit_intercept=False):
def set_objective(self, X, y, w, datafit, reg):
# The arguments of this function are the results of the
# `to_dict` method of the objective.
# They are customizable.
self.X, self.y, self.w = X, y, w
self.fit_intercept = fit_intercept

if datafit == "binom":
if self.solver in ['lbfgs', 'newton-cg']:
self.clf = LogisticRegression(
C=2 / reg / X.shape[0], solver=self.solver, tol=1e-16,
fit_intercept=fit_intercept
)
else:
solver = self.solver.replace('2', '')
self.clf = BinomialRegressor(
solver=solver, alpha=reg, tol=1e-16, max_iter=1,
fit_intercept=fit_intercept
)
self.clf = LogisticRegression(
C=2 / reg / X.shape[0], solver=self.solver, tol=1e-16,
fit_intercept=True
)
else:
self.clf = PoissonRegressor(
solver=self.solver, alpha=reg, tol=1e-16, max_iter=1,
fit_intercept=fit_intercept
fit_intercept=True
)

def run(self, n_iter):
if n_iter == 0:
self.coef_ = np.zeros(self.X.shape[1] + self.fit_intercept)
self.coef_ = np.zeros(self.X.shape[1] + 1)
return

self.clf.set_params(max_iter=n_iter)

with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=ConvergenceWarning)
self.clf.fit(self.X, self.y, sample_weight=self.w)
self.coef_ = self.clf.coef_.flatten()

if self.fit_intercept:
self.coef_ = np.r_[self.coef_, self.clf.intercept_]
self.coef_ = np.r_[self.clf.coef_.flatten(), self.clf.intercept_]

def get_result(self):
# The outputs of this function are the arguments of the
# `compute` method of the objective.
# They are customizable.
return self.coef_
return dict(beta=self.coef_)

0 comments on commit b96fe4d

Please sign in to comment.