Skip to content

Commit

Permalink
add psi test case
Browse files Browse the repository at this point in the history
  • Loading branch information
oaksharks committed Apr 27, 2023
1 parent 015699c commit 1c3e870
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 14 deletions.
8 changes: 6 additions & 2 deletions hypernets/experiment/_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def to_objective_object(o, force_minimize=False, **kwargs):
def to_search_object(search_space, optimize_direction, searcher, searcher_options,
reward_metric=None, scorer=None, objectives=None, task=None, pos_label=None):


def to_searcher(cls, options):
assert search_space is not None, '"search_space" should be specified if "searcher" is None or str.'
assert optimize_direction in {'max', 'min'}
Expand All @@ -77,10 +76,11 @@ def to_searcher(cls, options):
if objectives is None:
objectives = ['nf']
objectives_instance = []
force_minimize = search_cls == MOEADSearcher
force_minimize = (search_cls == MOEADSearcher)
for o in objectives:
objectives_instance.append(to_objective_object(o, force_minimize=force_minimize,
task=task, pos_label=pos_label))

objectives_instance.insert(0, PredictionObjective.create(reward_metric, force_minimize=force_minimize,
task=task, pos_label=pos_label))
searcher_options['objectives'] = objectives_instance
Expand Down Expand Up @@ -333,6 +333,10 @@ def append_early_stopping_callbacks(cbs):
reward_metric=reward_metric, scorer=scorer, objectives=objectives, task=task,
pos_label=kwargs.get('pos_label'))

if searcher.kind() == const.SEARCHER_MOO:
if 'psi' in [_.name for _ in searcher.objectives]:
assert X_test is not None, "psi objective requires test dataset"

if cfg.experiment_auto_down_sample_enabled and not isinstance(searcher, PlaybackSearcher) \
and 'down_sample_search' not in kwargs.keys():
train_data_shape = tb.get_shape(X_train)
Expand Down
2 changes: 1 addition & 1 deletion hypernets/experiment/compete.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,7 @@ def search(self, X_train, y_train, X_test=None, X_eval=None, y_eval=None, **kwar
es = self.find_early_stopping_callback(model.callbacks)
if es is not None and es.time_limit is not None and es.time_limit > 0:
es.time_limit = self.estimate_time_limit(es.time_limit)
model.search(X_train, y_train, X_eval, y_eval, cv=self.cv, num_folds=self.num_folds, **kwargs)
model.search(X_train, y_train, X_eval, y_eval, X_test=X_test, cv=self.cv, num_folds=self.num_folds, **kwargs)
return model

def from_fitted_step(self, fitted_step):
Expand Down
34 changes: 23 additions & 11 deletions hypernets/model/objectives.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,24 @@ def __init__(self, n_bins=10, task=const.TASK_BINARY, average='macro', eps=1e-6)
self.eps = eps

def call(self, trial, estimator, X_eval, y_val, X_train, y_train, X_test, **kwargs) -> float:
def to_2d(array_data):
if array_data.ndim == 1:
return array_data.reshape((-1, 1))
else:
return array_data

if self.task == const.TASK_BINARY:
train_proba = estimator.predict_proba(X_train)
test_proba = estimator.predict_proba(X_test)
return float(calc_psi(train_proba[:, 1], test_proba[:, 1]))
return float(calc_psi(to_2d(train_proba[:, 1]), to_2d(test_proba[:, 1])))
elif self.task == const.TASK_REGRESSION:
train_result = estimator.predict(X_train)
test_result = estimator.predict(X_test)
if train_result.ndim == 1:
train_result = train_result.reshape((-1, 1))
if test_result.ndim == 1:
test_result = test_result.reshape((-1, 1))
train_result = to_2d(estimator.predict(X_train))
test_result = to_2d(estimator.predict(X_test))
return float(calc_psi(train_result, test_result))
elif self.task == const.TASK_MULTICLASS:
train_proba = estimator.predict_proba(X_train)
test_proba = estimator.predict_proba(X_test)
psis = [float(calc_psi(train_proba[:, i], test_proba[:, 1])) for i in range(train_proba.shape[1])]
psis = [float(calc_psi(to_2d(train_proba[:, i]), to_2d(test_proba[:, 1]))) for i in range(train_proba.shape[1])]
return float(np.mean(psis))
else:
raise RuntimeError(f"unseen task type {self.task}")
Expand Down Expand Up @@ -351,17 +353,27 @@ def __repr__(self):
return f"{self.__class__.__name__}(name={self.name}, sample_size={self.sample_size}, direction={self.direction})"


def create_objective(name, **kwargs):
def create_objective(name, **kwargs):
def copy_opt(opt_names):
for opt_name in opt_names:
if opt_name in kwargs:
opts[opt_name] = kwargs.get(opt_name)

name = name.lower()
opts = {}

if name == 'elapsed':
return ElapsedObjective()
elif name == 'nf':
return NumOfFeatures(**kwargs)
copy_opt(['sample_size'])
return NumOfFeatures(**opts)
elif name == 'psi':
return PSIObjective(**kwargs)
copy_opt(['n_bins', 'task', 'average', 'eps'])
return PSIObjective(**opts)
elif name == 'feature_usage':
return FeatureUsageObjective()
elif name == 'pred_perf':
return PredictionPerformanceObjective()
else:
copy_opt(['task', 'pos_label', 'force_minimize'])
return PredictionObjective.create(name, **kwargs)
23 changes: 23 additions & 0 deletions hypernets/tests/experiment/make_experiment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,28 @@ def test_nsga2(self):
estimators = experiment.run(max_trials=10)
self.check_exp(experiment, estimators)

def test_nsga2_psi(self):
df_train = self.df_train.copy()
df_test = self.df_test.copy()
X_test = df_test.copy().drop('y', axis=1)
experiment = make_experiment(CatPlainModel, df_train,
eval_data=df_test,
test_data=X_test,
callbacks=[],
random_state=1234,
search_callbacks=[],
target='y',
searcher='nsga2', # available MOO searcher: moead, nsga2, rnsga2
searcher_options={'population_size': 5},
reward_metric='auc',
objectives=['psi'],
drift_detection=False,
early_stopping_rounds=10,
search_space=PlainSearchSpace(enable_dt=True, enable_lr=False, enable_nn=True))

estimators = experiment.run(max_trials=10)
self.check_exp(experiment, estimators)

def test_rnsga2(self):
df_train = self.df_train.copy()
df_test = self.df_test.copy()
Expand Down Expand Up @@ -373,3 +395,4 @@ def test_moead(self):

estimators = experiment.run(max_trials=10)
self.check_exp(experiment, estimators)

0 comments on commit 1c3e870

Please sign in to comment.