add psi test case

DataCanvasIO · Apr 27, 2023 · 1c3e870 · 1c3e870
1 parent 015699c
commit 1c3e870
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 14 deletions.
diff --git a/hypernets/experiment/_maker.py b/hypernets/experiment/_maker.py
@@ -53,7 +53,6 @@ def to_objective_object(o, force_minimize=False, **kwargs):
 def to_search_object(search_space, optimize_direction, searcher, searcher_options,
  reward_metric=None, scorer=None, objectives=None, task=None, pos_label=None):
 
-
  def to_searcher(cls, options):
  assert search_space is not None, '"search_space" should be specified if "searcher" is None or str.'
  assert optimize_direction in {'max', 'min'}
@@ -77,10 +76,11 @@ def to_searcher(cls, options):
  if objectives is None:
  objectives = ['nf']
  objectives_instance = []
- force_minimize = search_cls == MOEADSearcher
+ force_minimize = (search_cls == MOEADSearcher)
  for o in objectives:
  objectives_instance.append(to_objective_object(o, force_minimize=force_minimize,
  task=task, pos_label=pos_label))
+
  objectives_instance.insert(0, PredictionObjective.create(reward_metric, force_minimize=force_minimize,
  task=task, pos_label=pos_label))
  searcher_options['objectives'] = objectives_instance
@@ -333,6 +333,10 @@ def append_early_stopping_callbacks(cbs):
  reward_metric=reward_metric, scorer=scorer, objectives=objectives, task=task,
  pos_label=kwargs.get('pos_label'))
 
+ if searcher.kind() == const.SEARCHER_MOO:
+ if 'psi' in [_.name for _ in searcher.objectives]:
+ assert X_test is not None, "psi objective requires test dataset"
+
  if cfg.experiment_auto_down_sample_enabled and not isinstance(searcher, PlaybackSearcher) \
  and 'down_sample_search' not in kwargs.keys():
  train_data_shape = tb.get_shape(X_train)

diff --git a/hypernets/experiment/compete.py b/hypernets/experiment/compete.py
@@ -1029,7 +1029,7 @@ def search(self, X_train, y_train, X_test=None, X_eval=None, y_eval=None, **kwar
  es = self.find_early_stopping_callback(model.callbacks)
  if es is not None and es.time_limit is not None and es.time_limit > 0:
  es.time_limit = self.estimate_time_limit(es.time_limit)
- model.search(X_train, y_train, X_eval, y_eval, cv=self.cv, num_folds=self.num_folds, **kwargs)
+ model.search(X_train, y_train, X_eval, y_eval, X_test=X_test, cv=self.cv, num_folds=self.num_folds, **kwargs)
  return model
 
  def from_fitted_step(self, fitted_step):

diff --git a/hypernets/model/objectives.py b/hypernets/model/objectives.py
@@ -67,22 +67,24 @@ def __init__(self, n_bins=10, task=const.TASK_BINARY, average='macro', eps=1e-6)
  self.eps = eps
 
  def call(self, trial, estimator, X_eval, y_val, X_train, y_train, X_test, **kwargs) -> float:
+ def to_2d(array_data):
+ if array_data.ndim == 1:
+ return array_data.reshape((-1, 1))
+ else:
+ return array_data
+
  if self.task == const.TASK_BINARY:
  train_proba = estimator.predict_proba(X_train)
  test_proba = estimator.predict_proba(X_test)
- return float(calc_psi(train_proba[:, 1], test_proba[:, 1]))
+ return float(calc_psi(to_2d(train_proba[:, 1]), to_2d(test_proba[:, 1])))
  elif self.task == const.TASK_REGRESSION:
- train_result = estimator.predict(X_train)
- test_result = estimator.predict(X_test)
- if train_result.ndim == 1:
- train_result = train_result.reshape((-1, 1))
- if test_result.ndim == 1:
- test_result = test_result.reshape((-1, 1))
+ train_result = to_2d(estimator.predict(X_train))
+ test_result = to_2d(estimator.predict(X_test))
  return float(calc_psi(train_result, test_result))
  elif self.task == const.TASK_MULTICLASS:
  train_proba = estimator.predict_proba(X_train)
  test_proba = estimator.predict_proba(X_test)
- psis = [float(calc_psi(train_proba[:, i], test_proba[:, 1])) for i in range(train_proba.shape[1])]
+ psis = [float(calc_psi(to_2d(train_proba[:, i]), to_2d(test_proba[:, 1]))) for i in range(train_proba.shape[1])]
  return float(np.mean(psis))
  else:
  raise RuntimeError(f"unseen task type {self.task}")
@@ -351,17 +353,27 @@ def __repr__(self):
  return f"{self.__class__.__name__}(name={self.name}, sample_size={self.sample_size}, direction={self.direction})"
 
 
-def create_objective(name, **kwargs):
+def create_objective(name, **kwargs):
+ def copy_opt(opt_names):
+ for opt_name in opt_names:
+ if opt_name in kwargs:
+ opts[opt_name] = kwargs.get(opt_name)
+
  name = name.lower()
+ opts = {}
+
  if name == 'elapsed':
  return ElapsedObjective()
  elif name == 'nf':
- return NumOfFeatures(**kwargs)
+ copy_opt(['sample_size'])
+ return NumOfFeatures(**opts)
  elif name == 'psi':
- return PSIObjective(**kwargs)
+ copy_opt(['n_bins', 'task', 'average', 'eps'])
+ return PSIObjective(**opts)
  elif name == 'feature_usage':
  return FeatureUsageObjective()
  elif name == 'pred_perf':
  return PredictionPerformanceObjective()
  else:
+ copy_opt(['task', 'pos_label', 'force_minimize'])
  return PredictionObjective.create(name, **kwargs)
diff --git a/hypernets/tests/experiment/make_experiment_test.py b/hypernets/tests/experiment/make_experiment_test.py
@@ -334,6 +334,28 @@ def test_nsga2(self):
  estimators = experiment.run(max_trials=10)
  self.check_exp(experiment, estimators)
 
+ def test_nsga2_psi(self):
+ df_train = self.df_train.copy()
+ df_test = self.df_test.copy()
+ X_test = df_test.copy().drop('y', axis=1)
+ experiment = make_experiment(CatPlainModel, df_train,
+ eval_data=df_test,
+ test_data=X_test,
+ callbacks=[],
+ random_state=1234,
+ search_callbacks=[],
+ target='y',
+ searcher='nsga2', # available MOO searcher: moead, nsga2, rnsga2
+ searcher_options={'population_size': 5},
+ reward_metric='auc',
+ objectives=['psi'],
+ drift_detection=False,
+ early_stopping_rounds=10,
+ search_space=PlainSearchSpace(enable_dt=True, enable_lr=False, enable_nn=True))
+
+ estimators = experiment.run(max_trials=10)
+ self.check_exp(experiment, estimators)
+
  def test_rnsga2(self):
  df_train = self.df_train.copy()
  df_test = self.df_test.copy()
@@ -373,3 +395,4 @@ def test_moead(self):
 
  estimators = experiment.run(max_trials=10)
  self.check_exp(experiment, estimators)
+