Skip to content

Commit

Permalink
make featuretools optional
Browse files Browse the repository at this point in the history
  • Loading branch information
lixfz committed May 8, 2023
1 parent a02c348 commit a4a9aeb
Show file tree
Hide file tree
Showing 10 changed files with 50 additions and 13 deletions.
3 changes: 2 additions & 1 deletion hypernets/experiment/compete.py
Original file line number Diff line number Diff line change
Expand Up @@ -1894,7 +1894,8 @@ def __init__(self, hyper_model, X_train, y_train, X_eval=None, y_eval=None, X_te

if feature_generation:
if 'FeatureGenerationTransformer' not in tb.transformers.keys():
raise NotImplementedError('feature_generation is not supported for your data')
raise ValueError('feature_generation is not supported for your data, '
'or "featuretools" is not installed.')

if data_cleaner_args is None:
data_cleaner_args = {}
Expand Down
3 changes: 3 additions & 0 deletions hypernets/tabular/dask_ex/_feature_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
"""

from ..feature_generators import FeatureGenerationTransformer
from ..feature_generators import is_feature_generator_ready as _is_feature_generator_ready

is_feature_generator_ready = _is_feature_generator_ready


class DaskFeatureGenerationTransformer(FeatureGenerationTransformer):
Expand Down
5 changes: 4 additions & 1 deletion hypernets/tabular/dask_ex/_toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,9 +820,12 @@ def compute_sample_weight(y):

# TfidfEncoder=sk_ex.TfidfEncoder,
# DatetimeEncoder=sk_ex.DatetimeEncoder,
FeatureGenerationTransformer=_feature_generators.DaskFeatureGenerationTransformer,
# FeatureGenerationTransformer=_feature_generators.DaskFeatureGenerationTransformer,
FeatureImportancesSelectionTransformer=sk_ex.FeatureImportancesSelectionTransformer,
)

if _feature_generators.is_feature_generator_ready:
_predefined_transformers['FeatureGenerationTransformer'] = _feature_generators.FeatureGenerationTransformer

for name, tf in _predefined_transformers.items():
register_transformer(tf, name=name, dtypes=dd.DataFrame)
21 changes: 19 additions & 2 deletions hypernets/tabular/feature_generators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,22 @@
"""
"""
from ._primitives import CrossCategorical, GeoHashPrimitive, DaskCompatibleHaversine, TfidfPrimitive
from ._transformers import FeatureGenerationTransformer, is_geohash_installed
# from ._primitives import CrossCategorical, GeoHashPrimitive, DaskCompatibleHaversine, TfidfPrimitive
# from ._transformers import FeatureGenerationTransformer, is_geohash_installed

try:
from ._transformers import FeatureGenerationTransformer, is_geohash_installed

is_feature_generator_ready = True
except ImportError as e:
_msg = f'{e}, install featuretools and try again'

is_geohash_installed = False
is_feature_generator_ready = False

from sklearn.base import BaseEstimator as _BaseEstimator


class FeatureGenerationTransformer(_BaseEstimator):
def __init__(self, *args, **kwargs):
raise ImportError(_msg)
5 changes: 4 additions & 1 deletion hypernets/tabular/toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,9 +724,12 @@ def greedy_ensemble(cls, task, estimators, need_fit=False, n_folds=5, method='so
# TfidfEncoder=sk_ex.TfidfEncoder,
# DatetimeEncoder=sk_ex.DatetimeEncoder,

FeatureGenerationTransformer=feature_generators_.FeatureGenerationTransformer,
# FeatureGenerationTransformer=feature_generators_.FeatureGenerationTransformer,
)

if feature_generators_.is_feature_generator_ready:
_predefined_transformers['FeatureGenerationTransformer'] = feature_generators_.FeatureGenerationTransformer

for name, tf in _predefined_transformers.items():
register_transformer(tf, name=name, dtypes=pd.DataFrame)

Expand Down
10 changes: 7 additions & 3 deletions hypernets/tests/experiment/compete_experiment_test.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from datetime import datetime

import numpy as np

import pytest
from sklearn.preprocessing import LabelEncoder

from hypernets.core import SummaryCallback
from hypernets.core.objective import Objective
from hypernets.examples.plain_model import PlainModel, PlainSearchSpace
from hypernets.experiment import CompeteExperiment
from hypernets.model.objectives import ElapsedObjective, PredictionObjective
from hypernets.model.objectives import PredictionObjective
from hypernets.searchers.nsga_searcher import NSGAIISearcher
from hypernets.tabular import get_tool_box
from hypernets.tabular.datasets import dsutils
from hypernets.tabular.feature_generators import is_feature_generator_ready
from hypernets.tabular.sklearn_ex import MultiLabelEncoder
from hypernets.tests.model.plain_model_test import create_plain_model
from hypernets.tests.tabular.tb_dask import if_dask_ready, is_dask_installed, setup_dask
Expand Down Expand Up @@ -150,6 +151,7 @@ def test_without_cv():
experiment_with_bank_data(dict(cv=False), {})


@pytest.mark.skipif(not is_feature_generator_ready, reason='feature_generator is not ready')
def test_with_feature_generation():
experiment_with_movie_lens(dict(feature_generation=True,
feature_generation_text_cols=['title']), {})
Expand Down Expand Up @@ -199,7 +201,8 @@ def test_with_pi():
feature_reselection_threshold=0.0001), {})


def test_with_feature_generator():
@pytest.mark.skipif(not is_feature_generator_ready, reason='feature_generator is not ready')
def test_with_feature_generation_and_selection():
experiment_with_movie_lens(dict(feature_generation=True, feature_selection=True,
feature_generation_text_cols=['title']), {})

Expand All @@ -223,6 +226,7 @@ def test_with_cv_ensemble_dask():


@if_dask_ready
@pytest.mark.skipif(not is_feature_generator_ready, reason='feature_generator is not ready')
def test_with_feature_generator_dask():
experiment_with_movie_lens(dict(feature_generation=True, feature_selection=True,
feature_generation_text_cols=['title']), {}, with_dask=True)
Expand Down
4 changes: 3 additions & 1 deletion hypernets/tests/experiment/extractor_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from hypernets.tests.experiment import experiment_factory
from hypernets.experiment.compete import DataCleanStep, DriftDetectStep
from hypernets.experiment import ExperimentExtractor, StepMeta
import time
import pytest
from hypernets.tabular.feature_generators import is_feature_generator_ready


def _run_experiment(creator):
Expand Down Expand Up @@ -73,6 +74,7 @@ def test_multicollinearity_detect_extractor():
assert unselected_features['INDUS']['reserved'] == 'CRIM'


@pytest.mark.skipif(not is_feature_generator_ready, reason='feature_generator is not ready')
def test_feature_generation_extractor():
exp_data, estimator = _run_experiment(experiment_factory.create_feature_generation_experiment)
fg_step = exp_data.steps[2]
Expand Down
3 changes: 2 additions & 1 deletion hypernets/tests/experiment/make_experiment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from hypernets.experiment.compete import StepNames
from hypernets.tabular import get_tool_box
from hypernets.tabular.datasets import dsutils
from hypernets.tabular.feature_generators import is_feature_generator_ready
from hypernets.tabular.sklearn_ex import MultiLabelEncoder
from hypernets.utils import common as common_util
from hypernets.searchers.nsga_searcher import NSGAIISearcher
Expand Down Expand Up @@ -67,7 +68,7 @@ def test_experiment_with_blood_full_features():

experiment = make_experiment(PlainModel, df, target=target, search_space=PlainSearchSpace(),
test_data=df_test,
feature_generation=True,
feature_generation=is_feature_generator_ready,
collinearity_detection=True,
drift_detection=True,
feature_selection=True,
Expand Down
6 changes: 4 additions & 2 deletions hypernets/tests/tabular/feature_generator_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import math
from datetime import datetime

import featuretools as ft
import numpy as np
import pandas as pd
import pytest
Expand All @@ -17,7 +16,8 @@
from hypernets.tabular.column_selector import column_object_category_bool, column_number_exclude_timedelta
from hypernets.tabular.dataframe_mapper import DataFrameMapper
from hypernets.tabular.datasets import dsutils
from hypernets.tabular.feature_generators import FeatureGenerationTransformer, is_geohash_installed
from hypernets.tabular.feature_generators import FeatureGenerationTransformer, is_geohash_installed, \
is_feature_generator_ready
from hypernets.tabular.sklearn_ex import FeatureSelectionTransformer
from hypernets.utils import logging

Expand All @@ -38,6 +38,7 @@ def general_preprocessor():
return preprocessor


@pytest.mark.skipif(not is_feature_generator_ready, reason='feature_generator is not ready')
class Test_FeatureGenerator():
def test_char_add(self):
x1 = ['1', '2']
Expand All @@ -46,6 +47,7 @@ def test_char_add(self):
assert list(x3) == ['1c', '2d']

def test_ft_primitives(self):
import featuretools as ft
tps = ft.primitives.get_transform_primitives()
assert tps

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from hypernets.tabular import get_tool_box
from hypernets.tabular.datasets import dsutils
from hypernets.tabular.feature_generators import is_geohash_installed
from hypernets.tabular.feature_generators import is_geohash_installed, is_feature_generator_ready
from hypernets.utils import logging
from . import if_dask_ready, is_dask_installed, setup_dask

Expand All @@ -24,6 +24,7 @@


@if_dask_ready
@pytest.mark.skipif(not is_feature_generator_ready, reason='feature_generator is not ready')
class TestFeatureGeneratorWithDask:
@classmethod
def setup_class(cls):
Expand Down

0 comments on commit a4a9aeb

Please sign in to comment.