Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New search space def #122

Merged
merged 9 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,084 changes: 151 additions & 933 deletions Tutorial/1_Estimators_Overview.ipynb

Large diffs are not rendered by default.

478 changes: 0 additions & 478 deletions Tutorial/2_Defining_Search_Space_(config_dicts).ipynb

This file was deleted.

4,904 changes: 4,904 additions & 0 deletions Tutorial/2_Search_Spaces.ipynb

Large diffs are not rendered by default.

1,244 changes: 1,244 additions & 0 deletions Tutorial/3_Feature_Set_Selector.ipynb

Large diffs are not rendered by default.

1,147 changes: 0 additions & 1,147 deletions Tutorial/3_Genetic_Feature_Set_Selectors.ipynb

This file was deleted.

130 changes: 69 additions & 61 deletions Tutorial/4_Symbolic_Regression_and_Classification.ipynb

Large diffs are not rendered by default.

590 changes: 590 additions & 0 deletions Tutorial/5_Genetic_Feature_Selection.ipynb

Large diffs are not rendered by default.

121 changes: 0 additions & 121 deletions Tutorial/5_GraphPipeline.ipynb

This file was deleted.

121 changes: 121 additions & 0 deletions Tutorial/6_GraphPipeline.ipynb

Large diffs are not rendered by default.

107 changes: 102 additions & 5 deletions Tutorial/7_dask_parallelization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,27 @@
" scorer = sklearn.metrics.get_scorer('roc_auc_ovr')\n",
" X, y = sklearn.datasets.load_digits(return_X_y=True)\n",
" X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n",
" est = tpot2.TPOTEstimatorSteadyState( n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
" \n",
" graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
" est = tpot2.TPOTEstimator(\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
" )\n",
" \n",
" \n",
" est.fit(X_train, y_train)\n",
" print(scorer(est, X_test, y_test))"
]
Expand Down Expand Up @@ -106,7 +126,27 @@
"X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n",
"\n",
"\n",
"est = tpot2.TPOTEstimatorSteadyState( n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
"graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
"est = tpot2.TPOTEstimator(\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
" n_jobs=10,\n",
" memory_limit=\"4GB\"\n",
")\n",
"\n",
"est.fit(X_train, y_train)\n",
"print(scorer(est, X_test, y_test))"
]
Expand Down Expand Up @@ -214,7 +254,27 @@
}
],
"source": [
"est = tpot2.TPOTEstimatorSteadyState( client=client, classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
"graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
"est = tpot2.TPOTEstimator(\n",
" client = client,\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
")\n",
"\n",
"\n",
"# this is equivalent to: \n",
"# est = tpot2.TPOTClassifier(population_size= 8, generations=5, n_jobs=4, memory_limit=\"4GB\", verbose=1)\n",
"est.fit(X_train, y_train)\n",
Expand Down Expand Up @@ -283,7 +343,25 @@
" threads_per_worker=1,\n",
" memory_limit='4GB',\n",
") as cluster, Client(cluster) as client:\n",
" est = tpot2.TPOTEstimatorSteadyState(client=client, n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
" graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
" est = tpot2.TPOTEstimator(\n",
" client = client,\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
" )\n",
" est.fit(X_train, y_train)\n",
" print(scorer(est, X_test, y_test))"
]
Expand Down Expand Up @@ -349,7 +427,26 @@
" X, y = sklearn.datasets.load_digits(return_X_y=True)\n",
" X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n",
"\n",
" est = tpot2.TPOTEstimatorSteadyState( client=client, classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
" graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
" est = tpot2.TPOTEstimator(\n",
" client = client,\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
" )\n",
" est.fit(X_train, y_train)\n",
" # this is equivalent to: \n",
" # est = tpot2.TPOTClassifier(population_size= 8, generations=5, n_jobs=4, memory_limit=\"4GB\", verbose=1)\n",
" est.fit(X_train, y_train)\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,26 +186,33 @@
"\n",
"X, y = sklearn.datasets.load_iris(return_X_y=True)\n",
"\n",
"est = tpot2.TPOTEstimator( \n",
" generations=5,\n",
" scorers=['roc_auc_ovr'],\n",
" scorers_weights=[1],\n",
" classification=True,\n",
" root_config_dict=\"classifiers\",\n",
" inner_config_dict= [\"transformers\"],\n",
" leaf_config_dict=\"selectors\",\n",
" n_jobs=32,\n",
" cv=2,\n",
" max_eval_time_seconds=30,\n",
"graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
"est = tpot2.TPOTEstimator(\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" generations = 50,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
"\n",
"\n",
" population_size=population_size,\n",
" initial_population_size=initial_population_size,\n",
" population_scaling = population_scaling,\n",
" generations_until_end_population = generations_until_end_population,\n",
" \n",
" budget_range = budget_range,\n",
" generations_until_end_budget=generations_until_end_budget,\n",
" )\n",
"\n",
" population_size=population_size,\n",
" initial_population_size=initial_population_size,\n",
" population_scaling = population_scaling,\n",
" generations_until_end_population = generations_until_end_population,\n",
" \n",
" budget_range = budget_range,\n",
" generations_until_end_budget=generations_until_end_budget,\n",
" verbose=0)\n",
"\n",
"\n",
"start = time.time()\n",
Expand Down Expand Up @@ -296,14 +303,20 @@
}
],
"source": [
"graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
"\n",
"est = tpot2.TPOTEstimator( \n",
" generations=5,\n",
" scorers=['roc_auc_ovr'],\n",
" scorers_weights=[1],\n",
" classification=True,\n",
" root_config_dict=\"classifiers\",\n",
" inner_config_dict= [\"transformers\"],\n",
" leaf_config_dict=\"selectors\",\n",
" search_space = graph_search_space,\n",
" n_jobs=32,\n",
" cv=cv,\n",
" \n",
Expand Down Expand Up @@ -369,14 +382,15 @@
}
],
"source": [
"\n",
"\n",
"\n",
"est = tpot2.TPOTEstimator( \n",
" generations=5,\n",
" scorers=['roc_auc_ovr'],\n",
" scorers_weights=[1],\n",
" classification=True,\n",
" root_config_dict=\"classifiers\",\n",
" inner_config_dict= [\"transformers\"],\n",
" leaf_config_dict=\"selectors\",\n",
" search_space = graph_search_space,\n",
" n_jobs=32,\n",
" cv=cv,\n",
"\n",
Expand Down Expand Up @@ -447,9 +461,7 @@
" scorers=['roc_auc_ovr'],\n",
" scorers_weights=[1],\n",
" classification=True,\n",
" root_config_dict=\"classifiers\",\n",
" inner_config_dict= [\"transformers\"],\n",
" leaf_config_dict=\"selectors\",\n",
" search_space = graph_search_space,\n",
" n_jobs=32,\n",
" cv=cv,\n",
"\n",
Expand Down
85 changes: 85 additions & 0 deletions Tutorial/Example_Search_Spaces/imputation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from ConfigSpace import ConfigurationSpace\n",
"from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n",
"\n",
"simple_imputer = ConfigurationSpace(\n",
" space = {\n",
" 'strategy' : Categorical('strategy', [['mean','median',], ['most_frequent'] ]),\n",
" 'add_indicator' : Categorical('add_indicator', [True, False]), \n",
" }\n",
")\n",
"\n",
"simple_imputer.sample_configuration()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Configuration(values={\n",
" '2': 2,\n",
" 'a': 2,\n",
"})"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from ConfigSpace import ConfigurationSpace, EqualsCondition\n",
"import ConfigSpace\n",
"\n",
"cs = ConfigurationSpace({\n",
"\n",
" \"1\": [1,2,3],\n",
" \"2\": ConfigSpace.Constant(\"2\", 2),\n",
"\n",
" \"a\": [1, 2, 3],\n",
"\n",
"})\n",
"\n",
"cond = EqualsCondition(cs['1'], cs['a'], 1)\n",
"cond2 = EqualsCondition(cs['2'], cs['a'], 2)\n",
"\n",
"cs.add_condition(cond)\n",
"cs.add_condition(cond2)\n",
"\n",
"cs.sample_configuration()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tpot2env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def calculate_version():

setup(
name='TPOT2',
python_requires='<3.12', #for configspace compatibility
version=package_version,
author='Pedro Ribeiro',
packages=find_packages(),
Expand Down Expand Up @@ -48,6 +49,7 @@ def calculate_version():
'dask-ml>=2022.5.27',
'dask-jobqueue>=0.8.1',
'func_timeout>=4.3.5',
'configspace>=0.7.1',
],
extras_require={
'skrebate': ['skrebate>=0.3.4'],
Expand Down
4 changes: 3 additions & 1 deletion tpot2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
#TODO: are all the imports in the init files done correctly?
#TODO clean up import organization

from .individual import BaseIndividual

from .graphsklearn import GraphPipeline
from .population import Population

from . import builtin_modules
from . import utils
from . import config
from . import individual_representations
from . import search_spaces
from . import evolvers
from . import objectives
from . import selectors
Expand Down
22 changes: 1 addition & 21 deletions tpot2/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1 @@
#TODO: make configuration dictionaries optinally based on strings?
from .classifiers import make_classifier_config_dictionary
from .transformers import make_transformer_config_dictionary
from .regressors import make_regressor_config_dictionary
from .selectors import make_selector_config_dictionary
from .special_configs import make_arithmetic_transformer_config_dictionary, make_FSS_config_dictionary, make_passthrough_config_dictionary
from .autoqtl_builtins import make_FeatureEncodingFrequencySelector_config_dictionary, make_genetic_encoders_config_dictionary
from .hyperparametersuggestor import *

try:
from .classifiers_sklearnex import make_sklearnex_classifier_config_dictionary
from .regressors_sklearnex import make_sklearnex_regressor_config_dictionary
except ModuleNotFoundError: #if optional packages are not installed
pass

try:
from .mdr_configs import make_skrebate_config_dictionary, make_MDR_config_dictionary, make_ContinuousMDR_config_dictionary
except: #if optional packages are not installed
pass

from .classifiers import *
from .get_configspace import get_search_space
Loading
Loading