diff --git a/gama/configuration/test_configuration_task/__init__.py b/gama/configuration/test_configuration_task/__init__.py new file mode 100644 index 00000000..a7fdc527 --- /dev/null +++ b/gama/configuration/test_configuration_task/__init__.py @@ -0,0 +1,2 @@ +from .classifiers import TestClassifierConfig +from .preprocessors import TestPreprocessorConfig diff --git a/gama/configuration/test_configuration_task/classifiers.py b/gama/configuration/test_configuration_task/classifiers.py new file mode 100644 index 00000000..b0a72f57 --- /dev/null +++ b/gama/configuration/test_configuration_task/classifiers.py @@ -0,0 +1,238 @@ +import ConfigSpace as cs +import ConfigSpace.hyperparameters as csh + + +class TestClassifierConfig: + def __init__( + self, + config_space: cs.ConfigurationSpace, + ): + if "estimators" not in config_space.meta: + raise ValueError("Expected 'estimators' key in meta of config_space") + self.config_space = config_space + self.classifiers_setup_map = { + "BernoulliNB": self.setup_bernoulliNB, + "MultinomialNB": self.setup_multinomialNB, + "GaussianNB": self.setup_gaussianNB, + "DecisionTreeClassifier": self.setup_decision_tree, + "ExtraTreesClassifier": self.setup_extra_trees, + "RandomForestClassifier": self.setup_random_forest, + "GradientBoostingClassifier": self.setup_gradient_boosting, + "KNeighborsClassifier": self.setup_k_neighbors, + "LinearSVC": self.setup_linear_svc, + "LogisticRegression": self.setup_logistic_regression, + } + self.cs_estimators_name = self.config_space.meta["estimators"] + + @property + def shared_hyperparameters(self): + return { + "alpha": [1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0], + "fit_prior": [True, False], + "criterion": ["gini", "entropy"], + "max_depth": {"lower": 1, "upper": 11}, + "min_samples_split": {"lower": 2, "upper": 21}, + "min_samples_leaf": {"lower": 1, "upper": 21}, + "max_features": {"lower": 0.05, "upper": 1.01, "default_value": 1.0}, + "n_estimators": [100], + "bootstrap": [True, False], + "dual": [True, False], + "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0], + } + + def setup_classifiers(self): + classifiers_choices = list(self.classifiers_setup_map.keys()) + + if not classifiers_choices: + raise ValueError("No classifiers to add to config space") + + classifiers = csh.CategoricalHyperparameter( + name=self.cs_estimators_name, + choices=classifiers_choices, + ) + self.config_space.add_hyperparameter(classifiers) + + for classifier_name in classifiers_choices: + if setup_func := self.classifiers_setup_map.get(classifier_name): + setup_func(classifiers) + + def _add_hyperparameters_and_equals_conditions( + self, local_vars: dict, estimator_name: str + ): + if "classifiers" not in local_vars or not isinstance( + local_vars["classifiers"], csh.CategoricalHyperparameter + ): + raise ValueError( + "Expected 'classifiers' key with a CategoricalHyperparameter in local" + "vars" + ) + + hyperparameters_to_add = [ + hyperparameter + for hyperparameter in local_vars.values() + if isinstance(hyperparameter, csh.Hyperparameter) + and hyperparameter != local_vars["classifiers"] + ] + + conditions_to_add = [ + cs.EqualsCondition( + hyperparameter, local_vars["classifiers"], estimator_name + ) + for hyperparameter in hyperparameters_to_add + ] + + self.config_space.add_hyperparameters(hyperparameters_to_add) + self.config_space.add_conditions(conditions_to_add) + + def setup_bernoulliNB(self, classifiers: csh.CategoricalHyperparameter): + alpha_NB = csh.CategoricalHyperparameter( + "alpha__bernoulliNB", self.shared_hyperparameters["alpha"] + ) + fit_prior = csh.CategoricalHyperparameter( + "fit_prior__bernoulliNB", self.shared_hyperparameters["fit_prior"] + ) + self._add_hyperparameters_and_equals_conditions(locals(), "BernoulliNB") + + def setup_multinomialNB(self, classifiers: csh.CategoricalHyperparameter): + alpha_NB = csh.CategoricalHyperparameter( + "alpha__multinomialNB", self.shared_hyperparameters["alpha"] + ) + fit_prior = csh.CategoricalHyperparameter( + "fit_prior__multinomialNB", self.shared_hyperparameters["fit_prior"] + ) + self._add_hyperparameters_and_equals_conditions(locals(), "MultinomialNB") + + def setup_gaussianNB(self, classifiers: csh.CategoricalHyperparameter): + # GaussianNB has no hyperparameters + pass + + def setup_decision_tree(self, classifiers: csh.CategoricalHyperparameter): + criterion = csh.CategoricalHyperparameter( + "criterion__decision_tree", self.shared_hyperparameters["criterion"] + ) + max_depth = csh.UniformIntegerHyperparameter( + "max_depth__decision_tree", **self.shared_hyperparameters["max_depth"] + ) + min_samples_split = csh.UniformIntegerHyperparameter( + "min_samples_split__decision_tree", + **self.shared_hyperparameters["min_samples_split"], + ) + min_samples_leaf = csh.UniformIntegerHyperparameter( + "min_samples_leaf__decision_tree", + **self.shared_hyperparameters["min_samples_leaf"], + ) + self._add_hyperparameters_and_equals_conditions( + locals(), "DecisionTreeClassifier" + ) + + def setup_extra_trees(self, classifiers: csh.CategoricalHyperparameter): + criterion = csh.CategoricalHyperparameter( + "criterion__extra_trees", self.shared_hyperparameters["criterion"] + ) + max_depth = csh.UniformIntegerHyperparameter( + "max_depth__extra_trees", **self.shared_hyperparameters["max_depth"] + ) + min_samples_split = csh.UniformIntegerHyperparameter( + "min_samples_split__extra_trees", + **self.shared_hyperparameters["min_samples_split"], + ) + min_samples_leaf = csh.UniformIntegerHyperparameter( + "min_samples_leaf__extra_trees", + **self.shared_hyperparameters["min_samples_leaf"], + ) + max_features = csh.UniformFloatHyperparameter( + "max_features__extra_trees", **self.shared_hyperparameters["max_features"] + ) + n_estimators = csh.CategoricalHyperparameter( + "n_estimators__extra_trees", self.shared_hyperparameters["n_estimators"] + ) + bootstrap = csh.CategoricalHyperparameter( + "bootstrap__extra_trees", self.shared_hyperparameters["bootstrap"] + ) + self._add_hyperparameters_and_equals_conditions( + locals(), "ExtraTreesClassifier" + ) + + def setup_random_forest(self, classifiers: csh.CategoricalHyperparameter): + criterion = csh.CategoricalHyperparameter( + "criterion__random_forest", self.shared_hyperparameters["criterion"] + ) + max_depth = csh.UniformIntegerHyperparameter( + "max_depth__random_forest", **self.shared_hyperparameters["max_depth"] + ) + min_samples_split = csh.UniformIntegerHyperparameter( + "min_samples_split", **self.shared_hyperparameters["min_samples_split"] + ) + min_samples_leaf = csh.UniformIntegerHyperparameter( + "min_samples_leaf", **self.shared_hyperparameters["min_samples_leaf"] + ) + max_features = csh.UniformFloatHyperparameter( + "max_features", **self.shared_hyperparameters["max_features"] + ) + n_estimators = csh.CategoricalHyperparameter( + "n_estimators__random_forest", self.shared_hyperparameters["n_estimators"] + ) + bootstrap = csh.CategoricalHyperparameter( + "bootstrap", self.shared_hyperparameters["bootstrap"] + ) + self._add_hyperparameters_and_equals_conditions( + locals(), "RandomForestClassifier" + ) + + def setup_gradient_boosting(self, classifiers: csh.CategoricalHyperparameter): + sub_sample = csh.CategoricalHyperparameter( + "subsample", [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] + ) + learning_rate = csh.CategoricalHyperparameter( + "learning_rate", [1e-3, 1e-2, 1e-1, 0.5, 1.0] + ) + max_features = csh.UniformFloatHyperparameter( + "max_features__gradient_boosting", + **self.shared_hyperparameters["max_features"], + ) + n_estimators = csh.CategoricalHyperparameter( + "n_estimators__gradient_boosting", + self.shared_hyperparameters["n_estimators"], + ) + self._add_hyperparameters_and_equals_conditions( + locals(), "GradientBoostingClassifier" + ) + + def setup_k_neighbors(self, classifiers: csh.CategoricalHyperparameter): + n_neighbors = csh.UniformIntegerHyperparameter("n_neighbors", 1, 51) + weights = csh.CategoricalHyperparameter("weights", ["uniform", "distance"]) + p = csh.UniformIntegerHyperparameter("p", 1, 2) + self._add_hyperparameters_and_equals_conditions( + locals(), "KNeighborsClassifier" + ) + + def setup_linear_svc(self, classifiers: csh.CategoricalHyperparameter): + loss = csh.CategoricalHyperparameter( + "loss__linear_svc", ["hinge", "squared_hinge"] + ) + penalty = csh.CategoricalHyperparameter("penalty__linear_svc", ["l1", "l2"]) + dual = csh.CategoricalHyperparameter( + "dual__svc", self.shared_hyperparameters["dual"] + ) + tol = csh.CategoricalHyperparameter("tol__svc", [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]) + C = csh.CategoricalHyperparameter("C__svc", self.shared_hyperparameters["C"]) + self._add_hyperparameters_and_equals_conditions(locals(), "LinearSVC") + + # Forbidden clause: Penalty 'l1' cannot be used with loss 'hinge' + forbidden_penalty_loss = cs.ForbiddenAndConjunction( + cs.ForbiddenEqualsClause(self.config_space["penalty__linear_svc"], "l1"), + cs.ForbiddenEqualsClause(self.config_space["loss__linear_svc"], "hinge"), + ) + self.config_space.add_forbidden_clause(forbidden_penalty_loss) + + def setup_logistic_regression(self, classifiers: csh.CategoricalHyperparameter): + penalty = csh.CategoricalHyperparameter( + "penalty__logistic_regression", ["l1", "l2"] + ) + C = csh.CategoricalHyperparameter( + "C__logistic_regression", self.shared_hyperparameters["C"] + ) + dual = csh.CategoricalHyperparameter( + "dual__logistic_regression", self.shared_hyperparameters["dual"] + ) + self._add_hyperparameters_and_equals_conditions(locals(), "LogisticRegression") diff --git a/gama/configuration/test_configuration_task/preprocessors.py b/gama/configuration/test_configuration_task/preprocessors.py new file mode 100644 index 00000000..a80a5849 --- /dev/null +++ b/gama/configuration/test_configuration_task/preprocessors.py @@ -0,0 +1,190 @@ +import ConfigSpace as cs +import ConfigSpace.hyperparameters as csh + + +class TestPreprocessorConfig: + def __init__( + self, + config_space: cs.ConfigurationSpace, + ): + if "preprocessors" not in config_space.meta: + raise ValueError("Expected 'preprocessors' key in meta of config_space") + self.config_space = config_space + self.preprocessors_setup_map = { + "SelectFwe": self.setup_select_fwe, + "Binarizer": self.setup_binarizer, + "FastICA": self.setup_fast_ica, + "FeatureAgglomeration": self.setup_feature_agglomeration, + "MaxAbsScaler": self.setup_max_abs_scaler, + "MinMaxScaler": self.setup_min_max_scaler, + "Normalizer": self.setup_normalizer, + "Nystroem": self.setup_nystroem, + "PCA": self.setup_pca, + "PolynomialFeatures": self.setup_polynomial_features, + "RBFSampler": self.setup_rbf_sampler, + "RobustScaler": self.setup_robust_scaler, + "StandardScaler": self.setup_standard_scaler, + "SelectPercentile": self.setup_select_percentile, + "VarianceThreshold": self.setup_variance_threshold, + } + self.cs_preprocessors_name = config_space.meta["preprocessors"] + + @property + def shared_hyperparameters(self): + return { + "gamma": {"lower": 0.01, "upper": 1.01, "default_value": 1.0}, + } + + def setup_preprocessors(self): + preprocessors_choices = list(self.preprocessors_setup_map.keys()) + + if not preprocessors_choices: + raise ValueError("No preprocessors to add to config space") + + preprocessors = csh.CategoricalHyperparameter( + name=self.cs_preprocessors_name, + choices=preprocessors_choices, + ) + self.config_space.add_hyperparameter(preprocessors) + + for preprocessor_name in preprocessors_choices: + if setup_func := self.preprocessors_setup_map.get(preprocessor_name): + setup_func(preprocessors) + + def _add_hyperparameters_and_equals_conditions( + self, local_vars: dict, preprocessor_name: str + ): + if "preprocessors" not in local_vars or not isinstance( + local_vars["preprocessors"], csh.CategoricalHyperparameter + ): + raise ValueError( + "Expected 'preprocessors' key with a CategoricalHyperparameter in local" + "vars" + ) + + hyperparameters_to_add = [ + hyperparameter + for hyperparameter in local_vars.values() + if isinstance(hyperparameter, csh.Hyperparameter) + and hyperparameter != local_vars["preprocessors"] + ] + + conditions_to_add = [ + cs.EqualsCondition( + hyperparameter, local_vars["preprocessors"], preprocessor_name + ) + for hyperparameter in hyperparameters_to_add + ] + + self.config_space.add_hyperparameters(hyperparameters_to_add) + self.config_space.add_conditions(conditions_to_add) + + def setup_select_fwe(self, preprocessors: csh.CategoricalHyperparameter): + alpha = csh.UniformFloatHyperparameter( + "alpha__SelectFwe", 0, 0.05, default_value=0.05 + ) + self._add_hyperparameters_and_equals_conditions(locals(), "SelectFwe") + + def setup_binarizer(self, preprocessors: csh.CategoricalHyperparameter): + threshold = csh.UniformFloatHyperparameter( + "threshold__binarizer", 0.0, 1.01, default_value=0.05 + ) + self._add_hyperparameters_and_equals_conditions(locals(), "Binarizer") + + def setup_fast_ica(self, preprocessors: csh.CategoricalHyperparameter): + whiten = csh.CategoricalHyperparameter("whiten", ["unit-variance"]) + tol = csh.UniformFloatHyperparameter( + "tol__fast_ica", 0.0, 1.01, default_value=0.05 + ) + self._add_hyperparameters_and_equals_conditions(locals(), "FastICA") + + def setup_feature_agglomeration(self, preprocessors: csh.CategoricalHyperparameter): + linkage = csh.CategoricalHyperparameter( + "linkage__feature_agglomeration", ["ward", "complete", "average"] + ) + affinity = csh.CategoricalHyperparameter( + "affinity__feature_agglomeration", + ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"], + ) + self._add_hyperparameters_and_equals_conditions( + locals(), "FeatureAgglomeration" + ) + + # Forbidden clause: Linkage is different from 'ward' and affinity is 'euclidean' + forbidden_penalty_loss = cs.ForbiddenAndConjunction( + cs.ForbiddenInClause( + self.config_space["linkage__feature_agglomeration"], + ["complete", "average"], + ), + cs.ForbiddenEqualsClause( + self.config_space["affinity__feature_agglomeration"], "euclidean" + ), + ) + self.config_space.add_forbidden_clause(forbidden_penalty_loss) + + def setup_max_abs_scaler(self, preprocessors: csh.CategoricalHyperparameter): + # No hyperparameters + pass + + def setup_min_max_scaler(self, preprocessors: csh.CategoricalHyperparameter): + # No hyperparameters + pass + + def setup_normalizer(self, preprocessors: csh.CategoricalHyperparameter): + norm = csh.CategoricalHyperparameter("norm", ["l1", "l2", "max"]) + self._add_hyperparameters_and_equals_conditions(locals(), "Normalizer") + + def setup_nystroem(self, preprocessors: csh.CategoricalHyperparameter): + kernel = csh.CategoricalHyperparameter( + "kernel", + [ + "rbf", + "cosine", + "chi2", + "laplacian", + "polynomial", + "poly", + "linear", + "additive_chi2", + "sigmoid", + ], + ) + gamma = csh.UniformFloatHyperparameter( + "gamma__nystroem", **self.shared_hyperparameters["gamma"] + ) + n_components = csh.UniformIntegerHyperparameter("n_components", 1, 11) + self._add_hyperparameters_and_equals_conditions(locals(), "Nystroem") + + def setup_pca(self, preprocessors: csh.CategoricalHyperparameter): + svd_solver = csh.CategoricalHyperparameter("svd_solver", ["randomized"]) + iterated_power = csh.UniformIntegerHyperparameter("iterated_power", 1, 11) + self._add_hyperparameters_and_equals_conditions(locals(), "PCA") + + def setup_polynomial_features(self, preprocessors: csh.CategoricalHyperparameter): + include_bias = csh.CategoricalHyperparameter("include_bias", [False]) + interaction_only = csh.CategoricalHyperparameter("interaction_only", [False]) + self._add_hyperparameters_and_equals_conditions(locals(), "PolynomialFeatures") + + def setup_rbf_sampler(self, preprocessors: csh.CategoricalHyperparameter): + gamma = csh.UniformFloatHyperparameter( + "gamma__rbf_sampler", **self.shared_hyperparameters["gamma"] + ) + self._add_hyperparameters_and_equals_conditions(locals(), "RBFSampler") + + def setup_robust_scaler(self, preprocessors: csh.CategoricalHyperparameter): + # No hyperparameters + pass + + def setup_standard_scaler(self, preprocessors: csh.CategoricalHyperparameter): + # No hyperparameters + pass + + def setup_select_percentile(self, preprocessors: csh.CategoricalHyperparameter): + percentile = csh.UniformIntegerHyperparameter("percentile", 1, 100) + self._add_hyperparameters_and_equals_conditions(locals(), "SelectPercentile") + + def setup_variance_threshold(self, preprocessors: csh.CategoricalHyperparameter): + threshold = csh.UniformFloatHyperparameter( + "threshold__variance_threshold", 0.05, 1.01, default_value=0.05 + ) + self._add_hyperparameters_and_equals_conditions(locals(), "VarianceThreshold") diff --git a/gama/configuration/testconfiguration.py b/gama/configuration/testconfiguration.py index 4c134db9..95a7dcfc 100644 --- a/gama/configuration/testconfiguration.py +++ b/gama/configuration/testconfiguration.py @@ -1,146 +1,22 @@ -import numpy as np +import ConfigSpace as cs -from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import ( - ExtraTreesClassifier, - RandomForestClassifier, - GradientBoostingClassifier, +from gama.configuration.test_configuration_task import ( + TestClassifierConfig, + TestPreprocessorConfig, ) -from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import LinearSVC -from sklearn.linear_model import LogisticRegression -from sklearn.cluster import FeatureAgglomeration -from sklearn.preprocessing import ( - MaxAbsScaler, - MinMaxScaler, - Normalizer, - PolynomialFeatures, - RobustScaler, - StandardScaler, - Binarizer, -) -from sklearn.kernel_approximation import Nystroem, RBFSampler -from sklearn.decomposition import PCA, FastICA -from sklearn.feature_selection import ( - SelectFwe, - SelectPercentile, - f_classif, - VarianceThreshold, + +# A configuration with limited operators for unit tests 🧪 + +config_space = cs.ConfigurationSpace( + meta={ + # "gama_system_name": "current_configuration_name", + "estimators": "classifiers", + "preprocessors": "preprocessors", + } ) -# A configuration with limited operators for unit tests. +classifier_config = TestClassifierConfig(config_space) +classifier_config.setup_classifiers() -clf_config = { - "alpha": [1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0], - "fit_prior": [True, False], - "min_samples_split": range(2, 21), - "min_samples_leaf": range(1, 21), - # Classifiers - GaussianNB: {}, - BernoulliNB: {"alpha": [], "fit_prior": []}, - MultinomialNB: {"alpha": [], "fit_prior": []}, - DecisionTreeClassifier: { - "criterion": ["gini", "entropy"], - "max_depth": range(1, 11), - "min_samples_split": [], - "min_samples_leaf": [], - }, - ExtraTreesClassifier: { - "n_estimators": [100], - "criterion": ["gini", "entropy"], - "max_features": [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], - "min_samples_split": [], - "min_samples_leaf": [], - "bootstrap": [True, False], - }, - RandomForestClassifier: { - "n_estimators": [100], - "criterion": ["gini", "entropy"], - "max_features": [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], - "min_samples_split": range(2, 21), - "min_samples_leaf": range(1, 21), - "bootstrap": [True, False], - }, - GradientBoostingClassifier: { - "n_estimators": [100], - "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0], - "max_depth": range(1, 11), - "min_samples_split": range(2, 21), - "min_samples_leaf": range(1, 21), - "subsample": [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], - "max_features": np.arange(0.05, 1.01, 0.05), - }, - KNeighborsClassifier: { - "n_neighbors": range(1, 51), - "weights": ["uniform", "distance"], - "p": [1, 2], - }, - LinearSVC: { - "penalty": ["l1", "l2"], - "loss": ["hinge", "squared_hinge"], - "dual": [False, True], - "tol": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1], - "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0], - "param_check": [ - lambda params: (not params["dual"] or params["penalty"] == "l2") - and not (params["penalty"] == "l1" and params["loss"] == "hinge") - and not ( - params["penalty"] == "l2" - and params["loss"] == "hinge" - and not params["dual"] - ) - ], - }, - LogisticRegression: { - "penalty": ["l1", "l2"], - "C": [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0], - "dual": [False, True], - "param_check": [lambda params: not params["dual"] or params["penalty"] == "l2"], - }, - # Preprocesssors - Binarizer: {"threshold": np.arange(0.0, 1.01, 0.05)}, - FastICA: { - "tol": np.arange(0.0, 1.01, 0.05), - "whiten": ["unit-variance"], - }, - FeatureAgglomeration: { - "linkage": ["ward", "complete", "average"], - "affinity": ["euclidean", "l1", "l2", "manhattan", "cosine", "precomputed"], - "param_check": [ - lambda params: params["linkage"] != "ward" - or params["affinity"] == "euclidean" - ], - }, - MaxAbsScaler: {}, - MinMaxScaler: {}, - Normalizer: {"norm": ["l1", "l2", "max"]}, - Nystroem: { - "kernel": [ - "rbf", - "cosine", - "chi2", - "laplacian", - "polynomial", - "poly", - "linear", - "additive_chi2", - "sigmoid", - ], - "gamma": np.arange(0.0, 1.01, 0.05), - "n_components": range(1, 11), - }, - PCA: {"svd_solver": ["randomized"], "iterated_power": range(1, 11)}, - PolynomialFeatures: { - "degree": [2], - "include_bias": [False], - "interaction_only": [False], - }, - RBFSampler: {"gamma": np.arange(0.0, 1.01, 0.05)}, - RobustScaler: {}, - StandardScaler: {}, - # Selectors - SelectFwe: {"alpha": np.arange(0, 0.05, 0.001), "score_func": {f_classif: None}}, - SelectPercentile: {"percentile": range(1, 100), "score_func": {f_classif: None}}, - VarianceThreshold: {"threshold": np.arange(0.05, 1.01, 0.05)}, -} +preprocessor_config = TestPreprocessorConfig(config_space) +preprocessor_config.setup_preprocessors() diff --git a/tests/conftest.py b/tests/conftest.py index eb3dc76e..b1ead3f6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,58 +1,62 @@ import pytest from gama import GamaClassifier from gama.genetic_programming.components import Individual -from gama.configuration.testconfiguration import clf_config +from gama.configuration.testconfiguration import config_space as test_config_space from gama.genetic_programming.compilers.scikitlearn import compile_individual @pytest.fixture -def pset(): - gc = GamaClassifier(search_space=clf_config, scoring="accuracy", store="nothing") - yield gc._pset +def config_space(): + gc = GamaClassifier( + search_space=test_config_space, scoring="accuracy", store="nothing" + ) + yield gc.search_space gc.cleanup("all") @pytest.fixture def opset(): - gc = GamaClassifier(search_space=clf_config, scoring="accuracy", store="nothing") + gc = GamaClassifier( + search_space=test_config_space, scoring="accuracy", store="nothing" + ) yield gc._operator_set gc.cleanup("all") @pytest.fixture -def GNB(pset): - return Individual.from_string("GaussianNB(data)", pset, compile_individual) +def GNB(config_space): + return Individual.from_string("GaussianNB(data)", config_space, compile_individual) @pytest.fixture -def RS_MNB(pset): +def RS_MNB(config_space): return Individual.from_string( "MultinomialNB(RobustScaler(data), alpha=1.0, fit_prior=True)", - pset, + config_space, compile_individual, ) @pytest.fixture -def SS_BNB(pset): +def SS_BNB(config_space): return Individual.from_string( "BernoulliNB(StandardScaler(data), alpha=0.1, fit_prior=True)", - pset, + config_space, compile_individual, ) @pytest.fixture -def SS_RBS_SS_BNB(pset): +def SS_RBS_SS_BNB(config_space): return Individual.from_string( "BernoulliNB(StandardScaler(RobustScaler(StandardScaler(data))), alpha=0.1, fit_prior=True)", # noqa: E501 - pset, + config_space, compile_individual, ) @pytest.fixture -def LinearSVC(pset): +def LinearSVC(config_space): individual_str = """LinearSVC(data, LinearSVC.C=0.001, LinearSVC.dual=True, @@ -60,11 +64,11 @@ def LinearSVC(pset): LinearSVC.penalty='l2', LinearSVC.tol=1e-05)""" individual_str = "".join(individual_str.split()).replace(",", ", ") - return Individual.from_string(individual_str, pset, None) + return Individual.from_string(individual_str, config_space, None) @pytest.fixture -def ForestPipeline(pset): +def ForestPipeline(config_space): individual_str = """RandomForestClassifier( FeatureAgglomeration( data, @@ -79,11 +83,11 @@ def ForestPipeline(pset): RandomForestClassifier.n_estimators=100)""" individual_str = "".join(individual_str.split()).replace(",", ", ") - return Individual.from_string(individual_str, pset, None) + return Individual.from_string(individual_str, config_space, None) @pytest.fixture -def InvalidLinearSVC(pset): +def InvalidLinearSVC(config_space): individual_str = """LinearSVC(data, LinearSVC.C=0.001, LinearSVC.dual=True, @@ -91,4 +95,4 @@ def InvalidLinearSVC(pset): LinearSVC.penalty='l1', LinearSVC.tol=1e-05)""" individual_str = "".join(individual_str.split()).replace(",", ", ") - return Individual.from_string(individual_str, pset, compile_individual) + return Individual.from_string(individual_str, config_space, compile_individual) diff --git a/tests/system/test_gamaclassifier.py b/tests/system/test_gamaclassifier.py index b77d2b56..4cdd9186 100644 --- a/tests/system/test_gamaclassifier.py +++ b/tests/system/test_gamaclassifier.py @@ -10,6 +10,9 @@ from sklearn.metrics import accuracy_score, log_loss from sklearn.pipeline import Pipeline +from gama.configuration.test_configuration_task import TestClassifierConfig +from gama.configuration.testconfiguration import config_space +import ConfigSpace as cs from gama.postprocessing import EnsemblePostProcessing from gama.search_methods import AsynchronousSuccessiveHalving, AsyncEA, RandomSearch from gama.search_methods.base_search import BaseSearch @@ -237,3 +240,34 @@ def test_missing_value_classification_arff(): def test_missing_value_classification(): """Binary classification, log loss (probabilities), missing values.""" _test_dataset_problem(breast_cancer_missing, "neg_log_loss", missing_values=True) + + +def test_wrong_meta_estimators_config_space_gc(): + """Meta with wrong estimators""" + with pytest.raises(ValueError): + config_space.meta = { + # "gama_system_name": "current_configuration_name", + "dummy": "dummy", + } + GamaClassifier( + search_space=config_space, + ) + + +def test_wrong_meta_preprocessors_config_space_gc(): + """Meta with wrong preprocessors""" + with pytest.raises(ValueError): + dummy_config_space = cs.ConfigurationSpace( + meta={ + # "gama_system_name": "current_configuration_name", + "estimators": "classifiers", + "preprocessors": "dummy", + } + ) + + dummy_classifier_config = TestClassifierConfig(dummy_config_space) + dummy_classifier_config.setup_classifiers() + + GamaClassifier( + search_space=dummy_config_space, + ) diff --git a/tests/system/test_gamaregressor.py b/tests/system/test_gamaregressor.py index 985178c8..3b0d1995 100644 --- a/tests/system/test_gamaregressor.py +++ b/tests/system/test_gamaregressor.py @@ -4,10 +4,15 @@ from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error +from gama.configuration.regression_task import RegressorConfig +from gama.configuration.testconfiguration import config_space +import ConfigSpace as cs from gama.postprocessing import EnsemblePostProcessing from gama.utilities.generic.stopwatch import Stopwatch from gama import GamaRegressor +import pytest + FIT_TIME_MARGIN = 1.1 TOTAL_TIME_S = 60 @@ -74,3 +79,34 @@ def test_missing_value_regression(): store="nothing", ) _test_gama_regressor(gama, X_train, X_test, y_train, y_test, data, metric) + + +def test_wrong_meta_estimators_config_space_gr(): + """Meta with wrong estimators""" + with pytest.raises(ValueError): + config_space.meta = { + # "gama_system_name": "current_configuration_name", + "dummy": "dummy", + } + GamaRegressor( + search_space=config_space, + ) + + +def test_wrong_meta_preprocessors_config_space_gc(): + """Meta with wrong preprocessors""" + with pytest.raises(ValueError): + dummy_config_space = cs.ConfigurationSpace( + meta={ + # "gama_system_name": "current_configuration_name", + "estimators": "regressors", + "preprocessors": "dummy", + } + ) + + dummy_classifier_config = RegressorConfig(dummy_config_space) + dummy_classifier_config.setup_regressors() + + GamaRegressor( + search_space=dummy_config_space, + ) diff --git a/tests/unit/test_configuration_parser.py b/tests/unit/test_configuration_parser.py index 5618be54..2876a54c 100644 --- a/tests/unit/test_configuration_parser.py +++ b/tests/unit/test_configuration_parser.py @@ -1,18 +1,26 @@ -from sklearn.naive_bayes import BernoulliNB, GaussianNB +from gama.utilities.config_space import merge_configurations -from gama.configuration.parser import merge_configurations +from gama.configuration.testconfiguration import ( + config_space as classification_config_space, +) +from gama.configuration.regression import config_space as regression_config_space def test_merge_configuration(): """Test merging two simple configurations works as expected.""" - one = {"alpha": [0, 1], BernoulliNB: {"fit_prior": [True, False]}} - two = {"alpha": [0, 2], GaussianNB: {"fit_prior": [True, False]}} - expected_merged = { - "alpha": [0, 1, 2], - GaussianNB: {"fit_prior": [True, False]}, - BernoulliNB: {"fit_prior": [True, False]}, - } + test_classification_config = classification_config_space + test_regression_config = regression_config_space - actual_merged = merge_configurations(one, two) - assert expected_merged == actual_merged + merged_config = merge_configurations( + test_classification_config, test_regression_config + ) + + assert ( + test_classification_config.meta["estimators"] + in merged_config.get_hyperparameters_dict() + ) + assert ( + test_regression_config.meta["estimators"] + in merged_config.get_hyperparameters_dict() + ) diff --git a/tests/unit/test_ea_mutation.py b/tests/unit/test_ea_mutation.py index 5440bfd8..8589094f 100644 --- a/tests/unit/test_ea_mutation.py +++ b/tests/unit/test_ea_mutation.py @@ -14,44 +14,50 @@ from gama.genetic_programming.compilers.scikitlearn import compile_individual -def test_mut_replace_terminal(ForestPipeline, pset): +def test_mut_replace_terminal(ForestPipeline, config_space): """Tests if mut_replace_terminal replaces exactly one terminal.""" _test_mutation( ForestPipeline, mut_replace_terminal, _mut_replace_terminal_is_applied, - pset, + config_space, ) -def test_mut_replace_terminal_none_available(GNB, pset): +def test_mut_replace_terminal_none_available(GNB, config_space): """mut_replace_terminal raises an exception if no valid mutation is possible.""" with pytest.raises(ValueError) as error: - mut_replace_terminal(GNB, pset) + mut_replace_terminal(GNB, config_space) assert "Individual has no terminals suitable for mutation." in str(error.value) -def test_mut_replace_primitive_len_1(LinearSVC, pset): +def test_mut_replace_primitive_len_1(LinearSVC, config_space): """mut_replace_primitive replaces exactly one primitive.""" _test_mutation( - LinearSVC, mut_replace_primitive, _mut_replace_primitive_is_applied, pset + LinearSVC, + mut_replace_primitive, + _mut_replace_primitive_is_applied, + config_space, ) -def test_mut_replace_primitive_len_2(ForestPipeline, pset): +def test_mut_replace_primitive_len_2(ForestPipeline, config_space): """mut_replace_primitive replaces exactly one primitive.""" _test_mutation( - ForestPipeline, mut_replace_primitive, _mut_replace_primitive_is_applied, pset + ForestPipeline, + mut_replace_primitive, + _mut_replace_primitive_is_applied, + config_space, ) -def test_mut_insert(ForestPipeline, pset): +def test_mut_insert(ForestPipeline, config_space): """mut_insert inserts at least one primitive.""" - _test_mutation(ForestPipeline, mut_insert, _mut_insert_is_applied, pset) + _test_mutation(ForestPipeline, mut_insert, _mut_insert_is_applied, config_space) -def test_random_valid_mutation_with_all(ForestPipeline, pset): +def test_random_valid_mutation_with_all(ForestPipeline, config_space): """Test if a valid mutation is applied at random. I am honestly not sure of the best way to test this. @@ -63,7 +69,7 @@ def test_random_valid_mutation_with_all(ForestPipeline, pset): for i in range(_min_trials(n_mutations=4)): ind_clone = ForestPipeline.copy_as_new() - random_valid_mutation_in_place(ind_clone, pset) + random_valid_mutation_in_place(ind_clone, config_space) if _mut_shrink_is_applied(ForestPipeline, ind_clone)[0]: applied_mutation["shrink"] += 1 elif _mut_insert_is_applied(ForestPipeline, ind_clone)[0]: @@ -78,7 +84,7 @@ def test_random_valid_mutation_with_all(ForestPipeline, pset): assert all([count > 0 for (mut, count) in applied_mutation.items()]) -def test_random_valid_mutation_without_shrink(LinearSVC, pset): +def test_random_valid_mutation_without_shrink(LinearSVC, config_space): """Test if a valid mutation is applied at random. I am honestly not sure of the best way to test this. @@ -90,7 +96,7 @@ def test_random_valid_mutation_without_shrink(LinearSVC, pset): for i in range(_min_trials(n_mutations=3)): ind_clone = LinearSVC.copy_as_new() - random_valid_mutation_in_place(ind_clone, pset) + random_valid_mutation_in_place(ind_clone, config_space) if _mut_insert_is_applied(LinearSVC, ind_clone)[0]: applied_mutation["insert"] += 1 elif _mut_replace_terminal_is_applied(LinearSVC, ind_clone)[0]: @@ -103,7 +109,7 @@ def test_random_valid_mutation_without_shrink(LinearSVC, pset): assert all([count > 0 for (mut, count) in applied_mutation.items()]) -def test_random_valid_mutation_without_terminal(GNB, pset): +def test_random_valid_mutation_without_terminal(GNB, config_space): """Test if a valid mutation is applied at random. I am honestly not sure of the best way to test this. @@ -116,7 +122,7 @@ def test_random_valid_mutation_without_terminal(GNB, pset): for i in range(_min_trials(n_mutations=2)): ind_clone = GNB.copy_as_new() - random_valid_mutation_in_place(ind_clone, pset) + random_valid_mutation_in_place(ind_clone, config_space) if _mut_insert_is_applied(GNB, ind_clone)[0]: applied_mutation["insert"] += 1 elif _mut_replace_primitive_is_applied(GNB, ind_clone)[0]: @@ -127,7 +133,7 @@ def test_random_valid_mutation_without_terminal(GNB, pset): assert all([count > 0 for (mut, count) in applied_mutation.items()]) -def test_random_valid_mutation_without_insert(ForestPipeline, pset): +def test_random_valid_mutation_without_insert(ForestPipeline, config_space): """Test if a valid mutation is applied at random. I am honestly not sure of the best way to test this. @@ -141,7 +147,7 @@ def test_random_valid_mutation_without_insert(ForestPipeline, pset): for i in range(_min_trials(n_mutations=3)): ind_clone = ForestPipeline.copy_as_new() - random_valid_mutation_in_place(ind_clone, pset, max_length=2) + random_valid_mutation_in_place(ind_clone, config_space, max_length=2) if _mut_shrink_is_applied(ForestPipeline, ind_clone)[0]: applied_mutation["shrink"] += 1 elif _mut_replace_terminal_is_applied(ForestPipeline, ind_clone)[0]: @@ -245,7 +251,7 @@ def _mut_replace_primitive_is_applied(original, mutated): return True, None -def _test_mutation(individual: Individual, mutation, mutation_check, pset): +def _test_mutation(individual: Individual, mutation, mutation_check, config_space): """Test if an individual mutated by `mutation` passes `mutation_check` and compiles. :param individual: The individual to be mutated. @@ -255,10 +261,10 @@ def _test_mutation(individual: Individual, mutation, mutation_check, pset): see above functions. """ ind_clone = individual.copy_as_new() - mutation(ind_clone, pset) + mutation(ind_clone, config_space) applied, message = mutation_check(individual, ind_clone) assert applied, message # Should be able to compile the individual, will raise an Exception if not. - compile_individual(ind_clone, pset) + compile_individual(ind_clone, config_space)