Skip to content

Commit

Permalink
Sarah Segel: Handle configspace as dictionary in mlp example (#1057)
Browse files Browse the repository at this point in the history
  • Loading branch information
Github Actions committed Jul 20, 2023
1 parent 3fc7ee0 commit 64f4b91
Show file tree
Hide file tree
Showing 132 changed files with 658 additions and 9,814 deletions.
2 changes: 1 addition & 1 deletion development/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 7998ea861451546be11046d510c9eb9f
config: 9e450612d63cf85af7583a34de820462
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file modified development/.doctrees/environment.pickle
Binary file not shown.
Binary file not shown.
Binary file modified development/.doctrees/examples/1_basics/2_svm_cv.doctree
Binary file not shown.
Binary file modified development/.doctrees/examples/1_basics/3_ask_and_tell.doctree
Binary file not shown.
Binary file modified development/.doctrees/examples/1_basics/4_callback.doctree
Binary file not shown.
Binary file modified development/.doctrees/examples/1_basics/5_continue.doctree
Binary file not shown.
Binary file modified development/.doctrees/examples/1_basics/6_priors.doctree
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ def configspace(self) -> ConfigurationSpace:
return cs

def train(self, config: Configuration, seed: int = 0, budget: int = 10) -> dict[str, float]:
lr = config["learning_rate"] if config["learning_rate"] else "constant"
lr_init = config["learning_rate_init"] if config["learning_rate_init"] else 0.001
batch_size = config["batch_size"] if config["batch_size"] else 200
lr = config.get("learning_rate", "constant")
lr_init = config.get("learning_rate_init", 0.001)
batch_size = config.get("batch_size", 200)

start_time = time.time()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def train(self, config: Configuration, instance: str, seed: int = 0) -> float:

# SGD classifier using given configuration
clf = SGDClassifier(
loss="log",
loss="log_loss",
penalty="elasticnet",
alpha=config["alpha"],
l1_ratio=config["l1_ratio"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
},
"outputs": [],
"source": [
"from __future__ import annotations\n\nimport itertools\nimport warnings\n\nimport numpy as np\nfrom ConfigSpace import Categorical, Configuration, ConfigurationSpace, Float\nfrom sklearn import datasets\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.model_selection import StratifiedKFold, cross_val_score\n\nfrom smac import MultiFidelityFacade as MFFacade\nfrom smac import Scenario\n\n__copyright__ = \"Copyright 2021, AutoML.org Freiburg-Hannover\"\n__license__ = \"3-clause BSD\"\n\n\nclass DigitsDataset:\n def __init__(self) -> None:\n self._data = datasets.load_digits()\n\n def get_instances(self) -> list[str]:\n \"\"\"Create instances from the dataset which include two classes only.\"\"\"\n return [f\"{classA}-{classB}\" for classA, classB in itertools.combinations(self._data.target_names, 2)]\n\n def get_instance_features(self) -> dict[str, list[int | float]]:\n \"\"\"Returns the mean and variance of all instances as features.\"\"\"\n features = {}\n for instance in self.get_instances():\n data, _ = self.get_instance_data(instance)\n features[instance] = [np.mean(data), np.var(data)]\n\n return features\n\n def get_instance_data(self, instance: str) -> tuple[np.ndarray, np.ndarray]:\n \"\"\"Retrieve data from the passed instance.\"\"\"\n # We split the dataset into two classes\n classA, classB = instance.split(\"-\")\n indices = np.where(np.logical_or(int(classA) == self._data.target, int(classB) == self._data.target))\n\n data = self._data.data[indices]\n target = self._data.target[indices]\n\n return data, target\n\n\nclass SGD:\n def __init__(self, dataset: DigitsDataset) -> None:\n self.dataset = dataset\n\n @property\n def configspace(self) -> ConfigurationSpace:\n \"\"\"Build the configuration space which defines all parameters and their ranges for the SGD classifier.\"\"\"\n cs = ConfigurationSpace()\n\n # We define a few possible parameters for the SGD classifier\n alpha = Float(\"alpha\", (0, 1), default=1.0)\n l1_ratio = Float(\"l1_ratio\", (0, 1), default=0.5)\n learning_rate = Categorical(\"learning_rate\", [\"constant\", \"invscaling\", \"adaptive\"], default=\"constant\")\n eta0 = Float(\"eta0\", (0.00001, 1), default=0.1, log=True)\n # Add the parameters to configuration space\n cs.add_hyperparameters([alpha, l1_ratio, learning_rate, eta0])\n\n return cs\n\n def train(self, config: Configuration, instance: str, seed: int = 0) -> float:\n \"\"\"Creates a SGD classifier based on a configuration and evaluates it on the\n digits dataset using cross-validation.\"\"\"\n\n with warnings.catch_warnings():\n warnings.filterwarnings(\"ignore\")\n\n # SGD classifier using given configuration\n clf = SGDClassifier(\n loss=\"log\",\n penalty=\"elasticnet\",\n alpha=config[\"alpha\"],\n l1_ratio=config[\"l1_ratio\"],\n learning_rate=config[\"learning_rate\"],\n eta0=config[\"eta0\"],\n max_iter=30,\n early_stopping=True,\n random_state=seed,\n )\n\n # get instance\n data, target = self.dataset.get_instance_data(instance)\n\n cv = StratifiedKFold(n_splits=4, random_state=seed, shuffle=True) # to make CV splits consistent\n scores = cross_val_score(clf, data, target, cv=cv)\n\n return 1 - np.mean(scores)\n\n\nif __name__ == \"__main__\":\n dataset = DigitsDataset()\n model = SGD(dataset)\n\n scenario = Scenario(\n model.configspace,\n walltime_limit=30, # We want to optimize for 30 seconds\n n_trials=5000, # We want to try max 5000 different trials\n min_budget=1, # Use min one instance\n max_budget=45, # Use max 45 instances (if we have a lot of instances we could constraint it here)\n instances=dataset.get_instances(),\n instance_features=dataset.get_instance_features(),\n )\n\n # Create our SMAC object and pass the scenario and the train method\n smac = MFFacade(\n scenario,\n model.train,\n overwrite=True,\n )\n\n # Now we start the optimization process\n incumbent = smac.optimize()\n\n default_cost = smac.validate(model.configspace.get_default_configuration())\n print(f\"Default cost: {default_cost}\")\n\n incumbent_cost = smac.validate(incumbent)\n print(f\"Incumbent cost: {incumbent_cost}\")"
"from __future__ import annotations\n\nimport itertools\nimport warnings\n\nimport numpy as np\nfrom ConfigSpace import Categorical, Configuration, ConfigurationSpace, Float\nfrom sklearn import datasets\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.model_selection import StratifiedKFold, cross_val_score\n\nfrom smac import MultiFidelityFacade as MFFacade\nfrom smac import Scenario\n\n__copyright__ = \"Copyright 2021, AutoML.org Freiburg-Hannover\"\n__license__ = \"3-clause BSD\"\n\n\nclass DigitsDataset:\n def __init__(self) -> None:\n self._data = datasets.load_digits()\n\n def get_instances(self) -> list[str]:\n \"\"\"Create instances from the dataset which include two classes only.\"\"\"\n return [f\"{classA}-{classB}\" for classA, classB in itertools.combinations(self._data.target_names, 2)]\n\n def get_instance_features(self) -> dict[str, list[int | float]]:\n \"\"\"Returns the mean and variance of all instances as features.\"\"\"\n features = {}\n for instance in self.get_instances():\n data, _ = self.get_instance_data(instance)\n features[instance] = [np.mean(data), np.var(data)]\n\n return features\n\n def get_instance_data(self, instance: str) -> tuple[np.ndarray, np.ndarray]:\n \"\"\"Retrieve data from the passed instance.\"\"\"\n # We split the dataset into two classes\n classA, classB = instance.split(\"-\")\n indices = np.where(np.logical_or(int(classA) == self._data.target, int(classB) == self._data.target))\n\n data = self._data.data[indices]\n target = self._data.target[indices]\n\n return data, target\n\n\nclass SGD:\n def __init__(self, dataset: DigitsDataset) -> None:\n self.dataset = dataset\n\n @property\n def configspace(self) -> ConfigurationSpace:\n \"\"\"Build the configuration space which defines all parameters and their ranges for the SGD classifier.\"\"\"\n cs = ConfigurationSpace()\n\n # We define a few possible parameters for the SGD classifier\n alpha = Float(\"alpha\", (0, 1), default=1.0)\n l1_ratio = Float(\"l1_ratio\", (0, 1), default=0.5)\n learning_rate = Categorical(\"learning_rate\", [\"constant\", \"invscaling\", \"adaptive\"], default=\"constant\")\n eta0 = Float(\"eta0\", (0.00001, 1), default=0.1, log=True)\n # Add the parameters to configuration space\n cs.add_hyperparameters([alpha, l1_ratio, learning_rate, eta0])\n\n return cs\n\n def train(self, config: Configuration, instance: str, seed: int = 0) -> float:\n \"\"\"Creates a SGD classifier based on a configuration and evaluates it on the\n digits dataset using cross-validation.\"\"\"\n\n with warnings.catch_warnings():\n warnings.filterwarnings(\"ignore\")\n\n # SGD classifier using given configuration\n clf = SGDClassifier(\n loss=\"log_loss\",\n penalty=\"elasticnet\",\n alpha=config[\"alpha\"],\n l1_ratio=config[\"l1_ratio\"],\n learning_rate=config[\"learning_rate\"],\n eta0=config[\"eta0\"],\n max_iter=30,\n early_stopping=True,\n random_state=seed,\n )\n\n # get instance\n data, target = self.dataset.get_instance_data(instance)\n\n cv = StratifiedKFold(n_splits=4, random_state=seed, shuffle=True) # to make CV splits consistent\n scores = cross_val_score(clf, data, target, cv=cv)\n\n return 1 - np.mean(scores)\n\n\nif __name__ == \"__main__\":\n dataset = DigitsDataset()\n model = SGD(dataset)\n\n scenario = Scenario(\n model.configspace,\n walltime_limit=30, # We want to optimize for 30 seconds\n n_trials=5000, # We want to try max 5000 different trials\n min_budget=1, # Use min one instance\n max_budget=45, # Use max 45 instances (if we have a lot of instances we could constraint it here)\n instances=dataset.get_instances(),\n instance_features=dataset.get_instance_features(),\n )\n\n # Create our SMAC object and pass the scenario and the train method\n smac = MFFacade(\n scenario,\n model.train,\n overwrite=True,\n )\n\n # Now we start the optimization process\n incumbent = smac.optimize()\n\n default_cost = smac.validate(model.configspace.get_default_configuration())\n print(f\"Default cost: {default_cost}\")\n\n incumbent_cost = smac.validate(incumbent)\n print(f\"Incumbent cost: {incumbent_cost}\")"
]
}
],
Expand Down
Loading

0 comments on commit 64f4b91

Please sign in to comment.