Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hyperoptim #26

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
fe28678
rebased hyperoptimization
Nov 30, 2021
d9b1da8
rebased hyperoptimization
Nov 30, 2021
3bbc90e
refactored tvem->tvo
mknull May 24, 2022
a5441ea
move verbosity from DEBUG to INFO
mknull Jun 10, 2022
0c43cfb
comment polish + top_n variable change
mknull Jun 10, 2022
74c5f10
completing rebase
mknull Jun 13, 2022
f6b23d4
eem -> evo rename
mknull Jun 15, 2022
88db272
initial refactoring of FCDeconvnet 1/2
mknull Jun 15, 2022
70e1747
refactored and tested fully connected
mknull Jun 15, 2022
cebdc9d
removed logging line
mknull Jun 24, 2022
50f1cb4
joined models from merge
mknull Jun 27, 2022
e806748
removed unused **kwargs option from handle_data()
mknull Jun 27, 2022
f5af559
removed unnecessary directory
mknull Jun 27, 2022
97f1fed
sync device with TVO
mknull Jun 27, 2022
e75c306
fixed bug in FCDeconvnet
mknull Jun 27, 2022
b148bc0
set logging level to warning
mknull Jun 27, 2022
9a73cc6
changed TVAE worker's default hyperparameter ranges
mknull Jun 27, 2022
a7f41d1
Merge branch 'hyperoptim' of github.com:tvlearn/tvo into hyperoptim
mknull Jun 27, 2022
77963f7
black & pylama pass
mknull Jun 27, 2022
81c27f5
make mypy happy
mknull Jun 27, 2022
f4745a9
make mypy happy
mknull Jun 27, 2022
17e35ac
resolve all black, pylama, mypy warnings
mknull Jun 27, 2022
d304a9b
black pass over /tvo
mknull Jun 27, 2022
cde3885
allow flexible dtypes
mknull Jun 28, 2022
ce74531
cleanup
mknull Jun 28, 2022
37872d6
rename h -> x in forward
mknull Jun 28, 2022
37c6578
black pass with newer version
mknull Jun 29, 2022
7f71e94
revert changes back to master
mknull Jun 30, 2022
4a66aaf
Merge branch 'master' into hyperoptim
mknull Jul 11, 2022
1be02a9
black pass
mknull Jul 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hyperoptimization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .neural_models import FCDeConvNetSigOut, FCDeConvNet
from .workers import TVAEWorker
from .explore import print_best
from .runs import local_sequential

__all__ = ["FCDeConvNet", "FCDeConvNetSigOut", "TVAEWorker", "print_best", "local_sequential"]
197 changes: 197 additions & 0 deletions hyperoptimization/explore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import hpbandster.core.result as hpres
import hpbandster.visualization as hpvis
import json
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import os


class ValidFreeEnergy:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For a general hyperparameter optimization framework, it may be desirable to provide an interface that allows to implement different optimization metrics (including free energies) as generically as possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.

"""
Extracts the free energy from an hpbandster run if the run was successful.
If the free energy of the loss is requested (i.e. training), the negative loss is returned.
"""

def __init__(self, key):
"""
:param key: The key of the free energy to extract.
(one of loss, validation,
"""
self.key = key

def __call__(self, run):
if not self.key == "loss":
if "info" not in vars(run):
return None # broken run
if run["info"] is None:
return None # broken run, but later
if run["info"][self.key] is None:
return None # broken run, but silently
else:
return run["info"][self.key]

else:
if run[self.key] is None:
return None
else:
return -run[self.key] # hpbandster minimizes, but we report -loglikelihood


def result_and_runs(path):
"""
:param path: directory of the results and config json files
:return: the results and all_runs objects
"""
result = hpres.logged_results_to_HBS_result(path)
all_runs = result.get_all_runs()
return result, all_runs


def sorted_by_value(runs, key="loss"):
"""
:param runs: outpout of an hpbandster.core.result method
:param key: the key by which to sort the results
:return: a sorted list with only valid results
"""

get_if_valid = ValidFreeEnergy(key)
return sorted([run for run in runs if get_if_valid(run) is not None], key=get_if_valid)


def print_best(path="", printable="loss", criterion="loss", show_config=True, top_n=10):
"""
:param path: directory of the results and config json files
:param printable: value to print
:param criterion: value to sort by for the top N selection
:param show_config: Bool prints model config
:param top_n: number of models to show
:return:
"""

result, all_runs = result_and_runs(path)
id2conf = result.get_id2config_mapping()

by_criterion = sorted_by_value(all_runs, key=criterion)

print("Good confs as judged by {}: ".format(criterion))
for i in range(top_n):

# get value
if criterion == "loss":
value = -by_criterion[::-1][i][printable] # fix minus from Hpbandster minimization
else:
value = by_criterion[::-1][i]["info"][printable]

# get config id
id = "".join([(str(id_).rjust(3, " ")) for id_ in by_criterion[::-1][i]["config_id"]])

# print result
print(
"{}. with {}/free energy= {} |id ({})".format(
str(i + 1).rjust(2), printable, str(round(value, 6)).ljust(12), id
)
)

if show_config:
config = id2conf[by_criterion[i]["config_id"]]
print(json.dumps(config, indent=4))


def print_error_configs(path, top_n_broken=10):
"""
This function picks out and prints the hyperparameters that are most frequent
in configs that had an interrupted run.
:param path: directory of the results and config json files
:param top_n_broken: number of hyperparameters to print
:return: None
"""
result, all_runs = result_and_runs(path)
id2conf = result.get_id2config_mapping()

all_confs = [id2conf[run["config_id"]] for run in all_runs]
broken_confs = [id2conf[run["config_id"]] for run in all_runs if run["info"] is None]

all_hyperparamters_by_usage = Counter(
[key for conf in all_confs for key in conf["config"].keys()]
)
constantly_used = [
key
for key in all_hyperparamters_by_usage
if all_hyperparamters_by_usage[key] == len(all_runs)
]
broken_hyperparams_used = [
key for conf in broken_confs for key in conf["config"].keys() if key not in constantly_used
]

max_len = max([len(key) for key in broken_hyperparams_used])
broken_hyperparams_used = [key.ljust(max_len) for key in broken_hyperparams_used]

print("{} broken runs found".format(len(broken_confs)))
print("Top {} hyperparams by frequency:".format(top_n_broken))
temp = np.array(Counter(broken_hyperparams_used).most_common(top_n_broken)).T
temp = np.array([temp[1], temp[0]]).T
temp = ["".join(a + ": " + b) for a, b in temp]
print("\n".join(temp))


def visualize(path):
"""
This function visualize the behaviour of an hpbandster run
:param path: directory of the results and config json files
:return:
"""
# get results
result, all_runs = result_and_runs(path)

# plot:

# losses by budget
hpvis.losses_over_time(all_runs)

# concurent runs over time
hpvis.concurrent_runs_over_time(all_runs)

# finished runs over time
hpvis.finished_runs_over_time(all_runs)

# spearman rank correlation over budgets
hpvis.correlation_across_budgets(result)

# model based configs vs random search
hpvis.performance_histogram_model_vs_random(all_runs, result.get_id2config_mapping(), show=True)

plt.show()


if __name__ == "__main__":
# path = 'results'
path = os.path.abspath(
os.path.join(os.path.abspath(__file__), "../../dynamically_binarized_mnist/results_2")
)
mknull marked this conversation as resolved.
Show resolved Hide resolved
print_error_configs(path)
print("\n")
print_best(
path,
printable="validation accuracy",
criterion="train accuracy",
show_config=False,
top_n=5,
)
print("\n")
print_best(
path,
printable="validation accuracy",
criterion="validation accuracy",
show_config=True,
top_n=5,
)
print("\n")
print_best(
path,
printable="test accuracy",
criterion="test accuracy",
show_config=False,
top_n=5,
)
visualize(path)
52 changes: 52 additions & 0 deletions hyperoptimization/hyperoptimize_tvae_bars_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from argparse import ArgumentParser as Parser

from typing import Tuple

from hyperoptimization.workers import TVAEWorker
from hyperoptimization.utils import parse_hyperopt_args as hyperopt
from hyperoptimization.runs import local_sequential as run
import logging

logging.basicConfig(level=logging.WARNING)


def experiment(parser):

parser.add_argument("dataset", help="HD5 file as expected in input by tvo.Training")
parser.add_argument("--Ksize", type=int, default=3, help="size of each K^n set")
parser.add_argument("--epochs", type=int, default=40, help="number of training epochs")
parser.add_argument(
"--net-shape",
required=True,
type=parse_net_shape,
help="column-separated list of layer sizes",
)
parser.add_argument("--min_lr", type=float, help="MLP min learning rate", required=True)
parser.add_argument("--max_lr", type=float, help="MLP max learning rate", required=True)
parser.add_argument("--batch-size", type=int, required=True)
parser.add_argument("--output", help="output file for train log", required=True)
parser.add_argument(
"--seed",
type=int,
help="seed value for random number generators. default is a random seed",
)
return parser


def parse_net_shape(net_shape: str) -> Tuple[int, ...]:
"""
Parse string with TVAE shape into a tuple.

:param net_shape: column-separated list of integers, e.g. `"10:10:2"`
:returns: a tuple with the shape as integers, e.g. `(10,10,2)`
"""
return tuple(map(int, net_shape.split(":")))


parser = experiment(hyperopt(Parser()))
parsed_args = parser.parse_args()

worker = TVAEWorker
pr = None
# pr = result.logged_results_to_HBS_result("results")
run(worker=worker, parsed_args=parsed_args, previous_run=pr)
Loading