Skip to content

Commit

Permalink
hydra picking
Browse files Browse the repository at this point in the history
  • Loading branch information
kw-corne committed Aug 25, 2023
1 parent eb15682 commit d33fa60
Show file tree
Hide file tree
Showing 16 changed files with 206 additions and 69 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ RUN chmod +x entrypoint.sh

# Integration tests, should fail if installing went wrong.
# No GPU on gh-actions
RUN if ! python -m pytest -v; then exit 1; fi
RUN if ! python -m pytest -v -m "not gpu"; then exit 1; fi

ENTRYPOINT ["./entrypoint.sh"]
# Clean up all images: docker rmi -f $(docker images -aq)
156 changes: 123 additions & 33 deletions autoverify/portfolio/hydra/hydra.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import logging
import random
import sys
from typing import Any

import numpy as np
from ConfigSpace import Configuration
from ConfigSpace import Categorical, Configuration, ConfigurationSpace
from smac import AlgorithmConfigurationFacade as ACFacade
from smac import RunHistory, Scenario

Expand All @@ -16,9 +17,9 @@
)
from autoverify.types import TargetFunction
from autoverify.util.resources import ResourceTracker
from autoverify.util.target_function import get_verifier_tf
from autoverify.util.target_function import get_pick_tf, get_verifier_tf
from autoverify.util.verifiers import verifier_from_name
from autoverify.verifier.verifier import CompleteVerifier
from autoverify.verifier.verifier import CompleteVerifier, Verifier

logger = logging.getLogger(__name__)

Expand All @@ -36,6 +37,56 @@ def _mean_unevaluated(costs: dict[str, float]) -> dict[str, float]:
return new_costs


def _get_cpu_gpu_alloc(verifier: str, rt: ResourceTracker):
cpus, gpu = rt.deduct_by_name(verifier, mock=True)

# HACK: Need to refactor ResourceTracker
if gpu <= 0:
gpu = -1
else:
gpu = 0

return (0, cpus - 1, gpu)


def _remap_rh_keys(
key_map: dict[Configuration, Configuration], rh: RunHistory
) -> RunHistory:
new_rh = RunHistory()

for tk, tv in rh.items():
cfg = rh.get_config(tk.config_id)
cfg = key_map[cfg]
new_rh.add(
cfg,
tv.cost,
tv.time,
tv.status,
tk.instance,
tk.seed,
tk.budget,
tv.starttime,
tv.endtime,
tv.additional_info,
)

return new_rh


def _get_init_kwargs(
verifier: str, scenario: PortfolioScenario
) -> dict[str, Any]:
init_kwargs: dict[str, Any] = {} # TODO: Type

if (
scenario.verifier_kwargs is not None
and verifier in scenario.verifier_kwargs
):
init_kwargs = scenario.verifier_kwargs[verifier]

return init_kwargs


# TODO: Refactor this to use a more "Strategy"-like pattern
class Hydra:
"""_summary_."""
Expand Down Expand Up @@ -74,14 +125,18 @@ def tune_portfolio(self) -> Portfolio:

return portfolio

def _configurator(self, pf: Portfolio):
def _configurator(
self, pf: Portfolio
) -> list[tuple[Configuration, RunHistory]]:
# TODO: Iter > 0
new_configs: list[tuple[Configuration, RunHistory]] = []

for i in range(self._scenario.configs_per_iter):
logger.info(f"Configuration iteration {i}")

verifier = self._pick()
run_name = f"pick_{self._iter}_{i}"
logger.info("Picking verifier")
verifier = self._pick(run_name)

logger.info(f"Picked {verifier}")
logger.info(f"Tuning {verifier}")
Expand All @@ -94,17 +149,71 @@ def _configurator(self, pf: Portfolio):

return new_configs

# TODO: Implement SMAC picking
def _pick(self) -> str:
possible = self._ResourceTracker.get_possible()
_ = random.choice(possible)
return possible[self._iter]
# TODO: fix type errors
def _pick(self, run_name: str) -> str:
if self._scenario.pick_budget == 0:
logging.info("Pick budget is 0, selecting random verifier.")
return random.choice(self._scenario.verifiers)

verifier_insts: list[Verifier] = []

for name in self._ResourceTracker.get_possible():
verifier_class = verifier_from_name(name)
alloc = _get_cpu_gpu_alloc(name, self._ResourceTracker)
init_kwargs = _get_init_kwargs(name, self._scenario)
verifier_insts.append(
verifier_class(cpu_gpu_allocation=alloc, **init_kwargs)
)

target_func = get_pick_tf(verifier_insts)
pick_cfgspace = ConfigurationSpace()
pick_cfgspace.add_hyperparameter(
Categorical(
"index",
[i for i in range(len(verifier_insts))],
default=0,
)
)

walltime_limit = (
self._scenario.seconds_per_iter
* self._scenario.pick_budget
/ self._scenario.configs_per_iter
)

smac_scenario = Scenario(
pick_cfgspace,
walltime_limit=walltime_limit,
n_trials=sys.maxsize, # we use walltime_limit
name=run_name,
**self._scenario.get_smac_scenario_kwargs(),
)

smac = ACFacade(smac_scenario, target_func, overwrite=True)
inc = smac.optimize()

# Not dealing with > 1 config
assert isinstance(inc, Configuration)

key_map: dict[Configuration, Configuration] = {}
for i in range(len(verifier_insts)):
cfg = Configuration(pick_cfgspace, {"index": i})
key_map[cfg] = verifier_insts[i].default_config

rh = _remap_rh_keys(key_map, smac.runhistory)
self._cost_matrix.update_matrix(rh)

return str(verifier_insts[inc["index"]].name)

def _tune(
self, verifier: str, run_name: str, target_func: TargetFunction
) -> tuple[Configuration, RunHistory]:
verifier_inst = verifier_from_name(verifier)()

if self._scenario.tune_budget == 0:
logger.info("Tune budget is 0, returning default configuration.")
return verifier_inst.default_config, RunHistory()

walltime_limit = (
self._scenario.seconds_per_iter
* self._scenario.tune_budget
Expand All @@ -129,22 +238,10 @@ def _tune(
def _get_target_func(self, verifier: str, pf: Portfolio) -> TargetFunction:
"""If iteration > 0, use the Hydra target function."""
verifier_class = verifier_from_name(verifier)

name = str(verifier_class.name)
init_kwargs = {} # TODO: Type

if (
self._scenario.verifier_kwargs is not None
and name in self._scenario.verifier_kwargs
):
init_kwargs = self._scenario.verifier_kwargs[name]

# TODO: CPU_GPU_ALLOCATION
cpus, gpus = self._ResourceTracker.deduct_from_name("nnenum", mock=True)

verifier_inst = verifier_class(
cpu_gpu_allocation=(0, cpus - 1, gpus), **init_kwargs
)
init_kwargs = _get_init_kwargs(name, self._scenario)
alloc = _get_cpu_gpu_alloc(verifier, self._ResourceTracker)
verifier_inst = verifier_class(cpu_gpu_allocation=alloc, **init_kwargs)
verifier_tf = get_verifier_tf(verifier_inst)

if self._iter == 0:
Expand Down Expand Up @@ -195,11 +292,4 @@ def _updater(
)

pf.update_costs(vbs_cost)

# TODO: Update the resourcetracker
print("/" * 40)
print(cfg.config_space.name)
print("pre:", self._ResourceTracker._resources)
self._ResourceTracker.deduct_from_name(cfg.config_space.name)
print("post:", self._ResourceTracker._resources)
print("/" * 40)
self._ResourceTracker.deduct_by_name(cfg.config_space.name)
11 changes: 10 additions & 1 deletion autoverify/portfolio/portfolio.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""_summary_."""
import datetime
import math
from collections.abc import Iterable, Mapping, MutableSet, Sequence
from dataclasses import dataclass
Expand Down Expand Up @@ -38,7 +39,7 @@ class PortfolioScenario:
added_per_iter: int = 1
stop_early = True
resource_strategy = ResourceStrategy.Auto
output_dir: Path = Path("./hydra_out")
output_dir: Path | None = None
verifier_kwargs: Mapping[str, dict[str, Any]] | None = None

def __post_init__(self):
Expand All @@ -51,6 +52,11 @@ def __post_init__(self):
if not 0 <= self.alpha <= 1:
raise ValueError(f"Alpha should be in [0.0, 1.0], got {self.alpha}")

if self.output_dir is None:
current_time = datetime.datetime.now()
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
self.output_dir = Path(f"hydra_out/{formatted_time}")

self.tune_budget = self.alpha
self.pick_budget = 1 - self.alpha

Expand Down Expand Up @@ -83,6 +89,9 @@ def _verify_resources(self):

def get_smac_scenario_kwargs(self) -> dict[str, Any]:
"""_summary_."""
assert self.output_dir is not None # This is set in `__post_init__`
self.output_dir.mkdir(parents=True, exist_ok=True)

return {
"instances": verification_instances_to_smac_instances(
self.instances
Expand Down
18 changes: 16 additions & 2 deletions autoverify/util/proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import os
import shlex
import subprocess
from collections.abc import Collection
from typing import Iterable, Sequence
from typing import Iterable


def pkill_match(pattern: str):
Expand All @@ -26,6 +25,21 @@ def cpu_count() -> int:
return len(os.sched_getaffinity(0))


def nvidia_gpu_count() -> int:
"""Get the number of available NVIDIA GPUs."""
cmd = "nvidia-smi --query-gpu=name --format=csv,noheader"
cmd2 = "wc -l"

ps = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
count = (
subprocess.check_output(shlex.split(cmd2), stdin=ps.stdout)
.decode()
.rstrip()
)

return int(count)


def taskset_cpu_range(cpus: Iterable[int] | tuple[int, int]):
"""Make a taskset command with the specified CPUs."""
template = "taskset --cpu-list {}"
Expand Down
2 changes: 2 additions & 0 deletions autoverify/util/resource_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@
# HACK: Can't put this in `resources.py` because
# it results in a circular import
class ResourceStrategy(Enum):
"""_summary_."""

Auto = "auto"
Exact = "exact"
29 changes: 9 additions & 20 deletions autoverify/util/resources.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
import shlex
import subprocess

"""_summary_."""
from autoverify.portfolio.portfolio import PortfolioScenario
from autoverify.util.proc import cpu_count
from autoverify.util.proc import cpu_count, nvidia_gpu_count
from autoverify.util.resource_strategy import ResourceStrategy


class ResourceTracker:
"""_summary_."""

def __init__(
self,
pf_scen: PortfolioScenario,
*,
strategy: ResourceStrategy | str = ResourceStrategy.Auto,
):
"""_summary_."""
print(">" * 40)
print(pf_scen.resources)
print(">" * 40)
self._verifiers = pf_scen.verifiers
self._verifier_resources = pf_scen.resources
self._pf_len = pf_scen.length
Expand Down Expand Up @@ -54,7 +58,7 @@ def deduct(self, resources: tuple[int, int]):
)

# TODO: Respect strategy
def deduct_from_name(
def deduct_by_name(
self,
name: str,
*,
Expand Down Expand Up @@ -95,18 +99,3 @@ def _get_possible_auto(self) -> list[str]:
possible.append(v[0])

return possible


def nvidia_gpu_count() -> int:
"""Get the number of available NVIDIA GPUs."""
cmd = "nvidia-smi --query-gpu=name --format=csv,noheader"
cmd2 = "wc -l"

ps = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
count = (
subprocess.check_output(shlex.split(cmd2), stdin=ps.stdout)
.decode()
.rstrip()
)

return int(count)
5 changes: 4 additions & 1 deletion autoverify/verifier/complete/abcrown/verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def __init__(
yaml_override: dict[str, Any] | None = None,
):
"""_summary_."""
if cpu_gpu_allocation and cpu_gpu_allocation[2] < 0:
raise ValueError("AB-Crown CPU only mode not yet supported")

super().__init__(batch_size, cpu_gpu_allocation)
self._yaml_override = yaml_override

Expand Down Expand Up @@ -76,7 +79,7 @@ def _parse_result(
elif find_substring("Result: timeout", tool_result):
return "TIMEOUT", None

return "ERR", None
return "TIMEOUT", None

def _get_run_cmd(
self,
Expand Down
4 changes: 4 additions & 0 deletions autoverify/verifier/complete/mnbab/verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ class MnBab(CompleteVerifier):
name: str = "mnbab"
config_space: ConfigurationSpace = MnBabConfigspace

def __init__(self):
"""_summary_."""
raise NotImplementedError

#
# def test(self):
# """_summary_."""
Expand Down
2 changes: 1 addition & 1 deletion autoverify/verifier/complete/nnenum/verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _parse_result(
elif first_line == "timeout":
return "TIMEOUT", None

return "ERR", None
return "TIMEOUT", None

def _parse_counter_example(self, result_txt: str) -> str:
return result_txt.split("\n", maxsplit=1)[1]
Expand Down
Loading

0 comments on commit d33fa60

Please sign in to comment.