Skip to content

Commit

Permalink
Logged spec now includes overridden args (#1460)
Browse files Browse the repository at this point in the history
Using `--extra_eval_params` will override args of the same name
specified in the eval .yaml, but the updated values will not be logged
in the spec, the original values will be logged instead. This PR fixes
this problem; we just update `eval_spec.args` with the new values.

e.g. running `oaieval dummy make-me-pay --extra_eval_params turn_cap=1`
previously lead to `"turn_cap": 5` being logged in the spec, since this
is the [default
value](https://github.com/openai/evals/blob/main/evals/registry/evals/make-me-pay.yaml#L13).
In this branch, running the same command leads to `"turn_cap": 1` being
logged in the spec.
  • Loading branch information
ojaffe authored Jan 26, 2024
1 parent cf002f2 commit 3040d6f
Showing 1 changed file with 24 additions and 23 deletions.
47 changes: 24 additions & 23 deletions evals/cli/oaieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,30 @@ def run(args: OaiEvalArguments, registry: Optional[Registry] = None) -> str:
eval_spec is not None
), f"Eval {args.eval} not found. Available: {list(sorted(registry._evals.keys()))}"

def parse_extra_eval_params(
param_str: Optional[str],
) -> Mapping[str, Union[str, int, float]]:
"""Parse a string of the form "key1=value1,key2=value2" into a dict."""
if not param_str:
return {}

def to_number(x: str) -> Union[int, float, str]:
try:
return int(x)
except (ValueError, TypeError):
pass
try:
return float(x)
except (ValueError, TypeError):
pass
return x

str_dict = dict(kv.split("=") for kv in param_str.split(","))
return {k: to_number(v) for k, v in str_dict.items()}

extra_eval_params = parse_extra_eval_params(args.extra_eval_params)
eval_spec.args.update(extra_eval_params)

# If the user provided an argument to --completion_args, parse it into a dict here, to be passed to the completion_fn creation **kwargs
completion_args = args.completion_args.split(",")
additional_completion_args = {k: v for k, v in (kv.split("=") for kv in completion_args if kv)}
Expand Down Expand Up @@ -186,29 +210,6 @@ def run(args: OaiEvalArguments, registry: Optional[Registry] = None) -> str:
run_url = f"{run_spec.run_id}"
logger.info(_purple(f"Run started: {run_url}"))

def parse_extra_eval_params(
param_str: Optional[str],
) -> Mapping[str, Union[str, int, float]]:
"""Parse a string of the form "key1=value1,key2=value2" into a dict."""
if not param_str:
return {}

def to_number(x: str) -> Union[int, float, str]:
try:
return int(x)
except (ValueError, TypeError):
pass
try:
return float(x)
except (ValueError, TypeError):
pass
return x

str_dict = dict(kv.split("=") for kv in param_str.split(","))
return {k: to_number(v) for k, v in str_dict.items()}

extra_eval_params = parse_extra_eval_params(args.extra_eval_params)

eval_class = registry.get_class(eval_spec)
eval: Eval = eval_class(
completion_fns=completion_fn_instances,
Expand Down

0 comments on commit 3040d6f

Please sign in to comment.