Logged spec now includes overridden args (#1460)

Using `--extra_eval_params` will override args of the same name specified in the eval .yaml, but the updated values will not be logged in the spec, the original values will be logged instead. This PR fixes this problem; we just update `eval_spec.args` with the new values. e.g. running `oaieval dummy make-me-pay --extra_eval_params turn_cap=1` previously lead to `"turn_cap": 5` being logged in the spec, since this is the [default value](https://github.com/openai/evals/blob/main/evals/registry/evals/make-me-pay.yaml#L13). In this branch, running the same command leads to `"turn_cap": 1` being logged in the spec.
openai · Jan 26, 2024 · 3040d6f · 3040d6f
1 parent cf002f2
commit 3040d6f
Showing 1 changed file with 24 additions and 23 deletions.
diff --git a/evals/cli/oaieval.py b/evals/cli/oaieval.py
@@ -133,6 +133,30 @@ def run(args: OaiEvalArguments, registry: Optional[Registry] = None) -> str:
         eval_spec is not None
     ), f"Eval {args.eval} not found. Available: {list(sorted(registry._evals.keys()))}"
 
+    def parse_extra_eval_params(
+        param_str: Optional[str],
+    ) -> Mapping[str, Union[str, int, float]]:
+        """Parse a string of the form "key1=value1,key2=value2" into a dict."""
+        if not param_str:
+            return {}
+
+        def to_number(x: str) -> Union[int, float, str]:
+            try:
+                return int(x)
+            except (ValueError, TypeError):
+                pass
+            try:
+                return float(x)
+            except (ValueError, TypeError):
+                pass
+            return x
+
+        str_dict = dict(kv.split("=") for kv in param_str.split(","))
+        return {k: to_number(v) for k, v in str_dict.items()}
+
+    extra_eval_params = parse_extra_eval_params(args.extra_eval_params)
+    eval_spec.args.update(extra_eval_params)
+
     # If the user provided an argument to --completion_args, parse it into a dict here, to be passed to the completion_fn creation **kwargs
     completion_args = args.completion_args.split(",")
     additional_completion_args = {k: v for k, v in (kv.split("=") for kv in completion_args if kv)}
@@ -186,29 +210,6 @@ def run(args: OaiEvalArguments, registry: Optional[Registry] = None) -> str:
     run_url = f"{run_spec.run_id}"
     logger.info(_purple(f"Run started: {run_url}"))
 
-    def parse_extra_eval_params(
-        param_str: Optional[str],
-    ) -> Mapping[str, Union[str, int, float]]:
-        """Parse a string of the form "key1=value1,key2=value2" into a dict."""
-        if not param_str:
-            return {}
-
-        def to_number(x: str) -> Union[int, float, str]:
-            try:
-                return int(x)
-            except (ValueError, TypeError):
-                pass
-            try:
-                return float(x)
-            except (ValueError, TypeError):
-                pass
-            return x
-
-        str_dict = dict(kv.split("=") for kv in param_str.split(","))
-        return {k: to_number(v) for k, v in str_dict.items()}
-
-    extra_eval_params = parse_extra_eval_params(args.extra_eval_params)
-
     eval_class = registry.get_class(eval_spec)
     eval: Eval = eval_class(
         completion_fns=completion_fn_instances,