From 5ea0984a6a582f5cad2d50a6863e596b1fb75b1a Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Mon, 15 Apr 2024 15:21:25 +0000
Subject: [PATCH 001/100] copied replay_mythril.py over

---
 tune/protox/agent/replay.py | 330 ++++++++++++++++++++++++++++++++++++
 1 file changed, 330 insertions(+)
 create mode 100644 tune/protox/agent/replay.py

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
new file mode 100644
index 00000000..55410544
--- /dev/null
+++ b/tune/protox/agent/replay.py
@@ -0,0 +1,330 @@
+import datetime
+import logging
+import time
+import yaml
+import os
+import json
+import pandas as pd
+import tqdm
+import argparse
+import gymnasium as gym
+from pathlib import Path
+from dateutil.parser import parse
+
+import sys
+sys.path.append("/home/wz2/mythril")
+
+from envs.spec import Spec
+from envs.pg_env import PostgresEnv
+
+class DotDict(dict):
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+
+
+def gogo(args):
+    maximal = args.maximal
+    maximal_only = args.maximal_only
+    threshold = args.threshold
+
+    with open(f"{args.input}/config.yaml") as f:
+        mythril = yaml.safe_load(f)
+        mythril["mythril"]["benchbase_config_path"] = f"{args.input}/benchmark.xml"
+        mythril["mythril"]["verbose"] = True
+        mythril["mythril"]["postgres_path"] = args.pg_path
+
+    with open(f"{args.input}/config.yaml2", "w") as f:
+        yaml.dump(mythril, stream=f, default_flow_style=False)
+
+    if args.alternate:
+        horizon = args.horizon
+        per_query_timeout = args.pqt
+    else:
+        with open(f"{args.input}/stdout", "r") as f:
+            config = f.readlines()[0]
+            config = eval(config.split("HPO Configuration: ")[-1])
+            horizon = config["horizon"]
+
+        with open(f"{args.input}/stdout", "r") as f:
+            for line in f:
+                if "HPO Configuration: " in line:
+                    hpo = eval(line.split("HPO Configuration: ")[-1].strip())
+                    per_query_timeout = hpo["mythril_args"]["timeout"]
+
+    folders = []
+    start_found = False
+    filename = "output.log" if args.alternate else "stderr"
+    last_evaluation = None
+    with open(f"{args.input}/{filename}", "r") as f:
+        for line in f:
+            if not start_found:
+                if "Baseilne Metric" in line:
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseilne Metric")[0])
+                    start_found = True
+            else:
+                if "mv" in line and "repository" in line:
+                    repo = eval(line.split("Running ")[-1])[-1]
+                    last_folder = repo.split("/")[-1]
+                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
+                    last_evaluation = time_since_start
+                    if (time_since_start - start_time).total_seconds() < args.cutoff * 3600 or args.cutoff == 0:
+                        folders.append(last_folder)
+
+    # Only apply threshold if time is less than.
+    threshold_limit = last_evaluation - datetime.timedelta(seconds=int(args.threshold_limit * 3600))
+
+    spec = Spec(
+        agent_type=None,
+        seed=0,
+        horizon=horizon,
+        config_path=f"{args.input}/config.yaml2",
+        benchmark_config_path=f"{args.input}/{args.benchmark}.yaml",
+        workload_timeout=0)
+
+    env = PostgresEnv(
+        spec,
+        horizon=horizon,
+        timeout=None,
+        reward_utility=None,
+        logger=None,
+        replay=True)
+
+    if not args.simulated:
+        env.restore_pristine_snapshot()
+        env.action_space.reset(**{"connection": env.connection, "workload": spec.workload})
+        spec.workload.reset()
+
+    # Get the minimum reward.
+    runs = [Path(args.input) / "repository" / fold / "run.raw.csv" for fold in folders]
+    runs = [pd.read_csv(run) for run in runs]
+    rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == per_query_timeout) for run in runs]
+    rewards = sorted(rewards, key=lambda x: x[0])
+    min_reward = min([r[0] for r in rewards])
+    if maximal:
+        target = [r[1] for r in rewards if r[0] == min_reward]
+        assert len(target) >= 1
+        if target[0]:
+            # Don't use maximal if the min maximal is timed out.
+            # Don't threshold either.
+            threshold = 0
+            maximal = False
+            # Reject maximal only.
+            maximal_only = False
+            logging.warn("Maximal disabled.")
+        else:
+            logging.info(f"Maximal found: {min_reward}")
+
+    num_lines = 0
+    with open(f"{args.input}/{filename}", "r") as f:
+        for line in f:
+            if "Baseilne Metric" in line:
+                num_lines += 1
+            elif "mv" in line and "repository" in line:
+                num_lines += 1
+
+    def run_sample(action, timeout):
+        samples = []
+        # This should reliably check that we are loading the correct knobs...
+        ql_knobs = spec.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
+        for i in range(args.samples):
+            runtime = spec.workload._execute_workload(
+                connection=env.connection,
+                workload_timeout=timeout,
+                ql_knobs=ql_knobs,
+                env_spec=spec,
+                blocklist=[l for l in args.blocklist.split(",") if len(l) > 0])
+            samples.append(runtime)
+            logging.info(f"Runtime: {runtime}")
+
+            if runtime >= args.workload_timeout:
+                break
+
+            if args.samples == 2 and runtime >= timeout:
+                break
+            elif args.samples > 2 and len(samples) >= 2 and runtime >= timeout:
+                break
+
+        return samples
+
+    run_data = []
+    pbar = tqdm.tqdm(total=num_lines)
+    with open(f"{args.input}/{filename}", "r") as f:
+        current_step = 0
+
+        start_found = False
+        start_time = None
+        timeout = args.workload_timeout
+        cur_reward_max = timeout
+        selected_action_knobs = None
+        noop_index = False
+        maximal_repo = None
+        existing_indexes = []
+
+        for line in f:
+            # Keep going until we've found the start.
+            if not start_found:
+                if "Baseilne Metric" in line:
+                    start_found = True
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseilne Metric")[0])
+                    pbar.update(1)
+                continue
+
+            elif "Selected action: " in line:
+                act = eval(line.split("Selected action: ")[-1])
+                selected_action_knobs = env.action_space.get_knob_space().from_jsonable(act[0])[0]
+                noop_index = "NOOP" in act[1][0]
+
+            elif (maximal and ("mv" in line and "repository" in line)):
+                maximal_repo = line
+
+            elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "repository" in line)):
+                if "mv" in line and "repository" in line:
+                    repo = eval(line.split("Running ")[-1])[-1]
+                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
+                elif "Found new maximal state with" in line:
+                    repo = eval(maximal_repo.split("Running ")[-1])[-1]
+                    time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0])
+                    maximal_repo = None
+
+                # Get the evaluation reward.
+                reward = pd.read_csv(f"{args.input}/{repo}/run.raw.csv")
+                assert len(reward.columns) == 6
+                has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == per_query_timeout
+                reward = reward["Latency (microseconds)"].sum() / 1e6
+                assert reward > 0
+
+                if ((not maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
+                    index_sqls = []
+                    knobs = {}
+                    insert_knobs = False
+
+                    with open(f"{args.input}/{repo}/act_sql.txt", "r") as f:
+                        for line in f:
+                            line = line.strip()
+                            if len(line) == 0:
+                                insert_knobs = True
+                            elif not insert_knobs:
+                                index_sqls.append(line)
+                            else:
+                                k, v = line.split(" = ")
+                                knobs[k] = float(v)
+
+                    assert len(index_sqls) > 0
+                    assert len(knobs) > 0
+                    with open(f"{args.input}/{repo}/prior_state.txt", "r") as f:
+                        prior_states = eval(f.read())
+                        all_sc = [s.strip() for s in prior_states[1]]
+                        if not noop_index:
+                            all_sc.extend(index_sqls)
+
+                        all_sc = [a for a in all_sc if not "USING btree ()" in a]
+                        index_sqls = all_sc
+
+                    execute_sqls = []
+                    for index_sql in index_sqls:
+                        if index_sql in existing_indexes:
+                            continue
+                        execute_sqls.append(index_sql)
+                    for index_sql in existing_indexes:
+                        if index_sql not in index_sqls:
+                            indexname = index_sql.split("CREATE INDEX")[-1].split(" ON ")[0]
+                            execute_sqls.append(f"DROP INDEX IF EXISTS {indexname}")
+
+                    if not args.simulated:
+                        # Reset snapshot.
+                        env.action_space.reset(connection=env.connection, workload=env.workload)
+                        cc, _ = env.action_space.get_knob_space().generate_plan(selected_action_knobs if selected_action_knobs else {})
+                        env.shift_state(cc, execute_sqls, dump_page_cache=True)
+                    existing_indexes = index_sqls
+
+                    if not args.simulated:
+                        # Get samples.
+                        run_samples = samples = run_sample(knobs, timeout)
+                        logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
+                    else:
+                        run_samples = samples = [reward, reward]
+
+                    data = {
+                        "step": current_step,
+                        "orig_cost": reward,
+                        "time_since_start": (time_since_start - start_time).total_seconds(),
+                    }
+                    samples = {f"runtime{i}": s for i, s in enumerate(samples)}
+                    data.update(samples)
+                    run_data.append(data)
+
+                    current_step += 1
+
+                    if (not has_timeout) or (max(run_samples) < timeout):
+                        # Apply a tolerance..
+                        # If we've timed out, only apply threshold only if we've found a strictly better config.
+                        apply_threshold = threshold if time_since_start < threshold_limit else 0
+                        cur_reward_max = reward - apply_threshold
+
+                    if max(run_samples) < timeout:
+                        timeout = max(run_samples)
+
+                run_folder = repo.split("/")[-1]
+                if run_folder in folders and run_folder == folders[-1]:
+                    break
+                elif maximal_only and reward == min_reward:
+                    break
+            pbar.update(1)
+
+        if len(run_data) > 0:
+            data = {
+                "step": current_step,
+                "orig_cost": run_data[-1]["orig_cost"],
+                "time_since_start": -1,
+                "runtime0": run_data[-1]["runtime0"],
+            }
+            run_data.append(data)
+
+    # Output.
+    pd.DataFrame(run_data).to_csv(args.output, index=False)
+    env.close()
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(prog="UDO Replay")
+    parser.add_argument("--input", type=Path)
+    parser.add_argument("--benchmark", type=str)
+    parser.add_argument("--workload-timeout", type=int)
+    parser.add_argument("--samples", type=int)
+    parser.add_argument("--threshold", type=float)
+    parser.add_argument("--threshold-limit", type=float, default=0)
+    parser.add_argument("--maximal", action="store_true")
+    parser.add_argument("--simulated", action="store_true")
+    parser.add_argument("--maximal-only", action="store_true")
+    parser.add_argument("--alternate", action="store_true", default=False)
+    parser.add_argument("--pqt", type=int, default=0)
+    parser.add_argument("--horizon", type=int, default=0)
+    parser.add_argument("--cutoff", type=float, default=0)
+    parser.add_argument("--blocklist", default="")
+    parser.add_argument("--pg-path", type=str, default="/mnt/nvme0n1/wz2/noisepage")
+
+    parser.add_argument("--output-path", type=str, default="out.csv")
+    args = parser.parse_args()
+
+    while True:
+        pargs = DotDict(vars(args))
+        output_path = args.output_path
+
+        runs = Path(pargs.input).rglob("config.yaml")
+        runs = sorted([f for f in runs if not (f.parent / output_path).exists()])
+        for run in tqdm.tqdm([f for f in runs], leave=False):
+            if args.simulated:
+                adjust_output = run.parent / "out_simulated.csv"
+            else:
+                adjust_output = run.parent / args.output_path
+
+            if adjust_output.exists():
+                continue
+
+            print(f"Parsing {run.parent}")
+            new_args = pargs
+            new_args.input = run.parent
+            new_args.output = adjust_output
+            gogo(new_args)
+
+        break
\ No newline at end of file

From 16648904f62ef8c38265f1a82f3f31ca7c260169 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Mon, 15 Apr 2024 15:32:42 +0000
Subject: [PATCH 002/100] added replay function

---
 tune/protox/agent/cli.py    |  2 ++
 tune/protox/agent/replay.py | 28 +++++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/tune/protox/agent/cli.py b/tune/protox/agent/cli.py
index 968d2f12..a78814a0 100644
--- a/tune/protox/agent/cli.py
+++ b/tune/protox/agent/cli.py
@@ -3,6 +3,7 @@
 from misc.utils import DBGymConfig
 from tune.protox.agent.hpo import hpo
 from tune.protox.agent.tune import tune
+from tune.protox.agent.replay import replay
 
 
 @click.group("agent")
@@ -13,3 +14,4 @@ def agent_group(dbgym_cfg: DBGymConfig):
 
 agent_group.add_command(hpo)
 agent_group.add_command(tune)
+agent_group.add_command(replay)
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 55410544..9478cb1b 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -1,21 +1,20 @@
 import datetime
 import logging
-import time
+import click
 import yaml
-import os
-import json
 import pandas as pd
 import tqdm
 import argparse
-import gymnasium as gym
 from pathlib import Path
 from dateutil.parser import parse
 
 import sys
+
+from misc.utils import DBGymConfig
 sys.path.append("/home/wz2/mythril")
 
 from envs.spec import Spec
-from envs.pg_env import PostgresEnv
+from tune.protox.env.pg_env import PostgresEnv
 
 class DotDict(dict):
     __getattr__ = dict.get
@@ -23,6 +22,25 @@ class DotDict(dict):
     __delattr__ = dict.__delitem__
 
 
+@click.command()
+@click.pass_obj
+@click.argument("benchmark-name")
+@click.option("--seed-start", type=int, default=15721, help="A workload consists of queries from multiple seeds. This is the starting seed (inclusive).")
+@click.option("--seed-end", type=int, default=15721, help="A workload consists of queries from multiple seeds. This is the ending seed (inclusive).")
+@click.option(
+    "--query-subset",
+    type=click.Choice(["all", "even", "odd"]),
+    default="all",
+)
+@click.option(
+    "--scale-factor",
+    default=1.0,
+    help=f"The scale factor used when generating the data of the benchmark.",
+)
+def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float) -> None:
+    pass
+
+
 def gogo(args):
     maximal = args.maximal
     maximal_only = args.maximal_only

From 6dce17a4813b241a3ca333bcd667ca558b92dda0 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Mon, 15 Apr 2024 15:32:57 +0000
Subject: [PATCH 003/100] Baseilne -> Baseline

---
 tune/protox/agent/replay.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 9478cb1b..67214416 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -77,8 +77,8 @@ def gogo(args):
     with open(f"{args.input}/{filename}", "r") as f:
         for line in f:
             if not start_found:
-                if "Baseilne Metric" in line:
-                    start_time = parse(line.split("INFO:")[-1].split(" Baseilne Metric")[0])
+                if "Baseline Metric" in line:
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
                     start_found = True
             else:
                 if "mv" in line and "repository" in line:
@@ -136,7 +136,7 @@ def gogo(args):
     num_lines = 0
     with open(f"{args.input}/{filename}", "r") as f:
         for line in f:
-            if "Baseilne Metric" in line:
+            if "Baseline Metric" in line:
                 num_lines += 1
             elif "mv" in line and "repository" in line:
                 num_lines += 1
@@ -182,9 +182,9 @@ def run_sample(action, timeout):
         for line in f:
             # Keep going until we've found the start.
             if not start_found:
-                if "Baseilne Metric" in line:
+                if "Baseline Metric" in line:
                     start_found = True
-                    start_time = parse(line.split("INFO:")[-1].split(" Baseilne Metric")[0])
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
                     pbar.update(1)
                 continue
 

From 59c3d349bfa88d8860a3609b79ee43ee48451ae8 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Mon, 15 Apr 2024 15:44:04 +0000
Subject: [PATCH 004/100] pqt -> query_timeout

---
 tune/protox/agent/build_trial.py   |  4 ++--
 tune/protox/agent/replay.py        |  4 ++--
 tune/protox/env/mqo/mqo_wrapper.py |  6 +++---
 tune/protox/env/pg_env.py          |  8 ++++----
 tune/protox/env/util/execute.py    | 14 +++++++-------
 tune/protox/env/workload.py        | 12 ++++++------
 6 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 0e44b517..d9fda58d 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -318,7 +318,7 @@ def _build_env(
         horizon=hpoed_params["horizon"],
         reward_utility=reward_utility,
         pgconn=pgconn,
-        pqt=hpoed_params["query_timeout"],
+        query_timeout=hpoed_params["query_timeout"],
         benchbase_config=hpoed_params["benchbase_config"],
         logger=logger,
         replay=False,
@@ -336,7 +336,7 @@ def _build_env(
                 workload_eval_inverse=hpoed_params["workload_eval_inverse"],
                 workload_eval_reset=hpoed_params["workload_eval_reset"],
                 benchbase_config=hpoed_params["benchbase_config"],
-                pqt=hpoed_params["query_timeout"],
+                query_timeout=hpoed_params["query_timeout"],
                 env=env,
                 logger=logger,
             )
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 67214416..751c47e6 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -57,7 +57,7 @@ def gogo(args):
 
     if args.alternate:
         horizon = args.horizon
-        per_query_timeout = args.pqt
+        per_query_timeout = args.query_timeout
     else:
         with open(f"{args.input}/stdout", "r") as f:
             config = f.readlines()[0]
@@ -315,7 +315,7 @@ def run_sample(action, timeout):
     parser.add_argument("--simulated", action="store_true")
     parser.add_argument("--maximal-only", action="store_true")
     parser.add_argument("--alternate", action="store_true", default=False)
-    parser.add_argument("--pqt", type=int, default=0)
+    parser.add_argument("--query_timeout", type=int, default=0)
     parser.add_argument("--horizon", type=int, default=0)
     parser.add_argument("--cutoff", type=float, default=0)
     parser.add_argument("--blocklist", default="")
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 02e4d124..1943f038 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -114,7 +114,7 @@ def __init__(
         workload_eval_mode: str,
         workload_eval_inverse: bool,
         workload_eval_reset: bool,
-        pqt: int,
+        query_timeout: int,
         benchbase_config: dict[str, Any],
         env: gym.Env[Any, Any],
         logger: Optional[Logger],
@@ -136,7 +136,7 @@ def __init__(
         self.workload_eval_mode = workload_eval_mode
         self.workload_eval_inverse = workload_eval_inverse
         self.workload_eval_reset = workload_eval_reset
-        self.pqt = pqt
+        self.query_timeout = query_timeout
         self.benchbase_config = benchbase_config
         self.best_observed: dict[str, BestQueryRun] = {}
         self.logger = logger
@@ -337,7 +337,7 @@ def reset(self, *args: Any, **kwargs: Any) -> Tuple[Any, EnvInfoDict]:  # type:
                 actions=[r[1] for r in runs],
                 actions_names=[r[0] for r in runs],
                 benchbase_config=self.benchbase_config,
-                pqt=self.pqt,
+                query_timeout=self.query_timeout,
                 reset_metrics=kwargs["options"]["query_metric_data"],
                 update=False,
                 first=False,
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index f13f1884..e6c2262e 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -33,7 +33,7 @@ def __init__(
         horizon: int,
         reward_utility: RewardUtility,
         pgconn: PostgresConn,
-        pqt: int,
+        query_timeout: int,
         benchbase_config: dict[str, Any],
         logger: Optional[Logger] = None,
         replay: bool = False,
@@ -51,7 +51,7 @@ def __init__(
 
         self.benchbase_config = benchbase_config
         self.pgconn = pgconn
-        self.pqt = pqt
+        self.query_timeout = query_timeout
 
         self.current_state: Optional[Any] = None
         self.baseline_metric: Optional[float] = None
@@ -163,7 +163,7 @@ def reset(  # type: ignore
                 actions=[default_action],
                 actions_names=["GlobalDual"],
                 benchbase_config=self.benchbase_config,
-                pqt=self.pqt,
+                query_timeout=self.query_timeout,
                 update=False,
                 first=True,
             )
@@ -262,7 +262,7 @@ def step_execute(
                 obs_space=self.observation_space,
                 action_space=self.action_space,
                 benchbase_config=self.benchbase_config,
-                pqt=self.pqt,
+                query_timeout=self.query_timeout,
                 actions=[a[1] for a in actions],
                 actions_names=[a[0] for a in actions],
                 update=True,
diff --git a/tune/protox/env/util/execute.py b/tune/protox/env/util/execute.py
index 1dc09e74..d7a7584c 100644
--- a/tune/protox/env/util/execute.py
+++ b/tune/protox/env/util/execute.py
@@ -75,18 +75,18 @@ def _acquire_metrics_around_query(
     prefix: str,
     connection: psycopg.Connection[Any],
     query: str,
-    pqt: float = 0.0,
+    query_timeout: float = 0.0,
     obs_space: Optional[StateSpace] = None,
 ) -> Tuple[float, bool, Any, Any]:
     _force_statement_timeout(connection, 0)
     if obs_space and obs_space.require_metrics():
         initial_metrics = obs_space.construct_online(connection)
 
-    if pqt > 0:
-        _force_statement_timeout(connection, pqt * 1000)
+    if query_timeout > 0:
+        _force_statement_timeout(connection, query_timeout * 1000)
 
     qid_runtime, did_timeout, explain_data = _time_query(
-        logger, prefix, connection, query, pqt
+        logger, prefix, connection, query, query_timeout
     )
 
     # Wipe the statement timeout.
@@ -105,14 +105,14 @@ def execute_variations(
     connection: psycopg.Connection[Any],
     runs: list[QueryRun],
     query: str,
-    pqt: float = 0,
+    query_timeout: float = 0,
     logger: Optional[Logger] = None,
     sysknobs: Optional[KnobSpaceAction] = None,
     obs_space: Optional[StateSpace] = None,
 ) -> BestQueryRun:
 
     # Initial timeout.
-    timeout_limit = pqt
+    timeout_limit = query_timeout
     # Best run invocation.
     best_qr = BestQueryRun(None, None, True, None, None)
 
@@ -145,7 +145,7 @@ def execute_variations(
             prefix=qr.prefix_qid,
             connection=connection,
             query=pqk_query,
-            pqt=timeout_limit,
+            query_timeout=timeout_limit,
             obs_space=obs_space,
         )
 
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index b1bf9391..902d28a2 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -339,7 +339,7 @@ def _execute_workload(
         action_space: Optional[HolonSpace] = None,
         reset_metrics: Optional[dict[str, BestQueryRun]] = None,
         override_workload_timeout: Optional[float] = None,
-        pqt: Optional[int] = None,
+        query_timeout: Optional[int] = None,
         workload_qdir: Optional[Tuple[Union[str, Path], Union[str, Path]]] = None,
         disable_pg_hint: bool = False,
         blocklist: list[str] = [],
@@ -449,7 +449,7 @@ def _execute_workload(
                         f"{qid}",
                         pgconn.conn(),
                         query,
-                        pqt=time_left,
+                        query_timeout=time_left,
                         obs_space=None,
                     )
 
@@ -483,7 +483,7 @@ def _execute_workload(
                         if r[2] not in [rr[2] for rr in runs]:
                             runs.append(r)
 
-                    target_pqt = pqt if pqt else workload_timeout
+                    target_pqt = query_timeout if query_timeout else workload_timeout
                     skip_execute = False
                     if (
                         reset_metrics is not None
@@ -508,7 +508,7 @@ def _execute_workload(
                             connection=pgconn.conn(),
                             runs=runs,
                             query=query,
-                            pqt=min(target_pqt, workload_timeout - workload_time + 1),
+                            query_timeout=min(target_pqt, workload_timeout - workload_time + 1),
                             logger=self.logger,
                             sysknobs=sysknobs,
                             obs_space=obs_space,
@@ -670,7 +670,7 @@ def execute(
         actions: list[HolonAction],
         actions_names: list[str],
         benchbase_config: dict[str, Any],
-        pqt: Optional[int] = None,
+        query_timeout: Optional[int] = None,
         reset_metrics: Optional[dict[str, BestQueryRun]] = None,
         update: bool = True,
         first: bool = False,
@@ -699,7 +699,7 @@ def execute(
                 action_space=action_space,
                 reset_metrics=reset_metrics,
                 override_workload_timeout=self.workload_timeout,
-                pqt=pqt,
+                query_timeout=query_timeout,
                 workload_qdir=None,
                 disable_pg_hint=False,
                 blocklist=[],

From ae99b21d373f4dab5ff441cabb3cc244f7fd298e Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 16 Apr 2024 00:07:25 +0000
Subject: [PATCH 005/100] repository -> tuning_steps

---
 tune/protox/agent/build_trial.py |  2 +-
 tune/protox/agent/replay.py      | 12 ++++++------
 tune/protox/env/logger.py        | 16 ++++++++--------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index d9fda58d..23214b7a 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -136,7 +136,7 @@ def _build_utilities(
         hpoed_params["trace"],
         hpoed_params["verbose"],
         Path(logdir) / hpoed_params["output_log_path"],
-        Path(logdir) / hpoed_params["output_log_path"] / "repository",
+        Path(logdir) / hpoed_params["output_log_path"] / "tuning_steps",
         Path(logdir) / hpoed_params["output_log_path"] / "tboard",
     )
 
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 751c47e6..7f9a54eb 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -81,7 +81,7 @@ def gogo(args):
                     start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
                     start_found = True
             else:
-                if "mv" in line and "repository" in line:
+                if "mv" in line and "tuning_steps" in line:
                     repo = eval(line.split("Running ")[-1])[-1]
                     last_folder = repo.split("/")[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
@@ -114,7 +114,7 @@ def gogo(args):
         spec.workload.reset()
 
     # Get the minimum reward.
-    runs = [Path(args.input) / "repository" / fold / "run.raw.csv" for fold in folders]
+    runs = [Path(args.input) / "tuning_steps" / fold / "run.raw.csv" for fold in folders]
     runs = [pd.read_csv(run) for run in runs]
     rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == per_query_timeout) for run in runs]
     rewards = sorted(rewards, key=lambda x: x[0])
@@ -138,7 +138,7 @@ def gogo(args):
         for line in f:
             if "Baseline Metric" in line:
                 num_lines += 1
-            elif "mv" in line and "repository" in line:
+            elif "mv" in line and "tuning_steps" in line:
                 num_lines += 1
 
     def run_sample(action, timeout):
@@ -193,11 +193,11 @@ def run_sample(action, timeout):
                 selected_action_knobs = env.action_space.get_knob_space().from_jsonable(act[0])[0]
                 noop_index = "NOOP" in act[1][0]
 
-            elif (maximal and ("mv" in line and "repository" in line)):
+            elif (maximal and ("mv" in line and "tuning_steps" in line)):
                 maximal_repo = line
 
-            elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "repository" in line)):
-                if "mv" in line and "repository" in line:
+            elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "tuning_steps" in line)):
+                if "mv" in line and "tuning_steps" in line:
                     repo = eval(line.split("Running ")[-1])[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
                 elif "Found new maximal state with" in line:
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 1ab8d2bb..8c339b6a 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -56,13 +56,13 @@ def __init__(
         trace: bool,
         verbose: bool,
         output_log_path: str,
-        repository_path: str,
+        tuning_steps_path: str,
         tensorboard_path: str,
     ) -> None:
         self.trace = trace
         self.verbose = verbose
-        self.repository_path = repository_path
-        Path(repository_path).mkdir(parents=True, exist_ok=True)
+        self.tuning_steps_path = tuning_steps_path
+        Path(tuning_steps_path).mkdir(parents=True, exist_ok=True)
 
         level = logging.INFO if not self.verbose else logging.DEBUG
         formatter = "%(levelname)s:%(asctime)s [%(filename)s:%(lineno)s]  %(message)s"
@@ -95,21 +95,21 @@ def stash_results(
         time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         time = name_override if name_override else time
         if info_dict["results"] is not None and Path(info_dict["results"]).exists():
-            local["mv"][info_dict["results"], f"{self.repository_path}/{time}"].run()
+            local["mv"][info_dict["results"], f"{self.tuning_steps_path}/{time}"].run()
         else:
-            Path(f"{self.repository_path}/{time}").mkdir(parents=True, exist_ok=True)
+            Path(f"{self.tuning_steps_path}/{time}").mkdir(parents=True, exist_ok=True)
 
         if info_dict["prior_pgconf"]:
             local["mv"][
-                info_dict["prior_pgconf"], f"{self.repository_path}/{time}/old_pg.conf"
+                info_dict["prior_pgconf"], f"{self.tuning_steps_path}/{time}/old_pg.conf"
             ].run()
 
         if info_dict["prior_state_container"]:
-            with open(f"{self.repository_path}/{time}/prior_state.txt", "w") as f:
+            with open(f"{self.tuning_steps_path}/{time}/prior_state.txt", "w") as f:
                 f.write(str(info_dict["prior_state_container"]))
 
         if info_dict["action_json"]:
-            with open(f"{self.repository_path}/{time}/action.txt", "w") as f:
+            with open(f"{self.tuning_steps_path}/{time}/action.txt", "w") as f:
                 f.write(info_dict["action_json"])
 
     def advance(self) -> None:

From 08829d85ecba037d084d10fffa110b491eda2120 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 16 Apr 2024 00:20:31 +0000
Subject: [PATCH 006/100] removed logdir entirely

---
 dbms/postgres/cli.py             |  2 +-
 misc/utils.py                    | 10 ++++++++--
 tune/protox/agent/build_trial.py | 21 ++++++++++-----------
 tune/protox/agent/hpo.py         |  3 ---
 tune/protox/agent/tune.py        | 11 ++++++-----
 tune/protox/embedding/select.py  |  2 +-
 tune/protox/env/logger.py        | 16 ++++++++--------
 tune/protox/env/util/pg_conn.py  | 13 ++++++-------
 8 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py
index dc8a7824..69ab2460 100644
--- a/dbms/postgres/cli.py
+++ b/dbms/postgres/cli.py
@@ -126,7 +126,7 @@ def _create_pgdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: fl
     # create .tgz file
     # you can't pass "[pgdata].tgz" as an arg to cur_task_runs_data_path() because that would create "[pgdata].tgz" as a dir
     pgdata_tgz_real_fpath = dbgym_cfg.cur_task_runs_data_path(
-        ".", mkdir=True
+        mkdir=True
     ) / get_pgdata_tgz_name(benchmark_name, scale_factor)
     # we need to cd into pgdata_dpath so that the tar file does not contain folders for the whole path of pgdata_dpath
     subprocess_run(f"tar -czf {pgdata_tgz_real_fpath} .", cwd=pgdata_dpath)
diff --git a/misc/utils.py b/misc/utils.py
index d6398305..9a8788af 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -108,7 +108,7 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     lambda workspace_path, benchmark_name, scale_factor: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / f"dbgym_dbms_postgres"
+    / "dbgym_dbms_postgres"
     / "data"
     / get_pgdata_tgz_name(benchmark_name, scale_factor)
 )
@@ -121,7 +121,13 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     lambda workspace_path: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / f"dbgym_dbms_postgres" / "build" / "repo" / "boot"/ "build" / "postgres" / "bin"
+    / "dbgym_dbms_postgres" / "build" / "repo" / "boot"/ "build" / "postgres" / "bin"
+)
+default_tuning_steps_dpath = (
+    lambda workspace_path, benchmark_name, workload_name, boot_enabled: get_symlinks_path_from_workspace_path(
+        workspace_path
+    )
+    / "dbgym_tune_protox_agent" / "artifacts" / f"{benchmark_name}_{workload_name}{'_boot' if boot_enabled else ''}_tuning_steps"
 )
 
 
diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 23214b7a..c55cb2e9 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -93,9 +93,9 @@ def _get_signal(signal_folder: Union[str, Path]) -> Tuple[int, str]:
     raise IOError("No free ports to bind postgres to.")
 
 
-def _modify_benchbase_config(logdir: str, port: int, hpoed_params: dict[str, Any]) -> None:
+def _modify_benchbase_config(dbgym_cfg: DBGymConfig, port: int, hpoed_params: dict[str, Any]) -> None:
     if hpoed_params["benchmark_config"]["query_spec"]["oltp_workload"]:
-        conf_etree = ET.parse(Path(logdir) / "benchmark.xml")
+        conf_etree = ET.parse(dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "benchmark.xml")
         jdbc = f"jdbc:postgresql://localhost:{port}/benchbase?preferQueryMode=extended"
         conf_etree.getroot().find("url").text = jdbc  # type: ignore
 
@@ -110,7 +110,7 @@ def _modify_benchbase_config(logdir: str, port: int, hpoed_params: dict[str, Any
                 conf_etree.getroot().find("works").find("work").find("time").text = str(oltp_config["oltp_duration"])  # type: ignore
             if works.find("warmup") is not None:  # type: ignore
                 conf_etree.getroot().find("works").find("work").find("warmup").text = str(oltp_config["oltp_warmup"])  # type: ignore
-        conf_etree.write(Path(logdir) / "benchmark.xml")
+        conf_etree.write(dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "benchmark.xml")
 
 
 def _gen_noise_scale(
@@ -130,14 +130,14 @@ def f(p: ProtoAction, n: torch.Tensor) -> ProtoAction:
 
 
 def _build_utilities(
-    dbgym_cfg: DBGymConfig, logdir: str, pgport: int, hpoed_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, pgport: int, hpoed_params: dict[str, Any]
 ) -> Tuple[Logger, RewardUtility, PostgresConn, Workload]:
     logger = Logger(
         hpoed_params["trace"],
         hpoed_params["verbose"],
-        Path(logdir) / hpoed_params["output_log_path"],
-        Path(logdir) / hpoed_params["output_log_path"] / "tuning_steps",
-        Path(logdir) / hpoed_params["output_log_path"] / "tboard",
+        dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / hpoed_params["output_log_path"],
+        dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / hpoed_params["output_log_path"] / "tuning_steps",
+        dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / hpoed_params["output_log_path"] / "tboard",
     )
 
     reward_utility = RewardUtility(
@@ -157,7 +157,6 @@ def _build_utilities(
         pristine_pgdata_snapshot_fpath=Path(hpoed_params["pgconn_info"]["pristine_pgdata_snapshot_path"]),
         pgdata_parent_dpath=Path(hpoed_params["pgconn_info"]["pgdata_parent_dpath"]),
         pgbin_path=Path(hpoed_params["pgconn_info"]["pgbin_path"]),
-        postgres_logs_dir=Path(logdir) / hpoed_params["output_log_path"] / "pg_logs",
         connect_timeout=300,
         logger=logger,
     )
@@ -504,14 +503,14 @@ def _build_agent(
 
 
 def build_trial(
-    dbgym_cfg: DBGymConfig, seed: int, logdir: str, hpoed_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, seed: int, hpoed_params: dict[str, Any]
 ) -> Tuple[Logger, TargetResetWrapper, AgentEnv, Wolp, str]:
     # The massive trial builder.
 
     port, signal = _get_signal(hpoed_params["pgconn_info"]["pgbin_path"])
-    _modify_benchbase_config(logdir, port, hpoed_params)
+    _modify_benchbase_config(dbgym_cfg, port, hpoed_params)
 
-    logger, reward_utility, pgconn, workload = _build_utilities(dbgym_cfg, logdir, port, hpoed_params)
+    logger, reward_utility, pgconn, workload = _build_utilities(dbgym_cfg, port, hpoed_params)
     holon_space, lsc = _build_actions(dbgym_cfg, seed, hpoed_params, workload, logger)
     obs_space = _build_obs_space(dbgym_cfg, holon_space, lsc, hpoed_params, seed)
     target_reset, env = _build_env(
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index ee0fb23c..15d2f74d 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -399,13 +399,11 @@ def setup(self, hpoed_params: dict[str, Any]) -> None:
         )
         np.random.seed(seed)
         torch.manual_seed(seed)
-        assert hasattr(self, "logdir")
 
         self.timeout = TuneTimeoutChecker(hpoed_params["duration"])
         self.logger, self.target_reset, self.env, self.agent, self.signal = build_trial(
             self.dbgym_cfg,
             seed=seed,
-            logdir=self.logdir,
             hpoed_params=hpoed_params
         )
         self.logger.get_logger(None).info("%s", hpoed_params)
@@ -488,7 +486,6 @@ class TuneOpt(Trainable):
 
         def setup(self, hpoed_params: dict[str, Any]) -> None:
             self.trial = TuneTrial(TuneOpt.dbgym_cfg)
-            self.trial.logdir = self.logdir # type: ignore
             self.trial.setup(hpoed_params)
 
         def step(self) -> dict[Any, Any]:
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 6b87cb96..ce7effcd 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -5,7 +5,7 @@
 import click
 import pandas as pd
 
-from misc.utils import WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn
+from misc.utils import WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, conv_inputpath_to_realabspath, link_result, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn
 from tune.protox.agent.coerce_config import coerce_config
 from tune.protox.agent.hpo import TuneTrial, build_space
 
@@ -63,9 +63,6 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     # Piggyback off the HPO magic.
     t = TuneTrial(dbgym_cfg)
-    # This is a hack.
-    t.logdir = Path(dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True)) # type: ignore
-    t.logdir.mkdir(parents=True, exist_ok=True) # type: ignore
     t.setup(hpoed_params)
     start = time.time()
 
@@ -78,5 +75,9 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
         pd.DataFrame(data).to_csv(step_data_fpath, index=False)
 
     t.cleanup()
+
     # Output the step data.
-    pd.DataFrame(data).to_csv(step_data_fpath, index=False)
\ No newline at end of file
+    pd.DataFrame(data).to_csv(step_data_fpath, index=False)
+
+    # Link the tuning steps data (more details than step data).
+    link_result(dbgym_cfg, )
\ No newline at end of file
diff --git a/tune/protox/embedding/select.py b/tune/protox/embedding/select.py
index df9c9194..26e3c8d4 100644
--- a/tune/protox/embedding/select.py
+++ b/tune/protox/embedding/select.py
@@ -28,7 +28,7 @@ def select_best_embeddings(dbgym_cfg: DBGymConfig, generic_args: EmbeddingTrainG
         data = _attach(data, raw_data, select_args.idx_limit)
 
     curated_dpath = dbgym_cfg.cur_task_runs_data_path("curated", mkdir=True)
-    curated_results_fpath = dbgym_cfg.cur_task_runs_data_path(".", mkdir=True) / "curated_results.csv"
+    curated_results_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "curated_results.csv"
     data.to_csv(
         curated_results_fpath, index=False
     )
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 8c339b6a..b1ac19bf 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -56,13 +56,13 @@ def __init__(
         trace: bool,
         verbose: bool,
         output_log_path: str,
-        tuning_steps_path: str,
+        tuning_steps_dpath: str,
         tensorboard_path: str,
     ) -> None:
         self.trace = trace
         self.verbose = verbose
-        self.tuning_steps_path = tuning_steps_path
-        Path(tuning_steps_path).mkdir(parents=True, exist_ok=True)
+        self.tuning_steps_dpath = tuning_steps_dpath
+        Path(tuning_steps_dpath).mkdir(parents=True, exist_ok=True)
 
         level = logging.INFO if not self.verbose else logging.DEBUG
         formatter = "%(levelname)s:%(asctime)s [%(filename)s:%(lineno)s]  %(message)s"
@@ -95,21 +95,21 @@ def stash_results(
         time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         time = name_override if name_override else time
         if info_dict["results"] is not None and Path(info_dict["results"]).exists():
-            local["mv"][info_dict["results"], f"{self.tuning_steps_path}/{time}"].run()
+            local["mv"][info_dict["results"], f"{self.tuning_steps_dpath}/{time}"].run()
         else:
-            Path(f"{self.tuning_steps_path}/{time}").mkdir(parents=True, exist_ok=True)
+            Path(f"{self.tuning_steps_dpath}/{time}").mkdir(parents=True, exist_ok=True)
 
         if info_dict["prior_pgconf"]:
             local["mv"][
-                info_dict["prior_pgconf"], f"{self.tuning_steps_path}/{time}/old_pg.conf"
+                info_dict["prior_pgconf"], f"{self.tuning_steps_dpath}/{time}/old_pg.conf"
             ].run()
 
         if info_dict["prior_state_container"]:
-            with open(f"{self.tuning_steps_path}/{time}/prior_state.txt", "w") as f:
+            with open(f"{self.tuning_steps_dpath}/{time}/prior_state.txt", "w") as f:
                 f.write(str(info_dict["prior_state_container"]))
 
         if info_dict["action_json"]:
-            with open(f"{self.tuning_steps_path}/{time}/action.txt", "w") as f:
+            with open(f"{self.tuning_steps_dpath}/{time}/action.txt", "w") as f:
                 f.write(info_dict["action_json"])
 
     def advance(self) -> None:
diff --git a/tune/protox/env/util/pg_conn.py b/tune/protox/env/util/pg_conn.py
index 4801336b..9ba1e107 100644
--- a/tune/protox/env/util/pg_conn.py
+++ b/tune/protox/env/util/pg_conn.py
@@ -30,16 +30,13 @@ def __init__(
         pristine_pgdata_snapshot_fpath: Path,
         pgdata_parent_dpath: Path,
         pgbin_path: Union[str, Path],
-        postgres_logs_dir: Union[str, Path],
         connect_timeout: int,
         logger: Logger,
     ) -> None:
 
-        Path(postgres_logs_dir).mkdir(parents=True, exist_ok=True)
         self.dbgym_cfg = dbgym_cfg
         self.pgport = pgport
         self.pgbin_path = pgbin_path
-        self.postgres_logs_dir = postgres_logs_dir
         self.connect_timeout = connect_timeout
         self.log_step = 0
         self.logger = logger
@@ -77,10 +74,12 @@ def disconnect(self) -> None:
             self._conn = None
 
     def move_log(self) -> None:
-        if Path(f"{self.postgres_logs_dir}/pg.log").exists():
+        pglog_fpath = self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "pg.log"
+        pglog_this_step_fpath = self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / f"pg.log.{self.log_step}"
+        if pglog_fpath.exists():
             shutil.move(
-                f"{self.postgres_logs_dir}/pg.log",
-                f"{self.postgres_logs_dir}/pg.log.{self.log_step}",
+                pglog_fpath,
+                pglog_this_step_fpath
             )
             self.log_step += 1
 
@@ -164,7 +163,7 @@ def start_with_changes(
                 "-t",
                 "180",
                 "-l",
-                f"{self.postgres_logs_dir}/pg.log",
+                self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "pg.log"
                 "start",
             ].run(retcode=None)
 

From cd92d820fc24e8b2e2b2e586914bf5e114184707 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 16 Apr 2024 18:10:01 +0000
Subject: [PATCH 007/100] got rid of output_log_dir entirely

---
 scripts/pat_test.sh                |  4 ++++
 tune/protox/agent/build_trial.py   |  4 +---
 tune/protox/agent/coerce_config.py |  1 -
 tune/protox/agent/hpo.py           |  1 -
 tune/protox/env/logger.py          | 19 ++++++++++---------
 tune/protox/env/util/pg_conn.py    |  2 +-
 6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index a9a78820..bf08afe8 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -4,6 +4,10 @@ set -euxo pipefail
 
 SCALE_FACTOR=0.01
 
+# testing
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.01  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
+exit 0
+
 # benchmark
 python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
 python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index c55cb2e9..5f138055 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -133,11 +133,9 @@ def _build_utilities(
     dbgym_cfg: DBGymConfig, pgport: int, hpoed_params: dict[str, Any]
 ) -> Tuple[Logger, RewardUtility, PostgresConn, Workload]:
     logger = Logger(
+        dbgym_cfg,
         hpoed_params["trace"],
         hpoed_params["verbose"],
-        dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / hpoed_params["output_log_path"],
-        dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / hpoed_params["output_log_path"] / "tuning_steps",
-        dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / hpoed_params["output_log_path"] / "tboard",
     )
 
     reward_utility = RewardUtility(
diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py
index 1736f264..22a99094 100644
--- a/tune/protox/agent/coerce_config.py
+++ b/tune/protox/agent/coerce_config.py
@@ -27,7 +27,6 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpoed_params: d
             "duration": hpoed_params["mythril_args"]["duration"],
             "workload_timeout": hpoed_params["mythril_args"]["workload_timeout"],
             "query_timeout": hpoed_params["mythril_args"]["timeout"],
-            "output_log_path": "artifacts",
             "pgconn_info": {
                 "pgport": 5432,
                 "pguser": "admin",
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 15d2f74d..c8d7d963 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -253,7 +253,6 @@ def build_space(
 
         # Paths.
         "workload_path": str(workload_path),
-        "output_log_path": "artifacts/",
         "pgconn_info": pgconn_info,
         "benchmark_config": benchmark_config,
         "benchbase_config": benchbase_config,
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index b1ac19bf..638cb228 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -11,6 +11,8 @@
 from torch.utils.tensorboard import SummaryWriter  # type: ignore
 from typing_extensions import ParamSpec
 
+from misc.utils import DBGymConfig
+
 P = ParamSpec("P")
 T = TypeVar("T")
 
@@ -53,24 +55,23 @@ def default(self, obj: Any) -> Any:
 class Logger(object):
     def __init__(
         self,
+        dbgym_cfg: DBGymConfig,
         trace: bool,
         verbose: bool,
-        output_log_path: str,
-        tuning_steps_dpath: str,
-        tensorboard_path: str,
     ) -> None:
+        self.log_dpath = dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True)
         self.trace = trace
         self.verbose = verbose
-        self.tuning_steps_dpath = tuning_steps_dpath
-        Path(tuning_steps_dpath).mkdir(parents=True, exist_ok=True)
+        self.tensorboard_dpath = self.log_dpath / "tboard"
+        self.tuning_steps_dpath = self.log_dpath / "tuning_steps"
+        self.tuning_steps_dpath.mkdir(parents=True, exist_ok=True)
 
         level = logging.INFO if not self.verbose else logging.DEBUG
         formatter = "%(levelname)s:%(asctime)s [%(filename)s:%(lineno)s]  %(message)s"
         logging.basicConfig(format=formatter, level=level, force=True)
 
         # Setup the file logger.
-        Path(output_log_path).mkdir(parents=True, exist_ok=True)
-        file_logger = logging.FileHandler("{}/output.log".format(output_log_path))
+        file_logger = logging.FileHandler(self.log_dpath / "output.log")
         file_logger.setFormatter(logging.Formatter(formatter))
         file_logger.setLevel(level)
         logging.getLogger().addHandler(file_logger)
@@ -78,8 +79,8 @@ def __init__(
         # Setup the writer.
         self.writer: Union[SummaryWriter, None] = None
         if self.trace:
-            Path(tensorboard_path).mkdir(parents=True, exist_ok=True)
-            self.writer = SummaryWriter(tensorboard_path)  # type: ignore
+            self.tensorboard_dpath.mkdir(parents=True, exist_ok=True)
+            self.writer = SummaryWriter(self.tensorboard_dpath)  # type: ignore
 
         self.iteration = 1
         self.iteration_data: dict[str, Any] = {}
diff --git a/tune/protox/env/util/pg_conn.py b/tune/protox/env/util/pg_conn.py
index 9ba1e107..f6931114 100644
--- a/tune/protox/env/util/pg_conn.py
+++ b/tune/protox/env/util/pg_conn.py
@@ -163,7 +163,7 @@ def start_with_changes(
                 "-t",
                 "180",
                 "-l",
-                self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "pg.log"
+                self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "pg.log",
                 "start",
             ].run(retcode=None)
 

From 207097ab0480f053358a2e92dc3db3ef9cfc0600 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 16 Apr 2024 18:41:35 +0000
Subject: [PATCH 008/100] ray results now in dbgym workspace

---
 scripts/pat_test.sh                | 4 ++--
 tune/protox/agent/hpo.py           | 8 +++++++-
 tune/protox/embedding/train_all.py | 2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index bf08afe8..a09d65b0 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -5,7 +5,7 @@ set -euxo pipefail
 SCALE_FACTOR=0.01
 
 # testing
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.01  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.001  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
 exit 0
 
 # benchmark
@@ -22,5 +22,5 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.01  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.001  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index c8d7d963..289d5559 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -1,3 +1,4 @@
+import shutil
 import sys
 import time
 import json
@@ -578,6 +579,10 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         sync_config=SyncConfig(),
         verbose=2,
         log_to_file=True,
+        # I call it hpo_ray_results because agent tuning also uses Ray and stores its results
+        #   in tune_ray_results. By making them separate, we avoid the possibility of
+        #   file collisions.
+        storage_path=dbgym_cfg.cur_task_runs_path("hpo_ray_results", mkdir=True),
     )
 
     tuner = ray.tune.Tuner(
@@ -594,4 +599,5 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
                 print(f"Trial {results[i]} FAILED")
         assert False, print("Encountered exceptions!")
     best_result = results.get_best_result(metric=METRIC_NAME, mode=mode)
-    print(f"best_result={best_result}")
+    best_params_fpath = Path(best_result.path) / "params.json"
+    print(f"best_params_fpath={best_params_fpath}")
diff --git a/tune/protox/embedding/train_all.py b/tune/protox/embedding/train_all.py
index b7464eaa..b6a4144c 100644
--- a/tune/protox/embedding/train_all.py
+++ b/tune/protox/embedding/train_all.py
@@ -212,11 +212,11 @@ def train_all_embeddings(
     dtime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     run_config = RunConfig(
         name=f"ProtoXEmbeddingHPO_{dtime}",
-        storage_path=None,
         failure_config=FailureConfig(max_failures=0, fail_fast=True),
         sync_config=SyncConfig(),
         verbose=2,
         log_to_file=True,
+        storage_path=dbgym_cfg.cur_task_runs_path("tune_ray_results", mkdir=True),
     )
 
     resources = {"cpu": 1}

From 93b0988d2c46acb56acd29ee1412f6518fa5da8f Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 16 Apr 2024 18:46:15 +0000
Subject: [PATCH 009/100] now linking hpo-ed params in symlinks

---
 misc/utils.py            | 5 ++++-
 tune/protox/agent/hpo.py | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/misc/utils.py b/misc/utils.py
index 9a8788af..c5b793cc 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -87,11 +87,14 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     / "data"
     / default_embedder_dname(benchmark_name, workload_name)
 )
+default_hpoed_agent_params_fname = (
+    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_hpoed_agent_params.json"
+)
 default_hpoed_agent_params_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(workspace_path)
     / "dbgym_tune_protox_agent"
     / "data"
-    / f"{benchmark_name}_{workload_name}_hpoed_agent_params.json"
+    / default_hpoed_agent_params_fname(benchmark_name, workload_name)
 )
 workload_name_fn = (
     lambda scale_factor, seed_start, seed_end, query_subset : f"workload_sf{get_scale_factor_string(scale_factor)}_{seed_start}_{seed_end}_{query_subset}"
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 289d5559..1ad78204 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -23,7 +23,7 @@
 from ray.train import SyncConfig
 
 from tune.protox.agent.build_trial import build_trial
-from misc.utils import DBGymConfig, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_RELPATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath
+from misc.utils import DBGymConfig, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_RELPATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
 
 
 METRIC_NAME = "Best Metric"
@@ -600,4 +600,4 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         assert False, print("Encountered exceptions!")
     best_result = results.get_best_result(metric=METRIC_NAME, mode=mode)
     best_params_fpath = Path(best_result.path) / "params.json"
-    print(f"best_params_fpath={best_params_fpath}")
+    link_result(dbgym_cfg, best_params_fpath, custom_result_name=default_hpoed_agent_params_fname(hpo_args.benchmark_name, hpo_args.workload_name))

From d2bb70956348dca48d8157cc1f9352b5cd66d3f3 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 16 Apr 2024 19:13:37 +0000
Subject: [PATCH 010/100] now linking tuning steps

---
 misc/utils.py                      |   7 +-
 scripts/pat_test.sh                |   2 +-
 tune/protox/agent/replay.py        | 642 ++++++++++++++---------------
 tune/protox/agent/tune.py          |   5 +-
 tune/protox/embedding/train_all.py |   3 +
 5 files changed, 333 insertions(+), 326 deletions(-)

diff --git a/misc/utils.py b/misc/utils.py
index c5b793cc..b9506b35 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -126,11 +126,14 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     )
     / "dbgym_dbms_postgres" / "build" / "repo" / "boot"/ "build" / "postgres" / "bin"
 )
+default_tuning_steps_dname = (
+    lambda benchmark_name, workload_name, enable_boot_during_tune: f"{benchmark_name}_{workload_name}{'_boot' if enable_boot_during_tune else ''}_tuning_steps"
+)
 default_tuning_steps_dpath = (
-    lambda workspace_path, benchmark_name, workload_name, boot_enabled: get_symlinks_path_from_workspace_path(
+    lambda workspace_path, benchmark_name, workload_name, enable_boot_during_tune: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / "dbgym_tune_protox_agent" / "artifacts" / f"{benchmark_name}_{workload_name}{'_boot' if boot_enabled else ''}_tuning_steps"
+    / "dbgym_tune_protox_agent" / "artifacts" / default_tuning_steps_dname(benchmark_name, workload_name, enable_boot_during_tune)
 )
 
 
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index a09d65b0..724dc8c1 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -5,7 +5,7 @@ set -euxo pipefail
 SCALE_FACTOR=0.01
 
 # testing
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.001  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 7f9a54eb..e41b94c4 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -1,25 +1,25 @@
-import datetime
-import logging
+# import datetime
+# import logging
 import click
-import yaml
-import pandas as pd
-import tqdm
-import argparse
-from pathlib import Path
-from dateutil.parser import parse
+# import yaml
+# import pandas as pd
+# import tqdm
+# import argparse
+# from pathlib import Path
+# from dateutil.parser import parse
 
-import sys
+# import sys
 
 from misc.utils import DBGymConfig
-sys.path.append("/home/wz2/mythril")
+# sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
 
-from envs.spec import Spec
-from tune.protox.env.pg_env import PostgresEnv
+# from envs.spec import Spec
+# from tune.protox.env.pg_env import PostgresEnv
 
-class DotDict(dict):
-    __getattr__ = dict.get
-    __setattr__ = dict.__setitem__
-    __delattr__ = dict.__delitem__
+# class DotDict(dict):
+#     __getattr__ = dict.get
+#     __setattr__ = dict.__setitem__
+#     __delattr__ = dict.__delitem__
 
 
 @click.command()
@@ -41,308 +41,308 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     pass
 
 
-def gogo(args):
-    maximal = args.maximal
-    maximal_only = args.maximal_only
-    threshold = args.threshold
-
-    with open(f"{args.input}/config.yaml") as f:
-        mythril = yaml.safe_load(f)
-        mythril["mythril"]["benchbase_config_path"] = f"{args.input}/benchmark.xml"
-        mythril["mythril"]["verbose"] = True
-        mythril["mythril"]["postgres_path"] = args.pg_path
-
-    with open(f"{args.input}/config.yaml2", "w") as f:
-        yaml.dump(mythril, stream=f, default_flow_style=False)
-
-    if args.alternate:
-        horizon = args.horizon
-        per_query_timeout = args.query_timeout
-    else:
-        with open(f"{args.input}/stdout", "r") as f:
-            config = f.readlines()[0]
-            config = eval(config.split("HPO Configuration: ")[-1])
-            horizon = config["horizon"]
-
-        with open(f"{args.input}/stdout", "r") as f:
-            for line in f:
-                if "HPO Configuration: " in line:
-                    hpo = eval(line.split("HPO Configuration: ")[-1].strip())
-                    per_query_timeout = hpo["mythril_args"]["timeout"]
-
-    folders = []
-    start_found = False
-    filename = "output.log" if args.alternate else "stderr"
-    last_evaluation = None
-    with open(f"{args.input}/{filename}", "r") as f:
-        for line in f:
-            if not start_found:
-                if "Baseline Metric" in line:
-                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
-                    start_found = True
-            else:
-                if "mv" in line and "tuning_steps" in line:
-                    repo = eval(line.split("Running ")[-1])[-1]
-                    last_folder = repo.split("/")[-1]
-                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
-                    last_evaluation = time_since_start
-                    if (time_since_start - start_time).total_seconds() < args.cutoff * 3600 or args.cutoff == 0:
-                        folders.append(last_folder)
-
-    # Only apply threshold if time is less than.
-    threshold_limit = last_evaluation - datetime.timedelta(seconds=int(args.threshold_limit * 3600))
-
-    spec = Spec(
-        agent_type=None,
-        seed=0,
-        horizon=horizon,
-        config_path=f"{args.input}/config.yaml2",
-        benchmark_config_path=f"{args.input}/{args.benchmark}.yaml",
-        workload_timeout=0)
-
-    env = PostgresEnv(
-        spec,
-        horizon=horizon,
-        timeout=None,
-        reward_utility=None,
-        logger=None,
-        replay=True)
-
-    if not args.simulated:
-        env.restore_pristine_snapshot()
-        env.action_space.reset(**{"connection": env.connection, "workload": spec.workload})
-        spec.workload.reset()
-
-    # Get the minimum reward.
-    runs = [Path(args.input) / "tuning_steps" / fold / "run.raw.csv" for fold in folders]
-    runs = [pd.read_csv(run) for run in runs]
-    rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == per_query_timeout) for run in runs]
-    rewards = sorted(rewards, key=lambda x: x[0])
-    min_reward = min([r[0] for r in rewards])
-    if maximal:
-        target = [r[1] for r in rewards if r[0] == min_reward]
-        assert len(target) >= 1
-        if target[0]:
-            # Don't use maximal if the min maximal is timed out.
-            # Don't threshold either.
-            threshold = 0
-            maximal = False
-            # Reject maximal only.
-            maximal_only = False
-            logging.warn("Maximal disabled.")
-        else:
-            logging.info(f"Maximal found: {min_reward}")
-
-    num_lines = 0
-    with open(f"{args.input}/{filename}", "r") as f:
-        for line in f:
-            if "Baseline Metric" in line:
-                num_lines += 1
-            elif "mv" in line and "tuning_steps" in line:
-                num_lines += 1
-
-    def run_sample(action, timeout):
-        samples = []
-        # This should reliably check that we are loading the correct knobs...
-        ql_knobs = spec.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
-        for i in range(args.samples):
-            runtime = spec.workload._execute_workload(
-                connection=env.connection,
-                workload_timeout=timeout,
-                ql_knobs=ql_knobs,
-                env_spec=spec,
-                blocklist=[l for l in args.blocklist.split(",") if len(l) > 0])
-            samples.append(runtime)
-            logging.info(f"Runtime: {runtime}")
-
-            if runtime >= args.workload_timeout:
-                break
-
-            if args.samples == 2 and runtime >= timeout:
-                break
-            elif args.samples > 2 and len(samples) >= 2 and runtime >= timeout:
-                break
-
-        return samples
-
-    run_data = []
-    pbar = tqdm.tqdm(total=num_lines)
-    with open(f"{args.input}/{filename}", "r") as f:
-        current_step = 0
-
-        start_found = False
-        start_time = None
-        timeout = args.workload_timeout
-        cur_reward_max = timeout
-        selected_action_knobs = None
-        noop_index = False
-        maximal_repo = None
-        existing_indexes = []
-
-        for line in f:
-            # Keep going until we've found the start.
-            if not start_found:
-                if "Baseline Metric" in line:
-                    start_found = True
-                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
-                    pbar.update(1)
-                continue
-
-            elif "Selected action: " in line:
-                act = eval(line.split("Selected action: ")[-1])
-                selected_action_knobs = env.action_space.get_knob_space().from_jsonable(act[0])[0]
-                noop_index = "NOOP" in act[1][0]
-
-            elif (maximal and ("mv" in line and "tuning_steps" in line)):
-                maximal_repo = line
-
-            elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "tuning_steps" in line)):
-                if "mv" in line and "tuning_steps" in line:
-                    repo = eval(line.split("Running ")[-1])[-1]
-                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
-                elif "Found new maximal state with" in line:
-                    repo = eval(maximal_repo.split("Running ")[-1])[-1]
-                    time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0])
-                    maximal_repo = None
-
-                # Get the evaluation reward.
-                reward = pd.read_csv(f"{args.input}/{repo}/run.raw.csv")
-                assert len(reward.columns) == 6
-                has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == per_query_timeout
-                reward = reward["Latency (microseconds)"].sum() / 1e6
-                assert reward > 0
-
-                if ((not maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
-                    index_sqls = []
-                    knobs = {}
-                    insert_knobs = False
-
-                    with open(f"{args.input}/{repo}/act_sql.txt", "r") as f:
-                        for line in f:
-                            line = line.strip()
-                            if len(line) == 0:
-                                insert_knobs = True
-                            elif not insert_knobs:
-                                index_sqls.append(line)
-                            else:
-                                k, v = line.split(" = ")
-                                knobs[k] = float(v)
-
-                    assert len(index_sqls) > 0
-                    assert len(knobs) > 0
-                    with open(f"{args.input}/{repo}/prior_state.txt", "r") as f:
-                        prior_states = eval(f.read())
-                        all_sc = [s.strip() for s in prior_states[1]]
-                        if not noop_index:
-                            all_sc.extend(index_sqls)
-
-                        all_sc = [a for a in all_sc if not "USING btree ()" in a]
-                        index_sqls = all_sc
-
-                    execute_sqls = []
-                    for index_sql in index_sqls:
-                        if index_sql in existing_indexes:
-                            continue
-                        execute_sqls.append(index_sql)
-                    for index_sql in existing_indexes:
-                        if index_sql not in index_sqls:
-                            indexname = index_sql.split("CREATE INDEX")[-1].split(" ON ")[0]
-                            execute_sqls.append(f"DROP INDEX IF EXISTS {indexname}")
-
-                    if not args.simulated:
-                        # Reset snapshot.
-                        env.action_space.reset(connection=env.connection, workload=env.workload)
-                        cc, _ = env.action_space.get_knob_space().generate_plan(selected_action_knobs if selected_action_knobs else {})
-                        env.shift_state(cc, execute_sqls, dump_page_cache=True)
-                    existing_indexes = index_sqls
-
-                    if not args.simulated:
-                        # Get samples.
-                        run_samples = samples = run_sample(knobs, timeout)
-                        logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
-                    else:
-                        run_samples = samples = [reward, reward]
-
-                    data = {
-                        "step": current_step,
-                        "orig_cost": reward,
-                        "time_since_start": (time_since_start - start_time).total_seconds(),
-                    }
-                    samples = {f"runtime{i}": s for i, s in enumerate(samples)}
-                    data.update(samples)
-                    run_data.append(data)
-
-                    current_step += 1
-
-                    if (not has_timeout) or (max(run_samples) < timeout):
-                        # Apply a tolerance..
-                        # If we've timed out, only apply threshold only if we've found a strictly better config.
-                        apply_threshold = threshold if time_since_start < threshold_limit else 0
-                        cur_reward_max = reward - apply_threshold
-
-                    if max(run_samples) < timeout:
-                        timeout = max(run_samples)
-
-                run_folder = repo.split("/")[-1]
-                if run_folder in folders and run_folder == folders[-1]:
-                    break
-                elif maximal_only and reward == min_reward:
-                    break
-            pbar.update(1)
-
-        if len(run_data) > 0:
-            data = {
-                "step": current_step,
-                "orig_cost": run_data[-1]["orig_cost"],
-                "time_since_start": -1,
-                "runtime0": run_data[-1]["runtime0"],
-            }
-            run_data.append(data)
-
-    # Output.
-    pd.DataFrame(run_data).to_csv(args.output, index=False)
-    env.close()
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(prog="UDO Replay")
-    parser.add_argument("--input", type=Path)
-    parser.add_argument("--benchmark", type=str)
-    parser.add_argument("--workload-timeout", type=int)
-    parser.add_argument("--samples", type=int)
-    parser.add_argument("--threshold", type=float)
-    parser.add_argument("--threshold-limit", type=float, default=0)
-    parser.add_argument("--maximal", action="store_true")
-    parser.add_argument("--simulated", action="store_true")
-    parser.add_argument("--maximal-only", action="store_true")
-    parser.add_argument("--alternate", action="store_true", default=False)
-    parser.add_argument("--query_timeout", type=int, default=0)
-    parser.add_argument("--horizon", type=int, default=0)
-    parser.add_argument("--cutoff", type=float, default=0)
-    parser.add_argument("--blocklist", default="")
-    parser.add_argument("--pg-path", type=str, default="/mnt/nvme0n1/wz2/noisepage")
-
-    parser.add_argument("--output-path", type=str, default="out.csv")
-    args = parser.parse_args()
-
-    while True:
-        pargs = DotDict(vars(args))
-        output_path = args.output_path
-
-        runs = Path(pargs.input).rglob("config.yaml")
-        runs = sorted([f for f in runs if not (f.parent / output_path).exists()])
-        for run in tqdm.tqdm([f for f in runs], leave=False):
-            if args.simulated:
-                adjust_output = run.parent / "out_simulated.csv"
-            else:
-                adjust_output = run.parent / args.output_path
-
-            if adjust_output.exists():
-                continue
-
-            print(f"Parsing {run.parent}")
-            new_args = pargs
-            new_args.input = run.parent
-            new_args.output = adjust_output
-            gogo(new_args)
-
-        break
\ No newline at end of file
+# def gogo(args):
+#     maximal = args.maximal
+#     maximal_only = args.maximal_only
+#     threshold = args.threshold
+
+#     with open(f"{args.input}/config.yaml") as f:
+#         mythril = yaml.safe_load(f)
+#         mythril["mythril"]["benchbase_config_path"] = f"{args.input}/benchmark.xml"
+#         mythril["mythril"]["verbose"] = True
+#         mythril["mythril"]["postgres_path"] = args.pg_path
+
+#     with open(f"{args.input}/config.yaml2", "w") as f:
+#         yaml.dump(mythril, stream=f, default_flow_style=False)
+
+#     if args.alternate:
+#         horizon = args.horizon
+#         per_query_timeout = args.query_timeout
+#     else:
+#         with open(f"{args.input}/stdout", "r") as f:
+#             config = f.readlines()[0]
+#             config = eval(config.split("HPO Configuration: ")[-1])
+#             horizon = config["horizon"]
+
+#         with open(f"{args.input}/stdout", "r") as f:
+#             for line in f:
+#                 if "HPO Configuration: " in line:
+#                     hpo = eval(line.split("HPO Configuration: ")[-1].strip())
+#                     per_query_timeout = hpo["mythril_args"]["timeout"]
+
+#     folders = []
+#     start_found = False
+#     filename = "output.log" if args.alternate else "stderr"
+#     last_evaluation = None
+#     with open(f"{args.input}/{filename}", "r") as f:
+#         for line in f:
+#             if not start_found:
+#                 if "Baseline Metric" in line:
+#                     start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
+#                     start_found = True
+#             else:
+#                 if "mv" in line and "tuning_steps" in line:
+#                     repo = eval(line.split("Running ")[-1])[-1]
+#                     last_folder = repo.split("/")[-1]
+#                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
+#                     last_evaluation = time_since_start
+#                     if (time_since_start - start_time).total_seconds() < args.cutoff * 3600 or args.cutoff == 0:
+#                         folders.append(last_folder)
+
+#     # Only apply threshold if time is less than.
+#     threshold_limit = last_evaluation - datetime.timedelta(seconds=int(args.threshold_limit * 3600))
+
+#     spec = Spec(
+#         agent_type=None,
+#         seed=0,
+#         horizon=horizon,
+#         config_path=f"{args.input}/config.yaml2",
+#         benchmark_config_path=f"{args.input}/{args.benchmark}.yaml",
+#         workload_timeout=0)
+
+#     env = PostgresEnv(
+#         spec,
+#         horizon=horizon,
+#         timeout=None,
+#         reward_utility=None,
+#         logger=None,
+#         replay=True)
+
+#     if not args.simulated:
+#         env.restore_pristine_snapshot()
+#         env.action_space.reset(**{"connection": env.connection, "workload": spec.workload})
+#         spec.workload.reset()
+
+#     # Get the minimum reward.
+#     runs = [Path(args.input) / "tuning_steps" / fold / "run.raw.csv" for fold in folders]
+#     runs = [pd.read_csv(run) for run in runs]
+#     rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == per_query_timeout) for run in runs]
+#     rewards = sorted(rewards, key=lambda x: x[0])
+#     min_reward = min([r[0] for r in rewards])
+#     if maximal:
+#         target = [r[1] for r in rewards if r[0] == min_reward]
+#         assert len(target) >= 1
+#         if target[0]:
+#             # Don't use maximal if the min maximal is timed out.
+#             # Don't threshold either.
+#             threshold = 0
+#             maximal = False
+#             # Reject maximal only.
+#             maximal_only = False
+#             logging.warn("Maximal disabled.")
+#         else:
+#             logging.info(f"Maximal found: {min_reward}")
+
+#     num_lines = 0
+#     with open(f"{args.input}/{filename}", "r") as f:
+#         for line in f:
+#             if "Baseline Metric" in line:
+#                 num_lines += 1
+#             elif "mv" in line and "tuning_steps" in line:
+#                 num_lines += 1
+
+#     def run_sample(action, timeout):
+#         samples = []
+#         # This should reliably check that we are loading the correct knobs...
+#         ql_knobs = spec.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
+#         for i in range(args.samples):
+#             runtime = spec.workload._execute_workload(
+#                 connection=env.connection,
+#                 workload_timeout=timeout,
+#                 ql_knobs=ql_knobs,
+#                 env_spec=spec,
+#                 blocklist=[l for l in args.blocklist.split(",") if len(l) > 0])
+#             samples.append(runtime)
+#             logging.info(f"Runtime: {runtime}")
+
+#             if runtime >= args.workload_timeout:
+#                 break
+
+#             if args.samples == 2 and runtime >= timeout:
+#                 break
+#             elif args.samples > 2 and len(samples) >= 2 and runtime >= timeout:
+#                 break
+
+#         return samples
+
+#     run_data = []
+#     pbar = tqdm.tqdm(total=num_lines)
+#     with open(f"{args.input}/{filename}", "r") as f:
+#         current_step = 0
+
+#         start_found = False
+#         start_time = None
+#         timeout = args.workload_timeout
+#         cur_reward_max = timeout
+#         selected_action_knobs = None
+#         noop_index = False
+#         maximal_repo = None
+#         existing_indexes = []
+
+#         for line in f:
+#             # Keep going until we've found the start.
+#             if not start_found:
+#                 if "Baseline Metric" in line:
+#                     start_found = True
+#                     start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
+#                     pbar.update(1)
+#                 continue
+
+#             elif "Selected action: " in line:
+#                 act = eval(line.split("Selected action: ")[-1])
+#                 selected_action_knobs = env.action_space.get_knob_space().from_jsonable(act[0])[0]
+#                 noop_index = "NOOP" in act[1][0]
+
+#             elif (maximal and ("mv" in line and "tuning_steps" in line)):
+#                 maximal_repo = line
+
+#             elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "tuning_steps" in line)):
+#                 if "mv" in line and "tuning_steps" in line:
+#                     repo = eval(line.split("Running ")[-1])[-1]
+#                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
+#                 elif "Found new maximal state with" in line:
+#                     repo = eval(maximal_repo.split("Running ")[-1])[-1]
+#                     time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0])
+#                     maximal_repo = None
+
+#                 # Get the evaluation reward.
+#                 reward = pd.read_csv(f"{args.input}/{repo}/run.raw.csv")
+#                 assert len(reward.columns) == 6
+#                 has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == per_query_timeout
+#                 reward = reward["Latency (microseconds)"].sum() / 1e6
+#                 assert reward > 0
+
+#                 if ((not maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
+#                     index_sqls = []
+#                     knobs = {}
+#                     insert_knobs = False
+
+#                     with open(f"{args.input}/{repo}/act_sql.txt", "r") as f:
+#                         for line in f:
+#                             line = line.strip()
+#                             if len(line) == 0:
+#                                 insert_knobs = True
+#                             elif not insert_knobs:
+#                                 index_sqls.append(line)
+#                             else:
+#                                 k, v = line.split(" = ")
+#                                 knobs[k] = float(v)
+
+#                     assert len(index_sqls) > 0
+#                     assert len(knobs) > 0
+#                     with open(f"{args.input}/{repo}/prior_state.txt", "r") as f:
+#                         prior_states = eval(f.read())
+#                         all_sc = [s.strip() for s in prior_states[1]]
+#                         if not noop_index:
+#                             all_sc.extend(index_sqls)
+
+#                         all_sc = [a for a in all_sc if not "USING btree ()" in a]
+#                         index_sqls = all_sc
+
+#                     execute_sqls = []
+#                     for index_sql in index_sqls:
+#                         if index_sql in existing_indexes:
+#                             continue
+#                         execute_sqls.append(index_sql)
+#                     for index_sql in existing_indexes:
+#                         if index_sql not in index_sqls:
+#                             indexname = index_sql.split("CREATE INDEX")[-1].split(" ON ")[0]
+#                             execute_sqls.append(f"DROP INDEX IF EXISTS {indexname}")
+
+#                     if not args.simulated:
+#                         # Reset snapshot.
+#                         env.action_space.reset(connection=env.connection, workload=env.workload)
+#                         cc, _ = env.action_space.get_knob_space().generate_plan(selected_action_knobs if selected_action_knobs else {})
+#                         env.shift_state(cc, execute_sqls, dump_page_cache=True)
+#                     existing_indexes = index_sqls
+
+#                     if not args.simulated:
+#                         # Get samples.
+#                         run_samples = samples = run_sample(knobs, timeout)
+#                         logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
+#                     else:
+#                         run_samples = samples = [reward, reward]
+
+#                     data = {
+#                         "step": current_step,
+#                         "orig_cost": reward,
+#                         "time_since_start": (time_since_start - start_time).total_seconds(),
+#                     }
+#                     samples = {f"runtime{i}": s for i, s in enumerate(samples)}
+#                     data.update(samples)
+#                     run_data.append(data)
+
+#                     current_step += 1
+
+#                     if (not has_timeout) or (max(run_samples) < timeout):
+#                         # Apply a tolerance..
+#                         # If we've timed out, only apply threshold only if we've found a strictly better config.
+#                         apply_threshold = threshold if time_since_start < threshold_limit else 0
+#                         cur_reward_max = reward - apply_threshold
+
+#                     if max(run_samples) < timeout:
+#                         timeout = max(run_samples)
+
+#                 run_folder = repo.split("/")[-1]
+#                 if run_folder in folders and run_folder == folders[-1]:
+#                     break
+#                 elif maximal_only and reward == min_reward:
+#                     break
+#             pbar.update(1)
+
+#         if len(run_data) > 0:
+#             data = {
+#                 "step": current_step,
+#                 "orig_cost": run_data[-1]["orig_cost"],
+#                 "time_since_start": -1,
+#                 "runtime0": run_data[-1]["runtime0"],
+#             }
+#             run_data.append(data)
+
+#     # Output.
+#     pd.DataFrame(run_data).to_csv(args.output, index=False)
+#     env.close()
+
+# if __name__ == "__main__":
+#     parser = argparse.ArgumentParser(prog="UDO Replay")
+#     parser.add_argument("--input", type=Path)
+#     parser.add_argument("--benchmark", type=str)
+#     parser.add_argument("--workload-timeout", type=int)
+#     parser.add_argument("--samples", type=int)
+#     parser.add_argument("--threshold", type=float)
+#     parser.add_argument("--threshold-limit", type=float, default=0)
+#     parser.add_argument("--maximal", action="store_true")
+#     parser.add_argument("--simulated", action="store_true")
+#     parser.add_argument("--maximal-only", action="store_true")
+#     parser.add_argument("--alternate", action="store_true", default=False)
+#     parser.add_argument("--query_timeout", type=int, default=0)
+#     parser.add_argument("--horizon", type=int, default=0)
+#     parser.add_argument("--cutoff", type=float, default=0)
+#     parser.add_argument("--blocklist", default="")
+#     parser.add_argument("--pg-path", type=str, default="/mnt/nvme0n1/wz2/noisepage")
+
+#     parser.add_argument("--output-path", type=str, default="out.csv")
+#     args = parser.parse_args()
+
+#     while True:
+#         pargs = DotDict(vars(args))
+#         output_path = args.output_path
+
+#         runs = Path(pargs.input).rglob("config.yaml")
+#         runs = sorted([f for f in runs if not (f.parent / output_path).exists()])
+#         for run in tqdm.tqdm([f for f in runs], leave=False):
+#             if args.simulated:
+#                 adjust_output = run.parent / "out_simulated.csv"
+#             else:
+#                 adjust_output = run.parent / args.output_path
+
+#             if adjust_output.exists():
+#                 continue
+
+#             print(f"Parsing {run.parent}")
+#             new_args = pargs
+#             new_args.input = run.parent
+#             new_args.output = adjust_output
+#             gogo(new_args)
+
+#         break
\ No newline at end of file
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index ce7effcd..f9a07cb0 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -5,7 +5,7 @@
 import click
 import pandas as pd
 
-from misc.utils import WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, conv_inputpath_to_realabspath, link_result, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn
+from misc.utils import WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, conv_inputpath_to_realabspath, link_result, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn, default_tuning_steps_dname
 from tune.protox.agent.coerce_config import coerce_config
 from tune.protox.agent.hpo import TuneTrial, build_space
 
@@ -80,4 +80,5 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     pd.DataFrame(data).to_csv(step_data_fpath, index=False)
 
     # Link the tuning steps data (more details than step data).
-    link_result(dbgym_cfg, )
\ No newline at end of file
+    tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, False)
+    link_result(dbgym_cfg, dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps"), custom_result_name=tuning_steps_link_dname)
\ No newline at end of file
diff --git a/tune/protox/embedding/train_all.py b/tune/protox/embedding/train_all.py
index b6a4144c..72cfa715 100644
--- a/tune/protox/embedding/train_all.py
+++ b/tune/protox/embedding/train_all.py
@@ -216,6 +216,9 @@ def train_all_embeddings(
         sync_config=SyncConfig(),
         verbose=2,
         log_to_file=True,
+        # I call it tune_ray_results because agent HPO also uses Ray and stores its results
+        #   in hpo_ray_results. By making them separate, we avoid the possibility of
+        #   file collisions.
         storage_path=dbgym_cfg.cur_task_runs_path("tune_ray_results", mkdir=True),
     )
 

From 7cd260f859485d068f04fb508784793b5dfe639d Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 16 Apr 2024 21:03:51 +0000
Subject: [PATCH 011/100] replay main working

---
 misc/utils.py               |   6 +-
 scripts/pat_test.sh         |   3 +-
 tune/protox/agent/replay.py | 635 +++++++++++++++++-------------------
 tune/protox/agent/tune.py   |   3 +-
 4 files changed, 315 insertions(+), 332 deletions(-)

diff --git a/misc/utils.py b/misc/utils.py
index b9506b35..05c12997 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -127,13 +127,13 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     / "dbgym_dbms_postgres" / "build" / "repo" / "boot"/ "build" / "postgres" / "bin"
 )
 default_tuning_steps_dname = (
-    lambda benchmark_name, workload_name, enable_boot_during_tune: f"{benchmark_name}_{workload_name}{'_boot' if enable_boot_during_tune else ''}_tuning_steps"
+    lambda benchmark_name, workload_name, boot_enabled_during_tune: f"{benchmark_name}_{workload_name}{'_boot' if boot_enabled_during_tune else ''}_tuning_steps"
 )
 default_tuning_steps_dpath = (
-    lambda workspace_path, benchmark_name, workload_name, enable_boot_during_tune: get_symlinks_path_from_workspace_path(
+    lambda workspace_path, benchmark_name, workload_name, boot_enabled_during_tune: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / "dbgym_tune_protox_agent" / "artifacts" / default_tuning_steps_dname(benchmark_name, workload_name, enable_boot_during_tune)
+    / "dbgym_tune_protox_agent" / "data" / default_tuning_steps_dname(benchmark_name, workload_name, boot_enabled_during_tune)
 )
 
 
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 724dc8c1..26d07974 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -22,5 +22,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.001  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.01  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index e41b94c4..9af6c453 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -1,25 +1,25 @@
-# import datetime
-# import logging
+import datetime
+import logging
 import click
-# import yaml
-# import pandas as pd
-# import tqdm
-# import argparse
-# from pathlib import Path
-# from dateutil.parser import parse
+import yaml
+import pandas as pd
+import tqdm
+import argparse
+from pathlib import Path
+from dateutil.parser import parse
+
+from misc.utils import DBGymConfig, conv_inputpath_to_realabspath, workload_name_fn, default_tuning_steps_dpath
+# sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
 
-# import sys
+from tune.protox.env.pg_env import PostgresEnv
 
-from misc.utils import DBGymConfig
-# sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
+class DotDict(dict):
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
 
-# from envs.spec import Spec
-# from tune.protox.env.pg_env import PostgresEnv
 
-# class DotDict(dict):
-#     __getattr__ = dict.get
-#     __setattr__ = dict.__setitem__
-#     __delattr__ = dict.__delitem__
+REPLAY_DATA_FNAME = "replay_data.csv"
 
 
 @click.command()
@@ -35,314 +35,295 @@
 @click.option(
     "--scale-factor",
     default=1.0,
-    help=f"The scale factor used when generating the data of the benchmark.",
+    help="The scale factor used when generating the data of the benchmark.",
+)
+@click.option(
+    "--boot-enabled-during-tune",
+    is_flag=True,
+    help="Whether Boot was enabled during tuning.",
+)
+@click.option(
+    "--tuning-steps-dpath",
+    default=None,
+    type=Path,
+    help="The path to the `tuning_steps` directory to be replayed."
+)
+@click.option(
+    "--simulated",
+    is_flag=True,
+    help="Set to true to use the runtimes from the original tuning run instead of replaying the workload."
 )
-def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float) -> None:
-    pass
-
-
-# def gogo(args):
-#     maximal = args.maximal
-#     maximal_only = args.maximal_only
-#     threshold = args.threshold
-
-#     with open(f"{args.input}/config.yaml") as f:
-#         mythril = yaml.safe_load(f)
-#         mythril["mythril"]["benchbase_config_path"] = f"{args.input}/benchmark.xml"
-#         mythril["mythril"]["verbose"] = True
-#         mythril["mythril"]["postgres_path"] = args.pg_path
-
-#     with open(f"{args.input}/config.yaml2", "w") as f:
-#         yaml.dump(mythril, stream=f, default_flow_style=False)
-
-#     if args.alternate:
-#         horizon = args.horizon
-#         per_query_timeout = args.query_timeout
-#     else:
-#         with open(f"{args.input}/stdout", "r") as f:
-#             config = f.readlines()[0]
-#             config = eval(config.split("HPO Configuration: ")[-1])
-#             horizon = config["horizon"]
-
-#         with open(f"{args.input}/stdout", "r") as f:
-#             for line in f:
-#                 if "HPO Configuration: " in line:
-#                     hpo = eval(line.split("HPO Configuration: ")[-1].strip())
-#                     per_query_timeout = hpo["mythril_args"]["timeout"]
-
-#     folders = []
-#     start_found = False
-#     filename = "output.log" if args.alternate else "stderr"
-#     last_evaluation = None
-#     with open(f"{args.input}/{filename}", "r") as f:
-#         for line in f:
-#             if not start_found:
-#                 if "Baseline Metric" in line:
-#                     start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
-#                     start_found = True
-#             else:
-#                 if "mv" in line and "tuning_steps" in line:
-#                     repo = eval(line.split("Running ")[-1])[-1]
-#                     last_folder = repo.split("/")[-1]
-#                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
-#                     last_evaluation = time_since_start
-#                     if (time_since_start - start_time).total_seconds() < args.cutoff * 3600 or args.cutoff == 0:
-#                         folders.append(last_folder)
-
-#     # Only apply threshold if time is less than.
-#     threshold_limit = last_evaluation - datetime.timedelta(seconds=int(args.threshold_limit * 3600))
-
-#     spec = Spec(
-#         agent_type=None,
-#         seed=0,
-#         horizon=horizon,
-#         config_path=f"{args.input}/config.yaml2",
-#         benchmark_config_path=f"{args.input}/{args.benchmark}.yaml",
-#         workload_timeout=0)
-
-#     env = PostgresEnv(
-#         spec,
-#         horizon=horizon,
-#         timeout=None,
-#         reward_utility=None,
-#         logger=None,
-#         replay=True)
-
-#     if not args.simulated:
-#         env.restore_pristine_snapshot()
-#         env.action_space.reset(**{"connection": env.connection, "workload": spec.workload})
-#         spec.workload.reset()
-
-#     # Get the minimum reward.
-#     runs = [Path(args.input) / "tuning_steps" / fold / "run.raw.csv" for fold in folders]
-#     runs = [pd.read_csv(run) for run in runs]
-#     rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == per_query_timeout) for run in runs]
-#     rewards = sorted(rewards, key=lambda x: x[0])
-#     min_reward = min([r[0] for r in rewards])
-#     if maximal:
-#         target = [r[1] for r in rewards if r[0] == min_reward]
-#         assert len(target) >= 1
-#         if target[0]:
-#             # Don't use maximal if the min maximal is timed out.
-#             # Don't threshold either.
-#             threshold = 0
-#             maximal = False
-#             # Reject maximal only.
-#             maximal_only = False
-#             logging.warn("Maximal disabled.")
-#         else:
-#             logging.info(f"Maximal found: {min_reward}")
-
-#     num_lines = 0
-#     with open(f"{args.input}/{filename}", "r") as f:
-#         for line in f:
-#             if "Baseline Metric" in line:
-#                 num_lines += 1
-#             elif "mv" in line and "tuning_steps" in line:
-#                 num_lines += 1
-
-#     def run_sample(action, timeout):
-#         samples = []
-#         # This should reliably check that we are loading the correct knobs...
-#         ql_knobs = spec.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
-#         for i in range(args.samples):
-#             runtime = spec.workload._execute_workload(
-#                 connection=env.connection,
-#                 workload_timeout=timeout,
-#                 ql_knobs=ql_knobs,
-#                 env_spec=spec,
-#                 blocklist=[l for l in args.blocklist.split(",") if len(l) > 0])
-#             samples.append(runtime)
-#             logging.info(f"Runtime: {runtime}")
-
-#             if runtime >= args.workload_timeout:
-#                 break
-
-#             if args.samples == 2 and runtime >= timeout:
-#                 break
-#             elif args.samples > 2 and len(samples) >= 2 and runtime >= timeout:
-#                 break
-
-#         return samples
-
-#     run_data = []
-#     pbar = tqdm.tqdm(total=num_lines)
-#     with open(f"{args.input}/{filename}", "r") as f:
-#         current_step = 0
-
-#         start_found = False
-#         start_time = None
-#         timeout = args.workload_timeout
-#         cur_reward_max = timeout
-#         selected_action_knobs = None
-#         noop_index = False
-#         maximal_repo = None
-#         existing_indexes = []
-
-#         for line in f:
-#             # Keep going until we've found the start.
-#             if not start_found:
-#                 if "Baseline Metric" in line:
-#                     start_found = True
-#                     start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
-#                     pbar.update(1)
-#                 continue
-
-#             elif "Selected action: " in line:
-#                 act = eval(line.split("Selected action: ")[-1])
-#                 selected_action_knobs = env.action_space.get_knob_space().from_jsonable(act[0])[0]
-#                 noop_index = "NOOP" in act[1][0]
-
-#             elif (maximal and ("mv" in line and "tuning_steps" in line)):
-#                 maximal_repo = line
-
-#             elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "tuning_steps" in line)):
-#                 if "mv" in line and "tuning_steps" in line:
-#                     repo = eval(line.split("Running ")[-1])[-1]
-#                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
-#                 elif "Found new maximal state with" in line:
-#                     repo = eval(maximal_repo.split("Running ")[-1])[-1]
-#                     time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0])
-#                     maximal_repo = None
-
-#                 # Get the evaluation reward.
-#                 reward = pd.read_csv(f"{args.input}/{repo}/run.raw.csv")
-#                 assert len(reward.columns) == 6
-#                 has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == per_query_timeout
-#                 reward = reward["Latency (microseconds)"].sum() / 1e6
-#                 assert reward > 0
-
-#                 if ((not maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
-#                     index_sqls = []
-#                     knobs = {}
-#                     insert_knobs = False
-
-#                     with open(f"{args.input}/{repo}/act_sql.txt", "r") as f:
-#                         for line in f:
-#                             line = line.strip()
-#                             if len(line) == 0:
-#                                 insert_knobs = True
-#                             elif not insert_knobs:
-#                                 index_sqls.append(line)
-#                             else:
-#                                 k, v = line.split(" = ")
-#                                 knobs[k] = float(v)
-
-#                     assert len(index_sqls) > 0
-#                     assert len(knobs) > 0
-#                     with open(f"{args.input}/{repo}/prior_state.txt", "r") as f:
-#                         prior_states = eval(f.read())
-#                         all_sc = [s.strip() for s in prior_states[1]]
-#                         if not noop_index:
-#                             all_sc.extend(index_sqls)
-
-#                         all_sc = [a for a in all_sc if not "USING btree ()" in a]
-#                         index_sqls = all_sc
-
-#                     execute_sqls = []
-#                     for index_sql in index_sqls:
-#                         if index_sql in existing_indexes:
-#                             continue
-#                         execute_sqls.append(index_sql)
-#                     for index_sql in existing_indexes:
-#                         if index_sql not in index_sqls:
-#                             indexname = index_sql.split("CREATE INDEX")[-1].split(" ON ")[0]
-#                             execute_sqls.append(f"DROP INDEX IF EXISTS {indexname}")
-
-#                     if not args.simulated:
-#                         # Reset snapshot.
-#                         env.action_space.reset(connection=env.connection, workload=env.workload)
-#                         cc, _ = env.action_space.get_knob_space().generate_plan(selected_action_knobs if selected_action_knobs else {})
-#                         env.shift_state(cc, execute_sqls, dump_page_cache=True)
-#                     existing_indexes = index_sqls
-
-#                     if not args.simulated:
-#                         # Get samples.
-#                         run_samples = samples = run_sample(knobs, timeout)
-#                         logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
-#                     else:
-#                         run_samples = samples = [reward, reward]
-
-#                     data = {
-#                         "step": current_step,
-#                         "orig_cost": reward,
-#                         "time_since_start": (time_since_start - start_time).total_seconds(),
-#                     }
-#                     samples = {f"runtime{i}": s for i, s in enumerate(samples)}
-#                     data.update(samples)
-#                     run_data.append(data)
-
-#                     current_step += 1
-
-#                     if (not has_timeout) or (max(run_samples) < timeout):
-#                         # Apply a tolerance..
-#                         # If we've timed out, only apply threshold only if we've found a strictly better config.
-#                         apply_threshold = threshold if time_since_start < threshold_limit else 0
-#                         cur_reward_max = reward - apply_threshold
-
-#                     if max(run_samples) < timeout:
-#                         timeout = max(run_samples)
-
-#                 run_folder = repo.split("/")[-1]
-#                 if run_folder in folders and run_folder == folders[-1]:
-#                     break
-#                 elif maximal_only and reward == min_reward:
-#                     break
-#             pbar.update(1)
-
-#         if len(run_data) > 0:
-#             data = {
-#                 "step": current_step,
-#                 "orig_cost": run_data[-1]["orig_cost"],
-#                 "time_since_start": -1,
-#                 "runtime0": run_data[-1]["runtime0"],
-#             }
-#             run_data.append(data)
-
-#     # Output.
-#     pd.DataFrame(run_data).to_csv(args.output, index=False)
-#     env.close()
-
-# if __name__ == "__main__":
-#     parser = argparse.ArgumentParser(prog="UDO Replay")
-#     parser.add_argument("--input", type=Path)
-#     parser.add_argument("--benchmark", type=str)
-#     parser.add_argument("--workload-timeout", type=int)
-#     parser.add_argument("--samples", type=int)
-#     parser.add_argument("--threshold", type=float)
-#     parser.add_argument("--threshold-limit", type=float, default=0)
-#     parser.add_argument("--maximal", action="store_true")
-#     parser.add_argument("--simulated", action="store_true")
-#     parser.add_argument("--maximal-only", action="store_true")
-#     parser.add_argument("--alternate", action="store_true", default=False)
-#     parser.add_argument("--query_timeout", type=int, default=0)
-#     parser.add_argument("--horizon", type=int, default=0)
-#     parser.add_argument("--cutoff", type=float, default=0)
-#     parser.add_argument("--blocklist", default="")
-#     parser.add_argument("--pg-path", type=str, default="/mnt/nvme0n1/wz2/noisepage")
-
-#     parser.add_argument("--output-path", type=str, default="out.csv")
-#     args = parser.parse_args()
-
-#     while True:
-#         pargs = DotDict(vars(args))
-#         output_path = args.output_path
-
-#         runs = Path(pargs.input).rglob("config.yaml")
-#         runs = sorted([f for f in runs if not (f.parent / output_path).exists()])
-#         for run in tqdm.tqdm([f for f in runs], leave=False):
-#             if args.simulated:
-#                 adjust_output = run.parent / "out_simulated.csv"
-#             else:
-#                 adjust_output = run.parent / args.output_path
-
-#             if adjust_output.exists():
-#                 continue
-
-#             print(f"Parsing {run.parent}")
-#             new_args = pargs
-#             new_args.input = run.parent
-#             new_args.output = adjust_output
-#             gogo(new_args)
-
-#         break
\ No newline at end of file
+def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, simulated: bool) -> None:
+    # Set args to defaults programmatically (do this before doing anything else in the function)
+    workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
+    if tuning_steps_dpath == None:
+        tuning_steps_dpath = default_tuning_steps_dpath(dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name, boot_enabled_during_tune)
+
+    # Convert all input paths to absolute paths
+    tuning_steps_dpath = conv_inputpath_to_realabspath(dbgym_cfg, tuning_steps_dpath)
+
+    # Replay
+    runs = sorted(tuning_steps_dpath.rglob("config.yaml"))
+    for run in tqdm.tqdm([f for f in runs], leave=False):
+        print(f"Parsing {run.parent}")
+        # gogo(new_args)
+
+
+def gogo(args):
+    maximal = args.maximal
+    maximal_only = args.maximal_only
+    threshold = args.threshold
+
+    with open(f"{args.input}/config.yaml") as f:
+        mythril = yaml.safe_load(f)
+        mythril["mythril"]["benchbase_config_path"] = f"{args.input}/benchmark.xml"
+        mythril["mythril"]["verbose"] = True
+        mythril["mythril"]["postgres_path"] = args.pg_path
+
+    if args.alternate:
+        horizon = args.horizon
+        per_query_timeout = args.query_timeout
+    else:
+        with open(f"{args.input}/stdout", "r") as f:
+            config = f.readlines()[0]
+            config = eval(config.split("HPO Configuration: ")[-1])
+            horizon = config["horizon"]
+
+        with open(f"{args.input}/stdout", "r") as f:
+            for line in f:
+                if "HPO Configuration: " in line:
+                    hpo = eval(line.split("HPO Configuration: ")[-1].strip())
+                    per_query_timeout = hpo["mythril_args"]["timeout"]
+
+    folders = []
+    start_found = False
+    filename = "output.log" if args.alternate else "stderr"
+    last_evaluation = None
+    with open(f"{args.input}/{filename}", "r") as f:
+        for line in f:
+            if not start_found:
+                if "Baseline Metric" in line:
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
+                    start_found = True
+            else:
+                if "mv" in line and "tuning_steps" in line:
+                    repo = eval(line.split("Running ")[-1])[-1]
+                    last_folder = repo.split("/")[-1]
+                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
+                    last_evaluation = time_since_start
+                    if (time_since_start - start_time).total_seconds() < args.cutoff * 3600 or args.cutoff == 0:
+                        folders.append(last_folder)
+
+    # Only apply threshold if time is less than.
+    threshold_limit = last_evaluation - datetime.timedelta(seconds=int(args.threshold_limit * 3600))
+
+    spec = Spec(
+        agent_type=None,
+        seed=0,
+        horizon=horizon,
+        config_path=f"{args.input}/config.yaml2",
+        benchmark_config_path=f"{args.input}/{args.benchmark}.yaml",
+        workload_timeout=0)
+
+    env = PostgresEnv(
+        spec,
+        horizon=horizon,
+        timeout=None,
+        reward_utility=None,
+        logger=None,
+        replay=True)
+
+    if not args.simulated:
+        env.restore_pristine_snapshot()
+        env.action_space.reset(**{"connection": env.connection, "workload": spec.workload})
+        spec.workload.reset()
+
+    # Get the minimum reward.
+    runs = [Path(args.input) / "tuning_steps" / fold / "run.raw.csv" for fold in folders]
+    runs = [pd.read_csv(run) for run in runs]
+    rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == per_query_timeout) for run in runs]
+    rewards = sorted(rewards, key=lambda x: x[0])
+    min_reward = min([r[0] for r in rewards])
+    if maximal:
+        target = [r[1] for r in rewards if r[0] == min_reward]
+        assert len(target) >= 1
+        if target[0]:
+            # Don't use maximal if the min maximal is timed out.
+            # Don't threshold either.
+            threshold = 0
+            maximal = False
+            # Reject maximal only.
+            maximal_only = False
+            logging.warn("Maximal disabled.")
+        else:
+            logging.info(f"Maximal found: {min_reward}")
+
+    num_lines = 0
+    with open(f"{args.input}/{filename}", "r") as f:
+        for line in f:
+            if "Baseline Metric" in line:
+                num_lines += 1
+            elif "mv" in line and "tuning_steps" in line:
+                num_lines += 1
+
+    def run_sample(action, timeout):
+        samples = []
+        # This should reliably check that we are loading the correct knobs...
+        ql_knobs = spec.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
+        for i in range(args.samples):
+            runtime = spec.workload._execute_workload(
+                connection=env.connection,
+                workload_timeout=timeout,
+                ql_knobs=ql_knobs,
+                env_spec=spec,
+                blocklist=[l for l in args.blocklist.split(",") if len(l) > 0])
+            samples.append(runtime)
+            logging.info(f"Runtime: {runtime}")
+
+            if runtime >= args.workload_timeout:
+                break
+
+            if args.samples == 2 and runtime >= timeout:
+                break
+            elif args.samples > 2 and len(samples) >= 2 and runtime >= timeout:
+                break
+
+        return samples
+
+    run_data = []
+    pbar = tqdm.tqdm(total=num_lines)
+    with open(f"{args.input}/{filename}", "r") as f:
+        current_step = 0
+
+        start_found = False
+        start_time = None
+        timeout = args.workload_timeout
+        cur_reward_max = timeout
+        selected_action_knobs = None
+        noop_index = False
+        maximal_repo = None
+        existing_indexes = []
+
+        for line in f:
+            # Keep going until we've found the start.
+            if not start_found:
+                if "Baseline Metric" in line:
+                    start_found = True
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
+                    pbar.update(1)
+                continue
+
+            elif "Selected action: " in line:
+                act = eval(line.split("Selected action: ")[-1])
+                selected_action_knobs = env.action_space.get_knob_space().from_jsonable(act[0])[0]
+                noop_index = "NOOP" in act[1][0]
+
+            elif (maximal and ("mv" in line and "tuning_steps" in line)):
+                maximal_repo = line
+
+            elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "tuning_steps" in line)):
+                if "mv" in line and "tuning_steps" in line:
+                    repo = eval(line.split("Running ")[-1])[-1]
+                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
+                elif "Found new maximal state with" in line:
+                    repo = eval(maximal_repo.split("Running ")[-1])[-1]
+                    time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0])
+                    maximal_repo = None
+
+                # Get the evaluation reward.
+                reward = pd.read_csv(f"{args.input}/{repo}/run.raw.csv")
+                assert len(reward.columns) == 6
+                has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == per_query_timeout
+                reward = reward["Latency (microseconds)"].sum() / 1e6
+                assert reward > 0
+
+                if ((not maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
+                    index_sqls = []
+                    knobs = {}
+                    insert_knobs = False
+
+                    with open(f"{args.input}/{repo}/act_sql.txt", "r") as f:
+                        for line in f:
+                            line = line.strip()
+                            if len(line) == 0:
+                                insert_knobs = True
+                            elif not insert_knobs:
+                                index_sqls.append(line)
+                            else:
+                                k, v = line.split(" = ")
+                                knobs[k] = float(v)
+
+                    assert len(index_sqls) > 0
+                    assert len(knobs) > 0
+                    with open(f"{args.input}/{repo}/prior_state.txt", "r") as f:
+                        prior_states = eval(f.read())
+                        all_sc = [s.strip() for s in prior_states[1]]
+                        if not noop_index:
+                            all_sc.extend(index_sqls)
+
+                        all_sc = [a for a in all_sc if not "USING btree ()" in a]
+                        index_sqls = all_sc
+
+                    execute_sqls = []
+                    for index_sql in index_sqls:
+                        if index_sql in existing_indexes:
+                            continue
+                        execute_sqls.append(index_sql)
+                    for index_sql in existing_indexes:
+                        if index_sql not in index_sqls:
+                            indexname = index_sql.split("CREATE INDEX")[-1].split(" ON ")[0]
+                            execute_sqls.append(f"DROP INDEX IF EXISTS {indexname}")
+
+                    if not args.simulated:
+                        # Reset snapshot.
+                        env.action_space.reset(connection=env.connection, workload=env.workload)
+                        cc, _ = env.action_space.get_knob_space().generate_plan(selected_action_knobs if selected_action_knobs else {})
+                        env.shift_state(cc, execute_sqls, dump_page_cache=True)
+                    existing_indexes = index_sqls
+
+                    if not args.simulated:
+                        # Get samples.
+                        run_samples = samples = run_sample(knobs, timeout)
+                        logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
+                    else:
+                        run_samples = samples = [reward, reward]
+
+                    data = {
+                        "step": current_step,
+                        "orig_cost": reward,
+                        "time_since_start": (time_since_start - start_time).total_seconds(),
+                    }
+                    samples = {f"runtime{i}": s for i, s in enumerate(samples)}
+                    data.update(samples)
+                    run_data.append(data)
+
+                    current_step += 1
+
+                    if (not has_timeout) or (max(run_samples) < timeout):
+                        # Apply a tolerance..
+                        # If we've timed out, only apply threshold only if we've found a strictly better config.
+                        apply_threshold = threshold if time_since_start < threshold_limit else 0
+                        cur_reward_max = reward - apply_threshold
+
+                    if max(run_samples) < timeout:
+                        timeout = max(run_samples)
+
+                run_folder = repo.split("/")[-1]
+                if run_folder in folders and run_folder == folders[-1]:
+                    break
+                elif maximal_only and reward == min_reward:
+                    break
+            pbar.update(1)
+
+        if len(run_data) > 0:
+            data = {
+                "step": current_step,
+                "orig_cost": run_data[-1]["orig_cost"],
+                "time_since_start": -1,
+                "runtime0": run_data[-1]["runtime0"],
+            }
+            run_data.append(data)
+
+    # Output.
+    pd.DataFrame(run_data).to_csv(args.output, index=False)
+    env.close()
\ No newline at end of file
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index f9a07cb0..97e52e80 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -81,4 +81,5 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     # Link the tuning steps data (more details than step data).
     tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, False)
-    link_result(dbgym_cfg, dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps"), custom_result_name=tuning_steps_link_dname)
\ No newline at end of file
+    link_result(dbgym_cfg, dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps"), custom_result_name=tuning_steps_link_dname)
+    assert False, "b"
\ No newline at end of file

From aa9b98f480a839bd6a45bb6905dd2a21488e7b36 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 16 Apr 2024 21:36:39 +0000
Subject: [PATCH 012/100] wrote extract_from_task_run_fordpath

---
 misc/utils.py             | 103 +++++++++++++++++++++++---------------
 tune/protox/agent/tune.py |   2 +-
 2 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/misc/utils.py b/misc/utils.py
index 05c12997..efbb83e1 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -4,6 +4,7 @@
 import sys
 from datetime import datetime
 from pathlib import Path
+from typing import Tuple
 import click
 import yaml
 
@@ -342,6 +343,7 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
     It takes in a str | Path to match the interface of open().
     This file does not work if open_fpath is a symlink, to make its interface identical to that of open().
         Make sure to resolve all symlinks with conv_inputpath_to_realabspath().
+    To avoid confusion, I'm enforcing this function to only work with absolute paths.
     See the comment of save_file() for what "saving" means
     If you are generating a "result" for the run, _do not_ use this. Just use the normal open().
         This shouldn't be too hard to remember because this function crashes if open_fpath doesn't exist,
@@ -353,7 +355,7 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
      - If you open two "config" files of the same name but different paths, only the first open will be saved.
         - Opening two "dependency" files of the same name but different paths will lead to two different "base dirs" being symlinked.
     """
-    # process/validate open_fpath
+    # validate open_fpath
     assert os.path.isabs(
         open_fpath
     ), f"open_and_save(): open_fpath ({open_fpath}) should be an absolute path"
@@ -364,25 +366,55 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
     assert os.path.isfile(open_fpath), f"open_fpath ({open_fpath}) is not a file"
 
     # save
+    print(f"type(open_fpath)={type(open_fpath)}")
     save_file(dbgym_cfg, open_fpath)
 
     # open
     return open(open_fpath, mode=mode)
 
 
+def extract_from_task_run_fordpath(dbgym_cfg: DBGymConfig, task_run_fordpath: Path) -> Tuple[Path, str, Path, str]:
+    """
+    The task_runs/ folder is organized like task_runs/run_*/[codebase]/[org]/any/path/you/want.
+    This function extracts the [codebase] and [org] components
+    """
+    print(f"type(task_run_fordpath)={type(task_run_fordpath)}")
+    assert not task_run_fordpath.is_symlink()
+    parent_dpath = os.path.dirname(task_run_fordpath)
+    assert not os.path.samefile(
+        parent_dpath, dbgym_cfg.dbgym_runs_path
+    ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/ dir instead of directly in dbgym_cfg.dbgym_runs_path ({dbgym_cfg.dbgym_runs_path})"
+    assert not os.path.samefile(
+        parent_dir(parent_dpath), dbgym_cfg.dbgym_runs_path
+    ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/[codebase]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
+    assert not os.path.samefile(
+        parent_dir(parent_dir(parent_dpath)), dbgym_cfg.dbgym_runs_path
+    ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/[codebase]/[organization]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
+    # org_dpath is the run_*/[codebase]/[organization]/ dir that task_run_fordpath is in
+    org_dpath = parent_dpath
+    while not os.path.samefile(
+        parent_dir(parent_dir(parent_dir(org_dpath))), dbgym_cfg.dbgym_runs_path
+    ):
+        org_dpath = parent_dir(org_dpath)
+    org_dname = dir_basename(org_dpath)
+    codebase_dpath = parent_dir(org_dpath)
+    codebase_dname = dir_basename(codebase_dpath)
+
+    return codebase_dpath, codebase_dname, org_dpath, org_dname
+
+
 # TODO(phw2): after merging agent-train, refactor some code in agent-train to use save_file() instead of open_and_save()
 def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
     """
     If an external function takes in a file/directory as input, you will not be able to call open_and_save().
         In these situations, just call save_file().
+    Like open_and_save(), this function only works with real absolute paths.
     "Saving" can mean either copying the file or creating a symlink to it
     We copy the file if it is a "config", meaning it just exists without having been generated
     We create a symlink if it is a "dependency", meaning a task.py command was run to generate it
         In these cases we create a symlink so we have full provenance for how the dependency was created
     """
-    # process fpath and ensure that it's a file at the end
-    fpath = conv_inputpath_to_realabspath(dbgym_cfg, fpath)
-    fpath = os.path.realpath(fpath)  # traverse symlinks
+    # validate fpath
     assert not os.path.islink(fpath), f"fpath ({fpath}) should not be a symlink"
     assert os.path.exists(fpath), f"fpath ({fpath}) does not exist"
     assert os.path.isfile(fpath), f"fpath ({fpath}) is not a file"
@@ -396,31 +428,15 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
     #   2. files or dirs generated by a run may be very large (up to 100s of GBs) so we don't want to copy them
     if is_child_path(fpath, dbgym_cfg.dbgym_runs_path):
         # get paths we'll need later.
-        parent_dpath = os.path.dirname(fpath)
-        assert not os.path.samefile(
-            parent_dpath, dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/ dir instead of directly in dbgym_cfg.dbgym_runs_path ({dbgym_cfg.dbgym_runs_path})"
-        assert not os.path.samefile(
-            parent_dir(parent_dpath), dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/[codebase]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
-        assert not os.path.samefile(
-            parent_dir(parent_dir(parent_dpath)), dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/[codebase]/[organization]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
-        # org_dpath is the run_*/[codebase]/[organization]/ dir that fpath is in
-        org_dpath = parent_dpath
-        while not os.path.samefile(
-            parent_dir(parent_dir(parent_dir(org_dpath))), dbgym_cfg.dbgym_runs_path
-        ):
-            org_dpath = parent_dir(org_dpath)
-        org_dname = dir_basename(org_dpath)
-        codebase_dpath = parent_dir(org_dpath)
-        codebase_dname = dir_basename(codebase_dpath)
+        print(f"type(fpath)={type(fpath)}")
+        _, codebase_dname, org_dpath, org_dname = extract_from_task_run_fordpath(dbgym_cfg, fpath)
         this_run_save_dpath = os.path.join(
             dbgym_cfg.dbgym_this_run_path, codebase_dname, org_dname
         )
         os.makedirs(this_run_save_dpath, exist_ok=True)
 
         # if the fpath file is directly in org_dpath, we symlink the file directly
+        parent_dpath = os.path.dirname(fpath)
         if os.path.samefile(parent_dpath, org_dpath):
             fname = os.path.basename(fpath)
             symlink_fpath = os.path.join(this_run_save_dpath, fname)
@@ -452,35 +468,44 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
 
 
 # TODO(phw2): refactor our manual symlinking in postgres/cli.py to use link_result() instead
-def link_result(dbgym_cfg: DBGymConfig, result_path: Path, custom_result_name: str | None=None) -> None:
+def link_result(dbgym_cfg: DBGymConfig, result_fordpath: Path, custom_result_name: str | None=None) -> None:
     """
-    result_path must be a "result", meaning it was generated inside dbgym_cfg.dbgym_this_run_path
-    result_path itself can be a file or a dir but not a symlink
-    Create a symlink of the same name to result_path inside [workspace]/data/
-    Will override the old symlink if there is one
-    This is called so that [workspace]/data/ always contains the latest generated version of a file
+    result_fordpath must be a "result", meaning it was generated inside dbgym_cfg.dbgym_this_run_path.
+    Further, result_fordpath must have been generated by this invocation to task.py. This also means that
+      result_fordpath itself can be a file or a dir but not a symlink.
+    Given a file or directory in task_runs/run_*/[codebase]/[org], this will create a symlink inside
+      symlinks/[codebase]/[org]/.
+    Will override the old symlink if there is one, so that symlinks/ always contains the latest generated
+      version of a file.
     """
-    result_path = conv_inputpath_to_realabspath(dbgym_cfg, result_path)
-    assert is_child_path(result_path, dbgym_cfg.dbgym_this_run_path)
-    assert not os.path.islink(result_path)
+    result_fordpath = conv_inputpath_to_realabspath(dbgym_cfg, result_fordpath)
+    assert is_child_path(result_fordpath, dbgym_cfg.dbgym_this_run_path)
+    assert not os.path.islink(result_fordpath)
 
     if custom_result_name != None:
         result_name = custom_result_name
     else:
-        if os.path.isfile(result_path):
-            result_name = os.path.basename(result_path)
-        elif os.path.isdir(result_path):
-            result_name = dir_basename(result_path)
+        if os.path.isfile(result_fordpath):
+            result_name = os.path.basename(result_fordpath)
+        elif os.path.isdir(result_fordpath):
+            result_name = dir_basename(result_fordpath)
         else:
-            raise AssertionError("result_path must be either a file or dir")
-    symlink_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True) / result_name
+            raise AssertionError("result_fordpath must be either a file or dir")
+    
+    # Figure out the parent directory path of the symlink
+    codebase_dpath, codebase_dname, _, org_dname = extract_from_task_run_fordpath(dbgym_cfg, result_fordpath)
+    # We're only supposed to save files generated by us, which means they should be in cur_task_runs_path()
+    assert os.path.samefile(codebase_dpath, dbgym_cfg.cur_task_runs_path()), f"link_result should only be called on files generated by this invocation to task.py"
+    symlink_parent_dpath = dbgym_cfg.dbgym_symlinks_path / codebase_dname / org_dname
+    symlink_parent_dpath.mkdir(parents=True, exist_ok=True)
 
     # Remove the old symlink ("old" meaning created in an earlier run) if there is one
     # Note that in a multi-threaded setting, this might remove one created by a process in the same run,
     #   meaning it's not "old" by our definition of "old". However, we'll always end up with a symlink
     #   file of the current run regardless of the order of threads.
+    symlink_path = symlink_parent_dpath / result_name
     try_remove_file(symlink_path)
-    try_create_symlink(result_path, symlink_path)
+    try_create_symlink(result_fordpath, symlink_path)
 
 
 def try_create_symlink(src_path: Path, dst_path: Path) -> None:
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 97e52e80..89f6b7b2 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -42,6 +42,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     hpoed_agent_params_path = conv_inputpath_to_realabspath(dbgym_cfg, hpoed_agent_params_path)
 
     # Tune
+    print(f"type(hpoed_agent_params_path)={type(hpoed_agent_params_path)}")
     with open_and_save(dbgym_cfg, hpoed_agent_params_path, "r") as f:
         hpoed_params = json.load(f)
 
@@ -82,4 +83,3 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     # Link the tuning steps data (more details than step data).
     tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, False)
     link_result(dbgym_cfg, dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps"), custom_result_name=tuning_steps_link_dname)
-    assert False, "b"
\ No newline at end of file

From caf0d6c8c093f8c6505f1afc0ee5d08236eab25b Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 13:26:45 +0000
Subject: [PATCH 013/100] now finding all replay dirs

---
 scripts/pat_test.sh         | 2 +-
 tune/protox/agent/replay.py | 4 +++-
 tune/protox/env/logger.py   | 3 +++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 26d07974..bbbbbdfa 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -5,7 +5,7 @@ set -euxo pipefail
 SCALE_FACTOR=0.01
 
 # testing
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 9af6c453..fa3b7677 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -63,7 +63,9 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     tuning_steps_dpath = conv_inputpath_to_realabspath(dbgym_cfg, tuning_steps_dpath)
 
     # Replay
-    runs = sorted(tuning_steps_dpath.rglob("config.yaml"))
+    print(f"tuning_steps_dpath={tuning_steps_dpath}")
+    runs = sorted(tuning_steps_dpath.rglob("run.raw.csv"))
+    print(f"runs={runs}")
     for run in tqdm.tqdm([f for f in runs], leave=False):
         print(f"Parsing {run.parent}")
         # gogo(new_args)
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 638cb228..b82c8926 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -93,6 +93,9 @@ def get_logger(self, name: Optional[str]) -> logging.Logger:
     def stash_results(
         self, info_dict: dict[str, Any], name_override: Optional[str] = None
     ) -> None:
+        '''
+        Stash data about this step of tuning so that it can be replayed.
+        '''
         time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         time = name_override if name_override else time
         if info_dict["results"] is not None and Path(info_dict["results"]).exists():

From 64aaf159c05e9de18641b5a456be75e720e55d33 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 13:37:22 +0000
Subject: [PATCH 014/100] added all configs to replay

---
 misc/utils.py               |  3 ++
 tune/protox/agent/hpo.py    |  4 +--
 tune/protox/agent/replay.py | 60 ++++++++++++++++++++++++++++++++-----
 3 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/misc/utils.py b/misc/utils.py
index efbb83e1..59fba607 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -8,6 +8,9 @@
 import click
 import yaml
 
+# Default values
+DEFAULT_WORKLOAD_TIMEOUT = 600
+
 # Relpaths of different folders in the codebase
 TUNE_RELPATH = Path("tune")
 PROTOX_RELPATH = TUNE_RELPATH / "protox"
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 1ad78204..dac9f576 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -23,7 +23,7 @@
 from ray.train import SyncConfig
 
 from tune.protox.agent.build_trial import build_trial
-from misc.utils import DBGymConfig, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_RELPATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
+from misc.utils import DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_RELPATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
 
 
 METRIC_NAME = "Best Metric"
@@ -147,7 +147,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
 )
 @click.option(
     "--workload-timeout",
-    default=600,
+    default=DEFAULT_WORKLOAD_TIMEOUT,
     type=int,
     help="The timeout (in seconds) of a workload. We run the workload once per DBMS configuration. For OLAP workloads, certain configurations may be extremely suboptimal, so we need to time out the workload.",
 )
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index fa3b7677..661d89b6 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 from dateutil.parser import parse
 
-from misc.utils import DBGymConfig, conv_inputpath_to_realabspath, workload_name_fn, default_tuning_steps_dpath
+from misc.utils import DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, workload_name_fn, default_tuning_steps_dpath
 # sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
 
 from tune.protox.env.pg_env import PostgresEnv
@@ -48,12 +48,58 @@ class DotDict(dict):
     type=Path,
     help="The path to the `tuning_steps` directory to be replayed."
 )
+@click.option(
+    "--workload-timeout",
+    default=DEFAULT_WORKLOAD_TIMEOUT,
+    type=int,
+    help="The timeout (in seconds) of a workload when replaying."
+)
+@click.option(
+    "--num-samples",
+    default=1,
+    type=int,
+    help="The number of times to run the workload for each DBMS config being evaluated."
+)
+@click.option(
+    "--threshold",
+    default=0,
+    type=float,
+    help="The minimum delta between the runtimes of consecutive DBMS configs to warrant a config being evaluated."
+)
+@click.option(
+    "--threshold-limit",
+    default=None,
+    type=float,
+    help="Only use threshold within threshold-limit hours from the start. None means \"always use threshold\"."
+)
+@click.option(
+    "--maximal",
+    is_flag=True,
+    help="If set to true, only evaluate configs that are strictly \"better\"."
+)
 @click.option(
     "--simulated",
     is_flag=True,
     help="Set to true to use the runtimes from the original tuning run instead of replaying the workload."
 )
-def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, simulated: bool) -> None:
+@click.option(
+    "--maximal-only",
+    is_flag=True,
+    help="If set to true, only evaluate the best config"
+)
+@click.option(
+    "--cutoff",
+    default=None,
+    type=float,
+    help="Only evaluate configs up to cutoff hours. None means \"evaluate all configs\"."
+)
+@click.option(
+    "--blocklist",
+    default=[],
+    type=list,
+    help="Ignore running queries in the blocklist."
+)
+def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
     if tuning_steps_dpath == None:
@@ -64,14 +110,12 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
 
     # Replay
     print(f"tuning_steps_dpath={tuning_steps_dpath}")
-    runs = sorted(tuning_steps_dpath.rglob("run.raw.csv"))
-    print(f"runs={runs}")
-    for run in tqdm.tqdm([f for f in runs], leave=False):
-        print(f"Parsing {run.parent}")
-        # gogo(new_args)
+    tuning_step_dpaths = sorted(tuning_steps_dpath.rglob("run.raw.csv"))
+    for tuning_step_dpath in tqdm.tqdm(tuning_step_dpaths, leave=False):
+        replay_step(dbgym_cfg, new_args)
 
 
-def gogo(args):
+def replay_step(dbgym_cfg: DBGymConfig, maximal):
     maximal = args.maximal
     maximal_only = args.maximal_only
     threshold = args.threshold

From 6ece74d4c28a4874227b00178984911bdfbd53c4 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 13:43:41 +0000
Subject: [PATCH 015/100] added replayargs and deleted front of replay_step()

---
 tune/protox/agent/replay.py | 67 ++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 661d89b6..e25f293c 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -8,20 +8,30 @@
 from pathlib import Path
 from dateutil.parser import parse
 
-from misc.utils import DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, workload_name_fn, default_tuning_steps_dpath
+from misc.utils import DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, workload_name_fn, default_tuning_steps_dpath
 # sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
 
 from tune.protox.env.pg_env import PostgresEnv
 
-class DotDict(dict):
-    __getattr__ = dict.get
-    __setattr__ = dict.__setitem__
-    __delattr__ = dict.__delitem__
-
 
 REPLAY_DATA_FNAME = "replay_data.csv"
 
 
+class ReplayArgs:
+    def __init__(
+        self, workload_timeout: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list
+    ):
+        self.workload_timeout = workload_timeout
+        self.num_samples = num_samples
+        self.threshold = threshold
+        self.threshold_limit = threshold_limit
+        self.maximal = maximal
+        self.simulated = simulated
+        self.maximal_only = maximal_only
+        self.cutoff = cutoff
+        self.blocklist = blocklist
+
+
 @click.command()
 @click.pass_obj
 @click.argument("benchmark-name")
@@ -108,38 +118,27 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     # Convert all input paths to absolute paths
     tuning_steps_dpath = conv_inputpath_to_realabspath(dbgym_cfg, tuning_steps_dpath)
 
+    # Group args together to reduce the # of parameters we pass into functions
+    replay_args = ReplayArgs(workload_timeout, num_samples, threshold, threshold_limit, maximal, simulated, maximal_only, cutoff, blocklist)
+
     # Replay
     print(f"tuning_steps_dpath={tuning_steps_dpath}")
     tuning_step_dpaths = sorted(tuning_steps_dpath.rglob("run.raw.csv"))
     for tuning_step_dpath in tqdm.tqdm(tuning_step_dpaths, leave=False):
-        replay_step(dbgym_cfg, new_args)
-
-
-def replay_step(dbgym_cfg: DBGymConfig, maximal):
-    maximal = args.maximal
-    maximal_only = args.maximal_only
-    threshold = args.threshold
-
-    with open(f"{args.input}/config.yaml") as f:
-        mythril = yaml.safe_load(f)
-        mythril["mythril"]["benchbase_config_path"] = f"{args.input}/benchmark.xml"
-        mythril["mythril"]["verbose"] = True
-        mythril["mythril"]["postgres_path"] = args.pg_path
-
-    if args.alternate:
-        horizon = args.horizon
-        per_query_timeout = args.query_timeout
-    else:
-        with open(f"{args.input}/stdout", "r") as f:
-            config = f.readlines()[0]
-            config = eval(config.split("HPO Configuration: ")[-1])
-            horizon = config["horizon"]
-
-        with open(f"{args.input}/stdout", "r") as f:
-            for line in f:
-                if "HPO Configuration: " in line:
-                    hpo = eval(line.split("HPO Configuration: ")[-1].strip())
-                    per_query_timeout = hpo["mythril_args"]["timeout"]
+        replay_step(dbgym_cfg, tuning_step_dpath, replay_args)
+
+
+def replay_step(dbgym_cfg: DBGymConfig, tuning_step_dpath: Path, replay_args: ReplayArgs):
+    with open_and_save(dbgym_cfg, tuning_step_dpath / "stdout", "r") as f:
+        config = f.readlines()[0]
+        config = eval(config.split("HPO Configuration: ")[-1])
+        horizon = config["horizon"]
+
+    with open_and_save(dbgym_cfg, tuning_step_dpath / "stdout", "r") as f:
+        for line in f:
+            if "HPO Configuration: " in line:
+                hpo = eval(line.split("HPO Configuration: ")[-1].strip())
+                per_query_timeout = hpo["mythril_args"]["timeout"]
 
     folders = []
     start_found = False

From e341956a6007dbf14a567e07ca79d8a0977c0a79 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 14:46:53 +0000
Subject: [PATCH 016/100] now copying params.json directly into data/

---
 misc/utils.py             |  3 ---
 scripts/pat_test.sh       |  3 ++-
 tune/protox/agent/hpo.py  | 12 ++++++++++--
 tune/protox/agent/tune.py |  1 -
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/misc/utils.py b/misc/utils.py
index 59fba607..372f8c1e 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -369,7 +369,6 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
     assert os.path.isfile(open_fpath), f"open_fpath ({open_fpath}) is not a file"
 
     # save
-    print(f"type(open_fpath)={type(open_fpath)}")
     save_file(dbgym_cfg, open_fpath)
 
     # open
@@ -381,7 +380,6 @@ def extract_from_task_run_fordpath(dbgym_cfg: DBGymConfig, task_run_fordpath: Pa
     The task_runs/ folder is organized like task_runs/run_*/[codebase]/[org]/any/path/you/want.
     This function extracts the [codebase] and [org] components
     """
-    print(f"type(task_run_fordpath)={type(task_run_fordpath)}")
     assert not task_run_fordpath.is_symlink()
     parent_dpath = os.path.dirname(task_run_fordpath)
     assert not os.path.samefile(
@@ -431,7 +429,6 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
     #   2. files or dirs generated by a run may be very large (up to 100s of GBs) so we don't want to copy them
     if is_child_path(fpath, dbgym_cfg.dbgym_runs_path):
         # get paths we'll need later.
-        print(f"type(fpath)={type(fpath)}")
         _, codebase_dname, org_dpath, org_dname = extract_from_task_run_fordpath(dbgym_cfg, fpath)
         this_run_save_dpath = os.path.join(
             dbgym_cfg.dbgym_this_run_path, codebase_dname, org_dname
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index bbbbbdfa..4d281389 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -5,7 +5,8 @@ set -euxo pipefail
 SCALE_FACTOR=0.01
 
 # testing
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.01  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
+# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index dac9f576..fdf0e40d 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -598,6 +598,14 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
             if results[i].error:
                 print(f"Trial {results[i]} FAILED")
         assert False, print("Encountered exceptions!")
+    
+    # Save the best params.json
+    # Before saving, copy it into run_*/[codebase]/data/. We copy so that when we open the
+    #   params.json file using open_and_save(), it links to the params.json file directly
+    #   instead of to the dir TuneOpt*/. By linking to the params.json file directly, we
+    #   know which params.json file in TuneOpt*/ was actually used for tuning.
     best_result = results.get_best_result(metric=METRIC_NAME, mode=mode)
-    best_params_fpath = Path(best_result.path) / "params.json"
-    link_result(dbgym_cfg, best_params_fpath, custom_result_name=default_hpoed_agent_params_fname(hpo_args.benchmark_name, hpo_args.workload_name))
+    best_params_generated_fpath = Path(best_result.path) / "params.json"
+    best_params_copy_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "params.json"
+    shutil.copy(best_params_generated_fpath, best_params_copy_fpath)
+    link_result(dbgym_cfg, best_params_copy_fpath, custom_result_name=default_hpoed_agent_params_fname(hpo_args.benchmark_name, hpo_args.workload_name))
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 89f6b7b2..33db9f72 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -42,7 +42,6 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     hpoed_agent_params_path = conv_inputpath_to_realabspath(dbgym_cfg, hpoed_agent_params_path)
 
     # Tune
-    print(f"type(hpoed_agent_params_path)={type(hpoed_agent_params_path)}")
     with open_and_save(dbgym_cfg, hpoed_agent_params_path, "r") as f:
         hpoed_params = json.load(f)
 

From a531c2b43484c0f99e79540d373f2848f668a452 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 14:54:18 +0000
Subject: [PATCH 017/100] now copying params.json into tuning_steps

---
 scripts/pat_test.sh         |  3 +--
 tune/protox/agent/replay.py |  7 +++++++
 tune/protox/agent/tune.py   | 10 ++++++++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 4d281389..26d07974 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -5,8 +5,7 @@ set -euxo pipefail
 SCALE_FACTOR=0.01
 
 # testing
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --duration 0.01  --intended-pgdata-hardware ssd --pgdata-parent-dpath /mnt/nvme1n1/phw2/dbgym_tmp/
-# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index e25f293c..7798405f 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -1,3 +1,10 @@
+'''
+Replaying a tuning run gives you the authoritative runtimes of that tuning run.
+The original tuning run has per-query timeouts, so the runtimes may be inaccurate. The
+    replayed tuning run does not have per-query timeouts.
+Additionally, the original tuning run may have been accelerated by Boot, whereas the
+    replayed tuning run is not.
+'''
 import datetime
 import logging
 import click
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 33db9f72..d79df899 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -1,6 +1,7 @@
 import json
 import os
 from pathlib import Path
+import shutil
 import time
 import click
 import pandas as pd
@@ -79,6 +80,11 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     # Output the step data.
     pd.DataFrame(data).to_csv(step_data_fpath, index=False)
 
-    # Link the tuning steps data (more details than step data).
+    # Link the tuning steps data (this directory allows you to replay the tuning run).
+    # Replaying requires the params.json file, so we also copy it here.
+    # Since params.json is fairly small, I choose to copy the file itself instead of just
+    #   making a symlink to it.
+    tuning_steps_dpath = dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps")
+    shutil.copy(hpoed_agent_params_path, tuning_steps_dpath)
     tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, False)
-    link_result(dbgym_cfg, dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps"), custom_result_name=tuning_steps_link_dname)
+    link_result(dbgym_cfg, tuning_steps_dpath, custom_result_name=tuning_steps_link_dname)

From 4471787ba0a23027449700473d91248fc1792950 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 17:07:58 +0000
Subject: [PATCH 018/100] renamed boot_config_fpath to hpo_boot_config_fpath

---
 tune/protox/agent/build_trial.py |  3 ++-
 tune/protox/agent/hpo.py         | 22 +++++++++++-----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 8ef86981..4963f5ca 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -152,6 +152,7 @@ def _build_utilities(
     # If we're using Boot, PostgresConn.start_with_changes() assumes that Redis is running. Thus,
     #   we start Redis here if necessary.
     enable_boot = hpoed_params["enable_boot_during_hpo"] if is_hpo else hpoed_params["enable_boot_during_tune"]
+    boot_config_fpath = hpoed_params["hpo_boot_config_fpath"] if is_hpo else hpoed_params["tune_boot_config_fpath"]
     if enable_boot:
         make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"])
 
@@ -162,7 +163,7 @@ def _build_utilities(
         pgdata_parent_dpath=Path(hpoed_params["pgconn_info"]["pgdata_parent_dpath"]),
         pgbin_path=Path(hpoed_params["pgconn_info"]["pgbin_path"]),
         enable_boot=enable_boot,
-        boot_config_fpath=hpoed_params["boot_config_fpath"],
+        boot_config_fpath=boot_config_fpath,
         connect_timeout=300,
         logger=logger,
     )
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index b668eb9c..6ed3058d 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -23,14 +23,14 @@
 from ray.train import SyncConfig
 
 from tune.protox.agent.build_trial import build_trial
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_RELPATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_SYSKNOBS_PATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_RELPATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
 
 
 METRIC_NAME = "Best Metric"
 
 
 class AgentHPOArgs:
-    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, duration, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath):
+    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, duration, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath):
         self.benchmark_name = benchmark_name
         self.workload_name = workload_name
         self.embedder_path = embedder_path
@@ -49,7 +49,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
         self.workload_timeout = workload_timeout
         self.query_timeout = query_timeout
         self.enable_boot_during_hpo = enable_boot_during_hpo
-        self.boot_config_fpath = boot_config_fpath
+        self.hpo_boot_config_fpath = hpo_boot_config_fpath
 
 
 @click.command()
@@ -165,10 +165,10 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
     help="Whether to enable the Boot query accelerator during the HPO process. Deciding to use Boot during HPO is separate from deciding to use Boot during tuning.",
 )
 @click.option(
-    "--boot-config-fpath",
+    "--hpo-boot-config-fpath",
     default=DEFAULT_BOOT_CONFIG_FPATH,
     type=Path,
-    help="The path to the file configuring Boot.",
+    help="The path to the file configuring Boot when running HPO. When tuning, you may use a different Boot config.",
 )
 def hpo(
     dbgym_cfg,
@@ -194,7 +194,7 @@ def hpo(
     workload_timeout,
     query_timeout,
     enable_boot_during_hpo: bool,
-    boot_config_fpath: Path,
+    hpo_boot_config_fpath: Path,
 ):
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
@@ -224,7 +224,7 @@ def hpo(
     pgdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, pgdata_parent_dpath)
     pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path)
     workload_path = conv_inputpath_to_realabspath(dbgym_cfg, workload_path)
-    boot_config_fpath = conv_inputpath_to_realabspath(dbgym_cfg, boot_config_fpath)
+    hpo_boot_config_fpath = conv_inputpath_to_realabspath(dbgym_cfg, hpo_boot_config_fpath)
 
     # Check assertions on args
     if intended_pgdata_hardware == "hdd":
@@ -235,7 +235,7 @@ def hpo(
         assert False
 
     # Create args object
-    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, duration, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath)
+    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, duration, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath)
     _tune_hpo(dbgym_cfg, hpo_args)
 
 
@@ -252,7 +252,7 @@ def build_space(
     duration: int=30,
     seed: int=0,
     enable_boot_during_hpo: bool=False,
-    boot_config_fpath: Path=None,
+    hpo_boot_config_fpath: Path=None,
     workload_timeouts: list[int]=[600],
     query_timeouts: list[int]=[30],
     boot_enabled: bool = False,
@@ -265,7 +265,7 @@ def build_space(
         "trace": True,
         "seed": seed,
         "enable_boot_during_hpo": enable_boot_during_hpo,
-        "boot_config_fpath": boot_config_fpath,
+        "hpo_boot_config_fpath": hpo_boot_config_fpath,
         
         # Timeouts.
         "duration": duration,
@@ -573,7 +573,7 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         duration=hpo_args.duration,
         seed=hpo_args.seed,
         enable_boot_during_hpo=hpo_args.enable_boot_during_hpo,
-        boot_config_fpath=hpo_args.boot_config_fpath,
+        hpo_boot_config_fpath=hpo_args.hpo_boot_config_fpath,
         workload_timeouts=workload_timeouts,
         query_timeouts=query_timeouts,
     )

From 315a69fb368adbf7f90e22d68d50ee1e766adf7e Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 17:09:18 +0000
Subject: [PATCH 019/100] added hpo config fpath config to tune

---
 tune/protox/agent/tune.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index c47698b5..a28a0922 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -5,7 +5,7 @@
 import click
 import pandas as pd
 
-from misc.utils import WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, conv_inputpath_to_realabspath, link_result, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn, default_tuning_steps_dname
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, conv_inputpath_to_realabspath, link_result, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn, default_tuning_steps_dname
 from tune.protox.agent.coerce_config import coerce_config
 from tune.protox.agent.hpo import TuneTrial, build_space
 
@@ -37,7 +37,13 @@
     is_flag=True,
     help="Whether to enable the Boot query accelerator during the tuning process. Deciding to use Boot during tuning is separate from deciding to use Boot during HPO.",
 )
-def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool) -> None:
+@click.option(
+    "--tune-boot-config-fpath",
+    default=DEFAULT_BOOT_CONFIG_FPATH,
+    type=Path,
+    help="The path to the file configuring Boot when tuning. This may be a different Boot config than the one used for HPO.",
+)
+def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, tune_boot_config_fpath: Path) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
     if hpoed_agent_params_path == None:
@@ -45,6 +51,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     # Convert all input paths to absolute paths
     hpoed_agent_params_path = conv_inputpath_to_realabspath(dbgym_cfg, hpoed_agent_params_path)
+    tune_boot_config_fpath = conv_inputpath_to_realabspath(dbgym_cfg, tune_boot_config_fpath)
 
     # Tune
     with open_and_save(dbgym_cfg, hpoed_agent_params_path, "r") as f:
@@ -60,11 +67,12 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     ), hpoed_params)
 
     # Add configs to the hpoed_params that are allowed to differ between HPO and tuning.
-    # In general, for configs that can differ between HPO and tuning, I chose to append
-    #   "_during_hpo"/"_during_tune" to the end of them instead of naming them the same
+    # In general, for configs that can differ between HPO and tuning, I chose to name
+    #   them "*tune*" and "*hpo*" to the end of them instead of naming them the same
     #   and overriding the config during tuning. It's just much less confusing if we
     #   make sure to never override any configs in hpoed_params.
     hpoed_params["enable_boot_during_tune"] = enable_boot_during_tune
+    hpoed_params["tune_boot_config_fpath"] = tune_boot_config_fpath
 
     # Piggyback off the HPO magic.
     t = TuneTrial(dbgym_cfg, False)

From 4e9bde62c2b8f622c1128e318988e4998b29a4c7 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 17:33:20 +0000
Subject: [PATCH 020/100] fixed bugs so that hpo runs

---
 tune/protox/agent/build_trial.py   | 78 +++++++++++++++---------------
 tune/protox/agent/coerce_config.py | 48 +++++++++---------
 tune/protox/agent/hpo.py           | 20 ++++----
 tune/protox/agent/replay.py        | 28 +++++------
 tune/protox/agent/tune.py          | 18 +++----
 5 files changed, 95 insertions(+), 97 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 4963f5ca..e962b643 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -93,13 +93,13 @@ def _get_signal(signal_folder: Union[str, Path]) -> Tuple[int, str]:
     raise IOError("No free ports to bind postgres to.")
 
 
-def _modify_benchbase_config(dbgym_cfg: DBGymConfig, port: int, hpoed_params: dict[str, Any]) -> None:
-    if hpoed_params["benchmark_config"]["query_spec"]["oltp_workload"]:
+def _modify_benchbase_config(dbgym_cfg: DBGymConfig, port: int, hpo_params: dict[str, Any]) -> None:
+    if hpo_params["benchmark_config"]["query_spec"]["oltp_workload"]:
         conf_etree = ET.parse(dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "benchmark.xml")
         jdbc = f"jdbc:postgresql://localhost:{port}/benchbase?preferQueryMode=extended"
         conf_etree.getroot().find("url").text = jdbc  # type: ignore
 
-        oltp_config = hpoed_params["benchbase_config"]["oltp_config"]
+        oltp_config = hpo_params["benchbase_config"]["oltp_config"]
         if conf_etree.getroot().find("scalefactor") is not None:
             conf_etree.getroot().find("scalefactor").text = str(oltp_config["oltp_sf"])  # type: ignore
         if conf_etree.getroot().find("terminals") is not None:
@@ -130,38 +130,38 @@ def f(p: ProtoAction, n: torch.Tensor) -> ProtoAction:
 
 
 def _build_utilities(
-    dbgym_cfg: DBGymConfig, pgport: int, is_hpo: bool, hpoed_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, pgport: int, is_hpo: bool, hpo_params: dict[str, Any]
 ) -> Tuple[Logger, RewardUtility, PostgresConn, Workload]:
     logger = Logger(
         dbgym_cfg,
-        hpoed_params["trace"],
-        hpoed_params["verbose"],
+        hpo_params["trace"],
+        hpo_params["verbose"],
     )
 
     reward_utility = RewardUtility(
         target=(
             "tps"
-            if hpoed_params["benchmark_config"]["query_spec"]["oltp_workload"]
+            if hpo_params["benchmark_config"]["query_spec"]["oltp_workload"]
             else "latency"
         ),
-        metric=hpoed_params["reward"],
-        reward_scaler=hpoed_params["reward_scaler"],
+        metric=hpo_params["reward"],
+        reward_scaler=hpo_params["reward_scaler"],
         logger=logger,
     )
 
     # If we're using Boot, PostgresConn.start_with_changes() assumes that Redis is running. Thus,
     #   we start Redis here if necessary.
-    enable_boot = hpoed_params["enable_boot_during_hpo"] if is_hpo else hpoed_params["enable_boot_during_tune"]
-    boot_config_fpath = hpoed_params["hpo_boot_config_fpath"] if is_hpo else hpoed_params["tune_boot_config_fpath"]
+    enable_boot = hpo_params["enable_boot_during_hpo"] if is_hpo else hpo_params["enable_boot_during_tune"]
+    boot_config_fpath = hpo_params["hpo_boot_config_fpath"] if is_hpo else hpo_params["tune_boot_config_fpath"]
     if enable_boot:
         make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"])
 
     pgconn = PostgresConn(
         dbgym_cfg=dbgym_cfg,
         pgport=pgport,
-        pristine_pgdata_snapshot_fpath=Path(hpoed_params["pgconn_info"]["pristine_pgdata_snapshot_path"]),
-        pgdata_parent_dpath=Path(hpoed_params["pgconn_info"]["pgdata_parent_dpath"]),
-        pgbin_path=Path(hpoed_params["pgconn_info"]["pgbin_path"]),
+        pristine_pgdata_snapshot_fpath=Path(hpo_params["pgconn_info"]["pristine_pgdata_snapshot_path"]),
+        pgdata_parent_dpath=Path(hpo_params["pgconn_info"]["pgdata_parent_dpath"]),
+        pgbin_path=Path(hpo_params["pgconn_info"]["pgbin_path"]),
         enable_boot=enable_boot,
         boot_config_fpath=boot_config_fpath,
         connect_timeout=300,
@@ -170,13 +170,13 @@ def _build_utilities(
 
     workload = Workload(
         dbgym_cfg=dbgym_cfg,
-        tables=hpoed_params["benchmark_config"]["tables"],
-        attributes=hpoed_params["benchmark_config"]["attributes"],
-        query_spec=hpoed_params["benchmark_config"]["query_spec"],
-        workload_path=Path(hpoed_params["workload_path"]),
+        tables=hpo_params["benchmark_config"]["tables"],
+        attributes=hpo_params["benchmark_config"]["attributes"],
+        query_spec=hpo_params["benchmark_config"]["query_spec"],
+        workload_path=Path(hpo_params["workload_path"]),
         pid=None,
-        workload_timeout=hpoed_params["workload_timeout"],
-        workload_timeout_penalty=hpoed_params["workload_timeout_penalty"],
+        workload_timeout=hpo_params["workload_timeout"],
+        workload_timeout_penalty=hpo_params["workload_timeout_penalty"],
         logger=logger,
     )
 
@@ -305,7 +305,7 @@ def _build_obs_space(
 
 def _build_env(
     dbgym_cfg: DBGymConfig,
-    hpoed_params: dict[str, Any],
+    hpo_params: dict[str, Any],
     pgconn: PostgresConn,
     obs_space: StateSpace,
     holon_space: HolonSpace,
@@ -321,28 +321,28 @@ def _build_env(
         observation_space=obs_space,
         action_space=holon_space,
         workload=workload,
-        horizon=hpoed_params["horizon"],
+        horizon=hpo_params["horizon"],
         reward_utility=reward_utility,
         pgconn=pgconn,
-        query_timeout=hpoed_params["query_timeout"],
-        benchbase_config=hpoed_params["benchbase_config"],
+        query_timeout=hpo_params["query_timeout"],
+        benchbase_config=hpo_params["benchbase_config"],
         logger=logger,
         replay=False,
     )
 
     # Check whether to create the MQO wrapper.
-    if not hpoed_params["benchmark_config"]["query_spec"]["oltp_workload"]:
+    if not hpo_params["benchmark_config"]["query_spec"]["oltp_workload"]:
         if (
-            hpoed_params["workload_eval_mode"] != "pq"
-            or hpoed_params["workload_eval_inverse"]
-            or hpoed_params["workload_eval_reset"]
+            hpo_params["workload_eval_mode"] != "pq"
+            or hpo_params["workload_eval_inverse"]
+            or hpo_params["workload_eval_reset"]
         ):
             env = MQOWrapper(
-                workload_eval_mode=hpoed_params["workload_eval_mode"],
-                workload_eval_inverse=hpoed_params["workload_eval_inverse"],
-                workload_eval_reset=hpoed_params["workload_eval_reset"],
-                benchbase_config=hpoed_params["benchbase_config"],
-                query_timeout=hpoed_params["query_timeout"],
+                workload_eval_mode=hpo_params["workload_eval_mode"],
+                workload_eval_inverse=hpo_params["workload_eval_inverse"],
+                workload_eval_reset=hpo_params["workload_eval_reset"],
+                benchbase_config=hpo_params["benchbase_config"],
+                query_timeout=hpo_params["query_timeout"],
                 env=env,
                 logger=logger,
             )
@@ -510,16 +510,16 @@ def _build_agent(
 
 
 def build_trial(
-    dbgym_cfg: DBGymConfig, seed: int, is_hpo: bool, hpoed_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, seed: int, is_hpo: bool, hpo_params: dict[str, Any]
 ) -> Tuple[Logger, TargetResetWrapper, AgentEnv, Wolp, str]:
     # The massive trial builder.
 
-    port, signal = _get_signal(hpoed_params["pgconn_info"]["pgbin_path"])
-    _modify_benchbase_config(dbgym_cfg, port, hpoed_params)
+    port, signal = _get_signal(hpo_params["pgconn_info"]["pgbin_path"])
+    _modify_benchbase_config(dbgym_cfg, port, hpo_params)
 
-    logger, reward_utility, pgconn, workload = _build_utilities(dbgym_cfg, port, is_hpo, hpoed_params)
-    holon_space, lsc = _build_actions(dbgym_cfg, seed, hpoed_params, workload, logger)
-    obs_space = _build_obs_space(dbgym_cfg, holon_space, lsc, hpoed_params, seed)
+    logger, reward_utility, pgconn, workload = _build_utilities(dbgym_cfg, port, is_hpo, hpo_params)
+    holon_space, lsc = _build_actions(dbgym_cfg, seed, hpo_params, workload, logger)
+    obs_space = _build_obs_space(dbgym_cfg, holon_space, lsc, hpo_params, seed)
     target_reset, env = _build_env(
         dbgym_cfg,
         hpo_params,
diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py
index 22a99094..f2bc6b26 100644
--- a/tune/protox/agent/coerce_config.py
+++ b/tune/protox/agent/coerce_config.py
@@ -4,11 +4,11 @@
 from misc.utils import DBGymConfig, open_and_save
 
 
-def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpoed_params: dict[str, Any]) -> dict[str, Any]:
-    if "space_version" not in hpoed_params:
+def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dict[str, Any]) -> dict[str, Any]:
+    if "space_version" not in hpo_params:
         # This is an old version. Coerce the params file.
         new_config = {}
-        margs = hpoed_params["mythril_args"]
+        margs = hpo_params["mythril_args"]
 
         with open_and_save(dbgym_cfg, margs["benchmark_config"]) as f:
             benchmark_config = yaml.safe_load(f)
@@ -17,16 +17,16 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpoed_params: d
             benchmark_config["benchmark"] = benchmark
 
         # Merge the query specs.
-        mqs = hpoed_params["mythril_query_spec"]
+        mqs = hpo_params["mythril_query_spec"]
         benchmark_config["query_spec"].update(mqs)
 
         defaults = {
             "verbose": True,
             "trace": True,
-            "seed": hpoed_params["mythril_args"]["seed"],
-            "duration": hpoed_params["mythril_args"]["duration"],
-            "workload_timeout": hpoed_params["mythril_args"]["workload_timeout"],
-            "query_timeout": hpoed_params["mythril_args"]["timeout"],
+            "seed": hpo_params["mythril_args"]["seed"],
+            "duration": hpo_params["mythril_args"]["duration"],
+            "workload_timeout": hpo_params["mythril_args"]["workload_timeout"],
+            "query_timeout": hpo_params["mythril_args"]["timeout"],
             "pgconn_info": {
                 "pgport": 5432,
                 "pguser": "admin",
@@ -44,41 +44,41 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpoed_params: d
                     "oltp_warmup": margs.get("oltp_warmup", 0),
                 },
                 "benchbase_path": "/home/wz2/noisepage-pilot/artifacts/benchbase/",
-                "benchbase_config_path": hpoed_params["mythril_args"][
+                "benchbase_config_path": hpo_params["mythril_args"][
                     "benchbase_config_path"
                 ],
             },
-            "system_knobs": hpoed_params["mythril_system_knobs"],
+            "system_knobs": hpo_params["mythril_system_knobs"],
             "lsc": {
-                "enabled": hpoed_params["lsc_parameters"]["lsc_enabled"],
-                "initial": hpoed_params["lsc_parameters"]["lsc_shift_initial"],
-                "increment": hpoed_params["lsc_parameters"]["lsc_shift_increment"],
-                "max": hpoed_params["lsc_parameters"]["lsc_shift_max"],
-                "shift_eps_freq": hpoed_params["lsc_parameters"][
+                "enabled": hpo_params["lsc_parameters"]["lsc_enabled"],
+                "initial": hpo_params["lsc_parameters"]["lsc_shift_initial"],
+                "increment": hpo_params["lsc_parameters"]["lsc_shift_increment"],
+                "max": hpo_params["lsc_parameters"]["lsc_shift_max"],
+                "shift_eps_freq": hpo_params["lsc_parameters"][
                     "lsc_shift_schedule_eps_freq"
                 ],
-                "shift_after": hpoed_params["lsc_parameters"]["lsc_shift_after"],
+                "shift_after": hpo_params["lsc_parameters"]["lsc_shift_after"],
             },
             "neighbor_parameters": {
-                "knob_num_nearest": hpoed_params["neighbor_parameters"][
+                "knob_num_nearest": hpo_params["neighbor_parameters"][
                     "knob_num_nearest"
                 ],
-                "knob_span": hpoed_params["neighbor_parameters"]["knob_span"],
-                "index_num_samples": hpoed_params["neighbor_parameters"][
+                "knob_span": hpo_params["neighbor_parameters"]["knob_span"],
+                "index_num_samples": hpo_params["neighbor_parameters"][
                     "index_num_samples"
                 ],
-                "index_rules": hpoed_params["neighbor_parameters"].get(
+                "index_rules": hpo_params["neighbor_parameters"].get(
                     "index_subset", True
                 ),
             },
-            "embedder_path": hpoed_params["vae_metadata"]["embedder_path"],
+            "embedder_path": hpo_params["vae_metadata"]["embedder_path"],
         }
 
         for s in space.keys():
             if s in defaults:
                 new_config[s] = defaults[s]
-            elif s in hpoed_params:
-                new_config[s] = hpoed_params[s]
+            elif s in hpo_params:
+                new_config[s] = hpo_params[s]
             elif s == "space_version":
                 continue
             else:
@@ -86,4 +86,4 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpoed_params: d
 
         return new_config
 
-    return hpoed_params
+    return hpo_params
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 6ed3058d..35195733 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -23,7 +23,7 @@
 from ray.train import SyncConfig
 
 from tune.protox.agent.build_trial import build_trial
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_SYSKNOBS_PATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_RELPATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_PATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
 
 
 METRIC_NAME = "Best Metric"
@@ -412,27 +412,27 @@ def __init__(self, dbgym_cfg: DBGymConfig, is_hpo: bool) -> None:
         self.dbgym_cfg = dbgym_cfg
         self.is_hpo = is_hpo
 
-    def setup(self, hpoed_params: dict[str, Any]) -> None:
+    def setup(self, hpo_params: dict[str, Any]) -> None:
         # Attach mythril directory to the search path.
         sys.path.append(os.path.expanduser(self.dbgym_cfg.dbgym_repo_path))
 
         torch.set_default_dtype(torch.float32) # type: ignore
         seed = (
-            hpoed_params["seed"]
-            if hpoed_params["seed"] != -1
+            hpo_params["seed"]
+            if hpo_params["seed"] != -1
             else np.random.randint(np.iinfo(np.int32).max)
         )
         np.random.seed(seed)
         torch.manual_seed(seed)
 
-        self.timeout = TuneTimeoutChecker(hpoed_params["duration"])
+        self.timeout = TuneTimeoutChecker(hpo_params["duration"])
         self.logger, self.target_reset, self.env, self.agent, self.signal = build_trial(
             self.dbgym_cfg,
             seed=seed,
-            hpoed_params=hpoed_params,
+            hpo_params=hpo_params,
             is_hpo=self.is_hpo,
         )
-        self.logger.get_logger(None).info("%s", hpoed_params)
+        self.logger.get_logger(None).info("%s", hpo_params)
         self.logger.get_logger(None).info(f"Seed: {seed}")
 
         # Attach the timeout checker and loggers.
@@ -498,7 +498,7 @@ def cleanup(self) -> None:
         if Path(self.signal).exists():
             os.remove(self.signal)
 
-# I want to pass dbgym_cfg into TuneOpt without putting it inside `hpoed_params`. This is because it's a pain to turn DBGymConfig
+# I want to pass dbgym_cfg into TuneOpt without putting it inside `hpo_params`. This is because it's a pain to turn DBGymConfig
 #   into a nice dictionary of strings, and nothing in DBGymConfig would be relevant to someone checking the configs later
 # Using a function to create a class is Ray's recommended way of doing this (see
 #   https://discuss.ray.io/t/using-static-variables-to-control-trainable-subclass-in-ray-tune/808/4)
@@ -510,9 +510,9 @@ def create_tune_opt_class(dbgym_cfg_param):
     class TuneOpt(Trainable):
         dbgym_cfg = global_dbgym_cfg
 
-        def setup(self, hpoed_params: dict[str, Any]) -> None:
+        def setup(self, hpo_params: dict[str, Any]) -> None:
             self.trial = TuneTrial(TuneOpt.dbgym_cfg, True)
-            self.trial.setup(hpoed_params)
+            self.trial.setup(hpo_params)
 
         def step(self) -> dict[Any, Any]:
             return self.trial.step()
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 7798405f..c6ee4a87 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -6,6 +6,7 @@
     replayed tuning run is not.
 '''
 import datetime
+import json
 import logging
 import click
 import yaml
@@ -119,6 +120,7 @@ def __init__(
 def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
+
     if tuning_steps_dpath == None:
         tuning_steps_dpath = default_tuning_steps_dpath(dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name, boot_enabled_during_tune)
 
@@ -129,23 +131,19 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     replay_args = ReplayArgs(workload_timeout, num_samples, threshold, threshold_limit, maximal, simulated, maximal_only, cutoff, blocklist)
 
     # Replay
-    print(f"tuning_steps_dpath={tuning_steps_dpath}")
+    hpo_params_fpath = tuning_step_dpath / "params.json"
+
+    with open_and_save(dbgym_cfg, hpo_params_fpath) as f:
+        hpo_params = json.load(f)
+        
     tuning_step_dpaths = sorted(tuning_steps_dpath.rglob("run.raw.csv"))
     for tuning_step_dpath in tqdm.tqdm(tuning_step_dpaths, leave=False):
-        replay_step(dbgym_cfg, tuning_step_dpath, replay_args)
+        replay_step(dbgym_cfg, tuning_step_dpath, hpo_params, replay_args)
 
 
-def replay_step(dbgym_cfg: DBGymConfig, tuning_step_dpath: Path, replay_args: ReplayArgs):
-    with open_and_save(dbgym_cfg, tuning_step_dpath / "stdout", "r") as f:
-        config = f.readlines()[0]
-        config = eval(config.split("HPO Configuration: ")[-1])
-        horizon = config["horizon"]
-
-    with open_and_save(dbgym_cfg, tuning_step_dpath / "stdout", "r") as f:
-        for line in f:
-            if "HPO Configuration: " in line:
-                hpo = eval(line.split("HPO Configuration: ")[-1].strip())
-                per_query_timeout = hpo["mythril_args"]["timeout"]
+def replay_step(dbgym_cfg: DBGymConfig, tuning_step_dpath: Path, hpo_params: dict, replay_args: ReplayArgs):
+    horizon = hpo_params["horizon"]
+    query_timeout = hpo_params["query_timeout"]
 
     folders = []
     start_found = False
@@ -193,7 +191,7 @@ def replay_step(dbgym_cfg: DBGymConfig, tuning_step_dpath: Path, replay_args: Re
     # Get the minimum reward.
     runs = [Path(args.input) / "tuning_steps" / fold / "run.raw.csv" for fold in folders]
     runs = [pd.read_csv(run) for run in runs]
-    rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == per_query_timeout) for run in runs]
+    rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == query_timeout) for run in runs]
     rewards = sorted(rewards, key=lambda x: x[0])
     min_reward = min([r[0] for r in rewards])
     if maximal:
@@ -285,7 +283,7 @@ def run_sample(action, timeout):
                 # Get the evaluation reward.
                 reward = pd.read_csv(f"{args.input}/{repo}/run.raw.csv")
                 assert len(reward.columns) == 6
-                has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == per_query_timeout
+                has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == query_timeout
                 reward = reward["Latency (microseconds)"].sum() / 1e6
                 assert reward > 0
 
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index a28a0922..139d66af 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -55,33 +55,33 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     # Tune
     with open_and_save(dbgym_cfg, hpoed_agent_params_path, "r") as f:
-        hpoed_params = json.load(f)
+        hpo_params = json.load(f)
 
     # Coerce using a dummy space.
-    hpoed_params = coerce_config(dbgym_cfg, build_space(
+    hpo_params = coerce_config(dbgym_cfg, build_space(
         sysknobs={},
         benchmark_config={},
         workload_path=Path(),
         embedder_path=[],
         pgconn_info={}
-    ), hpoed_params)
+    ), hpo_params)
 
-    # Add configs to the hpoed_params that are allowed to differ between HPO and tuning.
+    # Add configs to the hpo_params that are allowed to differ between HPO and tuning.
     # In general, for configs that can differ between HPO and tuning, I chose to name
     #   them "*tune*" and "*hpo*" to the end of them instead of naming them the same
     #   and overriding the config during tuning. It's just much less confusing if we
-    #   make sure to never override any configs in hpoed_params.
-    hpoed_params["enable_boot_during_tune"] = enable_boot_during_tune
-    hpoed_params["tune_boot_config_fpath"] = tune_boot_config_fpath
+    #   make sure to never override any configs in hpo_params.
+    hpo_params["enable_boot_during_tune"] = enable_boot_during_tune
+    hpo_params["tune_boot_config_fpath"] = tune_boot_config_fpath
 
     # Piggyback off the HPO magic.
     t = TuneTrial(dbgym_cfg, False)
-    t.setup(hpoed_params)
+    t.setup(hpo_params)
     start = time.time()
 
     data = []
     step_data_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "step_data.csv"
-    while (time.time() - start) < hpoed_params["duration"] * 3600:
+    while (time.time() - start) < hpo_params["duration"] * 3600:
         data.append(t.step())
 
         # Continuously write the file out.

From 17dece3df0557b95765d52bc6e5802a719f01b97 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 17:52:35 +0000
Subject: [PATCH 021/100] fixed some comments

---
 scripts/pat_test.sh                | 6 +-----
 tune/protox/agent/hpo.py           | 3 ---
 tune/protox/agent/replay.py        | 2 +-
 tune/protox/embedding/train_all.py | 5 +----
 4 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 7d22c2c3..fc3d9444 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,10 +7,6 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --duration 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
-exit 0
-
-# testing
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 exit 0
 
@@ -30,6 +26,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --duration 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --duration 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 35195733..a5be3618 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -607,9 +607,6 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         sync_config=SyncConfig(),
         verbose=2,
         log_to_file=True,
-        # I call it hpo_ray_results because agent tuning also uses Ray and stores its results
-        #   in tune_ray_results. By making them separate, we avoid the possibility of
-        #   file collisions.
         storage_path=dbgym_cfg.cur_task_runs_path("hpo_ray_results", mkdir=True),
     )
 
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index c6ee4a87..bb41874b 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -131,7 +131,7 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     replay_args = ReplayArgs(workload_timeout, num_samples, threshold, threshold_limit, maximal, simulated, maximal_only, cutoff, blocklist)
 
     # Replay
-    hpo_params_fpath = tuning_step_dpath / "params.json"
+    hpo_params_fpath = tuning_steps_dpath / "params.json"
 
     with open_and_save(dbgym_cfg, hpo_params_fpath) as f:
         hpo_params = json.load(f)
diff --git a/tune/protox/embedding/train_all.py b/tune/protox/embedding/train_all.py
index 8e6fb0be..b8ae195c 100644
--- a/tune/protox/embedding/train_all.py
+++ b/tune/protox/embedding/train_all.py
@@ -216,10 +216,7 @@ def train_all_embeddings(
         sync_config=SyncConfig(),
         verbose=2,
         log_to_file=True,
-        # I call it tune_ray_results because agent HPO also uses Ray and stores its results
-        #   in hpo_ray_results. By making them separate, we avoid the possibility of
-        #   file collisions.
-        storage_path=dbgym_cfg.cur_task_runs_path("tune_ray_results", mkdir=True),
+        storage_path=dbgym_cfg.cur_task_runs_path("embedding_ray_results", mkdir=True),
     )
 
     resources = {"cpu": 1}

From 2f554e2e77239fd847f877b28d105a69c12b1161 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 22:18:36 +0000
Subject: [PATCH 022/100] made it past first output.log loop

---
 misc/utils.py               |  2 +-
 scripts/pat_test.sh         |  2 +-
 tune/protox/agent/replay.py | 30 ++++++++++++++++++------------
 tune/protox/agent/tune.py   |  9 +++++----
 4 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/misc/utils.py b/misc/utils.py
index d55d7890..95237c06 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -143,7 +143,7 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     lambda workspace_path, benchmark_name, workload_name, boot_enabled_during_tune: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / "dbgym_tune_protox_agent" / "data" / default_tuning_steps_dname(benchmark_name, workload_name, boot_enabled_during_tune)
+    / "dbgym_tune_protox_agent" / "artifacts" / default_tuning_steps_dname(benchmark_name, workload_name, boot_enabled_during_tune)
 )
 
 
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index fc3d9444..b6f4b525 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,7 +7,7 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index bb41874b..45cc254b 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -131,41 +131,47 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     replay_args = ReplayArgs(workload_timeout, num_samples, threshold, threshold_limit, maximal, simulated, maximal_only, cutoff, blocklist)
 
     # Replay
+    replay_tuning_run(dbgym_cfg, tuning_steps_dpath, replay_args)
+
+
+def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_args: ReplayArgs):
+    """
+    Replay a single tuning run (as in one tuning_steps/ folder).
+    """
     hpo_params_fpath = tuning_steps_dpath / "params.json"
 
     with open_and_save(dbgym_cfg, hpo_params_fpath) as f:
         hpo_params = json.load(f)
-        
-    tuning_step_dpaths = sorted(tuning_steps_dpath.rglob("run.raw.csv"))
-    for tuning_step_dpath in tqdm.tqdm(tuning_step_dpaths, leave=False):
-        replay_step(dbgym_cfg, tuning_step_dpath, hpo_params, replay_args)
 
-
-def replay_step(dbgym_cfg: DBGymConfig, tuning_step_dpath: Path, hpo_params: dict, replay_args: ReplayArgs):
     horizon = hpo_params["horizon"]
     query_timeout = hpo_params["query_timeout"]
 
     folders = []
     start_found = False
-    filename = "output.log" if args.alternate else "stderr"
+    output_log_fpath = tuning_steps_dpath / "output.log"
     last_evaluation = None
-    with open(f"{args.input}/{filename}", "r") as f:
+    with open_and_save(dbgym_cfg, output_log_fpath) as f:
         for line in f:
             if not start_found:
                 if "Baseline Metric" in line:
-                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0].split("[")[0])
                     start_found = True
             else:
                 if "mv" in line and "tuning_steps" in line:
                     repo = eval(line.split("Running ")[-1])[-1]
                     last_folder = repo.split("/")[-1]
-                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
+                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
                     last_evaluation = time_since_start
-                    if (time_since_start - start_time).total_seconds() < args.cutoff * 3600 or args.cutoff == 0:
+                    if replay_args.cutoff == None or (time_since_start - start_time).total_seconds() < replay_args.cutoff * 3600:
                         folders.append(last_folder)
 
+    print(f"folders={folders}")
+    print(f"last_evaluation={last_evaluation}")
+
+    assert False, "done"
+
     # Only apply threshold if time is less than.
-    threshold_limit = last_evaluation - datetime.timedelta(seconds=int(args.threshold_limit * 3600))
+    threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600))
 
     spec = Spec(
         agent_type=None,
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 139d66af..e8fe0343 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -93,10 +93,11 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     pd.DataFrame(data).to_csv(step_data_fpath, index=False)
 
     # Link the tuning steps data (this directory allows you to replay the tuning run).
-    # Replaying requires the params.json file, so we also copy it here.
-    # Since params.json is fairly small, I choose to copy the file itself instead of just
-    #   making a symlink to it.
+    # Replaying requires output.log and params.json, so we also copy them into the tuning_steps/ directory.
+    # The reason I copy them in is to ensure that tuning_steps/ is a fully self-contained directory.
     tuning_steps_dpath = dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps")
     shutil.copy(hpoed_agent_params_path, tuning_steps_dpath)
-    tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, False)
+    output_fpath = dbgym_cfg.cur_task_runs_artifacts_path() / "output.log"
+    shutil.copy(output_fpath, tuning_steps_dpath)
+    tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, enable_boot_during_tune)
     link_result(dbgym_cfg, tuning_steps_dpath, custom_result_name=tuning_steps_link_dname)

From f035e01df8caa49d8ad151b8a92d717755f580c4 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 22:20:24 +0000
Subject: [PATCH 023/100] now only reading folders in first loop

---
 tune/protox/agent/replay.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 45cc254b..62774885 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -145,10 +145,11 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
 
     horizon = hpo_params["horizon"]
     query_timeout = hpo_params["query_timeout"]
+    output_log_fpath = tuning_steps_dpath / "output.log"
 
+    # Go through output.log and find the tuning_steps/[time]/ folders, as well as the time of the last folder
     folders = []
     start_found = False
-    output_log_fpath = tuning_steps_dpath / "output.log"
     last_evaluation = None
     with open_and_save(dbgym_cfg, output_log_fpath) as f:
         for line in f:
@@ -157,7 +158,7 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
                     start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0].split("[")[0])
                     start_found = True
             else:
-                if "mv" in line and "tuning_steps" in line:
+                if "mv" in line and "tuning_steps" in line and "postgresql.auto.old" not in line:
                     repo = eval(line.split("Running ")[-1])[-1]
                     last_folder = repo.split("/")[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])

From 19310a3c8c76410d453723afc6b38cdf6424727e Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 22:39:16 +0000
Subject: [PATCH 024/100] fixed threshold limit

---
 tune/protox/agent/build_trial.py   | 34 +++++++++++++-------------
 tune/protox/agent/replay.py        | 38 +++++++++---------------------
 tune/protox/env/mqo/mqo_wrapper.py |  2 +-
 tune/protox/env/pg_env.py          |  4 ++--
 tune/protox/env/util/execute.py    | 16 ++++++-------
 tune/protox/env/workload.py        | 18 +++++++-------
 6 files changed, 48 insertions(+), 64 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index e962b643..6b6a5cb9 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -274,7 +274,7 @@ def _build_actions(
     return hspace, lsc
 
 
-def _build_obs_space(
+def _build_observation_space(
     dbgym_cfg: DBGymConfig, action_space: HolonSpace, lsc: LSC, hpo_params: dict[str, Any], seed: int
 ) -> StateSpace:
     if hpo_params["metric_state"] == "metric":
@@ -307,7 +307,7 @@ def _build_env(
     dbgym_cfg: DBGymConfig,
     hpo_params: dict[str, Any],
     pgconn: PostgresConn,
-    obs_space: StateSpace,
+    observation_space: StateSpace,
     holon_space: HolonSpace,
     lsc: LSC,
     workload: Workload,
@@ -318,7 +318,7 @@ def _build_env(
     env = gym.make(
         "Postgres-v0",
         dbgym_cfg=dbgym_cfg,
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=holon_space,
         workload=workload,
         horizon=hpo_params["horizon"],
@@ -378,7 +378,7 @@ def _build_env(
 def _build_agent(
     seed: int,
     hpo_params: dict[str, Any],
-    obs_space: StateSpace,
+    observation_space: StateSpace,
     action_space: HolonSpace,
     logger: Logger,
 ) -> Wolp:
@@ -386,10 +386,10 @@ def _build_agent(
     critic_action_dim = action_space.critic_dim()
 
     actor = Actor(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         net_arch=[int(l) for l in hpo_params["pi_arch"].split(",")],
-        features_dim=gym.spaces.utils.flatdim(obs_space),
+        features_dim=gym.spaces.utils.flatdim(observation_space),
         activation_fn=_parse_activation_fn(hpo_params["activation_fn"]),
         weight_init=hpo_params["weight_init"],
         bias_zero=hpo_params["bias_zero"],
@@ -399,10 +399,10 @@ def _build_agent(
     )
 
     actor_target = Actor(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         net_arch=[int(l) for l in hpo_params["pi_arch"].split(",")],
-        features_dim=gym.spaces.utils.flatdim(obs_space),
+        features_dim=gym.spaces.utils.flatdim(observation_space),
         activation_fn=_parse_activation_fn(hpo_params["activation_fn"]),
         weight_init=hpo_params["weight_init"],
         bias_zero=hpo_params["bias_zero"],
@@ -416,10 +416,10 @@ def _build_agent(
     )
 
     critic = ContinuousCritic(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         net_arch=[int(l) for l in hpo_params["qf_arch"].split(",")],
-        features_dim=gym.spaces.utils.flatdim(obs_space),
+        features_dim=gym.spaces.utils.flatdim(observation_space),
         activation_fn=_parse_activation_fn(hpo_params["activation_fn"]),
         weight_init=hpo_params["weight_init"],
         bias_zero=hpo_params["bias_zero"],
@@ -428,10 +428,10 @@ def _build_agent(
     )
 
     critic_target = ContinuousCritic(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         net_arch=[int(l) for l in hpo_params["qf_arch"].split(",")],
-        features_dim=gym.spaces.utils.flatdim(obs_space),
+        features_dim=gym.spaces.utils.flatdim(observation_space),
         activation_fn=_parse_activation_fn(hpo_params["activation_fn"]),
         weight_init=hpo_params["weight_init"],
         bias_zero=hpo_params["bias_zero"],
@@ -445,7 +445,7 @@ def _build_agent(
     )
 
     policy = WolpPolicy(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         actor=actor,
         actor_target=actor_target,
@@ -495,7 +495,7 @@ def _build_agent(
         policy=policy,
         replay_buffer=ReplayBuffer(
             buffer_size=hpo_params["buffer_size"],
-            obs_shape=[gym.spaces.utils.flatdim(obs_space)],
+            obs_shape=[gym.spaces.utils.flatdim(observation_space)],
             action_dim=critic_action_dim,
         ),
         learning_starts=hpo_params["learning_starts"],
@@ -519,12 +519,12 @@ def build_trial(
 
     logger, reward_utility, pgconn, workload = _build_utilities(dbgym_cfg, port, is_hpo, hpo_params)
     holon_space, lsc = _build_actions(dbgym_cfg, seed, hpo_params, workload, logger)
-    obs_space = _build_obs_space(dbgym_cfg, holon_space, lsc, hpo_params, seed)
+    observation_space = _build_observation_space(dbgym_cfg, holon_space, lsc, hpo_params, seed)
     target_reset, env = _build_env(
         dbgym_cfg,
         hpo_params,
         pgconn,
-        obs_space,
+        observation_space,
         holon_space,
         lsc,
         workload,
@@ -532,5 +532,5 @@ def build_trial(
         logger,
     )
 
-    agent = _build_agent(seed, hpo_params, obs_space, holon_space, logger)
+    agent = _build_agent(seed, hpo_params, observation_space, holon_space, logger)
     return logger, target_reset, env, agent, signal
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 62774885..4b59fa80 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -19,6 +19,7 @@
 from misc.utils import DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, workload_name_fn, default_tuning_steps_dpath
 # sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
 
+from tune.protox.agent.build_trial import build_trial
 from tune.protox.env.pg_env import PostgresEnv
 
 
@@ -143,8 +144,6 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     with open_and_save(dbgym_cfg, hpo_params_fpath) as f:
         hpo_params = json.load(f)
 
-    horizon = hpo_params["horizon"]
-    query_timeout = hpo_params["query_timeout"]
     output_log_fpath = tuning_steps_dpath / "output.log"
 
     # Go through output.log and find the tuning_steps/[time]/ folders, as well as the time of the last folder
@@ -166,31 +165,16 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
                     if replay_args.cutoff == None or (time_since_start - start_time).total_seconds() < replay_args.cutoff * 3600:
                         folders.append(last_folder)
 
-    print(f"folders={folders}")
-    print(f"last_evaluation={last_evaluation}")
+    # Only apply threshold if time is less than.
+    threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600)) if replay_args.threshold_limit != None else None
 
+    # Build PostgresEnv
+    _, _, agent_env, _, _ = build_trial(dbgym_cfg, hpo_params["seed"], False, hpo_params)
+    postgres_env = agent_env.unwrapped
+    print(f"postgres_env={postgres_env}, type(postgres_env)={type(postgres_env)}")
     assert False, "done"
 
-    # Only apply threshold if time is less than.
-    threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600))
-
-    spec = Spec(
-        agent_type=None,
-        seed=0,
-        horizon=horizon,
-        config_path=f"{args.input}/config.yaml2",
-        benchmark_config_path=f"{args.input}/{args.benchmark}.yaml",
-        workload_timeout=0)
-
-    env = PostgresEnv(
-        spec,
-        horizon=horizon,
-        timeout=None,
-        reward_utility=None,
-        logger=None,
-        replay=True)
-
-    if not args.simulated:
+    if not replay_args.simulated:
         env.restore_pristine_snapshot()
         env.action_space.reset(**{"connection": env.connection, "workload": spec.workload})
         spec.workload.reset()
@@ -198,7 +182,7 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     # Get the minimum reward.
     runs = [Path(args.input) / "tuning_steps" / fold / "run.raw.csv" for fold in folders]
     runs = [pd.read_csv(run) for run in runs]
-    rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == query_timeout) for run in runs]
+    rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]) for run in runs]
     rewards = sorted(rewards, key=lambda x: x[0])
     min_reward = min([r[0] for r in rewards])
     if maximal:
@@ -290,7 +274,7 @@ def run_sample(action, timeout):
                 # Get the evaluation reward.
                 reward = pd.read_csv(f"{args.input}/{repo}/run.raw.csv")
                 assert len(reward.columns) == 6
-                has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == query_timeout
+                has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
                 reward = reward["Latency (microseconds)"].sum() / 1e6
                 assert reward > 0
 
@@ -359,7 +343,7 @@ def run_sample(action, timeout):
                     if (not has_timeout) or (max(run_samples) < timeout):
                         # Apply a tolerance..
                         # If we've timed out, only apply threshold only if we've found a strictly better config.
-                        apply_threshold = threshold if time_since_start < threshold_limit else 0
+                        apply_threshold = threshold if threshold_limit == None or time_since_start < threshold_limit else 0
                         cur_reward_max = reward - apply_threshold
 
                     if max(run_samples) < timeout:
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 1943f038..3cf233e0 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -332,7 +332,7 @@ def reset(self, *args: Any, **kwargs: Any) -> Tuple[Any, EnvInfoDict]:  # type:
             ) = self.unwrapped.workload.execute(
                 pgconn=self.unwrapped.pgconn,
                 reward_utility=self.unwrapped.reward_utility,
-                obs_space=self.observation_space,
+                observation_space=self.observation_space,
                 action_space=self.action_space,
                 actions=[r[1] for r in runs],
                 actions_names=[r[0] for r in runs],
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index e6c2262e..e6788430 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -158,7 +158,7 @@ def reset(  # type: ignore
             success, metric, _, results, _, query_metric_data = self.workload.execute(
                 pgconn=self.pgconn,
                 reward_utility=self.reward_utility,
-                obs_space=self.observation_space,
+                observation_space=self.observation_space,
                 action_space=self.action_space,
                 actions=[default_action],
                 actions_names=["GlobalDual"],
@@ -259,7 +259,7 @@ def step_execute(
             ) = self.workload.execute(
                 pgconn=self.pgconn,
                 reward_utility=self.reward_utility,
-                obs_space=self.observation_space,
+                observation_space=self.observation_space,
                 action_space=self.action_space,
                 benchbase_config=self.benchbase_config,
                 query_timeout=self.query_timeout,
diff --git a/tune/protox/env/util/execute.py b/tune/protox/env/util/execute.py
index d7a7584c..a9f4e4d4 100644
--- a/tune/protox/env/util/execute.py
+++ b/tune/protox/env/util/execute.py
@@ -76,11 +76,11 @@ def _acquire_metrics_around_query(
     connection: psycopg.Connection[Any],
     query: str,
     query_timeout: float = 0.0,
-    obs_space: Optional[StateSpace] = None,
+    observation_space: Optional[StateSpace] = None,
 ) -> Tuple[float, bool, Any, Any]:
     _force_statement_timeout(connection, 0)
-    if obs_space and obs_space.require_metrics():
-        initial_metrics = obs_space.construct_online(connection)
+    if observation_space and observation_space.require_metrics():
+        initial_metrics = observation_space.construct_online(connection)
 
     if query_timeout > 0:
         _force_statement_timeout(connection, query_timeout * 1000)
@@ -91,9 +91,9 @@ def _acquire_metrics_around_query(
 
     # Wipe the statement timeout.
     _force_statement_timeout(connection, 0)
-    if obs_space and obs_space.require_metrics():
-        final_metrics = obs_space.construct_online(connection)
-        diff = obs_space.state_delta(initial_metrics, final_metrics)
+    if observation_space and observation_space.require_metrics():
+        final_metrics = observation_space.construct_online(connection)
+        diff = observation_space.state_delta(initial_metrics, final_metrics)
     else:
         diff = None
 
@@ -108,7 +108,7 @@ def execute_variations(
     query_timeout: float = 0,
     logger: Optional[Logger] = None,
     sysknobs: Optional[KnobSpaceAction] = None,
-    obs_space: Optional[StateSpace] = None,
+    observation_space: Optional[StateSpace] = None,
 ) -> BestQueryRun:
 
     # Initial timeout.
@@ -146,7 +146,7 @@ def execute_variations(
             connection=connection,
             query=pqk_query,
             query_timeout=timeout_limit,
-            obs_space=obs_space,
+            observation_space=observation_space,
         )
 
         if not did_timeout:
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 902d28a2..00e289be 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -335,7 +335,7 @@ def _execute_workload(
         actions: list[HolonAction] = [],
         actions_names: list[str] = [],
         results: Optional[Union[str, Path]] = None,
-        obs_space: Optional[StateSpace] = None,
+        observation_space: Optional[StateSpace] = None,
         action_space: Optional[HolonSpace] = None,
         reset_metrics: Optional[dict[str, BestQueryRun]] = None,
         override_workload_timeout: Optional[float] = None,
@@ -353,7 +353,7 @@ def _execute_workload(
         assert len(actions) == len(actions_names)
 
         # Do we need metrics.
-        need_metric = False if not obs_space else obs_space.require_metrics()
+        need_metric = False if not observation_space else observation_space.require_metrics()
 
         sysknobs = KnobSpaceAction({})
         ql_knobs = []
@@ -450,7 +450,7 @@ def _execute_workload(
                         pgconn.conn(),
                         query,
                         query_timeout=time_left,
-                        obs_space=None,
+                        observation_space=None,
                     )
 
                     undo_disable = ";".join(
@@ -511,7 +511,7 @@ def _execute_workload(
                             query_timeout=min(target_pqt, workload_timeout - workload_time + 1),
                             logger=self.logger,
                             sysknobs=sysknobs,
-                            obs_space=obs_space,
+                            observation_space=observation_space,
                         )
                     else:
                         assert reset_metrics
@@ -572,14 +572,14 @@ def _execute_workload(
                         f.write(json.dumps(run.explain_data))
                         f.write("\n\n")
 
-            if obs_space and obs_space.require_metrics():
+            if observation_space and observation_space.require_metrics():
                 # Create the metrics.
                 # Log the metrics data as a flattened.
                 accum_data = cast(
                     list[dict[str, Any]],
                     [v.metric_data for _, v in qid_runtime_data.items()],
                 )
-                accum_stats = obs_space.merge_deltas(accum_data)
+                accum_stats = observation_space.merge_deltas(accum_data)
                 with open(results_dir / "run.metrics.json", "w") as f:
                     # Flatten it.
                     def flatten(d: dict[str, Any]) -> dict[str, Any]:
@@ -665,7 +665,7 @@ def execute(
         self,
         pgconn: PostgresConn,
         reward_utility: RewardUtility,
-        obs_space: StateSpace,
+        observation_space: StateSpace,
         action_space: HolonSpace,
         actions: list[HolonAction],
         actions_names: list[str],
@@ -688,14 +688,14 @@ def execute(
             # Execute benchbase if specified.
             success = self._execute_benchbase(benchbase_config, results)
             # We can only create a state if we succeeded.
-            success = obs_space.check_benchbase(self.dbgym_cfg, results)
+            success = observation_space.check_benchbase(self.dbgym_cfg, results)
         else:
             ret = self._execute_workload(
                 pgconn,
                 actions=actions,
                 actions_names=actions_names,
                 results=results,
-                obs_space=obs_space,
+                observation_space=observation_space,
                 action_space=action_space,
                 reset_metrics=reset_metrics,
                 override_workload_timeout=self.workload_timeout,

From e084cc718b64a22056e776fb7803616e62f8b768 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 22:41:40 +0000
Subject: [PATCH 025/100] can now build PostgresEnv

---
 tune/protox/agent/replay.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 4b59fa80..31c9d4db 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -16,7 +16,7 @@
 from pathlib import Path
 from dateutil.parser import parse
 
-from misc.utils import DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, workload_name_fn, default_tuning_steps_dpath
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, workload_name_fn, default_tuning_steps_dpath
 # sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
 
 from tune.protox.agent.build_trial import build_trial
@@ -140,9 +140,13 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     Replay a single tuning run (as in one tuning_steps/ folder).
     """
     hpo_params_fpath = tuning_steps_dpath / "params.json"
-
     with open_and_save(dbgym_cfg, hpo_params_fpath) as f:
         hpo_params = json.load(f)
+    # Set configs to the hpo_params that are allowed to differ between HPO and tuning.
+    # The way we set these may be different than how they were set during the tuning run, because
+    #   we are replaying instead of tuning.
+    hpo_params["enable_boot_during_tune"] = False
+    hpo_params["tune_boot_config_fpath"] = DEFAULT_BOOT_CONFIG_FPATH
 
     output_log_fpath = tuning_steps_dpath / "output.log"
 

From 0c3c146920f02801d7d5f0c94be04101f297c4d1 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 17 Apr 2024 22:49:21 +0000
Subject: [PATCH 026/100] now resetting and getting min reward

---
 tune/protox/agent/build_trial.py   | 12 ++++----
 tune/protox/agent/replay.py        | 25 +++++++++-------
 tune/protox/env/mqo/mqo_wrapper.py |  4 +--
 tune/protox/env/pg_env.py          | 48 +++++++++++++++---------------
 tune/protox/env/workload.py        | 22 +++++++-------
 5 files changed, 58 insertions(+), 53 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 6b6a5cb9..a270822c 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -156,7 +156,7 @@ def _build_utilities(
     if enable_boot:
         make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"])
 
-    pgconn = PostgresConn(
+    pg_conn = PostgresConn(
         dbgym_cfg=dbgym_cfg,
         pgport=pgport,
         pristine_pgdata_snapshot_fpath=Path(hpo_params["pgconn_info"]["pristine_pgdata_snapshot_path"]),
@@ -180,7 +180,7 @@ def _build_utilities(
         logger=logger,
     )
 
-    return logger, reward_utility, pgconn, workload
+    return logger, reward_utility, pg_conn, workload
 
 
 def _build_actions(
@@ -306,7 +306,7 @@ def _build_observation_space(
 def _build_env(
     dbgym_cfg: DBGymConfig,
     hpo_params: dict[str, Any],
-    pgconn: PostgresConn,
+    pg_conn: PostgresConn,
     observation_space: StateSpace,
     holon_space: HolonSpace,
     lsc: LSC,
@@ -323,7 +323,7 @@ def _build_env(
         workload=workload,
         horizon=hpo_params["horizon"],
         reward_utility=reward_utility,
-        pgconn=pgconn,
+        pg_conn=pg_conn,
         query_timeout=hpo_params["query_timeout"],
         benchbase_config=hpo_params["benchbase_config"],
         logger=logger,
@@ -517,13 +517,13 @@ def build_trial(
     port, signal = _get_signal(hpo_params["pgconn_info"]["pgbin_path"])
     _modify_benchbase_config(dbgym_cfg, port, hpo_params)
 
-    logger, reward_utility, pgconn, workload = _build_utilities(dbgym_cfg, port, is_hpo, hpo_params)
+    logger, reward_utility, pg_conn, workload = _build_utilities(dbgym_cfg, port, is_hpo, hpo_params)
     holon_space, lsc = _build_actions(dbgym_cfg, seed, hpo_params, workload, logger)
     observation_space = _build_observation_space(dbgym_cfg, holon_space, lsc, hpo_params, seed)
     target_reset, env = _build_env(
         dbgym_cfg,
         hpo_params,
-        pgconn,
+        pg_conn,
         observation_space,
         holon_space,
         lsc,
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 31c9d4db..d4c14a31 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -172,23 +172,28 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     # Only apply threshold if time is less than.
     threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600)) if replay_args.threshold_limit != None else None
 
-    # Build PostgresEnv
+    # Build PostgresEnv.
     _, _, agent_env, _, _ = build_trial(dbgym_cfg, hpo_params["seed"], False, hpo_params)
-    postgres_env = agent_env.unwrapped
-    print(f"postgres_env={postgres_env}, type(postgres_env)={type(postgres_env)}")
-    assert False, "done"
+    pg_env = agent_env.unwrapped
 
+    # Reset things.
     if not replay_args.simulated:
-        env.restore_pristine_snapshot()
-        env.action_space.reset(**{"connection": env.connection, "workload": spec.workload})
-        spec.workload.reset()
+        pg_env.pg_conn.restore_pristine_snapshot()
 
     # Get the minimum reward.
-    runs = [Path(args.input) / "tuning_steps" / fold / "run.raw.csv" for fold in folders]
-    runs = [pd.read_csv(run) for run in runs]
-    rewards = [(run["Latency (microseconds)"].sum() / 1e6, (run["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]) for run in runs]
+    run_raw_csv_fpaths = [tuning_steps_dpath / fold / "run.raw.csv" for fold in folders]
+    run_raw_csvs = [pd.read_csv(run_raw_csv_fpath) for run_raw_csv_fpath in run_raw_csv_fpaths]
+    rewards = [(run_raw_csv["Latency (microseconds)"].sum() / 1e6, (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]) for run_raw_csv in run_raw_csvs]
     rewards = sorted(rewards, key=lambda x: x[0])
     min_reward = min([r[0] for r in rewards])
+
+    print(f"run_raw_csv_fpaths={run_raw_csv_fpaths}")
+    print(f"run_raw_csvs={run_raw_csvs}")
+    print(f"rewards={rewards}")
+    print(f"min_reward={min_reward}")
+
+    assert False, "done"
+
     if maximal:
         target = [r[1] for r in rewards if r[0] == min_reward]
         assert len(target) >= 1
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 3cf233e0..1b33923b 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -191,7 +191,7 @@ def step(  # type: ignore
         if self.workload_eval_mode in ["all", "all_enum", "global_dual"]:
             # Load the global (optimizer) knobs.
             qid_ams = parse_access_methods(
-                self.unwrapped.pgconn.conn(), self.unwrapped.workload.queries
+                self.unwrapped.pg_conn.conn(), self.unwrapped.workload.queries
             )
             runs.append(
                 (
@@ -330,7 +330,7 @@ def reset(self, *args: Any, **kwargs: Any) -> Tuple[Any, EnvInfoDict]:  # type:
                 _,
                 target_metric_data,
             ) = self.unwrapped.workload.execute(
-                pgconn=self.unwrapped.pgconn,
+                pg_conn=self.unwrapped.pg_conn,
                 reward_utility=self.unwrapped.reward_utility,
                 observation_space=self.observation_space,
                 action_space=self.action_space,
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index e6788430..a37d80b8 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -32,7 +32,7 @@ def __init__(
         workload: Workload,
         horizon: int,
         reward_utility: RewardUtility,
-        pgconn: PostgresConn,
+        pg_conn: PostgresConn,
         query_timeout: int,
         benchbase_config: dict[str, Any],
         logger: Optional[Logger] = None,
@@ -50,7 +50,7 @@ def __init__(
         self.reward_utility = reward_utility
 
         self.benchbase_config = benchbase_config
-        self.pgconn = pgconn
+        self.pg_conn = pg_conn
         self.query_timeout = query_timeout
 
         self.current_state: Optional[Any] = None
@@ -59,13 +59,13 @@ def __init__(
 
     def _restore_last_snapshot(self) -> None:
         assert self.horizon > 1 and self.workload.oltp_workload
-        assert self.pgconn.restore_checkpointed_snapshot()
+        assert self.pg_conn.restore_checkpointed_snapshot()
         assert isinstance(self.action_space, HolonSpace)
 
         self.state_container = self.action_space.generate_state_container(
             self.state_container,
             None,
-            self.pgconn.conn(),
+            self.pg_conn.conn(),
             self.workload.queries,
         )
 
@@ -105,18 +105,18 @@ def reset(  # type: ignore
 
             if self.workload.oltp_workload and self.horizon == 1:
                 # Restore a pristine snapshot of the world if OTLP and horizon = 1
-                self.pgconn.restore_pristine_snapshot()
+                self.pg_conn.restore_pristine_snapshot()
             else:
                 # Instead of restoring a pristine snapshot, just reset the knobs.
                 # This in effect "resets" the baseline knob settings.
-                self.pgconn.start_with_changes(conf_changes=[])
+                self.pg_conn.start_with_changes(conf_changes=[])
 
             # Maneuver the state into the requested state/config.
             assert isinstance(self.action_space, HolonSpace)
             sc = self.action_space.generate_state_container(
                 self.state_container,
                 None,
-                self.pgconn.conn(),
+                self.pg_conn.conn(),
                 self.workload.queries,
             )
             config_changes, sql_commands = self.action_space.generate_plan_from_config(
@@ -142,7 +142,7 @@ def reset(  # type: ignore
 
         else:
             # Restore a pristine snapshot of the world.
-            self.pgconn.restore_pristine_snapshot()
+            self.pg_conn.restore_pristine_snapshot()
             assert not self.replay
 
             # On the first time, run the benchmark to get the baseline.
@@ -151,12 +151,12 @@ def reset(  # type: ignore
 
             # Get the stock state container.
             sc = self.action_space.generate_state_container(
-                None, None, self.pgconn.conn(), self.workload.queries
+                None, None, self.pg_conn.conn(), self.workload.queries
             )
             default_action = self.action_space.null_action(sc)
 
             success, metric, _, results, _, query_metric_data = self.workload.execute(
-                pgconn=self.pgconn,
+                pg_conn=self.pg_conn,
                 reward_utility=self.reward_utility,
                 observation_space=self.observation_space,
                 action_space=self.action_space,
@@ -174,11 +174,11 @@ def reset(  # type: ignore
             self.state_container = self.action_space.generate_state_container(
                 self.state_container,
                 None,
-                self.pgconn.conn(),
+                self.pg_conn.conn(),
                 self.workload.queries,
             )
             state = self.observation_space.construct_offline(
-                self.pgconn.conn(), results, self.state_container
+                self.pg_conn.conn(), results, self.state_container
             )
 
             # Set the metric workload.
@@ -217,8 +217,8 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
         # Get the prior state.
         prior_state = copy.deepcopy(self.state_container)
         # Save the old configuration file.
-        old_conf_path = f"{self.pgconn.pgdata_dpath}/postgresql.auto.conf"
-        conf_path = f"{self.pgconn.pgdata_dpath}/postgresql.auto.old"
+        old_conf_path = f"{self.pg_conn.pgdata_dpath}/postgresql.auto.conf"
+        conf_path = f"{self.pg_conn.pgdata_dpath}/postgresql.auto.old"
         local["cp"][old_conf_path, conf_path].run()
 
         # Figure out what we have to change to get to the new configuration.
@@ -257,7 +257,7 @@ def step_execute(
                 q_timeout,
                 query_metric_data,
             ) = self.workload.execute(
-                pgconn=self.pgconn,
+                pg_conn=self.pg_conn,
                 reward_utility=self.reward_utility,
                 observation_space=self.observation_space,
                 action_space=self.action_space,
@@ -319,14 +319,14 @@ def step_post_execute(
             self.state_container = self.action_space.generate_state_container(
                 self.state_container,
                 action,
-                self.pgconn.conn(),
+                self.pg_conn.conn(),
                 self.workload.queries,
             )
 
             # Now. The state container should be accurate.
             assert isinstance(self.observation_space, StateSpace)
             next_state = self.observation_space.construct_offline(
-                self.pgconn.conn(), info["results"], self.state_container
+                self.pg_conn.conn(), info["results"], self.state_container
             )
         else:
             assert self.current_state
@@ -389,7 +389,7 @@ def attempt_checkpoint(conn_str: str) -> None:
                     f"Executing {sql} [{i+1}/{len(sql_commands)}]"
                 )
 
-            ret, stderr = self.pgconn.psql(sql)
+            ret, stderr = self.pg_conn.psql(sql)
             if ret == -1:
                 if stderr:
                     print(stderr, flush=True)
@@ -399,23 +399,23 @@ def attempt_checkpoint(conn_str: str) -> None:
                         # We've killed the index operation.
                         or "operational" in stderr
                     )
-                    attempt_checkpoint(self.pgconn.get_connstr())
+                    attempt_checkpoint(self.pg_conn.get_connstr())
                 return False
 
             assert ret == 0, print(stderr)
 
         # Now try and perform the configuration changes.
-        return self.pgconn.start_with_changes(
+        return self.pg_conn.start_with_changes(
             conf_changes=config_changes,
             dump_page_cache=dump_page_cache,
             save_checkpoint=self.workload.oltp_workload and self.horizon > 1,
         )
 
     def close(self) -> None:
-        self.pgconn.shutdown_postgres()
+        self.pg_conn.shutdown_postgres()
         # This file may not be in in [workspace]/tmp/, so it's important to delete it
-        local["rm"]["-rf", self.pgconn.pgdata_dpath].run()
+        local["rm"]["-rf", self.pg_conn.pgdata_dpath].run()
         # Even though these files get deleted because [workspace]/tmp/ gets deleted,
         #   we'll just delete them here anyways because why not
-        local["rm"]["-f", self.pgconn.checkpoint_pgdata_snapshot_fpath].run()
-        local["rm"]["-f", f"{self.pgconn.checkpoint_pgdata_snapshot_fpath}.tmp"].run()
+        local["rm"]["-f", self.pg_conn.checkpoint_pgdata_snapshot_fpath].run()
+        local["rm"]["-f", f"{self.pg_conn.checkpoint_pgdata_snapshot_fpath}.tmp"].run()
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 00e289be..bff6cee5 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -331,7 +331,7 @@ def max_indexable(self) -> int:
     @time_record("execute")
     def _execute_workload(
         self,
-        pgconn: PostgresConn,
+        pg_conn: PostgresConn,
         actions: list[HolonAction] = [],
         actions_names: list[str] = [],
         results: Optional[Union[str, Path]] = None,
@@ -422,7 +422,7 @@ def _execute_workload(
                 if sql_type != QueryType.SELECT:
                     # This is a sanity check because any OLTP workload should be run through benchbase, and any OLAP workload should not have INS_UPD_DEL queries. 
                     assert sql_type != QueryType.INS_UPD_DEL
-                    pgconn.conn().execute(query)
+                    pg_conn.conn().execute(query)
                     continue
 
                 if disable_pg_hint:
@@ -442,12 +442,12 @@ def _execute_workload(
                             if value == 0
                         ]
                     )
-                    pgconn.conn().execute(disable)
+                    pg_conn.conn().execute(disable)
 
                     qid_runtime, _, _, _ = _acquire_metrics_around_query(
                         self.logger,
                         f"{qid}",
-                        pgconn.conn(),
+                        pg_conn.conn(),
                         query,
                         query_timeout=time_left,
                         observation_space=None,
@@ -460,7 +460,7 @@ def _execute_workload(
                             if value == 0
                         ]
                     )
-                    pgconn.conn().execute(undo_disable)
+                    pg_conn.conn().execute(undo_disable)
 
                 else:
                     # De-duplicate the runs.
@@ -505,7 +505,7 @@ def _execute_workload(
 
                     if not skip_execute:
                         best_run: BestQueryRun = execute_variations(
-                            connection=pgconn.conn(),
+                            connection=pg_conn.conn(),
                             runs=runs,
                             query=query,
                             query_timeout=min(target_pqt, workload_timeout - workload_time + 1),
@@ -539,7 +539,7 @@ def _execute_workload(
                         if st != QueryType.SELECT:
                             # This is a sanity check because any OLTP workload should be run through benchbase, and any OLAP workload should not have INS_UPD_DEL queries. If we do have INS_UPD_DEL queries, our "undo" logic will likely have to change.
                             assert st != QueryType.INS_UPD_DEL
-                            pgconn.conn().execute(rq)
+                            pg_conn.conn().execute(rq)
 
                     stop_running = True
                     break
@@ -551,7 +551,7 @@ def _execute_workload(
                 assert sql_type != QueryType.UNKNOWN
                 if sql_type != QueryType.SELECT:
                     assert sql_type != QueryType.INS_UPD_DEL
-                    pgconn.conn().execute(query)
+                    pg_conn.conn().execute(query)
 
         if results is not None:
             # Make the result directory.
@@ -663,7 +663,7 @@ def _execute_benchbase(
 
     def execute(
         self,
-        pgconn: PostgresConn,
+        pg_conn: PostgresConn,
         reward_utility: RewardUtility,
         observation_space: StateSpace,
         action_space: HolonSpace,
@@ -681,7 +681,7 @@ def execute(
 
         # Purge results directory first.
         tmp_dir = tempfile.gettempdir()
-        results = f"{tmp_dir}/results{pgconn.pgport}"
+        results = f"{tmp_dir}/results{pg_conn.pgport}"
         shutil.rmtree(results, ignore_errors=True)
 
         if self.benchbase:
@@ -691,7 +691,7 @@ def execute(
             success = observation_space.check_benchbase(self.dbgym_cfg, results)
         else:
             ret = self._execute_workload(
-                pgconn,
+                pg_conn,
                 actions=actions,
                 actions_names=actions_names,
                 results=results,

From bac5238e4928031bf96cee81befec30a415adb59 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:01:42 +0000
Subject: [PATCH 027/100] single to double quotes

---
 dbms/postgres/cli.py            |  4 ++--
 misc/utils.py                   |  8 ++++----
 tune/protox/agent/hpo.py        |  4 ++--
 tune/protox/agent/replay.py     |  4 ++--
 tune/protox/env/logger.py       |  4 ++--
 tune/protox/env/util/pg_conn.py | 12 ++++++------
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py
index 930e80d3..e858d812 100644
--- a/dbms/postgres/cli.py
+++ b/dbms/postgres/cli.py
@@ -1,9 +1,9 @@
-'''
+"""
 At a high level, this file's goal is to (1) install+build postgres and (2) create pgdata.
 On the other hand, the goal of tune.protox.env.util.postgres is to provide helpers to manage
     a Postgres instance during agent tuning.
 util.pg provides helpers used by *both* of the above files (as well as other files).
-'''
+"""
 import logging
 import os
 import shutil
diff --git a/misc/utils.py b/misc/utils.py
index 95237c06..243bc8c1 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -518,10 +518,10 @@ def link_result(dbgym_cfg: DBGymConfig, result_fordpath: Path, custom_result_nam
 
 
 def try_create_symlink(src_path: Path, dst_path: Path) -> None:
-    '''
+    """
     Our functions that create symlinks might be called by multiple processes at once
     during HPO. Thus, this is a thread-safe way to create a symlink.
-    '''
+    """
     try:
         os.symlink(src_path, dst_path)
     except FileExistsError:
@@ -530,10 +530,10 @@ def try_create_symlink(src_path: Path, dst_path: Path) -> None:
 
 
 def try_remove_file(path: Path) -> None:
-    '''
+    """
     Our functions that remove files might be called by multiple processes at once
     during HPO. Thus, this is a thread-safe way to remove a file.
-    '''
+    """
     try:
         os.remove(path)
     except FileNotFoundError:
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index a5be3618..ec61c764 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -405,10 +405,10 @@ def __call__(self) -> bool:
 
 class TuneTrial:
     def __init__(self, dbgym_cfg: DBGymConfig, is_hpo: bool) -> None:
-        '''
+        """
         We use this object for both HPO and tune. It behaves *slightly* differently
         depending on what it's used for, which is why we have an is_hpo param.
-        '''
+        """
         self.dbgym_cfg = dbgym_cfg
         self.is_hpo = is_hpo
 
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index d4c14a31..2cc9ef50 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -1,10 +1,10 @@
-'''
+"""
 Replaying a tuning run gives you the authoritative runtimes of that tuning run.
 The original tuning run has per-query timeouts, so the runtimes may be inaccurate. The
     replayed tuning run does not have per-query timeouts.
 Additionally, the original tuning run may have been accelerated by Boot, whereas the
     replayed tuning run is not.
-'''
+"""
 import datetime
 import json
 import logging
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index b82c8926..ae0339d7 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -93,9 +93,9 @@ def get_logger(self, name: Optional[str]) -> logging.Logger:
     def stash_results(
         self, info_dict: dict[str, Any], name_override: Optional[str] = None
     ) -> None:
-        '''
+        """
         Stash data about this step of tuning so that it can be replayed.
-        '''
+        """
         time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         time = name_override if name_override else time
         if info_dict["results"] is not None and Path(info_dict["results"]).exists():
diff --git a/tune/protox/env/util/pg_conn.py b/tune/protox/env/util/pg_conn.py
index 8f05e9ac..aace6d37 100644
--- a/tune/protox/env/util/pg_conn.py
+++ b/tune/protox/env/util/pg_conn.py
@@ -1,10 +1,10 @@
-'''
+"""
 At a high level, this file's goal is to provide helpers to manage a Postgres instance during
     agent tuning.
 On the other hand, the goal of dbms.postgres.cli is to (1) install+build postgres and (2)
     create pgdata.
 util.pg provides helpers used by *both* of the above files (as well as other files).
-'''
+"""
 import os
 import shutil
 import threading
@@ -126,9 +126,9 @@ def start_with_changes(
         dump_page_cache: bool = False,
         save_checkpoint: bool = False,
     ) -> bool:
-        '''
+        """
         This function assumes that some snapshot has already been untarred into self.pgdata_dpath
-        '''
+        """
         # Install the new configuration changes.
         if conf_changes is not None:
             if SHARED_PRELOAD_LIBRARIES:
@@ -244,7 +244,7 @@ def start_with_changes(
         return True
 
     def _set_up_boot(self, intelligent_cache: bool, early_stop: bool, seq_sample: bool, seq_sample_pct: int, seq_sample_seed: int, mu_hyp_opt: float, mu_hyp_time: int, mu_hyp_stdev: float):
-        '''
+        """
         Sets up Boot on the currently running Postgres instances.
         Uses instance vars of PostgresConn for configuration.
         I chose to not encode any "default values" in this function. This is so that all values
@@ -252,7 +252,7 @@ def _set_up_boot(self, intelligent_cache: bool, early_stop: bool, seq_sample: bo
             was used in a given experiment by looking only at the config file. If we did encode
             "default values" in the function, we would need to know the state of the code at the
             time of the experiment, which is very difficult in the general case.
-        '''
+        """
         # If any of these commands fail, they'll throw a Python exception
         # Thus, if none of them throw an exception, we know they passed
         self.logger.get_logger(__name__).debug("Setting up boot")

From 79cee7231ce1b75f198ba195f1bd6c413daafec8 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:03:20 +0000
Subject: [PATCH 028/100] maximal fixed

---
 tune/protox/agent/replay.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 2cc9ef50..95429fb2 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -192,8 +192,7 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     print(f"rewards={rewards}")
     print(f"min_reward={min_reward}")
 
-    assert False, "done"
-
+    maximal = replay_args.maximal
     if maximal:
         target = [r[1] for r in rewards if r[0] == min_reward]
         assert len(target) >= 1
@@ -208,6 +207,8 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
         else:
             logging.info(f"Maximal found: {min_reward}")
 
+    assert False, "done"
+
     num_lines = 0
     with open(f"{args.input}/{filename}", "r") as f:
         for line in f:

From 86acc80193de6db71071ea925d7aaa540b7468b1 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:05:20 +0000
Subject: [PATCH 029/100] num lines

---
 tune/protox/agent/replay.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 95429fb2..730d4325 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -207,16 +207,17 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
         else:
             logging.info(f"Maximal found: {min_reward}")
 
-    assert False, "done"
-
     num_lines = 0
-    with open(f"{args.input}/{filename}", "r") as f:
+    with open_and_save(dbgym_cfg, output_log_fpath) as f:
         for line in f:
             if "Baseline Metric" in line:
                 num_lines += 1
-            elif "mv" in line and "tuning_steps" in line:
+            elif "mv" in line and "tuning_steps" in line and "postgresql.auto.old" not in line:
                 num_lines += 1
 
+    print(f"num_lines={num_lines}")
+    assert False, "done"
+
     def run_sample(action, timeout):
         samples = []
         # This should reliably check that we are loading the correct knobs...

From a3038b573a87ef7aeacb8e4b61d669d79a1f0854 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:11:38 +0000
Subject: [PATCH 030/100] initial fix to run_sample()

---
 tune/protox/agent/replay.py | 41 ++++++++++++++++++++++---------------
 tune/protox/env/workload.py |  4 ++--
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 730d4325..451ee161 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -215,47 +215,54 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
             elif "mv" in line and "tuning_steps" in line and "postgresql.auto.old" not in line:
                 num_lines += 1
 
-    print(f"num_lines={num_lines}")
-    assert False, "done"
-
     def run_sample(action, timeout):
         samples = []
         # This should reliably check that we are loading the correct knobs...
-        ql_knobs = spec.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
-        for i in range(args.samples):
-            runtime = spec.workload._execute_workload(
-                connection=env.connection,
-                workload_timeout=timeout,
-                ql_knobs=ql_knobs,
-                env_spec=spec,
-                blocklist=[l for l in args.blocklist.split(",") if len(l) > 0])
+        ql_knobs = pg_env.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
+        for i in range(replay_args.samples):
+            runtime = pg_env.workload.execute_workload(
+                pg_conn=pg_env.pg_conn,
+                actions=[built_action],
+                action_names=["Replay"],
+                observation_space=None,
+                action_space=pg_env.action_space,
+                reset_metrics=None,
+                override_workload_timeout=hpo_params["workload_timeout"],
+                query_timeout=hpo_params["query_timeout"],
+                workload_qdir=None,
+                disable_pg_hint=False,
+                blocklist=replay_args.blocklist,
+                first=False,
+            )
             samples.append(runtime)
             logging.info(f"Runtime: {runtime}")
 
-            if runtime >= args.workload_timeout:
+            if runtime >= replay_args.workload_timeout:
                 break
 
-            if args.samples == 2 and runtime >= timeout:
+            if replay_args.samples == 2 and runtime >= timeout:
                 break
-            elif args.samples > 2 and len(samples) >= 2 and runtime >= timeout:
+            elif replay_args.samples > 2 and len(samples) >= 2 and runtime >= timeout:
                 break
 
         return samples
 
     run_data = []
     pbar = tqdm.tqdm(total=num_lines)
-    with open(f"{args.input}/{filename}", "r") as f:
+    with open_and_save(dbgym_cfg, output_log_fpath) as f:
         current_step = 0
 
         start_found = False
         start_time = None
-        timeout = args.workload_timeout
+        timeout = replay_args.workload_timeout
         cur_reward_max = timeout
         selected_action_knobs = None
         noop_index = False
         maximal_repo = None
         existing_indexes = []
 
+        assert False, "done"
+
         for line in f:
             # Keep going until we've found the start.
             if not start_found:
@@ -267,7 +274,7 @@ def run_sample(action, timeout):
 
             elif "Selected action: " in line:
                 act = eval(line.split("Selected action: ")[-1])
-                selected_action_knobs = env.action_space.get_knob_space().from_jsonable(act[0])[0]
+                selected_action_knobs = pg_env.action_space.get_knob_space().from_jsonable(act[0])[0]
                 noop_index = "NOOP" in act[1][0]
 
             elif (maximal and ("mv" in line and "tuning_steps" in line)):
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index bff6cee5..025ff0f3 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -329,7 +329,7 @@ def max_indexable(self) -> int:
         return max([len(cols) for _, cols in self.query_usages.items()])
 
     @time_record("execute")
-    def _execute_workload(
+    def execute_workload(
         self,
         pg_conn: PostgresConn,
         actions: list[HolonAction] = [],
@@ -690,7 +690,7 @@ def execute(
             # We can only create a state if we succeeded.
             success = observation_space.check_benchbase(self.dbgym_cfg, results)
         else:
-            ret = self._execute_workload(
+            ret = self.execute_workload(
                 pg_conn,
                 actions=actions,
                 actions_names=actions_names,

From 551fd67512799523eceea96e61f8a77ce49dd08b Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:13:41 +0000
Subject: [PATCH 031/100] fixed all parsing errors

---
 tune/protox/agent/replay.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 451ee161..83eb7e87 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -261,14 +261,12 @@ def run_sample(action, timeout):
         maximal_repo = None
         existing_indexes = []
 
-        assert False, "done"
-
         for line in f:
             # Keep going until we've found the start.
             if not start_found:
                 if "Baseline Metric" in line:
                     start_found = True
-                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0])
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0].split("[")[0])
                     pbar.update(1)
                 continue
 
@@ -283,10 +281,10 @@ def run_sample(action, timeout):
             elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "tuning_steps" in line)):
                 if "mv" in line and "tuning_steps" in line:
                     repo = eval(line.split("Running ")[-1])[-1]
-                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0])
+                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
                 elif "Found new maximal state with" in line:
                     repo = eval(maximal_repo.split("Running ")[-1])[-1]
-                    time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0])
+                    time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
                     maximal_repo = None
 
                 # Get the evaluation reward.

From 0e2486bb77c910fdf7d3a180ca9bef1866461221 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:15:17 +0000
Subject: [PATCH 032/100] run raw csv path fixed

---
 tune/protox/agent/replay.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 83eb7e87..9a9e0e50 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -16,7 +16,7 @@
 from pathlib import Path
 from dateutil.parser import parse
 
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, workload_name_fn, default_tuning_steps_dpath
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, save_file, workload_name_fn, default_tuning_steps_dpath
 # sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
 
 from tune.protox.agent.build_trial import build_trial
@@ -288,7 +288,9 @@ def run_sample(action, timeout):
                     maximal_repo = None
 
                 # Get the evaluation reward.
-                reward = pd.read_csv(f"{args.input}/{repo}/run.raw.csv")
+                run_raw_csv_fpath = tuning_steps_dpath / repo / "run.raw.csv"
+                save_file(dbgym_cfg, run_raw_csv_fpath)
+                reward = pd.read_csv(run_raw_csv_fpath)
                 assert len(reward.columns) == 6
                 has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
                 reward = reward["Latency (microseconds)"].sum() / 1e6
@@ -299,7 +301,7 @@ def run_sample(action, timeout):
                     knobs = {}
                     insert_knobs = False
 
-                    with open(f"{args.input}/{repo}/act_sql.txt", "r") as f:
+                    with open_and_save(tuning_steps_dpath / repo / "act_sql.txt", "r") as f:
                         for line in f:
                             line = line.strip()
                             if len(line) == 0:

From 55faf8ecc29407cb1885bcba275da17a48f3f690 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:16:02 +0000
Subject: [PATCH 033/100] maximal_only fixed

---
 tune/protox/agent/replay.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 9a9e0e50..5f3e8da0 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -296,12 +296,12 @@ def run_sample(action, timeout):
                 reward = reward["Latency (microseconds)"].sum() / 1e6
                 assert reward > 0
 
-                if ((not maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
+                if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
                     index_sqls = []
                     knobs = {}
                     insert_knobs = False
 
-                    with open_and_save(tuning_steps_dpath / repo / "act_sql.txt", "r") as f:
+                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "act_sql.txt") as f:
                         for line in f:
                             line = line.strip()
                             if len(line) == 0:

From 92020c3a969571433c201263c1c77b460026edd0 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:25:04 +0000
Subject: [PATCH 034/100] now properly ignoring baseline

---
 tune/protox/agent/replay.py | 24 ++++++++++++++----------
 tune/protox/env/logger.py   |  2 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 5f3e8da0..655b8efe 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -139,6 +139,9 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     """
     Replay a single tuning run (as in one tuning_steps/ folder).
     """
+    def _is_tuning_step_line(line: str) -> bool:
+        return "mv" in line and "tuning_steps" in line and "postgresql.auto.old" not in line and "baseline" not in line
+
     hpo_params_fpath = tuning_steps_dpath / "params.json"
     with open_and_save(dbgym_cfg, hpo_params_fpath) as f:
         hpo_params = json.load(f)
@@ -161,7 +164,7 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
                     start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0].split("[")[0])
                     start_found = True
             else:
-                if "mv" in line and "tuning_steps" in line and "postgresql.auto.old" not in line:
+                if _is_tuning_step_line(line):
                     repo = eval(line.split("Running ")[-1])[-1]
                     last_folder = repo.split("/")[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
@@ -212,10 +215,10 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
         for line in f:
             if "Baseline Metric" in line:
                 num_lines += 1
-            elif "mv" in line and "tuning_steps" in line and "postgresql.auto.old" not in line:
+            elif _is_tuning_step_line(line):
                 num_lines += 1
 
-    def run_sample(action, timeout):
+    def _run_sample(action, timeout):
         samples = []
         # This should reliably check that we are loading the correct knobs...
         ql_knobs = pg_env.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
@@ -275,11 +278,11 @@ def run_sample(action, timeout):
                 selected_action_knobs = pg_env.action_space.get_knob_space().from_jsonable(act[0])[0]
                 noop_index = "NOOP" in act[1][0]
 
-            elif (maximal and ("mv" in line and "tuning_steps" in line)):
+            elif (maximal and (_is_tuning_step_line(line))):
                 maximal_repo = line
 
-            elif (maximal and "Found new maximal state with" in line) or (not maximal and ("mv" in line and "tuning_steps" in line)):
-                if "mv" in line and "tuning_steps" in line:
+            elif (maximal and "Found new maximal state with" in line) or (not maximal and (_is_tuning_step_line(line))):
+                if _is_tuning_step_line(line):
                     repo = eval(line.split("Running ")[-1])[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
                 elif "Found new maximal state with" in line:
@@ -299,9 +302,10 @@ def run_sample(action, timeout):
                 if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
                     index_sqls = []
                     knobs = {}
-                    insert_knobs = False
-
-                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "act_sql.txt") as f:
+                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.json") as f:
+                        action_json = json.load(f)
+                        print(f"len(action_json)={len(action_json)}")
+                        assert False, "done"
                         for line in f:
                             line = line.strip()
                             if len(line) == 0:
@@ -342,7 +346,7 @@ def run_sample(action, timeout):
 
                     if not args.simulated:
                         # Get samples.
-                        run_samples = samples = run_sample(knobs, timeout)
+                        run_samples = samples = _run_sample(knobs, timeout)
                         logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
                     else:
                         run_samples = samples = [reward, reward]
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index ae0339d7..3cb0fbfc 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -113,7 +113,7 @@ def stash_results(
                 f.write(str(info_dict["prior_state_container"]))
 
         if info_dict["action_json"]:
-            with open(f"{self.tuning_steps_dpath}/{time}/action.txt", "w") as f:
+            with open(f"{self.tuning_steps_dpath}/{time}/action.json", "w") as f:
                 f.write(info_dict["action_json"])
 
     def advance(self) -> None:

From 98d549b77e3a2e333a5aeb553e6326d22ba8cb65 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 00:29:34 +0000
Subject: [PATCH 035/100] now parsing action.json

---
 tune/protox/agent/replay.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 655b8efe..ec423121 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -301,23 +301,20 @@ def _run_sample(action, timeout):
 
                 if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
                     index_sqls = []
-                    knobs = {}
+                    all_knobs = {}
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.json") as f:
                         action_json = json.load(f)
-                        print(f"len(action_json)={len(action_json)}")
-                        assert False, "done"
-                        for line in f:
-                            line = line.strip()
-                            if len(line) == 0:
-                                insert_knobs = True
-                            elif not insert_knobs:
-                                index_sqls.append(line)
-                            else:
-                                k, v = line.split(" = ")
-                                knobs[k] = float(v)
+                        assert len(action_json) == 3, "action_json should be a list with system knobs, an index, and per-query knobs"
+                        system_knobs = action_json[0]
+                        index_sqls = action_json[1]
+                        query_knobs = action_json[2]
+                        all_knobs = {k: v for k, v in list(system_knobs.items()) + list(query_knobs.items())}
+
+                    print(f"index_sqls={index_sqls}")
+                    print(f"all_knobs={all_knobs}")
 
                     assert len(index_sqls) > 0
-                    assert len(knobs) > 0
+                    assert len(all_knobs) > 0
                     with open(f"{args.input}/{repo}/prior_state.txt", "r") as f:
                         prior_states = eval(f.read())
                         all_sc = [s.strip() for s in prior_states[1]]
@@ -346,7 +343,7 @@ def _run_sample(action, timeout):
 
                     if not args.simulated:
                         # Get samples.
-                        run_samples = samples = _run_sample(knobs, timeout)
+                        run_samples = samples = _run_sample(all_knobs, timeout)
                         logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
                     else:
                         run_samples = samples = [reward, reward]

From 41a4ac1c55e660c4977b389bf2620ce36e5ff4c3 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 01:03:27 +0000
Subject: [PATCH 036/100] now reading prior_state.pkl correctly

---
 tune/protox/agent/replay.py        | 16 ++++++++++------
 tune/protox/env/logger.py          | 12 +++++++-----
 tune/protox/env/mqo/mqo_wrapper.py |  2 +-
 tune/protox/env/pg_env.py          |  4 ++--
 tune/protox/env/types.py           |  4 ++--
 5 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index ec423121..567ba2d2 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -8,6 +8,7 @@
 import datetime
 import json
 import logging
+import pickle
 import click
 import yaml
 import pandas as pd
@@ -302,7 +303,7 @@ def _run_sample(action, timeout):
                 if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
                     index_sqls = []
                     all_knobs = {}
-                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.json") as f:
+                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.json", "r") as f:
                         action_json = json.load(f)
                         assert len(action_json) == 3, "action_json should be a list with system knobs, an index, and per-query knobs"
                         system_knobs = action_json[0]
@@ -310,20 +311,23 @@ def _run_sample(action, timeout):
                         query_knobs = action_json[2]
                         all_knobs = {k: v for k, v in list(system_knobs.items()) + list(query_knobs.items())}
 
-                    print(f"index_sqls={index_sqls}")
-                    print(f"all_knobs={all_knobs}")
+                    print(f"index_sqls 1={index_sqls}")
 
                     assert len(index_sqls) > 0
                     assert len(all_knobs) > 0
-                    with open(f"{args.input}/{repo}/prior_state.txt", "r") as f:
-                        prior_states = eval(f.read())
-                        all_sc = [s.strip() for s in prior_states[1]]
+                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
+                        prior_states = pickle.load(f)
+                        index_acts = prior_states[1]
+                        all_sc = [index_act.sql(True, True).strip() for index_act in index_acts]
                         if not noop_index:
                             all_sc.extend(index_sqls)
 
                         all_sc = [a for a in all_sc if not "USING btree ()" in a]
                         index_sqls = all_sc
 
+                    print(f"index_sqls 2={index_sqls}")
+                    assert False, "done"
+
                     execute_sqls = []
                     for index_sql in index_sqls:
                         if index_sql in existing_indexes:
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 3cb0fbfc..03b7eb0c 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -1,6 +1,7 @@
 import inspect
 import json
 import logging
+import pickle
 import time
 from datetime import datetime
 from pathlib import Path
@@ -109,12 +110,13 @@ def stash_results(
             ].run()
 
         if info_dict["prior_state_container"]:
-            with open(f"{self.tuning_steps_dpath}/{time}/prior_state.txt", "w") as f:
-                f.write(str(info_dict["prior_state_container"]))
+            with open(self.tuning_steps_dpath / time / "prior_state.pkl", "wb") as f:
+                # info_dict["prior_state_container"] is a somewhat complex object so we use pickle over json
+                pickle.dump(info_dict["prior_state_container"], f)
 
-        if info_dict["action_json"]:
-            with open(f"{self.tuning_steps_dpath}/{time}/action.json", "w") as f:
-                f.write(info_dict["action_json"])
+        if info_dict["action_json_str"]:
+            with open(self.tuning_steps_dpath / time / "action.json", "w") as f:
+                f.write(info_dict["action_json_str"])
 
     def advance(self) -> None:
         if self.writer is None:
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 1b33923b..ec99c247 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -279,7 +279,7 @@ def transmute(
         with torch.no_grad():
             # Pass the mutilated action back through.
             assert isinstance(self.action_space, HolonSpace)
-            info["action_json"] = json.dumps(self.action_space.to_jsonable([action]))
+            info["action_json_str"] = json.dumps(self.action_space.to_jsonable([action]))
             info["maximal_embed"] = self.action_space.to_latent([action])
 
         return self.unwrapped.step_post_execute(success, action, info)
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index a37d80b8..aedc29f8 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -197,7 +197,7 @@ def reset(  # type: ignore
                     "results": results,
                     "prior_state_container": None,
                     "prior_pgconf": None,
-                    "action_json": None,
+                    "action_json_str": None,
                 }
             )
             self.baseline_metric = metric
@@ -286,7 +286,7 @@ def step_execute(
                     "query_metric_data": query_metric_data,
                     "reward": reward,
                     "results": results,
-                    "action_json": json.dumps(
+                    "action_json_str": json.dumps(
                         self.action_space.to_jsonable([a[1] for a in actions])
                     ),
                 }
diff --git a/tune/protox/env/types.py b/tune/protox/env/types.py
index 0ee36f85..0bcba623 100644
--- a/tune/protox/env/types.py
+++ b/tune/protox/env/types.py
@@ -192,8 +192,8 @@ class EnvInfoDict(TypedDict, total=False):
     q_timeout: bool
     # Query metric data.
     query_metric_data: Optional[dict[str, BestQueryRun]]
-    # JSON of the action that was executed.
-    action_json: Optional[str]
+    # JSON string of the action that was executed.
+    action_json_str: Optional[str]
     # ProtoAction of the altered step action.
     maximal_embed: ProtoAction
 

From 132fb16ddd4e8bca4e82022960300aa68d26c5c7 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 01:46:58 +0000
Subject: [PATCH 037/100] now outputting IndexAction instead of SQL string to
 action.txt

---
 tune/protox/agent/replay.py              | 25 ++++++++++--------------
 tune/protox/env/logger.py                |  6 +++---
 tune/protox/env/mqo/mqo_wrapper.py       |  3 ++-
 tune/protox/env/pg_env.py                |  7 +++----
 tune/protox/env/space/holon_space.py     |  3 +++
 tune/protox/env/space/primitive/index.py |  5 +++++
 tune/protox/env/types.py                 |  5 +++--
 7 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 567ba2d2..8210c718 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -191,11 +191,6 @@ def _is_tuning_step_line(line: str) -> bool:
     rewards = sorted(rewards, key=lambda x: x[0])
     min_reward = min([r[0] for r in rewards])
 
-    print(f"run_raw_csv_fpaths={run_raw_csv_fpaths}")
-    print(f"run_raw_csvs={run_raw_csvs}")
-    print(f"rewards={rewards}")
-    print(f"min_reward={min_reward}")
-
     maximal = replay_args.maximal
     if maximal:
         target = [r[1] for r in rewards if r[0] == min_reward]
@@ -301,17 +296,18 @@ def _run_sample(action, timeout):
                 assert reward > 0
 
                 if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
-                    index_sqls = []
-                    all_knobs = {}
-                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.json", "r") as f:
-                        action_json = json.load(f)
-                        assert len(action_json) == 3, "action_json should be a list with system knobs, an index, and per-query knobs"
-                        system_knobs = action_json[0]
-                        index_sqls = action_json[1]
-                        query_knobs = action_json[2]
+                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
+                        actions_info = pickle.load(f)
+                        assert type(actions_info) is list and len(actions_info) == 1, f"there should only be one action in actions_info {actions_info}"
+                        action_info = actions_info[0]
+                        assert type(action_info) is tuple and len(action_info) == 3, f"action_info ({action_info}) should be a tuple with system knobs, an index, and per-query knobs"
+                        system_knobs = action_info[0]
+                        index_acts = action_info[1]
+                        query_knobs = action_info[2]
                         all_knobs = {k: v for k, v in list(system_knobs.items()) + list(query_knobs.items())}
 
-                    print(f"index_sqls 1={index_sqls}")
+                    print(f"index_acts 1={index_acts}")
+                    assert False, "done"
 
                     assert len(index_sqls) > 0
                     assert len(all_knobs) > 0
@@ -326,7 +322,6 @@ def _run_sample(action, timeout):
                         index_sqls = all_sc
 
                     print(f"index_sqls 2={index_sqls}")
-                    assert False, "done"
 
                     execute_sqls = []
                     for index_sql in index_sqls:
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 03b7eb0c..68468afc 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -114,9 +114,9 @@ def stash_results(
                 # info_dict["prior_state_container"] is a somewhat complex object so we use pickle over json
                 pickle.dump(info_dict["prior_state_container"], f)
 
-        if info_dict["action_json_str"]:
-            with open(self.tuning_steps_dpath / time / "action.json", "w") as f:
-                f.write(info_dict["action_json_str"])
+        if info_dict["actions_info"]:
+            with open(self.tuning_steps_dpath / time / "action.pkl", "wb") as f:
+                pickle.dump(info_dict["actions_info"], f)
 
     def advance(self) -> None:
         if self.writer is None:
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index ec99c247..835f21d4 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -279,7 +279,8 @@ def transmute(
         with torch.no_grad():
             # Pass the mutilated action back through.
             assert isinstance(self.action_space, HolonSpace)
-            info["action_json_str"] = json.dumps(self.action_space.to_jsonable([action]))
+            actions_info = self.action_space.convert_actions_to_format_for_replay([action])
+            info["actions_info"] = actions_info
             info["maximal_embed"] = self.action_space.to_latent([action])
 
         return self.unwrapped.step_post_execute(success, action, info)
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index aedc29f8..9104a1f5 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -197,7 +197,7 @@ def reset(  # type: ignore
                     "results": results,
                     "prior_state_container": None,
                     "prior_pgconf": None,
-                    "action_json_str": None,
+                    "actions_info": None,
                 }
             )
             self.baseline_metric = metric
@@ -278,6 +278,7 @@ def step_execute(
             metric, reward = self.reward_utility(did_error=True)
             results, q_timeout, query_metric_data = None, True, None
 
+        actions_info = self.action_space.convert_actions_to_format_for_replay([action[1] for action in actions])
         info.update(
             EnvInfoDict(
                 {
@@ -286,9 +287,7 @@ def step_execute(
                     "query_metric_data": query_metric_data,
                     "reward": reward,
                     "results": results,
-                    "action_json_str": json.dumps(
-                        self.action_space.to_jsonable([a[1] for a in actions])
-                    ),
+                    "actions_info": actions_info,
                 }
             )
         )
diff --git a/tune/protox/env/space/holon_space.py b/tune/protox/env/space/holon_space.py
index f6f25cb9..34e1b188 100644
--- a/tune/protox/env/space/holon_space.py
+++ b/tune/protox/env/space/holon_space.py
@@ -369,3 +369,6 @@ def generate_plan_from_config(
         config_changes = list(itertools.chain(*[o[0] for o in outputs]))
         sql_commands = list(itertools.chain(*[o[1] for o in outputs]))
         return config_changes, sql_commands
+
+    def convert_actions_to_format_for_replay(self, actions: list[HolonAction]):
+        return [(a[0], self.get_index_space().to_action(a[1]), a[2]) for a in actions]
\ No newline at end of file
diff --git a/tune/protox/env/space/primitive/index.py b/tune/protox/env/space/primitive/index.py
index ad357be0..49e787c7 100644
--- a/tune/protox/env/space/primitive/index.py
+++ b/tune/protox/env/space/primitive/index.py
@@ -59,6 +59,11 @@ def construct_md(
 
     @property
     def idx_name(self) -> str:
+        """
+        The idx_name of an IndexAction uniquely identifies it. If two actions represent the same index, they will
+            have the same idx_name. If they don't represent the same index, they will have different idx_names.
+        This is a more reliable way of testing equality than using sql(), because sql() has options.
+        """
         if self._idx_name is not None:
             return self._idx_name
 
diff --git a/tune/protox/env/types.py b/tune/protox/env/types.py
index 0bcba623..442a6a89 100644
--- a/tune/protox/env/types.py
+++ b/tune/protox/env/types.py
@@ -192,8 +192,9 @@ class EnvInfoDict(TypedDict, total=False):
     q_timeout: bool
     # Query metric data.
     query_metric_data: Optional[dict[str, BestQueryRun]]
-    # JSON string of the action that was executed.
-    action_json_str: Optional[str]
+    # Information about the actions that were executed this step.
+    # The actions are in a format usable by replay.
+    actions_info: Tuple["KnobSpaceAction", "IndexAction", "QuerySpaceAction"]
     # ProtoAction of the altered step action.
     maximal_embed: ProtoAction
 

From 2646f7306813a34a1b457b66a8463c845670da05 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 01:58:35 +0000
Subject: [PATCH 038/100] done with combining index acts from action and
 previous

---
 tune/protox/agent/replay.py | 51 ++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 8210c718..a5373331 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -258,7 +258,7 @@ def _run_sample(action, timeout):
         selected_action_knobs = None
         noop_index = False
         maximal_repo = None
-        existing_indexes = []
+        existing_index_acts = []
 
         for line in f:
             # Keep going until we've found the start.
@@ -296,49 +296,54 @@ def _run_sample(action, timeout):
                 assert reward > 0
 
                 if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
+                    index_acts = []
+
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
                         actions_info = pickle.load(f)
                         assert type(actions_info) is list and len(actions_info) == 1, f"there should only be one action in actions_info {actions_info}"
                         action_info = actions_info[0]
                         assert type(action_info) is tuple and len(action_info) == 3, f"action_info ({action_info}) should be a tuple with system knobs, an index, and per-query knobs"
                         system_knobs = action_info[0]
-                        index_acts = action_info[1]
+                        index_acts.append(action_info[1])
                         query_knobs = action_info[2]
                         all_knobs = {k: v for k, v in list(system_knobs.items()) + list(query_knobs.items())}
 
                     print(f"index_acts 1={index_acts}")
-                    assert False, "done"
 
-                    assert len(index_sqls) > 0
+                    assert len(index_acts) > 0
                     assert len(all_knobs) > 0
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
                         prior_states = pickle.load(f)
-                        index_acts = prior_states[1]
-                        all_sc = [index_act.sql(True, True).strip() for index_act in index_acts]
+                        prior_index_acts = prior_states[1]
+                        all_sc = [index_act for index_act in prior_index_acts]
                         if not noop_index:
-                            all_sc.extend(index_sqls)
+                            all_sc.extend(index_acts)
 
-                        all_sc = [a for a in all_sc if not "USING btree ()" in a]
-                        index_sqls = all_sc
+                        all_sc = [a for a in all_sc if not "USING btree ()" in a.sql(True, True)]
+                        index_acts = all_sc
 
-                    print(f"index_sqls 2={index_sqls}")
+                    print(f"index_acts 2={index_acts}")
+                    assert False, "done"
 
-                    execute_sqls = []
-                    for index_sql in index_sqls:
-                        if index_sql in existing_indexes:
+                    # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
+                    index_modifaction_sqls = []
+                    for index_act in index_acts:
+                        if index_act in existing_index_acts:
+                            assert False, "done 2"
                             continue
-                        execute_sqls.append(index_sql)
-                    for index_sql in existing_indexes:
-                        if index_sql not in index_sqls:
-                            indexname = index_sql.split("CREATE INDEX")[-1].split(" ON ")[0]
-                            execute_sqls.append(f"DROP INDEX IF EXISTS {indexname}")
+                        index_modifaction_sqls.append(index_act.sql(True, True))
+                    for index_act in existing_index_acts:
+                        if index_act not in index_acts:
+                            index_modifaction_sqls.append(index_act.sql(False, True))
 
-                    if not args.simulated:
-                        # Reset snapshot.
-                        env.action_space.reset(connection=env.connection, workload=env.workload)
+                    print(f"index_modifaction_sqls={index_modifaction_sqls}")
+                    assert False, "done"
+
+                    if not replay_args.simulated:
+                        # Apply index changes
                         cc, _ = env.action_space.get_knob_space().generate_plan(selected_action_knobs if selected_action_knobs else {})
-                        env.shift_state(cc, execute_sqls, dump_page_cache=True)
-                    existing_indexes = index_sqls
+                        env.shift_state(cc, index_modifaction_sqls, dump_page_cache=True)
+                    existing_index_acts = index_acts
 
                     if not args.simulated:
                         # Get samples.

From d49bdef9d037ca1de448d0a8d2b42867f7404486 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 01:58:48 +0000
Subject: [PATCH 039/100] done with combining index acts from action and
 previous

---
 tune/protox/agent/replay.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index a5373331..46ed451f 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -308,8 +308,6 @@ def _run_sample(action, timeout):
                         query_knobs = action_info[2]
                         all_knobs = {k: v for k, v in list(system_knobs.items()) + list(query_knobs.items())}
 
-                    print(f"index_acts 1={index_acts}")
-
                     assert len(index_acts) > 0
                     assert len(all_knobs) > 0
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
@@ -322,9 +320,6 @@ def _run_sample(action, timeout):
                         all_sc = [a for a in all_sc if not "USING btree ()" in a.sql(True, True)]
                         index_acts = all_sc
 
-                    print(f"index_acts 2={index_acts}")
-                    assert False, "done"
-
                     # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
                     index_modifaction_sqls = []
                     for index_act in index_acts:

From 0e3777f7a286349f2cff1337ef7381201eed3425 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 01:59:52 +0000
Subject: [PATCH 040/100] done with creating index_modifaction_sqls

---
 tune/protox/agent/replay.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 46ed451f..a8cced34 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -317,7 +317,7 @@ def _run_sample(action, timeout):
                         if not noop_index:
                             all_sc.extend(index_acts)
 
-                        all_sc = [a for a in all_sc if not "USING btree ()" in a.sql(True, True)]
+                        all_sc = [a for a in all_sc if not "USING btree ()" in a.sql(True)]
                         index_acts = all_sc
 
                     # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
@@ -326,10 +326,10 @@ def _run_sample(action, timeout):
                         if index_act in existing_index_acts:
                             assert False, "done 2"
                             continue
-                        index_modifaction_sqls.append(index_act.sql(True, True))
+                        index_modifaction_sqls.append(index_act.sql(True))
                     for index_act in existing_index_acts:
                         if index_act not in index_acts:
-                            index_modifaction_sqls.append(index_act.sql(False, True))
+                            index_modifaction_sqls.append(index_act.sql(False))
 
                     print(f"index_modifaction_sqls={index_modifaction_sqls}")
                     assert False, "done"

From da8d07829028f70f4c1bad83356befc7bad93b69 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 03:11:23 +0000
Subject: [PATCH 041/100] done with shift_state

---
 tune/protox/agent/replay.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index a8cced34..31df5ba3 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -178,7 +178,7 @@ def _is_tuning_step_line(line: str) -> bool:
 
     # Build PostgresEnv.
     _, _, agent_env, _, _ = build_trial(dbgym_cfg, hpo_params["seed"], False, hpo_params)
-    pg_env = agent_env.unwrapped
+    pg_env: PostgresEnv = agent_env.unwrapped
 
     # Reset things.
     if not replay_args.simulated:
@@ -317,7 +317,7 @@ def _run_sample(action, timeout):
                         if not noop_index:
                             all_sc.extend(index_acts)
 
-                        all_sc = [a for a in all_sc if not "USING btree ()" in a.sql(True)]
+                        all_sc = [a for a in all_sc if not "USING btree ()" in a.sql(True, True)]
                         index_acts = all_sc
 
                     # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
@@ -326,20 +326,19 @@ def _run_sample(action, timeout):
                         if index_act in existing_index_acts:
                             assert False, "done 2"
                             continue
-                        index_modifaction_sqls.append(index_act.sql(True))
+                        index_modifaction_sqls.append(index_act.sql(True, True))
                     for index_act in existing_index_acts:
                         if index_act not in index_acts:
                             index_modifaction_sqls.append(index_act.sql(False))
 
-                    print(f"index_modifaction_sqls={index_modifaction_sqls}")
-                    assert False, "done"
-
                     if not replay_args.simulated:
                         # Apply index changes
-                        cc, _ = env.action_space.get_knob_space().generate_plan(selected_action_knobs if selected_action_knobs else {})
-                        env.shift_state(cc, index_modifaction_sqls, dump_page_cache=True)
+                        cc, _ = pg_env.action_space.get_knob_space().generate_action_plan(action_info[0], prior_states[0])
+                        pg_env.shift_state(cc, index_modifaction_sqls, dump_page_cache=True)
                     existing_index_acts = index_acts
 
+                    assert False, "done"
+
                     if not args.simulated:
                         # Get samples.
                         run_samples = samples = _run_sample(all_knobs, timeout)

From 41a9059d2e7c0b7248b4a55938a39ddb4fd4f9be Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 03:16:14 +0000
Subject: [PATCH 042/100] run_sample running

---
 tune/protox/agent/replay.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 31df5ba3..b2a40178 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -143,6 +143,10 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     def _is_tuning_step_line(line: str) -> bool:
         return "mv" in line and "tuning_steps" in line and "postgresql.auto.old" not in line and "baseline" not in line
 
+    maximal = replay_args.maximal
+    maximal_only = replay_args.maximal_only
+    threshold = replay_args.threshold
+
     hpo_params_fpath = tuning_steps_dpath / "params.json"
     with open_and_save(dbgym_cfg, hpo_params_fpath) as f:
         hpo_params = json.load(f)
@@ -191,7 +195,6 @@ def _is_tuning_step_line(line: str) -> bool:
     rewards = sorted(rewards, key=lambda x: x[0])
     min_reward = min([r[0] for r in rewards])
 
-    maximal = replay_args.maximal
     if maximal:
         target = [r[1] for r in rewards if r[0] == min_reward]
         assert len(target) >= 1
@@ -214,15 +217,14 @@ def _is_tuning_step_line(line: str) -> bool:
             elif _is_tuning_step_line(line):
                 num_lines += 1
 
-    def _run_sample(action, timeout):
+    def _run_sample(action_info, timeout):
         samples = []
         # This should reliably check that we are loading the correct knobs...
-        ql_knobs = pg_env.action_space.get_knob_space().get_query_level_knobs(action) if action is not None else {}
-        for i in range(replay_args.samples):
+        for _ in range(replay_args.num_samples):
             runtime = pg_env.workload.execute_workload(
                 pg_conn=pg_env.pg_conn,
-                actions=[built_action],
-                action_names=["Replay"],
+                actions=[action_info],
+                actions_names=["Replay"],
                 observation_space=None,
                 action_space=pg_env.action_space,
                 reset_metrics=None,
@@ -239,9 +241,9 @@ def _run_sample(action, timeout):
             if runtime >= replay_args.workload_timeout:
                 break
 
-            if replay_args.samples == 2 and runtime >= timeout:
+            if replay_args.num_samples == 2 and runtime >= timeout:
                 break
-            elif replay_args.samples > 2 and len(samples) >= 2 and runtime >= timeout:
+            elif replay_args.num_samples > 2 and len(samples) >= 2 and runtime >= timeout:
                 break
 
         return samples
@@ -324,7 +326,6 @@ def _run_sample(action, timeout):
                     index_modifaction_sqls = []
                     for index_act in index_acts:
                         if index_act in existing_index_acts:
-                            assert False, "done 2"
                             continue
                         index_modifaction_sqls.append(index_act.sql(True, True))
                     for index_act in existing_index_acts:
@@ -337,11 +338,9 @@ def _run_sample(action, timeout):
                         pg_env.shift_state(cc, index_modifaction_sqls, dump_page_cache=True)
                     existing_index_acts = index_acts
 
-                    assert False, "done"
-
-                    if not args.simulated:
+                    if not replay_args.simulated:
                         # Get samples.
-                        run_samples = samples = _run_sample(all_knobs, timeout)
+                        run_samples = samples = _run_sample(action_info, timeout)
                         logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
                     else:
                         run_samples = samples = [reward, reward]
@@ -383,5 +382,5 @@ def _run_sample(action, timeout):
             run_data.append(data)
 
     # Output.
-    pd.DataFrame(run_data).to_csv(args.output, index=False)
-    env.close()
\ No newline at end of file
+    pd.DataFrame(run_data).to_csv(dbgym_cfg.cur_task_runs_data_path("run_data.csv"), index=False)
+    pg_env.close()
\ No newline at end of file

From 1fcca5a98f67539e33a3a321947948bf2d9cece8 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 19:20:56 +0000
Subject: [PATCH 043/100] removed indexes from constraints

---
 benchmark/tpch/tpch_constraints.sql | 47 +++++++++++++++--------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/benchmark/tpch/tpch_constraints.sql b/benchmark/tpch/tpch_constraints.sql
index fca8c21d..81e23f20 100644
--- a/benchmark/tpch/tpch_constraints.sql
+++ b/benchmark/tpch/tpch_constraints.sql
@@ -7,26 +7,27 @@ ALTER TABLE orders ADD CONSTRAINT orders_o_custkey_fkey FOREIGN KEY (o_custkey)
 ALTER TABLE lineitem ADD CONSTRAINT lineitem_l_orderkey_fkey FOREIGN KEY (l_orderkey) REFERENCES orders (o_orderkey) ON DELETE CASCADE;
 ALTER TABLE lineitem ADD CONSTRAINT lineitem_l_partkey_l_suppkey_fkey FOREIGN KEY (l_partkey, l_suppkey) REFERENCES partsupp (ps_partkey, ps_suppkey) ON DELETE CASCADE;
 
-CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC);
-CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC);
-CREATE INDEX n_rk ON nation (n_regionkey ASC);
-CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC);
-CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC);
-CREATE INDEX s_nk ON supplier (s_nationkey ASC);
-CREATE INDEX ps_pk ON partsupp (ps_partkey ASC);
-CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC);
-CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC);
-CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC);
-CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC);
-CREATE INDEX c_nk ON customer (c_nationkey ASC);
-CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC);
-CREATE INDEX o_ck ON orders (o_custkey ASC);
-CREATE INDEX o_od ON orders (o_orderdate ASC);
-CREATE INDEX l_ok ON lineitem (l_orderkey ASC);
-CREATE INDEX l_pk ON lineitem (l_partkey ASC);
-CREATE INDEX l_sk ON lineitem (l_suppkey ASC);
-CREATE INDEX l_sd ON lineitem (l_shipdate ASC);
-CREATE INDEX l_cd ON lineitem (l_commitdate ASC);
-CREATE INDEX l_rd ON lineitem (l_receiptdate ASC);
-CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC);
-CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);
\ No newline at end of file
+-- We don't create any indexes so that there's a clean slate for tuning
+-- CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC);
+-- CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC);
+-- CREATE INDEX n_rk ON nation (n_regionkey ASC);
+-- CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC);
+-- CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC);
+-- CREATE INDEX s_nk ON supplier (s_nationkey ASC);
+-- CREATE INDEX ps_pk ON partsupp (ps_partkey ASC);
+-- CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC);
+-- CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC);
+-- CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC);
+-- CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC);
+-- CREATE INDEX c_nk ON customer (c_nationkey ASC);
+-- CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC);
+-- CREATE INDEX o_ck ON orders (o_custkey ASC);
+-- CREATE INDEX o_od ON orders (o_orderdate ASC);
+-- CREATE INDEX l_ok ON lineitem (l_orderkey ASC);
+-- CREATE INDEX l_pk ON lineitem (l_partkey ASC);
+-- CREATE INDEX l_sk ON lineitem (l_suppkey ASC);
+-- CREATE INDEX l_sd ON lineitem (l_shipdate ASC);
+-- CREATE INDEX l_cd ON lineitem (l_commitdate ASC);
+-- CREATE INDEX l_rd ON lineitem (l_receiptdate ASC);
+-- CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC);
+-- CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);
\ No newline at end of file

From 00b0c875e507688a4fe541bdbc1f1d5c2df7e01e Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 19:21:08 +0000
Subject: [PATCH 044/100] 0.1 experiments

---
 experiments/protox_tpch_sf0point1/main.sh | 33 +++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100755 experiments/protox_tpch_sf0point1/main.sh

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
new file mode 100755
index 00000000..4ae16daa
--- /dev/null
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+SCALE_FACTOR=0.1
+INTENDED_PGDATA_HARDWARE=ssd
+PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+
+# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
+exit 0
+
+# benchmark
+python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
+python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
+
+# postgres
+python3 task.py --no-startup-check dbms postgres build
+python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+
+exit 0
+
+# embedding
+python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # long datagen so that train doesn't crash
+python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
+
+# agent
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 10 --duration 0.2  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR

From 5cf5ef6945da803e916a0108573ac0a3d8c7d976 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 19:24:37 +0000
Subject: [PATCH 045/100] only stashing results for tune, and setting idx_name
 based on index contents now

---
 experiments/protox_tpch_sf10/main.sh      |  3 ++
 scripts/pat_test.sh                       |  3 ++
 tune/protox/agent/hpo.py                  |  8 +++-
 tune/protox/agent/off_policy_algorithm.py | 10 ++--
 tune/protox/agent/replay.py               | 58 +++++++++++++----------
 tune/protox/env/space/primitive/index.py  | 32 +++++--------
 tune/protox/env/workload.py               |  2 +-
 7 files changed, 67 insertions(+), 49 deletions(-)

diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 62c6cf22..1b9564f1 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -7,7 +7,10 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
 exit 0
 
 # benchmark
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index b6f4b525..f2fedea7 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,7 +7,10 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index ec61c764..82bf1ba2 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -464,9 +464,13 @@ def step(self) -> dict[Any, Any]:
                 f"Baseline Metric: {baseline_metric}. Baseline Reward: {baseline_reward}"
             )
             self.env_init = True
-            self.logger.stash_results(infos, name_override="baseline")
+
+            # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
+            #   stashed in the same directory and potentially crash the system.
+            if not self.is_hpo:
+                self.logger.stash_results(infos, name_override="baseline")
         else:
-            self.agent.learn(self.env, total_timesteps=1)
+            self.agent.learn(self.env, total_timesteps=1, is_hpo=self.is_hpo)
 
         self.timeout.pause()
         self.logger.advance()
diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py
index 68e5f1be..d0393a1f 100644
--- a/tune/protox/agent/off_policy_algorithm.py
+++ b/tune/protox/agent/off_policy_algorithm.py
@@ -140,6 +140,7 @@ def collect_rollouts(
         env: AgentEnv,
         train_freq: TrainFreq,
         replay_buffer: ReplayBuffer,
+        is_hpo: bool,
         action_noise: Optional[ActionNoise] = None,
         learning_starts: int = 0,
     ) -> RolloutReturn:
@@ -182,7 +183,9 @@ def collect_rollouts(
             # Rescale and perform action
             new_obs, rewards, terms, truncs, infos = env.step(actions)
             dones = terms or truncs
-            if self.logger:
+            # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
+            #   stashed in the same directory and potentially crash the system.
+            if self.logger and not is_hpo:
                 self.logger.stash_results(infos)
 
             self.num_timesteps += 1
@@ -210,7 +213,7 @@ def collect_rollouts(
             num_collected_steps, num_collected_episodes, continue_training
         )
 
-    def learn(self, env: AgentEnv, total_timesteps: int) -> None:
+    def learn(self, env: AgentEnv, total_timesteps: int, is_hpo: bool) -> None:
         assert isinstance(env, AgentEnv)
         total_timesteps = self._setup_learn(env, total_timesteps)
 
@@ -218,9 +221,10 @@ def learn(self, env: AgentEnv, total_timesteps: int) -> None:
             rollout = self.collect_rollouts(
                 env,
                 train_freq=self.train_freq,
+                replay_buffer=self.replay_buffer,
+                is_hpo=is_hpo,
                 action_noise=self.action_noise,
                 learning_starts=self.learning_starts,
-                replay_buffer=self.replay_buffer,
             )
 
             if rollout.continue_training is False:
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index b2a40178..ad6b08fe 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -219,7 +219,6 @@ def _is_tuning_step_line(line: str) -> bool:
 
     def _run_sample(action_info, timeout):
         samples = []
-        # This should reliably check that we are loading the correct knobs...
         for _ in range(replay_args.num_samples):
             runtime = pg_env.workload.execute_workload(
                 pg_conn=pg_env.pg_conn,
@@ -229,7 +228,7 @@ def _run_sample(action_info, timeout):
                 action_space=pg_env.action_space,
                 reset_metrics=None,
                 override_workload_timeout=hpo_params["workload_timeout"],
-                query_timeout=hpo_params["query_timeout"],
+                query_timeout=None,
                 workload_qdir=None,
                 disable_pg_hint=False,
                 blocklist=replay_args.blocklist,
@@ -252,15 +251,14 @@ def _run_sample(action_info, timeout):
     pbar = tqdm.tqdm(total=num_lines)
     with open_and_save(dbgym_cfg, output_log_fpath) as f:
         current_step = 0
-
         start_found = False
         start_time = None
         timeout = replay_args.workload_timeout
         cur_reward_max = timeout
-        selected_action_knobs = None
         noop_index = False
         maximal_repo = None
         existing_index_acts = []
+        print_step = 0
 
         for line in f:
             # Keep going until we've found the start.
@@ -273,7 +271,6 @@ def _run_sample(action_info, timeout):
 
             elif "Selected action: " in line:
                 act = eval(line.split("Selected action: ")[-1])
-                selected_action_knobs = pg_env.action_space.get_knob_space().from_jsonable(act[0])[0]
                 noop_index = "NOOP" in act[1][0]
 
             elif (maximal and (_is_tuning_step_line(line))):
@@ -298,44 +295,55 @@ def _run_sample(action_info, timeout):
                 assert reward > 0
 
                 if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
-                    index_acts = []
+                    index_acts = set()
 
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
                         actions_info = pickle.load(f)
                         assert type(actions_info) is list and len(actions_info) == 1, f"there should only be one action in actions_info {actions_info}"
                         action_info = actions_info[0]
                         assert type(action_info) is tuple and len(action_info) == 3, f"action_info ({action_info}) should be a tuple with system knobs, an index, and per-query knobs"
-                        system_knobs = action_info[0]
-                        index_acts.append(action_info[1])
-                        query_knobs = action_info[2]
-                        all_knobs = {k: v for k, v in list(system_knobs.items()) + list(query_knobs.items())}
+                        index_acts.add(action_info[1])
+
+                    print(f"\n\n\nprint_step={print_step}")
+                    print_step += 1
+                    print(f"existing_index_acts={existing_index_acts}")
+                    print(f"before index_acts={index_acts}")
 
                     assert len(index_acts) > 0
-                    assert len(all_knobs) > 0
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
                         prior_states = pickle.load(f)
-                        prior_index_acts = prior_states[1]
-                        all_sc = [index_act for index_act in prior_index_acts]
+                        all_sc = set(prior_states[1])
                         if not noop_index:
-                            all_sc.extend(index_acts)
+                            for index_act in index_acts:
+                                all_sc.add(index_act)
 
-                        all_sc = [a for a in all_sc if not "USING btree ()" in a.sql(True, True)]
+                        all_sc = {a for a in all_sc if not "USING btree ()" in a.sql(True)}
                         index_acts = all_sc
 
+                    print(f"after index_acts={index_acts}")
+
                     # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
-                    index_modifaction_sqls = []
+                    index_modification_sqls = []
                     for index_act in index_acts:
-                        if index_act in existing_index_acts:
-                            continue
-                        index_modifaction_sqls.append(index_act.sql(True, True))
-                    for index_act in existing_index_acts:
-                        if index_act not in index_acts:
-                            index_modifaction_sqls.append(index_act.sql(False))
+                        if index_act not in existing_index_acts:
+                            index_modification_sqls.append(index_act.sql(True))
+                    for existing_index_act in existing_index_acts:
+                        if existing_index_act not in index_acts:
+                            index_modification_sqls.append(existing_index_act.sql(False))
+
+                    print(f"index_modification_sqls={index_modification_sqls}")
+                    pg_indexes_cursor = pg_env.pg_conn.conn().execute("SELECT * FROM pg_indexes WHERE schemaname = 'public';")
+                    rows = pg_indexes_cursor.fetchall()
+                    for row in rows:
+                        if "UNIQUE" not in row[4]:
+                            print(f"row={row}")
+                    print("\n\n")
 
                     if not replay_args.simulated:
                         # Apply index changes
                         cc, _ = pg_env.action_space.get_knob_space().generate_action_plan(action_info[0], prior_states[0])
-                        pg_env.shift_state(cc, index_modifaction_sqls, dump_page_cache=True)
+                        print(f"cc={cc}")
+                        pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=False)
                     existing_index_acts = index_acts
 
                     if not replay_args.simulated:
@@ -382,5 +390,7 @@ def _run_sample(action_info, timeout):
             run_data.append(data)
 
     # Output.
-    pd.DataFrame(run_data).to_csv(dbgym_cfg.cur_task_runs_data_path("run_data.csv"), index=False)
+    run_data_df = pd.DataFrame(run_data)
+    print(f"Finished replaying with run_data_df=\n{run_data_df}\n. Data stored in {dbgym_cfg.cur_task_runs_path()}.")
+    run_data_df.to_csv(dbgym_cfg.cur_task_runs_data_path("run_data.csv"), index=False)
     pg_env.close()
\ No newline at end of file
diff --git a/tune/protox/env/space/primitive/index.py b/tune/protox/env/space/primitive/index.py
index 49e787c7..fe1417ba 100644
--- a/tune/protox/env/space/primitive/index.py
+++ b/tune/protox/env/space/primitive/index.py
@@ -6,7 +6,8 @@
 class IndexAction(object):
     IA = TypeVar("IA", bound="IndexAction")
 
-    index_counter: ClassVar[int] = 0
+    index_name_counter = 0
+    index_name_map: dict["IndexAction", int] = dict()
 
     def __init__(
         self,
@@ -26,7 +27,6 @@ def __init__(
         self.inc_names = inc_names
         self.raw_repr = raw_repr
         self.bias = bias
-        self._idx_name: Optional[str] = None
 
     @property
     def is_valid(self) -> bool:
@@ -54,25 +54,18 @@ def construct_md(
             raw_repr=None,
             bias=0.0,
         )
-        ia._idx_name = idx_name
+        assert ia.get_index_name() == idx_name
         return ia
 
-    @property
-    def idx_name(self) -> str:
-        """
-        The idx_name of an IndexAction uniquely identifies it. If two actions represent the same index, they will
-            have the same idx_name. If they don't represent the same index, they will have different idx_names.
-        This is a more reliable way of testing equality than using sql(), because sql() has options.
-        """
-        if self._idx_name is not None:
-            return self._idx_name
-
-        IndexAction.index_counter += 1
-        self._idx_name = f"index{IndexAction.index_counter}"
-        return self._idx_name
+    def get_index_name(self):
+        if self not in IndexAction.index_name_map:
+            IndexAction.index_name_map[self] = f"index{IndexAction.index_name_counter}"
+            IndexAction.index_name_counter += 1
+        
+        return IndexAction.index_name_map[self]
 
     def sql(self, add: bool, allow_fail: bool = False) -> str:
-        idx_name = self.idx_name
+        idx_name = self.get_index_name()
         if not add:
             if allow_fail:
                 return f"DROP INDEX IF EXISTS {idx_name}"
@@ -102,12 +95,13 @@ def __eq__(self, other: object) -> bool:
             assert isinstance(other, IndexAction)
             ts = set(self.inc_names)
             os = set(other.inc_names)
-            return (
+            is_eq = (
                 self.idx_type == other.idx_type
                 and self.tbl_name == other.tbl_name
                 and self.columns == other.columns
                 and ts == os
             )
+            return is_eq
         return False
 
     def __hash__(self) -> int:
@@ -124,7 +118,7 @@ def __hash__(self) -> int:
     def __repr__(self, add: bool = True) -> str:
         return "{a} {idx_name} ON {tbl_name} USING {idx_type} ({columns}) {inc_clause}".format(
             a="CREATE" if add else "NOOP",
-            idx_name=self.idx_name,
+            idx_name=self.get_index_name(),
             tbl_name=self.tbl_name,
             idx_type=self.idx_type,
             columns=",".join(self.columns),
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 025ff0f3..c0faa432 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -379,7 +379,7 @@ def execute_workload(
                     for action in actions
                 ],
             )
-
+        
         # Figure out workload to execute.
         if workload_qdir is not None and workload_qdir[0] is not None:
             # Load actual queries to execute.

From ca0bf853d51d7e6d0a2557be532b78ff6b633cd2 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 19:28:48 +0000
Subject: [PATCH 046/100] added some comments about idx_name

---
 tune/protox/env/space/primitive/index.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/tune/protox/env/space/primitive/index.py b/tune/protox/env/space/primitive/index.py
index fe1417ba..4fe9d749 100644
--- a/tune/protox/env/space/primitive/index.py
+++ b/tune/protox/env/space/primitive/index.py
@@ -54,16 +54,9 @@ def construct_md(
             raw_repr=None,
             bias=0.0,
         )
-        assert ia.get_index_name() == idx_name
+        assert ia.get_index_name() == idx_name, f"ia.get_index_name()={ia.get_index_name()} but idx_name={idx_name}"
         return ia
 
-    def get_index_name(self):
-        if self not in IndexAction.index_name_map:
-            IndexAction.index_name_map[self] = f"index{IndexAction.index_name_counter}"
-            IndexAction.index_name_counter += 1
-        
-        return IndexAction.index_name_map[self]
-
     def sql(self, add: bool, allow_fail: bool = False) -> str:
         idx_name = self.get_index_name()
         if not add:
@@ -84,6 +77,15 @@ def sql(self, add: bool, allow_fail: bool = False) -> str:
             ),
         )
 
+    # A given index name (like "index5") maps one-to-one to the function of an
+    # index (i.e. its table, columns, etc.).
+    def get_index_name(self):
+        if self not in IndexAction.index_name_map:
+            IndexAction.index_name_map[self] = f"index{IndexAction.index_name_counter}"
+            IndexAction.index_name_counter += 1
+        
+        return IndexAction.index_name_map[self]
+
     # This equality/hash mechanism is purely based off of index identity.
     # We ensure that all other flags are exclusive from a "validity" pre-check.
     #

From d92707ccb15e54522aaf6ebc433669307e2a126b Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 19:34:01 +0000
Subject: [PATCH 047/100] now always dumping page cache

---
 scripts/pat_test.sh         | 2 --
 tune/protox/agent/replay.py | 2 +-
 tune/protox/env/pg_env.py   | 4 ++--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index f2fedea7..a9518db6 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -8,9 +8,7 @@ PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index ad6b08fe..dc67f79f 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -343,7 +343,7 @@ def _run_sample(action_info, timeout):
                         # Apply index changes
                         cc, _ = pg_env.action_space.get_knob_space().generate_action_plan(action_info[0], prior_states[0])
                         print(f"cc={cc}")
-                        pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=False)
+                        pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=True)
                     existing_index_acts = index_acts
 
                     if not replay_args.simulated:
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index 9104a1f5..6779c6ad 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -122,7 +122,7 @@ def reset(  # type: ignore
             config_changes, sql_commands = self.action_space.generate_plan_from_config(
                 config, sc
             )
-            assert self.shift_state(config_changes, sql_commands)
+            assert self.shift_state(config_changes, sql_commands, dump_page_cache=True)
 
             # Note that we do not actually update the baseline metric/reward used by the reward
             # utility. This is so the reward is not stochastic with respect to the starting state.
@@ -228,7 +228,7 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
             action, prior_state
         )
         # Attempt to maneuver to the new state.
-        success = self.shift_state(config_changes, sql_commands)
+        success = self.shift_state(config_changes, sql_commands, dump_page_cache=True)
         return success, EnvInfoDict(
             {
                 "attempted_changes": (config_changes, sql_commands),

From 82f9ea12d72384d8cb764cc8eaf0368b4dadf8e8 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 19:34:48 +0000
Subject: [PATCH 048/100] removed print statements

---
 tune/protox/agent/replay.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index dc67f79f..c26d714a 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -258,7 +258,6 @@ def _run_sample(action_info, timeout):
         noop_index = False
         maximal_repo = None
         existing_index_acts = []
-        print_step = 0
 
         for line in f:
             # Keep going until we've found the start.
@@ -304,11 +303,6 @@ def _run_sample(action_info, timeout):
                         assert type(action_info) is tuple and len(action_info) == 3, f"action_info ({action_info}) should be a tuple with system knobs, an index, and per-query knobs"
                         index_acts.add(action_info[1])
 
-                    print(f"\n\n\nprint_step={print_step}")
-                    print_step += 1
-                    print(f"existing_index_acts={existing_index_acts}")
-                    print(f"before index_acts={index_acts}")
-
                     assert len(index_acts) > 0
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
                         prior_states = pickle.load(f)
@@ -320,8 +314,6 @@ def _run_sample(action_info, timeout):
                         all_sc = {a for a in all_sc if not "USING btree ()" in a.sql(True)}
                         index_acts = all_sc
 
-                    print(f"after index_acts={index_acts}")
-
                     # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
                     index_modification_sqls = []
                     for index_act in index_acts:
@@ -331,18 +323,9 @@ def _run_sample(action_info, timeout):
                         if existing_index_act not in index_acts:
                             index_modification_sqls.append(existing_index_act.sql(False))
 
-                    print(f"index_modification_sqls={index_modification_sqls}")
-                    pg_indexes_cursor = pg_env.pg_conn.conn().execute("SELECT * FROM pg_indexes WHERE schemaname = 'public';")
-                    rows = pg_indexes_cursor.fetchall()
-                    for row in rows:
-                        if "UNIQUE" not in row[4]:
-                            print(f"row={row}")
-                    print("\n\n")
-
                     if not replay_args.simulated:
                         # Apply index changes
                         cc, _ = pg_env.action_space.get_knob_space().generate_action_plan(action_info[0], prior_states[0])
-                        print(f"cc={cc}")
                         pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=True)
                     existing_index_acts = index_acts
 

From c2ad745b94c9d4d5e8a768085431a7883227ad2f Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 19:43:02 +0000
Subject: [PATCH 049/100] duration -> trial_duration

---
 experiments/protox_tpch_sf0point1/main.sh |  2 +-
 experiments/protox_tpch_sf10/main.sh      |  2 +-
 scripts/pat_test.sh                       |  2 +-
 tune/protox/agent/coerce_config.py        |  2 +-
 tune/protox/agent/hpo.py                  | 28 +++++++++++------------
 tune/protox/agent/tune.py                 |  2 +-
 tune/protox/embedding/analyze.py          | 12 +++++-----
 tune/protox/embedding/datagen.py          |  4 ++--
 tune/protox/embedding/train_all.py        |  4 ++--
 9 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 4ae16daa..510f76cc 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -28,6 +28,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 10 --duration 0.2  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 10 --trial-duration 0.2  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 1b9564f1..953f754d 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -26,5 +26,5 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --duration 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --trial-duration 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index a9518db6..6f972c9e 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -27,6 +27,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --duration 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --trial-duration 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py
index f2bc6b26..82f0bab5 100644
--- a/tune/protox/agent/coerce_config.py
+++ b/tune/protox/agent/coerce_config.py
@@ -24,7 +24,7 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dic
             "verbose": True,
             "trace": True,
             "seed": hpo_params["mythril_args"]["seed"],
-            "duration": hpo_params["mythril_args"]["duration"],
+            "trial_duration": hpo_params["mythril_args"]["trial_duration"],
             "workload_timeout": hpo_params["mythril_args"]["workload_timeout"],
             "query_timeout": hpo_params["mythril_args"]["timeout"],
             "pgconn_info": {
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 82bf1ba2..50c6bdae 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -30,7 +30,7 @@
 
 
 class AgentHPOArgs:
-    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, duration, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath):
+    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, trial_duration, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath):
         self.benchmark_name = benchmark_name
         self.workload_name = workload_name
         self.embedder_path = embedder_path
@@ -45,7 +45,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
         self.agent = agent
         self.max_concurrent = max_concurrent
         self.num_samples = num_samples
-        self.duration = duration
+        self.trial_duration = trial_duration
         self.workload_timeout = workload_timeout
         self.query_timeout = query_timeout
         self.enable_boot_during_hpo = enable_boot_during_hpo
@@ -145,7 +145,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
     help=f"The # of times to specific hyperparameter configs to sample from the hyperparameter search space and train agent models with.",
 )
 @click.option(
-    "--duration", default=30, type=float, help="The total number of hours to run for."
+    "--trial-duration", default=4, type=float, help="The number of hours to run each hyperparamer config trial for."
 )
 @click.option(
     "--workload-timeout",
@@ -190,7 +190,7 @@ def hpo(
     agent,
     max_concurrent,
     num_samples,
-    duration,
+    trial_duration,
     workload_timeout,
     query_timeout,
     enable_boot_during_hpo: bool,
@@ -235,7 +235,7 @@ def hpo(
         assert False
 
     # Create args object
-    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, duration, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath)
+    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, trial_duration, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath)
     _tune_hpo(dbgym_cfg, hpo_args)
 
 
@@ -249,7 +249,7 @@ def build_space(
     embedder_path: list[Path],
     pgconn_info: dict[str, str],
     benchbase_config: dict[str, Any]={},
-    duration: int=30,
+    trial_duration: int=30,
     seed: int=0,
     enable_boot_during_hpo: bool=False,
     hpo_boot_config_fpath: Path=None,
@@ -268,7 +268,7 @@ def build_space(
         "hpo_boot_config_fpath": hpo_boot_config_fpath,
         
         # Timeouts.
-        "duration": duration,
+        "trial_duration": trial_duration,
         "workload_timeout": tune.choice(workload_timeouts),
         "query_timeout": tune.choice(query_timeouts),
 
@@ -375,9 +375,9 @@ def build_space(
 
 
 class TuneTimeoutChecker(object):
-    def __init__(self, duration: int) -> None:
-        self.limit = (duration * 3600) > 0
-        self.remain = int(duration * 3600)
+    def __init__(self, trial_duration: int) -> None:
+        self.limit = (trial_duration * 3600) > 0
+        self.remain = int(trial_duration * 3600)
         self.running = False
         self.start = 0.
 
@@ -425,7 +425,7 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
         np.random.seed(seed)
         torch.manual_seed(seed)
 
-        self.timeout = TuneTimeoutChecker(hpo_params["duration"])
+        self.timeout_checker = TuneTimeoutChecker(hpo_params["trial_duration"])
         self.logger, self.target_reset, self.env, self.agent, self.signal = build_trial(
             self.dbgym_cfg,
             seed=seed,
@@ -436,7 +436,7 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
         self.logger.get_logger(None).info(f"Seed: {seed}")
 
         # Attach the timeout checker and loggers.
-        self.agent.set_timeout_checker(self.timeout)
+        self.agent.set_timeout_checker(self.timeout_checker)
         self.agent.set_logger(self.logger)
 
         self.env_init = False
@@ -446,7 +446,7 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
     def step(self) -> dict[Any, Any]:
         self.step_count += 1
         # Only measure the actual tuning time.
-        self.timeout.resume()
+        self.timeout_checker.resume()
 
         episode = self.agent._episode_num
         it = self.agent.num_timesteps
@@ -574,7 +574,7 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
             "pgbin_path": hpo_args.pgbin_path,
         },
         benchbase_config=benchbase_config,
-        duration=hpo_args.duration,
+        trial_duration=hpo_args.trial_duration,
         seed=hpo_args.seed,
         enable_boot_during_hpo=hpo_args.enable_boot_during_hpo,
         hpo_boot_config_fpath=hpo_args.hpo_boot_config_fpath,
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index e8fe0343..d83445b9 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -81,7 +81,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     data = []
     step_data_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "step_data.csv"
-    while (time.time() - start) < hpo_params["duration"] * 3600:
+    while (time.time() - start) < hpo_params["trial_duration"] * 3600:
         data.append(t.step())
 
         # Continuously write the file out.
diff --git a/tune/protox/embedding/analyze.py b/tune/protox/embedding/analyze.py
index 24746a9d..5341a0da 100644
--- a/tune/protox/embedding/analyze.py
+++ b/tune/protox/embedding/analyze.py
@@ -64,11 +64,11 @@ def analyze_all_embeddings_parts(dbgym_cfg: DBGymConfig, num_parts: int, generic
     start_time = time.time()
     for part_i in range(num_parts):
         _analyze_embeddings_part(dbgym_cfg, part_i, generic_args, analyze_args)
-    duration = time.time() - start_time
+    analyze_all_parts_duration = time.time() - start_time
     with open(
         dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "analyze_all_time.txt", "w"
     ) as f:
-        f.write(f"{duration}")
+        f.write(f"{analyze_all_parts_duration}")
 
 
 def _analyze_embeddings_part(dbgym_cfg: DBGymConfig, part_i: int, generic_args: EmbeddingTrainGenericArgs, analyze_args: EmbeddingAnalyzeArgs):
@@ -79,15 +79,15 @@ def _analyze_embeddings_part(dbgym_cfg: DBGymConfig, part_i: int, generic_args:
 
     start_time = time.time()
     _create_stats_for_part(dbgym_cfg, part_dpath, generic_args, analyze_args)
-    duration = time.time() - start_time
+    analyze_part_duration = time.time() - start_time
     with open(os.path.join(part_dpath, "stats_time.txt"), "w") as f:
-        f.write(f"{duration}")
+        f.write(f"{analyze_part_duration}")
 
     start_time = time.time()
     _create_ranges_for_part(dbgym_cfg, part_dpath, generic_args, analyze_args)
-    duration = time.time() - start_time
+    create_range_duration = time.time() - start_time
     with open(os.path.join(part_dpath, "ranges_time.txt"), "w") as f:
-        f.write(f"{duration}")
+        f.write(f"{create_range_duration}")
 
 
 def _create_stats_for_part(dbgym_cfg: DBGymConfig, part_dpath: Path, generic_args: EmbeddingTrainGenericArgs, analyze_args: EmbeddingAnalyzeArgs):
diff --git a/tune/protox/embedding/datagen.py b/tune/protox/embedding/datagen.py
index a86d6b44..940a3dfd 100644
--- a/tune/protox/embedding/datagen.py
+++ b/tune/protox/embedding/datagen.py
@@ -257,9 +257,9 @@ def datagen(
     start_postgres(dbgym_cfg, pgbin_path, pgdata_dpath)
     _gen_traindata_dir(dbgym_cfg, generic_args, dir_gen_args)
     _combine_traindata_dir_into_parquet(dbgym_cfg, generic_args, file_gen_args)
-    duration = time.time() - start_time
+    datagen_duration = time.time() - start_time
     with open(f"{dbgym_cfg.dbgym_this_run_path}/datagen_time.txt", "w") as f:
-        f.write(f"{duration}")
+        f.write(f"{datagen_duration}")
     stop_postgres(dbgym_cfg, pgbin_path, pgdata_dpath)
 
 
diff --git a/tune/protox/embedding/train_all.py b/tune/protox/embedding/train_all.py
index b8ae195c..20d73292 100644
--- a/tune/protox/embedding/train_all.py
+++ b/tune/protox/embedding/train_all.py
@@ -250,9 +250,9 @@ def train_all_embeddings(
                 print(f"Trial {results[i]} FAILED")
         assert False
 
-    duration = time.time() - start_time
+    train_all_embeddings_duration = time.time() - start_time
     with open(f"{dbgym_cfg.dbgym_this_run_path}/hpo_train_time.txt", "w") as f:
-        f.write(f"{duration}")
+        f.write(f"{train_all_embeddings_duration}")
 
 
 def _hpo_train(

From 31f052115590cf6a0303be4d9649a715ff91489c Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 20:33:06 +0000
Subject: [PATCH 050/100] added separate CLI arg for tune duration

---
 experiments/protox_tpch_sf0point1/main.sh |  2 +-
 experiments/protox_tpch_sf10/main.sh      |  2 +-
 scripts/pat_test.sh                       |  7 +++---
 tune/protox/agent/coerce_config.py        |  2 +-
 tune/protox/agent/hpo.py                  | 30 ++++++++++++-----------
 tune/protox/agent/replay.py               |  4 ++-
 tune/protox/agent/tune.py                 | 11 +++++++--
 tune/protox/env/pg_env.py                 |  4 ++-
 8 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 510f76cc..b56c6e86 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -28,6 +28,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 10 --trial-duration 0.2  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 10 --tune-duration-during-hpo 0.2  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 953f754d..2c50e528 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -26,5 +26,5 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --trial-duration 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 6f972c9e..c9c05821 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,8 +7,9 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.1
+# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
@@ -27,6 +28,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --trial-duration 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py
index 82f0bab5..7006c28a 100644
--- a/tune/protox/agent/coerce_config.py
+++ b/tune/protox/agent/coerce_config.py
@@ -24,7 +24,7 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dic
             "verbose": True,
             "trace": True,
             "seed": hpo_params["mythril_args"]["seed"],
-            "trial_duration": hpo_params["mythril_args"]["trial_duration"],
+            "tune_duration_during_hpo": hpo_params["mythril_args"]["tune_duration_during_hpo"],
             "workload_timeout": hpo_params["mythril_args"]["workload_timeout"],
             "query_timeout": hpo_params["mythril_args"]["timeout"],
             "pgconn_info": {
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 50c6bdae..a9c83dff 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -30,7 +30,7 @@
 
 
 class AgentHPOArgs:
-    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, trial_duration, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath):
+    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath):
         self.benchmark_name = benchmark_name
         self.workload_name = workload_name
         self.embedder_path = embedder_path
@@ -45,7 +45,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
         self.agent = agent
         self.max_concurrent = max_concurrent
         self.num_samples = num_samples
-        self.trial_duration = trial_duration
+        self.tune_duration_during_hpo = tune_duration_during_hpo
         self.workload_timeout = workload_timeout
         self.query_timeout = query_timeout
         self.enable_boot_during_hpo = enable_boot_during_hpo
@@ -145,7 +145,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
     help=f"The # of times to specific hyperparameter configs to sample from the hyperparameter search space and train agent models with.",
 )
 @click.option(
-    "--trial-duration", default=4, type=float, help="The number of hours to run each hyperparamer config trial for."
+    "--tune-duration-during-hpo", default=4, type=float, help="The number of hours to run each hyperparamer config tuning trial for."
 )
 @click.option(
     "--workload-timeout",
@@ -190,7 +190,7 @@ def hpo(
     agent,
     max_concurrent,
     num_samples,
-    trial_duration,
+    tune_duration_during_hpo,
     workload_timeout,
     query_timeout,
     enable_boot_during_hpo: bool,
@@ -235,7 +235,7 @@ def hpo(
         assert False
 
     # Create args object
-    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, trial_duration, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath)
+    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath)
     _tune_hpo(dbgym_cfg, hpo_args)
 
 
@@ -249,7 +249,7 @@ def build_space(
     embedder_path: list[Path],
     pgconn_info: dict[str, str],
     benchbase_config: dict[str, Any]={},
-    trial_duration: int=30,
+    tune_duration_during_hpo: int=30,
     seed: int=0,
     enable_boot_during_hpo: bool=False,
     hpo_boot_config_fpath: Path=None,
@@ -268,7 +268,7 @@ def build_space(
         "hpo_boot_config_fpath": hpo_boot_config_fpath,
         
         # Timeouts.
-        "trial_duration": trial_duration,
+        "tune_duration_during_hpo": tune_duration_during_hpo,
         "workload_timeout": tune.choice(workload_timeouts),
         "query_timeout": tune.choice(query_timeouts),
 
@@ -375,9 +375,9 @@ def build_space(
 
 
 class TuneTimeoutChecker(object):
-    def __init__(self, trial_duration: int) -> None:
-        self.limit = (trial_duration * 3600) > 0
-        self.remain = int(trial_duration * 3600)
+    def __init__(self, tune_duration: int) -> None:
+        self.limit = (tune_duration * 3600) > 0
+        self.remain = int(tune_duration * 3600)
         self.running = False
         self.start = 0.
 
@@ -425,7 +425,9 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
         np.random.seed(seed)
         torch.manual_seed(seed)
 
-        self.timeout_checker = TuneTimeoutChecker(hpo_params["trial_duration"])
+        tune_duration = hpo_params["tune_duration_during_hpo"] if self.is_hpo else hpo_params["tune_duration_during_tune"]
+
+        self.timeout_checker = TuneTimeoutChecker(tune_duration)
         self.logger, self.target_reset, self.env, self.agent, self.signal = build_trial(
             self.dbgym_cfg,
             seed=seed,
@@ -472,7 +474,7 @@ def step(self) -> dict[Any, Any]:
         else:
             self.agent.learn(self.env, total_timesteps=1, is_hpo=self.is_hpo)
 
-        self.timeout.pause()
+        self.timeout_checker.pause()
         self.logger.advance()
 
         # Step telemetry that we care about.
@@ -490,7 +492,7 @@ def step(self) -> dict[Any, Any]:
         }
 
         # If we've timed out. Note that we've timed out.
-        if self.timeout():
+        if self.timeout_checker():
             self.cleanup()
             data[ray.tune.result.DONE] = True
 
@@ -574,7 +576,7 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
             "pgbin_path": hpo_args.pgbin_path,
         },
         benchbase_config=benchbase_config,
-        trial_duration=hpo_args.trial_duration,
+        tune_duration_during_hpo=hpo_args.tune_duration_during_hpo,
         seed=hpo_args.seed,
         enable_boot_during_hpo=hpo_args.enable_boot_during_hpo,
         hpo_boot_config_fpath=hpo_args.hpo_boot_config_fpath,
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index c26d714a..9212e04f 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -326,7 +326,9 @@ def _run_sample(action_info, timeout):
                     if not replay_args.simulated:
                         # Apply index changes
                         cc, _ = pg_env.action_space.get_knob_space().generate_action_plan(action_info[0], prior_states[0])
-                        pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=True)
+                        # Like in tuning, we don't dump the page cache when calling shift_state() to see how the workload
+                        #   performs in a warm cache scenario.
+                        pg_env.shift_state(cc, index_modification_sqls)
                     existing_index_acts = index_acts
 
                     if not replay_args.simulated:
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index d83445b9..64dc1e3f 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -43,7 +43,13 @@
     type=Path,
     help="The path to the file configuring Boot when tuning. This may be a different Boot config than the one used for HPO.",
 )
-def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, tune_boot_config_fpath: Path) -> None:
+@click.option(
+    "--tune-duration-during-tune",
+    default=30,
+    type=float,
+    help="The number of hours to run the tuning agent for. This may be different than how long we ran the agent for during HPO."
+)
+def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, tune_boot_config_fpath: Path, tune_duration_during_tune: float) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
     if hpoed_agent_params_path == None:
@@ -73,6 +79,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     #   make sure to never override any configs in hpo_params.
     hpo_params["enable_boot_during_tune"] = enable_boot_during_tune
     hpo_params["tune_boot_config_fpath"] = tune_boot_config_fpath
+    hpo_params["tune_duration_during_tune"] = tune_duration_during_tune
 
     # Piggyback off the HPO magic.
     t = TuneTrial(dbgym_cfg, False)
@@ -81,7 +88,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     data = []
     step_data_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "step_data.csv"
-    while (time.time() - start) < hpo_params["trial_duration"] * 3600:
+    while (time.time() - start) < hpo_params["tune_duration_during_tune"] * 3600:
         data.append(t.step())
 
         # Continuously write the file out.
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index 6779c6ad..af578879 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -228,7 +228,9 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
             action, prior_state
         )
         # Attempt to maneuver to the new state.
-        success = self.shift_state(config_changes, sql_commands, dump_page_cache=True)
+        # Don't dump the page cache in shift_state() in order to see how the workload performs in
+        #   a warm cache scenario.
+        success = self.shift_state(config_changes, sql_commands)
         return success, EnvInfoDict(
             {
                 "attempted_changes": (config_changes, sql_commands),

From 9dcd36baed25a3c2a9101378a6918b1ccb7d65e6 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 18 Apr 2024 23:57:17 +0000
Subject: [PATCH 051/100] added print statements to investigate replay behavior

---
 scripts/pat_test.sh                |  2 +-
 tune/protox/agent/replay.py        | 18 ++++++++++++++++--
 tune/protox/agent/tune.py          | 10 +++++-----
 tune/protox/env/logger.py          |  2 +-
 tune/protox/env/mqo/mqo_wrapper.py |  1 +
 tune/protox/env/pg_env.py          |  4 +++-
 6 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index c9c05821..fec041dd 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -9,7 +9,7 @@ PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.1
-# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 9212e04f..dc77d0b3 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -22,6 +22,7 @@
 
 from tune.protox.agent.build_trial import build_trial
 from tune.protox.env.pg_env import PostgresEnv
+from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs
 
 
 REPLAY_DATA_FNAME = "replay_data.csv"
@@ -141,7 +142,7 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     Replay a single tuning run (as in one tuning_steps/ folder).
     """
     def _is_tuning_step_line(line: str) -> bool:
-        return "mv" in line and "tuning_steps" in line and "postgresql.auto.old" not in line and "baseline" not in line
+        return "mv" in line and "tuning_steps" in line and "baseline" not in line
 
     maximal = replay_args.maximal
     maximal_only = replay_args.maximal_only
@@ -159,6 +160,8 @@ def _is_tuning_step_line(line: str) -> bool:
     output_log_fpath = tuning_steps_dpath / "output.log"
 
     # Go through output.log and find the tuning_steps/[time]/ folders, as well as the time of the last folder
+    # This finds all the [time] folders in tuning_steps/ (except "baseline" since we ignore that in `_is_tuning_step_line()`),
+    #   so you could just do `ls tuning_steps/` if you wanted to.
     folders = []
     start_found = False
     last_evaluation = None
@@ -181,6 +184,7 @@ def _is_tuning_step_line(line: str) -> bool:
     threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600)) if replay_args.threshold_limit != None else None
 
     # Build PostgresEnv.
+    # TODO(phw2): build it with replay = true
     _, _, agent_env, _, _ = build_trial(dbgym_cfg, hpo_params["seed"], False, hpo_params)
     pg_env: PostgresEnv = agent_env.unwrapped
 
@@ -220,6 +224,8 @@ def _is_tuning_step_line(line: str) -> bool:
     def _run_sample(action_info, timeout):
         samples = []
         for _ in range(replay_args.num_samples):
+            logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables, pg_env.action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
+            logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables)}\n\n")
             runtime = pg_env.workload.execute_workload(
                 pg_conn=pg_env.pg_conn,
                 actions=[action_info],
@@ -258,6 +264,8 @@ def _run_sample(action_info, timeout):
         noop_index = False
         maximal_repo = None
         existing_index_acts = []
+        if1_count = 0
+        if2_count = 0
 
         for line in f:
             # Keep going until we've found the start.
@@ -275,7 +283,10 @@ def _run_sample(action_info, timeout):
             elif (maximal and (_is_tuning_step_line(line))):
                 maximal_repo = line
 
-            elif (maximal and "Found new maximal state with" in line) or (not maximal and (_is_tuning_step_line(line))):
+            elif (maximal and "Found new maximal state with" in line) or (not maximal and _is_tuning_step_line(line)):
+                if1_count += 1
+                print(f"if1_count={if1_count}")
+
                 if _is_tuning_step_line(line):
                     repo = eval(line.split("Running ")[-1])[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
@@ -294,6 +305,9 @@ def _run_sample(action_info, timeout):
                 assert reward > 0
 
                 if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
+                    if2_count += 1
+                    print(f"if2_count={if2_count}")
+
                     index_acts = set()
 
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 64dc1e3f..63fbe33d 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -82,26 +82,26 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     hpo_params["tune_duration_during_tune"] = tune_duration_during_tune
 
     # Piggyback off the HPO magic.
-    t = TuneTrial(dbgym_cfg, False)
-    t.setup(hpo_params)
+    tune_trial = TuneTrial(dbgym_cfg, False)
+    tune_trial.setup(hpo_params)
     start = time.time()
 
     data = []
     step_data_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "step_data.csv"
     while (time.time() - start) < hpo_params["tune_duration_during_tune"] * 3600:
-        data.append(t.step())
+        data.append(tune_trial.step())
 
         # Continuously write the file out.
         pd.DataFrame(data).to_csv(step_data_fpath, index=False)
 
-    t.cleanup()
+    tune_trial.cleanup()
 
     # Output the step data.
     pd.DataFrame(data).to_csv(step_data_fpath, index=False)
 
     # Link the tuning steps data (this directory allows you to replay the tuning run).
     # Replaying requires output.log and params.json, so we also copy them into the tuning_steps/ directory.
-    # The reason I copy them in is to ensure that tuning_steps/ is a fully self-contained directory.
+    # The reason I don't use symlinks for output.log or params.json is to ensure that tuning_steps/ is a fully self-contained directory.
     tuning_steps_dpath = dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps")
     shutil.copy(hpoed_agent_params_path, tuning_steps_dpath)
     output_fpath = dbgym_cfg.cur_task_runs_artifacts_path() / "output.log"
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 68468afc..96e10141 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -105,7 +105,7 @@ def stash_results(
             Path(f"{self.tuning_steps_dpath}/{time}").mkdir(parents=True, exist_ok=True)
 
         if info_dict["prior_pgconf"]:
-            local["mv"][
+            local["cp"][
                 info_dict["prior_pgconf"], f"{self.tuning_steps_dpath}/{time}/old_pg.conf"
             ].run()
 
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 835f21d4..d4dadee5 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -268,6 +268,7 @@ def transmute(
             )
 
         # Execute.
+        self.logger.get_logger(__name__).info("MQOWrapper called step_execute()")
         success, info = self.unwrapped.step_execute(success, runs, info)
         if info["query_metric_data"]:
             self._update_best_observed(info["query_metric_data"])
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index af578879..f06740f5 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -12,6 +12,7 @@
 from tune.protox.env.logger import Logger, time_record
 from tune.protox.env.space.holon_space import HolonSpace
 from tune.protox.env.space.state.space import StateSpace
+from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs
 from tune.protox.env.types import (
     EnvInfoDict,
     HolonAction,
@@ -250,7 +251,8 @@ def step_execute(
             assert isinstance(self.observation_space, StateSpace)
             assert isinstance(self.action_space, HolonSpace)
             # Evaluate the benchmark.
-            start_time = time.time()
+            self.logger.get_logger(__name__).info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(self.pg_conn.conn(), self.action_space.get_knob_space().tables, self.action_space.get_knob_space().knobs, self.workload.queries)}\n\n")
+            self.logger.get_logger(__name__).info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(self.pg_conn.conn(), self.action_space.get_knob_space().tables)}\n\n")
             (
                 success,
                 metric,

From 3175716231bc391fda3879571d412ba006bb6b31 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 13:15:52 +0000
Subject: [PATCH 052/100] timeout -> workload_timeout

---
 tune/protox/agent/replay.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index dc77d0b3..491193f3 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -221,7 +221,7 @@ def _is_tuning_step_line(line: str) -> bool:
             elif _is_tuning_step_line(line):
                 num_lines += 1
 
-    def _run_sample(action_info, timeout):
+    def _run_sample(action_info, workload_timeout):
         samples = []
         for _ in range(replay_args.num_samples):
             logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables, pg_env.action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
@@ -246,9 +246,9 @@ def _run_sample(action_info, timeout):
             if runtime >= replay_args.workload_timeout:
                 break
 
-            if replay_args.num_samples == 2 and runtime >= timeout:
+            if replay_args.num_samples == 2 and runtime >= workload_timeout:
                 break
-            elif replay_args.num_samples > 2 and len(samples) >= 2 and runtime >= timeout:
+            elif replay_args.num_samples > 2 and len(samples) >= 2 and runtime >= workload_timeout:
                 break
 
         return samples
@@ -259,8 +259,8 @@ def _run_sample(action_info, timeout):
         current_step = 0
         start_found = False
         start_time = None
-        timeout = replay_args.workload_timeout
-        cur_reward_max = timeout
+        workload_timeout = replay_args.workload_timeout
+        cur_reward_max = workload_timeout
         noop_index = False
         maximal_repo = None
         existing_index_acts = []
@@ -347,8 +347,8 @@ def _run_sample(action_info, timeout):
 
                     if not replay_args.simulated:
                         # Get samples.
-                        run_samples = samples = _run_sample(action_info, timeout)
-                        logging.info(f"Original Runtime: {reward} (timeout {has_timeout}). New Samples: {samples}")
+                        run_samples = samples = _run_sample(action_info, workload_timeout)
+                        logging.info(f"Original Runtime: {reward} (workload_timeout {has_timeout}). New Samples: {samples}")
                     else:
                         run_samples = samples = [reward, reward]
 
@@ -363,14 +363,14 @@ def _run_sample(action_info, timeout):
 
                     current_step += 1
 
-                    if (not has_timeout) or (max(run_samples) < timeout):
+                    if (not has_timeout) or (max(run_samples) < workload_timeout):
                         # Apply a tolerance..
                         # If we've timed out, only apply threshold only if we've found a strictly better config.
                         apply_threshold = threshold if threshold_limit == None or time_since_start < threshold_limit else 0
                         cur_reward_max = reward - apply_threshold
 
-                    if max(run_samples) < timeout:
-                        timeout = max(run_samples)
+                    if max(run_samples) < workload_timeout:
+                        workload_timeout = max(run_samples)
 
                 run_folder = repo.split("/")[-1]
                 if run_folder in folders and run_folder == folders[-1]:

From 43ba9c302943abd122e5dc031df192abce378c29 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 13:42:10 +0000
Subject: [PATCH 053/100] got rid of modifying workload_timeout

---
 scripts/pat_test.sh                  |  2 +-
 tune/protox/agent/replay.py          | 28 +++++++++++-----------------
 tune/protox/env/space/holon_space.py |  2 +-
 3 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index fec041dd..5f5a5aa3 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -8,7 +8,7 @@ PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.1
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.1
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 491193f3..85b26d12 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -9,6 +9,7 @@
 import json
 import logging
 import pickle
+from typing import List, Tuple
 import click
 import yaml
 import pandas as pd
@@ -22,7 +23,9 @@
 
 from tune.protox.agent.build_trial import build_trial
 from tune.protox.env.pg_env import PostgresEnv
+from tune.protox.env.space.primitive.index import IndexAction
 from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs
+from tune.protox.env.types import HolonAction
 
 
 REPLAY_DATA_FNAME = "replay_data.csv"
@@ -149,7 +152,7 @@ def _is_tuning_step_line(line: str) -> bool:
     threshold = replay_args.threshold
 
     hpo_params_fpath = tuning_steps_dpath / "params.json"
-    with open_and_save(dbgym_cfg, hpo_params_fpath) as f:
+    with open_and_save(dbgym_cfg, hpo_params_fpath, "r") as f:
         hpo_params = json.load(f)
     # Set configs to the hpo_params that are allowed to differ between HPO and tuning.
     # The way we set these may be different than how they were set during the tuning run, because
@@ -184,7 +187,7 @@ def _is_tuning_step_line(line: str) -> bool:
     threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600)) if replay_args.threshold_limit != None else None
 
     # Build PostgresEnv.
-    # TODO(phw2): build it with replay = true
+    # TODO(phw2): build PostgresEnv with replay = true
     _, _, agent_env, _, _ = build_trial(dbgym_cfg, hpo_params["seed"], False, hpo_params)
     pg_env: PostgresEnv = agent_env.unwrapped
 
@@ -221,11 +224,12 @@ def _is_tuning_step_line(line: str) -> bool:
             elif _is_tuning_step_line(line):
                 num_lines += 1
 
-    def _run_sample(action_info, workload_timeout):
+    def _run_sample(action_info: "HolonAction") -> list[float]:
         samples = []
         for _ in range(replay_args.num_samples):
             logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables, pg_env.action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
             logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables)}\n\n")
+            # DEBUG(phw2) assert replay_args.workload_timeout == hpo_params["workload_timeout_during_replay"] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
             runtime = pg_env.workload.execute_workload(
                 pg_conn=pg_env.pg_conn,
                 actions=[action_info],
@@ -233,7 +237,6 @@ def _run_sample(action_info, workload_timeout):
                 observation_space=None,
                 action_space=pg_env.action_space,
                 reset_metrics=None,
-                override_workload_timeout=hpo_params["workload_timeout"],
                 query_timeout=None,
                 workload_qdir=None,
                 disable_pg_hint=False,
@@ -246,11 +249,6 @@ def _run_sample(action_info, workload_timeout):
             if runtime >= replay_args.workload_timeout:
                 break
 
-            if replay_args.num_samples == 2 and runtime >= workload_timeout:
-                break
-            elif replay_args.num_samples > 2 and len(samples) >= 2 and runtime >= workload_timeout:
-                break
-
         return samples
 
     run_data = []
@@ -259,8 +257,7 @@ def _run_sample(action_info, workload_timeout):
         current_step = 0
         start_found = False
         start_time = None
-        workload_timeout = replay_args.workload_timeout
-        cur_reward_max = workload_timeout
+        cur_reward_max = replay_args.workload_timeout
         noop_index = False
         maximal_repo = None
         existing_index_acts = []
@@ -347,8 +344,8 @@ def _run_sample(action_info, workload_timeout):
 
                     if not replay_args.simulated:
                         # Get samples.
-                        run_samples = samples = _run_sample(action_info, workload_timeout)
-                        logging.info(f"Original Runtime: {reward} (workload_timeout {has_timeout}). New Samples: {samples}")
+                        run_samples = samples = _run_sample(action_info)
+                        logging.info(f"Original Runtime: {reward} (timed out? {has_timeout}). New Samples: {samples}")
                     else:
                         run_samples = samples = [reward, reward]
 
@@ -363,15 +360,12 @@ def _run_sample(action_info, workload_timeout):
 
                     current_step += 1
 
-                    if (not has_timeout) or (max(run_samples) < workload_timeout):
+                    if (not has_timeout) or (max(run_samples) < replay_args.workload_timeout):
                         # Apply a tolerance..
                         # If we've timed out, only apply threshold only if we've found a strictly better config.
                         apply_threshold = threshold if threshold_limit == None or time_since_start < threshold_limit else 0
                         cur_reward_max = reward - apply_threshold
 
-                    if max(run_samples) < workload_timeout:
-                        workload_timeout = max(run_samples)
-
                 run_folder = repo.split("/")[-1]
                 if run_folder in folders and run_folder == folders[-1]:
                     break
diff --git a/tune/protox/env/space/holon_space.py b/tune/protox/env/space/holon_space.py
index 34e1b188..6b80f928 100644
--- a/tune/protox/env/space/holon_space.py
+++ b/tune/protox/env/space/holon_space.py
@@ -370,5 +370,5 @@ def generate_plan_from_config(
         sql_commands = list(itertools.chain(*[o[1] for o in outputs]))
         return config_changes, sql_commands
 
-    def convert_actions_to_format_for_replay(self, actions: list[HolonAction]):
+    def convert_actions_to_format_for_replay(self, actions: list[HolonAction]) -> list:
         return [(a[0], self.get_index_space().to_action(a[1]), a[2]) for a in actions]
\ No newline at end of file

From 019b4fcc2eebccc9618782c655f7651c9b207de5 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 13:46:42 +0000
Subject: [PATCH 054/100] added tuningmode enum

---
 misc/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/misc/utils.py b/misc/utils.py
index 243bc8c1..f19c023b 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -1,3 +1,4 @@
+from enum import Enum
 import os
 import shutil
 import subprocess
@@ -11,6 +12,9 @@
 
 from util.shell import subprocess_run
 
+# Enums
+TuningMode = Enum('TuningMode', ['HPO', 'TUNE', 'REPLAY'])
+
 # Default values
 DEFAULT_WORKLOAD_TIMEOUT = 600
 

From 02049c44d1bddd3d28055986223ed491a6d0fc02 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 13:50:19 +0000
Subject: [PATCH 055/100] is_hpo -> tuning_mode

---
 tune/protox/agent/build_trial.py          | 12 ++++++------
 tune/protox/agent/hpo.py                  | 18 +++++++++---------
 tune/protox/agent/off_policy_algorithm.py |  9 +++++----
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index a270822c..88568e49 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -17,7 +17,7 @@
 )
 from torch import nn
 
-from misc.utils import DBGymConfig, open_and_save, make_redis_started, save_file
+from misc.utils import DBGymConfig, TuningMode, open_and_save, make_redis_started, save_file
 from tune.protox.agent.agent_env import AgentEnv
 from tune.protox.agent.buffers import ReplayBuffer
 from tune.protox.agent.noise import ClampNoise
@@ -130,7 +130,7 @@ def f(p: ProtoAction, n: torch.Tensor) -> ProtoAction:
 
 
 def _build_utilities(
-    dbgym_cfg: DBGymConfig, pgport: int, is_hpo: bool, hpo_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, pgport: int, tuning_mode: TuningMode, hpo_params: dict[str, Any]
 ) -> Tuple[Logger, RewardUtility, PostgresConn, Workload]:
     logger = Logger(
         dbgym_cfg,
@@ -151,8 +151,8 @@ def _build_utilities(
 
     # If we're using Boot, PostgresConn.start_with_changes() assumes that Redis is running. Thus,
     #   we start Redis here if necessary.
-    enable_boot = hpo_params["enable_boot_during_hpo"] if is_hpo else hpo_params["enable_boot_during_tune"]
-    boot_config_fpath = hpo_params["hpo_boot_config_fpath"] if is_hpo else hpo_params["tune_boot_config_fpath"]
+    enable_boot = hpo_params["enable_boot_during_hpo"] if tuning_mode == TuningMode.HPO else hpo_params["enable_boot_during_tune"]
+    boot_config_fpath = hpo_params["hpo_boot_config_fpath"] if tuning_mode == TuningMode.HPO else hpo_params["tune_boot_config_fpath"]
     if enable_boot:
         make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"])
 
@@ -510,14 +510,14 @@ def _build_agent(
 
 
 def build_trial(
-    dbgym_cfg: DBGymConfig, seed: int, is_hpo: bool, hpo_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, seed: int, tuning_mode: TuningMode, hpo_params: dict[str, Any]
 ) -> Tuple[Logger, TargetResetWrapper, AgentEnv, Wolp, str]:
     # The massive trial builder.
 
     port, signal = _get_signal(hpo_params["pgconn_info"]["pgbin_path"])
     _modify_benchbase_config(dbgym_cfg, port, hpo_params)
 
-    logger, reward_utility, pg_conn, workload = _build_utilities(dbgym_cfg, port, is_hpo, hpo_params)
+    logger, reward_utility, pg_conn, workload = _build_utilities(dbgym_cfg, port, tuning_mode, hpo_params)
     holon_space, lsc = _build_actions(dbgym_cfg, seed, hpo_params, workload, logger)
     observation_space = _build_observation_space(dbgym_cfg, holon_space, lsc, hpo_params, seed)
     target_reset, env = _build_env(
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index a9c83dff..ec58624d 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -23,7 +23,7 @@
 from ray.train import SyncConfig
 
 from tune.protox.agent.build_trial import build_trial
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_PATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, TuningMode, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_PATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
 
 
 METRIC_NAME = "Best Metric"
@@ -404,13 +404,13 @@ def __call__(self) -> bool:
 
 
 class TuneTrial:
-    def __init__(self, dbgym_cfg: DBGymConfig, is_hpo: bool) -> None:
+    def __init__(self, dbgym_cfg: DBGymConfig, tuning_mode: TuningMode) -> None:
         """
-        We use this object for both HPO and tune. It behaves *slightly* differently
-        depending on what it's used for, which is why we have an is_hpo param.
+        We use this object for HPO, tune, and replay. It behaves *slightly* differently
+        depending on what it's used for, which is why we have the tuning_mode param.
         """
         self.dbgym_cfg = dbgym_cfg
-        self.is_hpo = is_hpo
+        self.tuning_mode = tuning_mode
 
     def setup(self, hpo_params: dict[str, Any]) -> None:
         # Attach mythril directory to the search path.
@@ -425,14 +425,14 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
         np.random.seed(seed)
         torch.manual_seed(seed)
 
-        tune_duration = hpo_params["tune_duration_during_hpo"] if self.is_hpo else hpo_params["tune_duration_during_tune"]
+        tune_duration = hpo_params["tune_duration_during_hpo"] if self.tuning_mode == TuningMode.HPO else hpo_params["tune_duration_during_tune"]
 
         self.timeout_checker = TuneTimeoutChecker(tune_duration)
         self.logger, self.target_reset, self.env, self.agent, self.signal = build_trial(
             self.dbgym_cfg,
             seed=seed,
             hpo_params=hpo_params,
-            is_hpo=self.is_hpo,
+            tuning_mode=self.tuning_mode,
         )
         self.logger.get_logger(None).info("%s", hpo_params)
         self.logger.get_logger(None).info(f"Seed: {seed}")
@@ -469,10 +469,10 @@ def step(self) -> dict[Any, Any]:
 
             # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
             #   stashed in the same directory and potentially crash the system.
-            if not self.is_hpo:
+            if not self.tuning_mode == TuningMode.HPO:
                 self.logger.stash_results(infos, name_override="baseline")
         else:
-            self.agent.learn(self.env, total_timesteps=1, is_hpo=self.is_hpo)
+            self.agent.learn(self.env, total_timesteps=1, tuning_mode=self.tuning_mode)
 
         self.timeout_checker.pause()
         self.logger.advance()
diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py
index d0393a1f..7cac1014 100644
--- a/tune/protox/agent/off_policy_algorithm.py
+++ b/tune/protox/agent/off_policy_algorithm.py
@@ -4,6 +4,7 @@
 import numpy as np
 from numpy.typing import NDArray
 
+from misc.utils import TuningMode
 from tune.protox.agent.agent_env import AgentEnv
 from tune.protox.agent.base_class import BaseAlgorithm
 from tune.protox.agent.buffers import ReplayBuffer
@@ -140,7 +141,7 @@ def collect_rollouts(
         env: AgentEnv,
         train_freq: TrainFreq,
         replay_buffer: ReplayBuffer,
-        is_hpo: bool,
+        tuning_mode: TuningMode,
         action_noise: Optional[ActionNoise] = None,
         learning_starts: int = 0,
     ) -> RolloutReturn:
@@ -185,7 +186,7 @@ def collect_rollouts(
             dones = terms or truncs
             # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
             #   stashed in the same directory and potentially crash the system.
-            if self.logger and not is_hpo:
+            if self.logger and not tuning_mode == TuningMode.HPO:
                 self.logger.stash_results(infos)
 
             self.num_timesteps += 1
@@ -213,7 +214,7 @@ def collect_rollouts(
             num_collected_steps, num_collected_episodes, continue_training
         )
 
-    def learn(self, env: AgentEnv, total_timesteps: int, is_hpo: bool) -> None:
+    def learn(self, env: AgentEnv, total_timesteps: int, tuning_mode: TuningMode) -> None:
         assert isinstance(env, AgentEnv)
         total_timesteps = self._setup_learn(env, total_timesteps)
 
@@ -222,7 +223,7 @@ def learn(self, env: AgentEnv, total_timesteps: int, is_hpo: bool) -> None:
                 env,
                 train_freq=self.train_freq,
                 replay_buffer=self.replay_buffer,
-                is_hpo=is_hpo,
+                tuning_mode=tuning_mode,
                 action_noise=self.action_noise,
                 learning_starts=self.learning_starts,
             )

From e7012e149f0c5665897e34852950b19700f1b484 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 13:53:37 +0000
Subject: [PATCH 056/100] replaced replay in pg_env with tuning_mode

---
 tune/protox/agent/build_trial.py |  3 ++-
 tune/protox/agent/hpo.py         |  2 +-
 tune/protox/agent/replay.py      |  7 ++-----
 tune/protox/agent/tune.py        |  4 ++--
 tune/protox/env/pg_env.py        | 10 +++++-----
 tune/protox/env/types.py         |  2 +-
 6 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 88568e49..132dee65 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -305,6 +305,7 @@ def _build_observation_space(
 
 def _build_env(
     dbgym_cfg: DBGymConfig,
+    tuning_mode: TuningMode,
     hpo_params: dict[str, Any],
     pg_conn: PostgresConn,
     observation_space: StateSpace,
@@ -318,6 +319,7 @@ def _build_env(
     env = gym.make(
         "Postgres-v0",
         dbgym_cfg=dbgym_cfg,
+        tuning_mode=tuning_mode,
         observation_space=observation_space,
         action_space=holon_space,
         workload=workload,
@@ -327,7 +329,6 @@ def _build_env(
         query_timeout=hpo_params["query_timeout"],
         benchbase_config=hpo_params["benchbase_config"],
         logger=logger,
-        replay=False,
     )
 
     # Check whether to create the MQO wrapper.
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index ec58624d..c17b7eac 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -517,7 +517,7 @@ class TuneOpt(Trainable):
         dbgym_cfg = global_dbgym_cfg
 
         def setup(self, hpo_params: dict[str, Any]) -> None:
-            self.trial = TuneTrial(TuneOpt.dbgym_cfg, True)
+            self.trial = TuneTrial(TuneOpt.dbgym_cfg, TuningMode.HPO)
             self.trial.setup(hpo_params)
 
         def step(self) -> dict[Any, Any]:
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 85b26d12..42fa0441 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -9,16 +9,13 @@
 import json
 import logging
 import pickle
-from typing import List, Tuple
 import click
-import yaml
 import pandas as pd
 import tqdm
-import argparse
 from pathlib import Path
 from dateutil.parser import parse
 
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, save_file, workload_name_fn, default_tuning_steps_dpath
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, TuningMode, conv_inputpath_to_realabspath, open_and_save, save_file, workload_name_fn, default_tuning_steps_dpath
 # sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
 
 from tune.protox.agent.build_trial import build_trial
@@ -188,7 +185,7 @@ def _is_tuning_step_line(line: str) -> bool:
 
     # Build PostgresEnv.
     # TODO(phw2): build PostgresEnv with replay = true
-    _, _, agent_env, _, _ = build_trial(dbgym_cfg, hpo_params["seed"], False, hpo_params)
+    _, _, agent_env, _, _ = build_trial(dbgym_cfg, hpo_params["seed"], TuningMode.REPLAY, hpo_params)
     pg_env: PostgresEnv = agent_env.unwrapped
 
     # Reset things.
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 63fbe33d..ccde9af9 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -5,7 +5,7 @@
 import click
 import pandas as pd
 
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, conv_inputpath_to_realabspath, link_result, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn, default_tuning_steps_dname
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, TuningMode, conv_inputpath_to_realabspath, link_result, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn, default_tuning_steps_dname
 from tune.protox.agent.coerce_config import coerce_config
 from tune.protox.agent.hpo import TuneTrial, build_space
 
@@ -82,7 +82,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     hpo_params["tune_duration_during_tune"] = tune_duration_during_tune
 
     # Piggyback off the HPO magic.
-    tune_trial = TuneTrial(dbgym_cfg, False)
+    tune_trial = TuneTrial(dbgym_cfg, TuningMode.TUNE)
     tune_trial.setup(hpo_params)
     start = time.time()
 
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index f06740f5..05237f06 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -8,7 +8,7 @@
 import psycopg
 from plumbum import local
 
-from misc.utils import DBGymConfig
+from misc.utils import DBGymConfig, TuningMode
 from tune.protox.env.logger import Logger, time_record
 from tune.protox.env.space.holon_space import HolonSpace
 from tune.protox.env.space.state.space import StateSpace
@@ -28,6 +28,7 @@ class PostgresEnv(gym.Env[Any, Any]):
     def __init__(
         self,
         dbgym_cfg: DBGymConfig,
+        tuning_mode: TuningMode,
         observation_space: StateSpace,
         action_space: HolonSpace,
         workload: Workload,
@@ -37,12 +38,11 @@ def __init__(
         query_timeout: int,
         benchbase_config: dict[str, Any],
         logger: Optional[Logger] = None,
-        replay: bool = False,
     ):
         super().__init__()
 
         self.dbgym_cfg = dbgym_cfg
-        self.replay = replay
+        self.tuning_mode = tuning_mode
         self.logger = logger
         self.action_space = action_space
         self.observation_space = observation_space
@@ -144,7 +144,7 @@ def reset(  # type: ignore
         else:
             # Restore a pristine snapshot of the world.
             self.pg_conn.restore_pristine_snapshot()
-            assert not self.replay
+            assert self.tuning_mode != TuningMode.REPLAY
 
             # On the first time, run the benchmark to get the baseline.
             assert isinstance(self.observation_space, StateSpace)
@@ -349,7 +349,7 @@ def step_post_execute(
     def step(  # type: ignore
         self, action: HolonAction
     ) -> Tuple[Any, float, bool, bool, EnvInfoDict]:
-        assert not self.replay
+        assert self.tuning_mode != TuningMode.REPLAY
         success, info = self.step_before_execution(action)
         success, info = self.step_execute(success, [("PerQuery", action)], info)
         return self.step_post_execute(success, action, info)
diff --git a/tune/protox/env/types.py b/tune/protox/env/types.py
index 442a6a89..4ae71d76 100644
--- a/tune/protox/env/types.py
+++ b/tune/protox/env/types.py
@@ -193,7 +193,7 @@ class EnvInfoDict(TypedDict, total=False):
     # Query metric data.
     query_metric_data: Optional[dict[str, BestQueryRun]]
     # Information about the actions that were executed this step.
-    # The actions are in a format usable by replay.
+    # The actions are in a format usable by replay. (TODO(phw2))
     actions_info: Tuple["KnobSpaceAction", "IndexAction", "QuerySpaceAction"]
     # ProtoAction of the altered step action.
     maximal_embed: ProtoAction

From 243411c1b8512025f448b5151a6583e0865bb53f Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 14:00:47 +0000
Subject: [PATCH 057/100] changed HPO params to use enums instead of having
 different names

---
 tune/protox/agent/build_trial.py |  4 ++--
 tune/protox/agent/hpo.py         | 14 ++++++++++----
 tune/protox/agent/replay.py      | 10 +++++-----
 tune/protox/agent/tune.py        | 16 ++++++++--------
 4 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 132dee65..c398ace6 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -151,8 +151,8 @@ def _build_utilities(
 
     # If we're using Boot, PostgresConn.start_with_changes() assumes that Redis is running. Thus,
     #   we start Redis here if necessary.
-    enable_boot = hpo_params["enable_boot_during_hpo"] if tuning_mode == TuningMode.HPO else hpo_params["enable_boot_during_tune"]
-    boot_config_fpath = hpo_params["hpo_boot_config_fpath"] if tuning_mode == TuningMode.HPO else hpo_params["tune_boot_config_fpath"]
+    enable_boot = hpo_params["enable_boot"][tuning_mode]
+    boot_config_fpath = hpo_params["boot_config_fpath"][tuning_mode]
     if enable_boot:
         make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"])
 
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index c17b7eac..949029e2 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -165,7 +165,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
     help="Whether to enable the Boot query accelerator during the HPO process. Deciding to use Boot during HPO is separate from deciding to use Boot during tuning.",
 )
 @click.option(
-    "--hpo-boot-config-fpath",
+    "--boot-config-fpath-during-hpo",
     default=DEFAULT_BOOT_CONFIG_FPATH,
     type=Path,
     help="The path to the file configuring Boot when running HPO. When tuning, you may use a different Boot config.",
@@ -264,11 +264,17 @@ def build_space(
         "verbose": True,
         "trace": True,
         "seed": seed,
-        "enable_boot_during_hpo": enable_boot_during_hpo,
-        "hpo_boot_config_fpath": hpo_boot_config_fpath,
+        "enable_boot": {
+            str(TuningMode.HPO): enable_boot_during_hpo,
+        },
+        "boot_config_fpath": {
+            str(TuningMode.HPO): hpo_boot_config_fpath,
+        },
         
         # Timeouts.
-        "tune_duration_during_hpo": tune_duration_during_hpo,
+        "tune_duration": {
+            str(TuningMode.HPO): tune_duration_during_hpo,
+        },
         "workload_timeout": tune.choice(workload_timeouts),
         "query_timeout": tune.choice(query_timeouts),
 
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 42fa0441..f23ba10e 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -151,11 +151,11 @@ def _is_tuning_step_line(line: str) -> bool:
     hpo_params_fpath = tuning_steps_dpath / "params.json"
     with open_and_save(dbgym_cfg, hpo_params_fpath, "r") as f:
         hpo_params = json.load(f)
-    # Set configs to the hpo_params that are allowed to differ between HPO and tuning.
-    # The way we set these may be different than how they were set during the tuning run, because
-    #   we are replaying instead of tuning.
-    hpo_params["enable_boot_during_tune"] = False
-    hpo_params["tune_boot_config_fpath"] = DEFAULT_BOOT_CONFIG_FPATH
+    # Set the hpo_params that are allowed to differ between HPO, tuning, and replay.
+    hpo_params["enable_boot"][str(TuningMode.REPLAY)] = False
+    hpo_params["boot_config_fpath"][str(TuningMode.REPLAY)] = None
+    # TODO(phw2): set tune_duration to be None to represent inf
+    hpo_params["tune_duration"][str(TuningMode.REPLAY)] = hpo_params["tune_duration"][str(TuningMode.TUNE)]
 
     output_log_fpath = tuning_steps_dpath / "output.log"
 
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index ccde9af9..681ed520 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -38,7 +38,7 @@
     help="Whether to enable the Boot query accelerator during the tuning process. Deciding to use Boot during tuning is separate from deciding to use Boot during HPO.",
 )
 @click.option(
-    "--tune-boot-config-fpath",
+    "--boot-config-fpath-during-tune",
     default=DEFAULT_BOOT_CONFIG_FPATH,
     type=Path,
     help="The path to the file configuring Boot when tuning. This may be a different Boot config than the one used for HPO.",
@@ -49,7 +49,7 @@
     type=float,
     help="The number of hours to run the tuning agent for. This may be different than how long we ran the agent for during HPO."
 )
-def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, tune_boot_config_fpath: Path, tune_duration_during_tune: float) -> None:
+def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, boot_config_fpath_during_tune: Path, tune_duration_during_tune: float) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
     if hpoed_agent_params_path == None:
@@ -57,7 +57,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     # Convert all input paths to absolute paths
     hpoed_agent_params_path = conv_inputpath_to_realabspath(dbgym_cfg, hpoed_agent_params_path)
-    tune_boot_config_fpath = conv_inputpath_to_realabspath(dbgym_cfg, tune_boot_config_fpath)
+    boot_config_fpath_during_tune = conv_inputpath_to_realabspath(dbgym_cfg, boot_config_fpath_during_tune)
 
     # Tune
     with open_and_save(dbgym_cfg, hpoed_agent_params_path, "r") as f:
@@ -72,14 +72,14 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
         pgconn_info={}
     ), hpo_params)
 
-    # Add configs to the hpo_params that are allowed to differ between HPO and tuning.
-    # In general, for configs that can differ between HPO and tuning, I chose to name
+    # Set the hpo_params that are allowed to differ between HPO, tuning, and replay.
+    # In general, for configs that can differ between HPO, tuning, and replay I chose to name
     #   them "*tune*" and "*hpo*" to the end of them instead of naming them the same
     #   and overriding the config during tuning. It's just much less confusing if we
     #   make sure to never override any configs in hpo_params.
-    hpo_params["enable_boot_during_tune"] = enable_boot_during_tune
-    hpo_params["tune_boot_config_fpath"] = tune_boot_config_fpath
-    hpo_params["tune_duration_during_tune"] = tune_duration_during_tune
+    hpo_params["enable_boot"][str(TuningMode.TUNE)] = enable_boot_during_tune
+    hpo_params["boot_config_fpath"][str(TuningMode.TUNE)] = boot_config_fpath_during_tune
+    hpo_params["tune_duration"][str(TuningMode.TUNE)] = tune_duration_during_tune
 
     # Piggyback off the HPO magic.
     tune_trial = TuneTrial(dbgym_cfg, TuningMode.TUNE)

From cbb87a75e3ba5a95196a8af8a6de38e423bdb410 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 14:21:37 +0000
Subject: [PATCH 058/100] hpo, tune, and replay all now not crashing

---
 scripts/pat_test.sh                       |  2 +-
 tune/protox/agent/build_trial.py          | 11 +++++-----
 tune/protox/agent/coerce_config.py        |  6 ++++--
 tune/protox/agent/hpo.py                  | 26 +++++++++++++----------
 tune/protox/agent/off_policy_algorithm.py |  4 ++--
 tune/protox/agent/replay.py               | 17 +++++++--------
 tune/protox/agent/tune.py                 | 12 ++++++++---
 7 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 5f5a5aa3..2b09f1b9 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -8,7 +8,7 @@ PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.1
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index c398ace6..2bdd49a6 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -130,7 +130,7 @@ def f(p: ProtoAction, n: torch.Tensor) -> ProtoAction:
 
 
 def _build_utilities(
-    dbgym_cfg: DBGymConfig, pgport: int, tuning_mode: TuningMode, hpo_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, pgport: int, hpo_params: dict[str, Any]
 ) -> Tuple[Logger, RewardUtility, PostgresConn, Workload]:
     logger = Logger(
         dbgym_cfg,
@@ -151,8 +151,8 @@ def _build_utilities(
 
     # If we're using Boot, PostgresConn.start_with_changes() assumes that Redis is running. Thus,
     #   we start Redis here if necessary.
-    enable_boot = hpo_params["enable_boot"][tuning_mode]
-    boot_config_fpath = hpo_params["boot_config_fpath"][tuning_mode]
+    enable_boot = hpo_params["enable_boot"][str(tuning_mode)]
+    boot_config_fpath = hpo_params["boot_config_fpath"][str(tuning_mode)]
     if enable_boot:
         make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"])
 
@@ -511,18 +511,19 @@ def _build_agent(
 
 
 def build_trial(
-    dbgym_cfg: DBGymConfig, seed: int, tuning_mode: TuningMode, hpo_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, seed: int, hpo_params: dict[str, Any]
 ) -> Tuple[Logger, TargetResetWrapper, AgentEnv, Wolp, str]:
     # The massive trial builder.
 
     port, signal = _get_signal(hpo_params["pgconn_info"]["pgbin_path"])
     _modify_benchbase_config(dbgym_cfg, port, hpo_params)
 
-    logger, reward_utility, pg_conn, workload = _build_utilities(dbgym_cfg, port, tuning_mode, hpo_params)
+    logger, reward_utility, pg_conn, workload = _build_utilities(dbgym_cfg, tuning_mode, port, hpo_params)
     holon_space, lsc = _build_actions(dbgym_cfg, seed, hpo_params, workload, logger)
     observation_space = _build_observation_space(dbgym_cfg, holon_space, lsc, hpo_params, seed)
     target_reset, env = _build_env(
         dbgym_cfg,
+        tuning_mode,
         hpo_params,
         pg_conn,
         observation_space,
diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py
index 7006c28a..c6886f1c 100644
--- a/tune/protox/agent/coerce_config.py
+++ b/tune/protox/agent/coerce_config.py
@@ -1,7 +1,7 @@
 from typing import Any
 import yaml
 
-from misc.utils import DBGymConfig, open_and_save
+from misc.utils import DBGymConfig, TuningMode, open_and_save
 
 
 def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dict[str, Any]) -> dict[str, Any]:
@@ -24,7 +24,9 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dic
             "verbose": True,
             "trace": True,
             "seed": hpo_params["mythril_args"]["seed"],
-            "tune_duration_during_hpo": hpo_params["mythril_args"]["tune_duration_during_hpo"],
+            "tune_duration": {
+                str(TuningMode.HPO): hpo_params["mythril_args"]["duration"],
+            },
             "workload_timeout": hpo_params["mythril_args"]["workload_timeout"],
             "query_timeout": hpo_params["mythril_args"]["timeout"],
             "pgconn_info": {
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 949029e2..2642d649 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -30,7 +30,7 @@
 
 
 class AgentHPOArgs:
-    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath):
+    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo):
         self.benchmark_name = benchmark_name
         self.workload_name = workload_name
         self.embedder_path = embedder_path
@@ -49,7 +49,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
         self.workload_timeout = workload_timeout
         self.query_timeout = query_timeout
         self.enable_boot_during_hpo = enable_boot_during_hpo
-        self.hpo_boot_config_fpath = hpo_boot_config_fpath
+        self.boot_config_fpath_during_hpo = boot_config_fpath_during_hpo
 
 
 @click.command()
@@ -194,7 +194,7 @@ def hpo(
     workload_timeout,
     query_timeout,
     enable_boot_during_hpo: bool,
-    hpo_boot_config_fpath: Path,
+    boot_config_fpath_during_hpo: Path,
 ):
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
@@ -224,7 +224,7 @@ def hpo(
     pgdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, pgdata_parent_dpath)
     pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path)
     workload_path = conv_inputpath_to_realabspath(dbgym_cfg, workload_path)
-    hpo_boot_config_fpath = conv_inputpath_to_realabspath(dbgym_cfg, hpo_boot_config_fpath)
+    boot_config_fpath_during_hpo = conv_inputpath_to_realabspath(dbgym_cfg, boot_config_fpath_during_hpo)
 
     # Check assertions on args
     if intended_pgdata_hardware == "hdd":
@@ -235,7 +235,7 @@ def hpo(
         assert False
 
     # Create args object
-    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, hpo_boot_config_fpath)
+    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo)
     _tune_hpo(dbgym_cfg, hpo_args)
 
 
@@ -252,7 +252,7 @@ def build_space(
     tune_duration_during_hpo: int=30,
     seed: int=0,
     enable_boot_during_hpo: bool=False,
-    hpo_boot_config_fpath: Path=None,
+    boot_config_fpath_during_hpo: Path=None,
     workload_timeouts: list[int]=[600],
     query_timeouts: list[int]=[30],
     boot_enabled: bool = False,
@@ -264,11 +264,15 @@ def build_space(
         "verbose": True,
         "trace": True,
         "seed": seed,
+        # For params that may differ between HPO, tune, and replay, I chose to represent them
+        #   as dictionaries. I felt this was less confusing that overriding parts of the hpo_params
+        #   during tune or replay. With the dictionary representation, we never override anything in
+        #   hpo_params - we only ever add new fields to hpo_params.
         "enable_boot": {
             str(TuningMode.HPO): enable_boot_during_hpo,
         },
         "boot_config_fpath": {
-            str(TuningMode.HPO): hpo_boot_config_fpath,
+            str(TuningMode.HPO): boot_config_fpath_during_hpo,
         },
         
         # Timeouts.
@@ -381,7 +385,7 @@ def build_space(
 
 
 class TuneTimeoutChecker(object):
-    def __init__(self, tune_duration: int) -> None:
+    def __init__(self, tune_duration: float) -> None:
         self.limit = (tune_duration * 3600) > 0
         self.remain = int(tune_duration * 3600)
         self.running = False
@@ -431,14 +435,14 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
         np.random.seed(seed)
         torch.manual_seed(seed)
 
-        tune_duration = hpo_params["tune_duration_during_hpo"] if self.tuning_mode == TuningMode.HPO else hpo_params["tune_duration_during_tune"]
+        tune_duration = hpo_params["tune_duration"][str(self.tuning_mode)]
 
         self.timeout_checker = TuneTimeoutChecker(tune_duration)
         self.logger, self.target_reset, self.env, self.agent, self.signal = build_trial(
             self.dbgym_cfg,
+            self.tuning_mode,
             seed=seed,
             hpo_params=hpo_params,
-            tuning_mode=self.tuning_mode,
         )
         self.logger.get_logger(None).info("%s", hpo_params)
         self.logger.get_logger(None).info(f"Seed: {seed}")
@@ -585,7 +589,7 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         tune_duration_during_hpo=hpo_args.tune_duration_during_hpo,
         seed=hpo_args.seed,
         enable_boot_during_hpo=hpo_args.enable_boot_during_hpo,
-        hpo_boot_config_fpath=hpo_args.hpo_boot_config_fpath,
+        boot_config_fpath_during_hpo=hpo_args.boot_config_fpath_during_hpo,
         workload_timeouts=workload_timeouts,
         query_timeouts=query_timeouts,
     )
diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py
index 7cac1014..249660ff 100644
--- a/tune/protox/agent/off_policy_algorithm.py
+++ b/tune/protox/agent/off_policy_algorithm.py
@@ -138,10 +138,10 @@ def _sample_action(
 
     def collect_rollouts(
         self,
+        tuning_mode: TuningMode,
         env: AgentEnv,
         train_freq: TrainFreq,
         replay_buffer: ReplayBuffer,
-        tuning_mode: TuningMode,
         action_noise: Optional[ActionNoise] = None,
         learning_starts: int = 0,
     ) -> RolloutReturn:
@@ -220,10 +220,10 @@ def learn(self, env: AgentEnv, total_timesteps: int, tuning_mode: TuningMode) ->
 
         while self.num_timesteps < total_timesteps:
             rollout = self.collect_rollouts(
+                tuning_mode,
                 env,
                 train_freq=self.train_freq,
                 replay_buffer=self.replay_buffer,
-                tuning_mode=tuning_mode,
                 action_noise=self.action_noise,
                 learning_starts=self.learning_starts,
             )
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index f23ba10e..4b5a3ad2 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -154,10 +154,6 @@ def _is_tuning_step_line(line: str) -> bool:
     # Set the hpo_params that are allowed to differ between HPO, tuning, and replay.
     hpo_params["enable_boot"][str(TuningMode.REPLAY)] = False
     hpo_params["boot_config_fpath"][str(TuningMode.REPLAY)] = None
-    # TODO(phw2): set tune_duration to be None to represent inf
-    hpo_params["tune_duration"][str(TuningMode.REPLAY)] = hpo_params["tune_duration"][str(TuningMode.TUNE)]
-
-    output_log_fpath = tuning_steps_dpath / "output.log"
 
     # Go through output.log and find the tuning_steps/[time]/ folders, as well as the time of the last folder
     # This finds all the [time] folders in tuning_steps/ (except "baseline" since we ignore that in `_is_tuning_step_line()`),
@@ -165,7 +161,8 @@ def _is_tuning_step_line(line: str) -> bool:
     folders = []
     start_found = False
     last_evaluation = None
-    with open_and_save(dbgym_cfg, output_log_fpath) as f:
+    output_log_fpath = tuning_steps_dpath / "output.log"
+    with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
         for line in f:
             if not start_found:
                 if "Baseline Metric" in line:
@@ -179,13 +176,15 @@ def _is_tuning_step_line(line: str) -> bool:
                     last_evaluation = time_since_start
                     if replay_args.cutoff == None or (time_since_start - start_time).total_seconds() < replay_args.cutoff * 3600:
                         folders.append(last_folder)
+    
+    # Set tune_duration to be high so that it doesn't cut the replay off early
+    hpo_params["tune_duration"][str(TuningMode.REPLAY)] = replay_args.workload_timeout * len(folders)
 
     # Only apply threshold if time is less than.
     threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600)) if replay_args.threshold_limit != None else None
 
     # Build PostgresEnv.
-    # TODO(phw2): build PostgresEnv with replay = true
-    _, _, agent_env, _, _ = build_trial(dbgym_cfg, hpo_params["seed"], TuningMode.REPLAY, hpo_params)
+    _, _, agent_env, _, _ = build_trial(dbgym_cfg, TuningMode.REPLAY, hpo_params["seed"], hpo_params)
     pg_env: PostgresEnv = agent_env.unwrapped
 
     # Reset things.
@@ -214,7 +213,7 @@ def _is_tuning_step_line(line: str) -> bool:
             logging.info(f"Maximal found: {min_reward}")
 
     num_lines = 0
-    with open_and_save(dbgym_cfg, output_log_fpath) as f:
+    with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
         for line in f:
             if "Baseline Metric" in line:
                 num_lines += 1
@@ -250,7 +249,7 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
 
     run_data = []
     pbar = tqdm.tqdm(total=num_lines)
-    with open_and_save(dbgym_cfg, output_log_fpath) as f:
+    with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
         current_step = 0
         start_found = False
         start_time = None
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 681ed520..ccdb57b2 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -45,9 +45,9 @@
 )
 @click.option(
     "--tune-duration-during-tune",
-    default=30,
+    default=None,
     type=float,
-    help="The number of hours to run the tuning agent for. This may be different than how long we ran the agent for during HPO."
+    help="The number of hours to run the tuning agent for. If you do not specify this argument, it will be the same as --tune-duration-during-hpo."
 )
 def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, boot_config_fpath_during_tune: Path, tune_duration_during_tune: float) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
@@ -72,11 +72,17 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
         pgconn_info={}
     ), hpo_params)
 
+    # Set args to defaults programmatically cont.
+    # We need to do this here instead of up above because we need hpo_params
+    tune_duration_during_tune = tune_duration_during_tune if tune_duration_during_tune != None else hpo_params["tune_duration"][str(TuningMode.HPO)]
+
     # Set the hpo_params that are allowed to differ between HPO, tuning, and replay.
     # In general, for configs that can differ between HPO, tuning, and replay I chose to name
     #   them "*tune*" and "*hpo*" to the end of them instead of naming them the same
     #   and overriding the config during tuning. It's just much less confusing if we
     #   make sure to never override any configs in hpo_params.
+    # Note that while we currently do not persist the hpo_params used during *tuning* back to
+    #   a file, this is entirely possible to do in the future if needed.
     hpo_params["enable_boot"][str(TuningMode.TUNE)] = enable_boot_during_tune
     hpo_params["boot_config_fpath"][str(TuningMode.TUNE)] = boot_config_fpath_during_tune
     hpo_params["tune_duration"][str(TuningMode.TUNE)] = tune_duration_during_tune
@@ -88,7 +94,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     data = []
     step_data_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "step_data.csv"
-    while (time.time() - start) < hpo_params["tune_duration_during_tune"] * 3600:
+    while (time.time() - start) < tune_duration_during_tune * 3600:
         data.append(tune_trial.step())
 
         # Continuously write the file out.

From d88fd98b29e4bbd124676331b5ae4859a03ee3c4 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 15:42:29 +0000
Subject: [PATCH 059/100] added workload timeout during replay param

---
 experiments/protox_tpch_sf0point1/main.sh |  9 +++----
 experiments/protox_tpch_sf10/main.sh      |  2 +-
 tune/protox/agent/build_trial.py          |  5 ++--
 tune/protox/agent/coerce_config.py        |  4 ++-
 tune/protox/agent/hpo.py                  |  4 ++-
 tune/protox/agent/replay.py               | 32 ++++++++++++++---------
 tune/protox/agent/tune.py                 | 10 +++----
 tune/protox/env/logger.py                 |  2 +-
 8 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index b56c6e86..432aa60e 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -7,10 +7,9 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
+# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 10
 exit 0
 
 # benchmark
@@ -28,6 +27,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 10 --tune-duration-during-hpo 0.2  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 2c50e528..3dad54b7 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -26,5 +26,5 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 2bdd49a6..d80f2610 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -152,7 +152,6 @@ def _build_utilities(
     # If we're using Boot, PostgresConn.start_with_changes() assumes that Redis is running. Thus,
     #   we start Redis here if necessary.
     enable_boot = hpo_params["enable_boot"][str(tuning_mode)]
-    boot_config_fpath = hpo_params["boot_config_fpath"][str(tuning_mode)]
     if enable_boot:
         make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"])
 
@@ -163,7 +162,7 @@ def _build_utilities(
         pgdata_parent_dpath=Path(hpo_params["pgconn_info"]["pgdata_parent_dpath"]),
         pgbin_path=Path(hpo_params["pgconn_info"]["pgbin_path"]),
         enable_boot=enable_boot,
-        boot_config_fpath=boot_config_fpath,
+        boot_config_fpath=hpo_params["boot_config_fpath"][str(tuning_mode)],
         connect_timeout=300,
         logger=logger,
     )
@@ -175,7 +174,7 @@ def _build_utilities(
         query_spec=hpo_params["benchmark_config"]["query_spec"],
         workload_path=Path(hpo_params["workload_path"]),
         pid=None,
-        workload_timeout=hpo_params["workload_timeout"],
+        workload_timeout=hpo_params["workload_timeout"][str(tuning_mode)],
         workload_timeout_penalty=hpo_params["workload_timeout_penalty"],
         logger=logger,
     )
diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py
index c6886f1c..3c19900c 100644
--- a/tune/protox/agent/coerce_config.py
+++ b/tune/protox/agent/coerce_config.py
@@ -27,7 +27,9 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dic
             "tune_duration": {
                 str(TuningMode.HPO): hpo_params["mythril_args"]["duration"],
             },
-            "workload_timeout": hpo_params["mythril_args"]["workload_timeout"],
+            "workload_timeout": {
+                str(TuningMode.HPO): hpo_params["mythril_args"]["workload_timeout"],
+            },
             "query_timeout": hpo_params["mythril_args"]["timeout"],
             "pgconn_info": {
                 "pgport": 5432,
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 2642d649..99c399a3 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -279,7 +279,9 @@ def build_space(
         "tune_duration": {
             str(TuningMode.HPO): tune_duration_during_hpo,
         },
-        "workload_timeout": tune.choice(workload_timeouts),
+        "workload_timeout": {
+            str(TuningMode.HPO): tune.choice(workload_timeouts),
+        },
         "query_timeout": tune.choice(query_timeouts),
 
         # Paths.
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 4b5a3ad2..ae9b136d 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -30,9 +30,9 @@
 
 class ReplayArgs:
     def __init__(
-        self, workload_timeout: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list
+        self, workload_timeout_during_replay: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list
     ):
-        self.workload_timeout = workload_timeout
+        self.workload_timeout_during_replay = workload_timeout_during_replay
         self.num_samples = num_samples
         self.threshold = threshold
         self.threshold_limit = threshold_limit
@@ -70,10 +70,12 @@ def __init__(
     help="The path to the `tuning_steps` directory to be replayed."
 )
 @click.option(
-    "--workload-timeout",
-    default=DEFAULT_WORKLOAD_TIMEOUT,
+    "--workload-timeout-during-replay",
+    default=None,
     type=int,
-    help="The timeout (in seconds) of a workload when replaying."
+    # You can make it use the workload timeout used during tuning if you want.
+    # I just made it use the workload timeout from HPO because I don't currently persist the tuning HPO params.
+    help="The timeout (in seconds) of a workload when replaying. By default, it will be equal to the workload timeout used during HPO."
 )
 @click.option(
     "--num-samples",
@@ -120,7 +122,7 @@ def __init__(
     type=list,
     help="Ignore running queries in the blocklist."
 )
-def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list) -> None:
+def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout_during_replay: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
 
@@ -131,7 +133,7 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     tuning_steps_dpath = conv_inputpath_to_realabspath(dbgym_cfg, tuning_steps_dpath)
 
     # Group args together to reduce the # of parameters we pass into functions
-    replay_args = ReplayArgs(workload_timeout, num_samples, threshold, threshold_limit, maximal, simulated, maximal_only, cutoff, blocklist)
+    replay_args = ReplayArgs(workload_timeout_during_replay, num_samples, threshold, threshold_limit, maximal, simulated, maximal_only, cutoff, blocklist)
 
     # Replay
     replay_tuning_run(dbgym_cfg, tuning_steps_dpath, replay_args)
@@ -151,9 +153,15 @@ def _is_tuning_step_line(line: str) -> bool:
     hpo_params_fpath = tuning_steps_dpath / "params.json"
     with open_and_save(dbgym_cfg, hpo_params_fpath, "r") as f:
         hpo_params = json.load(f)
+
+    # Set defaults that depend on hpo_params
+    if replay_args.workload_timeout_during_replay == None:
+        replay_args.workload_timeout_during_replay = hpo_params["workload_timeout"][str(TuningMode.HPO)]
+
     # Set the hpo_params that are allowed to differ between HPO, tuning, and replay.
     hpo_params["enable_boot"][str(TuningMode.REPLAY)] = False
     hpo_params["boot_config_fpath"][str(TuningMode.REPLAY)] = None
+    hpo_params["workload_timeout"][str(TuningMode.REPLAY)] = replay_args.workload_timeout_during_replay
 
     # Go through output.log and find the tuning_steps/[time]/ folders, as well as the time of the last folder
     # This finds all the [time] folders in tuning_steps/ (except "baseline" since we ignore that in `_is_tuning_step_line()`),
@@ -178,7 +186,7 @@ def _is_tuning_step_line(line: str) -> bool:
                         folders.append(last_folder)
     
     # Set tune_duration to be high so that it doesn't cut the replay off early
-    hpo_params["tune_duration"][str(TuningMode.REPLAY)] = replay_args.workload_timeout * len(folders)
+    hpo_params["tune_duration"][str(TuningMode.REPLAY)] = replay_args.workload_timeout_during_replay * len(folders)
 
     # Only apply threshold if time is less than.
     threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600)) if replay_args.threshold_limit != None else None
@@ -225,7 +233,7 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
         for _ in range(replay_args.num_samples):
             logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables, pg_env.action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
             logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables)}\n\n")
-            # DEBUG(phw2) assert replay_args.workload_timeout == hpo_params["workload_timeout_during_replay"] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
+            assert replay_args.workload_timeout_during_replay == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
             runtime = pg_env.workload.execute_workload(
                 pg_conn=pg_env.pg_conn,
                 actions=[action_info],
@@ -242,7 +250,7 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
             samples.append(runtime)
             logging.info(f"Runtime: {runtime}")
 
-            if runtime >= replay_args.workload_timeout:
+            if runtime >= replay_args.workload_timeout_during_replay:
                 break
 
         return samples
@@ -253,7 +261,7 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
         current_step = 0
         start_found = False
         start_time = None
-        cur_reward_max = replay_args.workload_timeout
+        cur_reward_max = replay_args.workload_timeout_during_replay
         noop_index = False
         maximal_repo = None
         existing_index_acts = []
@@ -356,7 +364,7 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
 
                     current_step += 1
 
-                    if (not has_timeout) or (max(run_samples) < replay_args.workload_timeout):
+                    if (not has_timeout) or (max(run_samples) < replay_args.workload_timeout_during_replay):
                         # Apply a tolerance..
                         # If we've timed out, only apply threshold only if we've found a strictly better config.
                         apply_threshold = threshold if threshold_limit == None or time_since_start < threshold_limit else 0
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index ccdb57b2..2e57929d 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -72,9 +72,9 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
         pgconn_info={}
     ), hpo_params)
 
-    # Set args to defaults programmatically cont.
-    # We need to do this here instead of up above because we need hpo_params
-    tune_duration_during_tune = tune_duration_during_tune if tune_duration_during_tune != None else hpo_params["tune_duration"][str(TuningMode.HPO)]
+    # Set defaults that depend on hpo_params
+    if tune_duration_during_tune == None:
+        tune_duration_during_tune = hpo_params["tune_duration"][str(TuningMode.HPO)]
 
     # Set the hpo_params that are allowed to differ between HPO, tuning, and replay.
     # In general, for configs that can differ between HPO, tuning, and replay I chose to name
@@ -86,6 +86,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     hpo_params["enable_boot"][str(TuningMode.TUNE)] = enable_boot_during_tune
     hpo_params["boot_config_fpath"][str(TuningMode.TUNE)] = boot_config_fpath_during_tune
     hpo_params["tune_duration"][str(TuningMode.TUNE)] = tune_duration_during_tune
+    hpo_params["workload_timeout"][str(TuningMode.TUNE)] = hpo_params["workload_timeout"][str(TuningMode.HPO)]
 
     # Piggyback off the HPO magic.
     tune_trial = TuneTrial(dbgym_cfg, TuningMode.TUNE)
@@ -109,8 +110,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     # Replaying requires output.log and params.json, so we also copy them into the tuning_steps/ directory.
     # The reason I don't use symlinks for output.log or params.json is to ensure that tuning_steps/ is a fully self-contained directory.
     tuning_steps_dpath = dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps")
+    # We copy hpoed_agent_params_path instead of moving it because hpoed_agent_params_path was generated in another task run
     shutil.copy(hpoed_agent_params_path, tuning_steps_dpath)
-    output_fpath = dbgym_cfg.cur_task_runs_artifacts_path() / "output.log"
-    shutil.copy(output_fpath, tuning_steps_dpath)
     tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, enable_boot_during_tune)
     link_result(dbgym_cfg, tuning_steps_dpath, custom_result_name=tuning_steps_link_dname)
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 96e10141..95aca21a 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -72,7 +72,7 @@ def __init__(
         logging.basicConfig(format=formatter, level=level, force=True)
 
         # Setup the file logger.
-        file_logger = logging.FileHandler(self.log_dpath / "output.log")
+        file_logger = logging.FileHandler(self.tuning_steps_dpath / "output.log")
         file_logger.setFormatter(logging.Formatter(formatter))
         file_logger.setLevel(level)
         logging.getLogger().addHandler(file_logger)

From bc6652639e6b714d50545cb1361a0ec4c09fde2b Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 16:42:12 +0000
Subject: [PATCH 060/100] fixed race condition in multiple threads writing to
 pg.log

---
 experiments/protox_tpch_sf0point1/main.sh | 2 +-
 scripts/pat_test.sh                       | 4 ++--
 tune/protox/agent/replay.py               | 3 +++
 tune/protox/env/util/execute.py           | 8 +++++---
 tune/protox/env/util/pg_conn.py           | 8 +++++---
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 432aa60e..7f15a038 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -9,7 +9,7 @@ PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
 # python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 10
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 25
 exit 0
 
 # benchmark
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 2b09f1b9..a888d753 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,8 +7,8 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index ae9b136d..d7ded74a 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -309,6 +309,9 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                     if2_count += 1
                     print(f"if2_count={if2_count}")
 
+                    if if2_count >= 2:
+                        break
+
                     index_acts = set()
 
                     with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
diff --git a/tune/protox/env/util/execute.py b/tune/protox/env/util/execute.py
index a9f4e4d4..ebc6e254 100644
--- a/tune/protox/env/util/execute.py
+++ b/tune/protox/env/util/execute.py
@@ -37,7 +37,7 @@ def _time_query(
     query: str,
     timeout: float,
 ) -> Tuple[float, bool, Any]:
-    has_timeout = False
+    did_timeout = False
     has_explain = "EXPLAIN" in query
     explain_data = None
 
@@ -45,11 +45,13 @@ def _time_query(
         start_time = time.time()
         cursor = connection.execute(query)
         qid_runtime = (time.time() - start_time) * 1e6
+        print(f"{prefix} measured qid_runtime={qid_runtime/1e6}")
 
         if has_explain:
             c = [c for c in cursor][0][0][0]
             assert "Execution Time" in c
             qid_runtime = float(c["Execution Time"]) * 1e3
+            print(f"{prefix} explain qid_runtime={qid_runtime/1e6}")
             explain_data = c
 
         if logger:
@@ -63,11 +65,11 @@ def _time_query(
                 f"{prefix} exceeded evaluation timeout {timeout}"
             )
         qid_runtime = timeout * 1e6
-        has_timeout = True
+        did_timeout = True
     except Exception as e:
         assert False, print(e)
     # qid_runtime is in microseconds.
-    return qid_runtime, has_timeout, explain_data
+    return qid_runtime, did_timeout, explain_data
 
 
 def _acquire_metrics_around_query(
diff --git a/tune/protox/env/util/pg_conn.py b/tune/protox/env/util/pg_conn.py
index aace6d37..3a4f0207 100644
--- a/tune/protox/env/util/pg_conn.py
+++ b/tune/protox/env/util/pg_conn.py
@@ -79,8 +79,8 @@ def disconnect(self) -> None:
             self._conn = None
 
     def move_log(self) -> None:
-        pglog_fpath = self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "pg.log"
-        pglog_this_step_fpath = self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / f"pg.log.{self.log_step}"
+        pglog_fpath = self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / f"pg{self.pgport}.log"
+        pglog_this_step_fpath = self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / f"pg{self.pgport}.log.{self.log_step}"
         if pglog_fpath.exists():
             shutil.move(
                 pglog_fpath,
@@ -175,7 +175,9 @@ def start_with_changes(
                 "-t",
                 "180",
                 "-l",
-                self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "pg.log",
+                # We log to pg{self.pgport}.log instead of pg.log so that different PostgresConn objects
+                #   don't all try to write to the same file.
+                self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / f"pg{self.pgport}.log",
                 "start",
             ].run(retcode=None)
 

From f09d38f2c5ff93d3c05493938e01a93f1c320142 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 17:03:10 +0000
Subject: [PATCH 061/100] now linking to params.json for manual run_*/
 traversal

---
 scripts/pat_test.sh       |  6 +++---
 tune/protox/agent/hpo.py  | 14 +++++++++-----
 tune/protox/agent/tune.py | 12 +++++++++---
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index a888d753..dfbae86e 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,9 +7,9 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 99c399a3..9b3fbb74 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -642,13 +642,17 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
                 print(f"Trial {results[i]} FAILED")
         assert False, print("Encountered exceptions!")
     
-    # Save the best params.json
-    # Before saving, copy it into run_*/[codebase]/data/. We copy so that when we open the
-    #   params.json file using open_and_save(), it links to the params.json file directly
-    #   instead of to the dir TuneOpt*/. By linking to the params.json file directly, we
-    #   know which params.json file in TuneOpt*/ was actually used for tuning.
+    # Save the best params.json.
     best_result = results.get_best_result(metric=METRIC_NAME, mode=mode)
     best_params_generated_fpath = Path(best_result.path) / "params.json"
+    # Before saving, copy it into run_*/[codebase]/data/. This way, save_file() called on
+    #   params.json will link directly to run_*/[codebase]/data/params.json instead of to
+    #   run_*/[codebase]/hpo_ray_results/TuneOpt*/.
     best_params_copy_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "params.json"
     shutil.copy(best_params_generated_fpath, best_params_copy_fpath)
     link_result(dbgym_cfg, best_params_copy_fpath, custom_result_name=default_hpoed_agent_params_fname(hpo_args.benchmark_name, hpo_args.workload_name))
+    # We also link from run_*/[codebase]/data/params.json to run_*/[codebase]/hpo_ray_results/TuneOpt*/**/params.json.
+    #   This way, when _manually_ looking through run_*/, we can see which HPO trial was
+    #   responsible for creating params.json.
+    best_params_link_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "params.json.link"
+    os.symlink(best_params_generated_fpath, best_params_link_fpath)
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 2e57929d..2f566f17 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -1,4 +1,5 @@
 import json
+import os
 from pathlib import Path
 import shutil
 import time
@@ -107,10 +108,15 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     pd.DataFrame(data).to_csv(step_data_fpath, index=False)
 
     # Link the tuning steps data (this directory allows you to replay the tuning run).
-    # Replaying requires output.log and params.json, so we also copy them into the tuning_steps/ directory.
-    # The reason I don't use symlinks for output.log or params.json is to ensure that tuning_steps/ is a fully self-contained directory.
     tuning_steps_dpath = dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps")
+    # Replaying requires params.json, so we also copy it into the tuning_steps/ directory.
     # We copy hpoed_agent_params_path instead of moving it because hpoed_agent_params_path was generated in another task run
-    shutil.copy(hpoed_agent_params_path, tuning_steps_dpath)
+    # We copy instead of just symlinking so that tuning_steps/ is a fully self-contained directory.
+    hpoed_agent_params_copy_fpath = tuning_steps_dpath / "params.json"
+    shutil.copy(hpoed_agent_params_path, hpoed_agent_params_copy_fpath)
     tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, enable_boot_during_tune)
     link_result(dbgym_cfg, tuning_steps_dpath, custom_result_name=tuning_steps_link_dname)
+    # We also create a link to hpoed_agent_params_path. This is useful when we are _manually_ looking through
+    #   run_*/ and want to see which other run_*/ was responsible for creating params.json
+    hpoed_agent_params_link_fpath = tuning_steps_dpath / "params.json.link"
+    os.symlink(hpoed_agent_params_path, hpoed_agent_params_link_fpath)

From 867aa922b4c17603d691f2d76543ff92bfb9f5e0 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 17:08:50 +0000
Subject: [PATCH 062/100] renamed reward in replay.py

---
 tune/protox/agent/replay.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index d7ded74a..bbe40d4c 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -299,13 +299,13 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                 # Get the evaluation reward.
                 run_raw_csv_fpath = tuning_steps_dpath / repo / "run.raw.csv"
                 save_file(dbgym_cfg, run_raw_csv_fpath)
-                reward = pd.read_csv(run_raw_csv_fpath)
-                assert len(reward.columns) == 6
-                has_timeout = (reward["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
-                reward = reward["Latency (microseconds)"].sum() / 1e6
-                assert reward > 0
+                run_raw_csv = pd.read_csv(run_raw_csv_fpath)
+                assert len(run_raw_csv.columns) == 6
+                has_timeout = (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
+                original_runtime = run_raw_csv["Latency (microseconds)"].sum() / 1e6
+                assert original_runtime > 0
 
-                if ((not replay_args.maximal_only and reward < cur_reward_max) or reward == min_reward) and (not maximal or not has_timeout):
+                if ((not replay_args.maximal_only and original_runtime < cur_reward_max) or original_runtime == min_reward) and (not maximal or not has_timeout):
                     if2_count += 1
                     print(f"if2_count={if2_count}")
 
@@ -352,13 +352,13 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                     if not replay_args.simulated:
                         # Get samples.
                         run_samples = samples = _run_sample(action_info)
-                        logging.info(f"Original Runtime: {reward} (timed out? {has_timeout}). New Samples: {samples}")
+                        logging.info(f"Original Runtime: {original_runtime} (timed out? {has_timeout}). New Samples: {samples}")
                     else:
-                        run_samples = samples = [reward, reward]
+                        run_samples = samples = [original_runtime, original_runtime]
 
                     data = {
                         "step": current_step,
-                        "orig_cost": reward,
+                        "original_runtime": original_runtime,
                         "time_since_start": (time_since_start - start_time).total_seconds(),
                     }
                     samples = {f"runtime{i}": s for i, s in enumerate(samples)}
@@ -371,19 +371,19 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                         # Apply a tolerance..
                         # If we've timed out, only apply threshold only if we've found a strictly better config.
                         apply_threshold = threshold if threshold_limit == None or time_since_start < threshold_limit else 0
-                        cur_reward_max = reward - apply_threshold
+                        cur_reward_max = original_runtime - apply_threshold
 
                 run_folder = repo.split("/")[-1]
                 if run_folder in folders and run_folder == folders[-1]:
                     break
-                elif maximal_only and reward == min_reward:
+                elif maximal_only and original_runtime == min_reward:
                     break
             pbar.update(1)
 
         if len(run_data) > 0:
             data = {
                 "step": current_step,
-                "orig_cost": run_data[-1]["orig_cost"],
+                "original_runtime": run_data[-1]["original_runtime"],
                 "time_since_start": -1,
                 "runtime0": run_data[-1]["runtime0"],
             }

From 74d70ffbe32364ec41313cb503f44e48d367e0b9 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 17:11:47 +0000
Subject: [PATCH 063/100] more renaming

---
 tune/protox/agent/replay.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index bbe40d4c..ca8b35b0 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -202,12 +202,12 @@ def _is_tuning_step_line(line: str) -> bool:
     # Get the minimum reward.
     run_raw_csv_fpaths = [tuning_steps_dpath / fold / "run.raw.csv" for fold in folders]
     run_raw_csvs = [pd.read_csv(run_raw_csv_fpath) for run_raw_csv_fpath in run_raw_csv_fpaths]
-    rewards = [(run_raw_csv["Latency (microseconds)"].sum() / 1e6, (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]) for run_raw_csv in run_raw_csvs]
-    rewards = sorted(rewards, key=lambda x: x[0])
-    min_reward = min([r[0] for r in rewards])
+    original_runtime_infos = [(run_raw_csv["Latency (microseconds)"].sum() / 1e6, (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]) for run_raw_csv in run_raw_csvs]
+    original_runtime_infos = sorted(original_runtime_infos, key=lambda x: x[0])
+    min_original_runtime = min([r[0] for r in original_runtime_infos])
 
     if maximal:
-        target = [r[1] for r in rewards if r[0] == min_reward]
+        target = [r[1] for r in original_runtime_infos if r[0] == min_original_runtime]
         assert len(target) >= 1
         if target[0]:
             # Don't use maximal if the min maximal is timed out.
@@ -218,7 +218,7 @@ def _is_tuning_step_line(line: str) -> bool:
             maximal_only = False
             logging.warn("Maximal disabled.")
         else:
-            logging.info(f"Maximal found: {min_reward}")
+            logging.info(f"Maximal found: {min_original_runtime}")
 
     num_lines = 0
     with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
@@ -305,7 +305,7 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                 original_runtime = run_raw_csv["Latency (microseconds)"].sum() / 1e6
                 assert original_runtime > 0
 
-                if ((not replay_args.maximal_only and original_runtime < cur_reward_max) or original_runtime == min_reward) and (not maximal or not has_timeout):
+                if ((not replay_args.maximal_only and original_runtime < cur_reward_max) or original_runtime == min_original_runtime) and (not maximal or not has_timeout):
                     if2_count += 1
                     print(f"if2_count={if2_count}")
 
@@ -376,7 +376,7 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                 run_folder = repo.split("/")[-1]
                 if run_folder in folders and run_folder == folders[-1]:
                     break
-                elif maximal_only and original_runtime == min_reward:
+                elif maximal_only and original_runtime == min_original_runtime:
                     break
             pbar.update(1)
 

From 0e3aa517464bf96893b55ecac240604585067c39 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 17:21:26 +0000
Subject: [PATCH 064/100] comment changes

---
 tune/protox/agent/replay.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index ca8b35b0..8e7fb173 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -199,7 +199,7 @@ def _is_tuning_step_line(line: str) -> bool:
     if not replay_args.simulated:
         pg_env.pg_conn.restore_pristine_snapshot()
 
-    # Get the minimum reward.
+    # Get the minimum original runtime.
     run_raw_csv_fpaths = [tuning_steps_dpath / fold / "run.raw.csv" for fold in folders]
     run_raw_csvs = [pd.read_csv(run_raw_csv_fpath) for run_raw_csv_fpath in run_raw_csv_fpaths]
     original_runtime_infos = [(run_raw_csv["Latency (microseconds)"].sum() / 1e6, (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]) for run_raw_csv in run_raw_csvs]
@@ -296,7 +296,7 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                     time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
                     maximal_repo = None
 
-                # Get the evaluation reward.
+                # Get the original runtime.
                 run_raw_csv_fpath = tuning_steps_dpath / repo / "run.raw.csv"
                 save_file(dbgym_cfg, run_raw_csv_fpath)
                 run_raw_csv = pd.read_csv(run_raw_csv_fpath)

From fbbee8999b009baeaba498edad021f9a6173791e Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 19:06:57 +0000
Subject: [PATCH 065/100] got rid of the 2 maximal params, 2 threshold params,
 and the 'samples' param

---
 experiments/protox_tpch_sf0point1/main.sh |   2 +-
 tune/protox/agent/replay.py               | 251 +++++++---------------
 tune/protox/env/pg_env.py                 |   4 +-
 3 files changed, 83 insertions(+), 174 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 7f15a038..432aa60e 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -9,7 +9,7 @@ PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
 # python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 25
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 10
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 8e7fb173..f59ada09 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -30,15 +30,10 @@
 
 class ReplayArgs:
     def __init__(
-        self, workload_timeout_during_replay: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list
+        self, workload_timeout_during_replay: bool, simulated: bool, cutoff: float, blocklist: list
     ):
         self.workload_timeout_during_replay = workload_timeout_during_replay
-        self.num_samples = num_samples
-        self.threshold = threshold
-        self.threshold_limit = threshold_limit
-        self.maximal = maximal
         self.simulated = simulated
-        self.maximal_only = maximal_only
         self.cutoff = cutoff
         self.blocklist = blocklist
 
@@ -77,39 +72,11 @@ def __init__(
     # I just made it use the workload timeout from HPO because I don't currently persist the tuning HPO params.
     help="The timeout (in seconds) of a workload when replaying. By default, it will be equal to the workload timeout used during HPO."
 )
-@click.option(
-    "--num-samples",
-    default=1,
-    type=int,
-    help="The number of times to run the workload for each DBMS config being evaluated."
-)
-@click.option(
-    "--threshold",
-    default=0,
-    type=float,
-    help="The minimum delta between the runtimes of consecutive DBMS configs to warrant a config being evaluated."
-)
-@click.option(
-    "--threshold-limit",
-    default=None,
-    type=float,
-    help="Only use threshold within threshold-limit hours from the start. None means \"always use threshold\"."
-)
-@click.option(
-    "--maximal",
-    is_flag=True,
-    help="If set to true, only evaluate configs that are strictly \"better\"."
-)
 @click.option(
     "--simulated",
     is_flag=True,
     help="Set to true to use the runtimes from the original tuning run instead of replaying the workload."
 )
-@click.option(
-    "--maximal-only",
-    is_flag=True,
-    help="If set to true, only evaluate the best config"
-)
 @click.option(
     "--cutoff",
     default=None,
@@ -122,7 +89,7 @@ def __init__(
     type=list,
     help="Ignore running queries in the blocklist."
 )
-def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout_during_replay: bool, num_samples: int, threshold: float, threshold_limit: float, maximal: bool, simulated: bool, maximal_only: bool, cutoff: float, blocklist: list) -> None:
+def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout_during_replay: bool, simulated: bool, cutoff: float, blocklist: list) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
 
@@ -133,7 +100,7 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     tuning_steps_dpath = conv_inputpath_to_realabspath(dbgym_cfg, tuning_steps_dpath)
 
     # Group args together to reduce the # of parameters we pass into functions
-    replay_args = ReplayArgs(workload_timeout_during_replay, num_samples, threshold, threshold_limit, maximal, simulated, maximal_only, cutoff, blocklist)
+    replay_args = ReplayArgs(workload_timeout_during_replay, simulated, cutoff, blocklist)
 
     # Replay
     replay_tuning_run(dbgym_cfg, tuning_steps_dpath, replay_args)
@@ -146,10 +113,6 @@ def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_a
     def _is_tuning_step_line(line: str) -> bool:
         return "mv" in line and "tuning_steps" in line and "baseline" not in line
 
-    maximal = replay_args.maximal
-    maximal_only = replay_args.maximal_only
-    threshold = replay_args.threshold
-
     hpo_params_fpath = tuning_steps_dpath / "params.json"
     with open_and_save(dbgym_cfg, hpo_params_fpath, "r") as f:
         hpo_params = json.load(f)
@@ -163,12 +126,11 @@ def _is_tuning_step_line(line: str) -> bool:
     hpo_params["boot_config_fpath"][str(TuningMode.REPLAY)] = None
     hpo_params["workload_timeout"][str(TuningMode.REPLAY)] = replay_args.workload_timeout_during_replay
 
-    # Go through output.log and find the tuning_steps/[time]/ folders, as well as the time of the last folder
+    # Go through output.log and find the tuning_steps/[time]/ folders
     # This finds all the [time] folders in tuning_steps/ (except "baseline" since we ignore that in `_is_tuning_step_line()`),
     #   so you could just do `ls tuning_steps/` if you wanted to.
     folders = []
     start_found = False
-    last_evaluation = None
     output_log_fpath = tuning_steps_dpath / "output.log"
     with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
         for line in f:
@@ -181,16 +143,12 @@ def _is_tuning_step_line(line: str) -> bool:
                     repo = eval(line.split("Running ")[-1])[-1]
                     last_folder = repo.split("/")[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
-                    last_evaluation = time_since_start
                     if replay_args.cutoff == None or (time_since_start - start_time).total_seconds() < replay_args.cutoff * 3600:
                         folders.append(last_folder)
     
     # Set tune_duration to be high so that it doesn't cut the replay off early
     hpo_params["tune_duration"][str(TuningMode.REPLAY)] = replay_args.workload_timeout_during_replay * len(folders)
 
-    # Only apply threshold if time is less than.
-    threshold_limit = last_evaluation - datetime.timedelta(seconds=int(replay_args.threshold_limit * 3600)) if replay_args.threshold_limit != None else None
-
     # Build PostgresEnv.
     _, _, agent_env, _, _ = build_trial(dbgym_cfg, TuningMode.REPLAY, hpo_params["seed"], hpo_params)
     pg_env: PostgresEnv = agent_env.unwrapped
@@ -199,27 +157,6 @@ def _is_tuning_step_line(line: str) -> bool:
     if not replay_args.simulated:
         pg_env.pg_conn.restore_pristine_snapshot()
 
-    # Get the minimum original runtime.
-    run_raw_csv_fpaths = [tuning_steps_dpath / fold / "run.raw.csv" for fold in folders]
-    run_raw_csvs = [pd.read_csv(run_raw_csv_fpath) for run_raw_csv_fpath in run_raw_csv_fpaths]
-    original_runtime_infos = [(run_raw_csv["Latency (microseconds)"].sum() / 1e6, (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]) for run_raw_csv in run_raw_csvs]
-    original_runtime_infos = sorted(original_runtime_infos, key=lambda x: x[0])
-    min_original_runtime = min([r[0] for r in original_runtime_infos])
-
-    if maximal:
-        target = [r[1] for r in original_runtime_infos if r[0] == min_original_runtime]
-        assert len(target) >= 1
-        if target[0]:
-            # Don't use maximal if the min maximal is timed out.
-            # Don't threshold either.
-            threshold = 0
-            maximal = False
-            # Reject maximal only.
-            maximal_only = False
-            logging.warn("Maximal disabled.")
-        else:
-            logging.info(f"Maximal found: {min_original_runtime}")
-
     num_lines = 0
     with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
         for line in f:
@@ -228,45 +165,36 @@ def _is_tuning_step_line(line: str) -> bool:
             elif _is_tuning_step_line(line):
                 num_lines += 1
 
-    def _run_sample(action_info: "HolonAction") -> list[float]:
-        samples = []
-        for _ in range(replay_args.num_samples):
-            logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables, pg_env.action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
-            logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables)}\n\n")
-            assert replay_args.workload_timeout_during_replay == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
-            runtime = pg_env.workload.execute_workload(
-                pg_conn=pg_env.pg_conn,
-                actions=[action_info],
-                actions_names=["Replay"],
-                observation_space=None,
-                action_space=pg_env.action_space,
-                reset_metrics=None,
-                query_timeout=None,
-                workload_qdir=None,
-                disable_pg_hint=False,
-                blocklist=replay_args.blocklist,
-                first=False,
-            )
-            samples.append(runtime)
-            logging.info(f"Runtime: {runtime}")
-
-            if runtime >= replay_args.workload_timeout_during_replay:
-                break
-
-        return samples
+    # A convenience wrapper around execute_workload() which fills in the arguments properly
+    def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
+        logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables, pg_env.action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
+        logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables)}\n\n")
+        assert replay_args.workload_timeout_during_replay == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
+        runtime = pg_env.workload.execute_workload(
+            pg_conn=pg_env.pg_conn,
+            actions=[action_info],
+            actions_names=["Replay"],
+            observation_space=None,
+            action_space=pg_env.action_space,
+            reset_metrics=None,
+            query_timeout=None,
+            workload_qdir=None,
+            disable_pg_hint=False,
+            blocklist=replay_args.blocklist,
+            first=False,
+        )
+        return runtime
 
     run_data = []
-    pbar = tqdm.tqdm(total=num_lines)
+    progess_bar = tqdm.tqdm(total=num_lines)
     with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
         current_step = 0
         start_found = False
         start_time = None
-        cur_reward_max = replay_args.workload_timeout_during_replay
         noop_index = False
         maximal_repo = None
         existing_index_acts = []
         if1_count = 0
-        if2_count = 0
 
         for line in f:
             # Keep going until we've found the start.
@@ -274,17 +202,14 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                 if "Baseline Metric" in line:
                     start_found = True
                     start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0].split("[")[0])
-                    pbar.update(1)
+                    progess_bar.update(1)
                 continue
 
             elif "Selected action: " in line:
                 act = eval(line.split("Selected action: ")[-1])
                 noop_index = "NOOP" in act[1][0]
 
-            elif (maximal and (_is_tuning_step_line(line))):
-                maximal_repo = line
-
-            elif (maximal and "Found new maximal state with" in line) or (not maximal and _is_tuning_step_line(line)):
+            elif _is_tuning_step_line(line):
                 if1_count += 1
                 print(f"if1_count={if1_count}")
 
@@ -305,87 +230,69 @@ def _run_sample(action_info: "HolonAction") -> list[float]:
                 original_runtime = run_raw_csv["Latency (microseconds)"].sum() / 1e6
                 assert original_runtime > 0
 
-                if ((not replay_args.maximal_only and original_runtime < cur_reward_max) or original_runtime == min_original_runtime) and (not maximal or not has_timeout):
-                    if2_count += 1
-                    print(f"if2_count={if2_count}")
-
-                    if if2_count >= 2:
-                        break
-
-                    index_acts = set()
-
-                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
-                        actions_info = pickle.load(f)
-                        assert type(actions_info) is list and len(actions_info) == 1, f"there should only be one action in actions_info {actions_info}"
-                        action_info = actions_info[0]
-                        assert type(action_info) is tuple and len(action_info) == 3, f"action_info ({action_info}) should be a tuple with system knobs, an index, and per-query knobs"
-                        index_acts.add(action_info[1])
-
-                    assert len(index_acts) > 0
-                    with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
-                        prior_states = pickle.load(f)
-                        all_sc = set(prior_states[1])
-                        if not noop_index:
-                            for index_act in index_acts:
-                                all_sc.add(index_act)
-
-                        all_sc = {a for a in all_sc if not "USING btree ()" in a.sql(True)}
-                        index_acts = all_sc
-
-                    # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
-                    index_modification_sqls = []
-                    for index_act in index_acts:
-                        if index_act not in existing_index_acts:
-                            index_modification_sqls.append(index_act.sql(True))
-                    for existing_index_act in existing_index_acts:
-                        if existing_index_act not in index_acts:
-                            index_modification_sqls.append(existing_index_act.sql(False))
-
-                    if not replay_args.simulated:
-                        # Apply index changes
-                        cc, _ = pg_env.action_space.get_knob_space().generate_action_plan(action_info[0], prior_states[0])
-                        # Like in tuning, we don't dump the page cache when calling shift_state() to see how the workload
-                        #   performs in a warm cache scenario.
-                        pg_env.shift_state(cc, index_modification_sqls)
-                    existing_index_acts = index_acts
-
-                    if not replay_args.simulated:
-                        # Get samples.
-                        run_samples = samples = _run_sample(action_info)
-                        logging.info(f"Original Runtime: {original_runtime} (timed out? {has_timeout}). New Samples: {samples}")
-                    else:
-                        run_samples = samples = [original_runtime, original_runtime]
-
-                    data = {
-                        "step": current_step,
-                        "original_runtime": original_runtime,
-                        "time_since_start": (time_since_start - start_time).total_seconds(),
-                    }
-                    samples = {f"runtime{i}": s for i, s in enumerate(samples)}
-                    data.update(samples)
-                    run_data.append(data)
-
-                    current_step += 1
-
-                    if (not has_timeout) or (max(run_samples) < replay_args.workload_timeout_during_replay):
-                        # Apply a tolerance..
-                        # If we've timed out, only apply threshold only if we've found a strictly better config.
-                        apply_threshold = threshold if threshold_limit == None or time_since_start < threshold_limit else 0
-                        cur_reward_max = original_runtime - apply_threshold
+                # Get the indexes from this action and the prior state
+                index_acts = set()
+                with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
+                    actions_info = pickle.load(f)
+                    assert type(actions_info) is list and len(actions_info) == 1, f"there should only be one action in actions_info {actions_info}"
+                    action_info = actions_info[0]
+                    assert type(action_info) is tuple and len(action_info) == 3, f"action_info ({action_info}) should be a tuple with system knobs, an index, and per-query knobs"
+                    index_acts.add(action_info[1])
+                assert len(index_acts) > 0
+                with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
+                    prior_states = pickle.load(f)
+                    all_sc = set(prior_states[1])
+                    if not noop_index:
+                        for index_act in index_acts:
+                            all_sc.add(index_act)
+
+                    all_sc = {a for a in all_sc if not "USING btree ()" in a.sql(True)}
+                    index_acts = all_sc
+                # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
+                index_modification_sqls = []
+                for index_act in index_acts:
+                    if index_act not in existing_index_acts:
+                        index_modification_sqls.append(index_act.sql(True))
+                for existing_index_act in existing_index_acts:
+                    if existing_index_act not in index_acts:
+                        index_modification_sqls.append(existing_index_act.sql(False))
+
+                # Modify Postgres to have the right indexes and system-wide knobs. `index_modification_sqls` holds the indexes
+                #   while `cc` holds the system-wide knobs.
+                if not replay_args.simulated:
+                    cc, _ = pg_env.action_space.get_knob_space().generate_action_plan(action_info[0], prior_states[0])
+                    # Like in tuning, we don't dump the page cache when calling shift_state() to see how the workload
+                    #   performs in a warm cache scenario.
+                    pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=False)
+                existing_index_acts = index_acts
+
+                # Get the runtime.
+                if not replay_args.simulated:
+                    replayed_runtime = _execute_workload_wrapper(action_info)
+                    logging.info(f"Original Runtime: {original_runtime} (timed out? {has_timeout}). Replayed Runtime: {replayed_runtime}")
+                else:
+                    replayed_runtime = original_runtime
+
+                # Add this tuning step's data to `run_data``.
+                run_data.append({
+                    "step": current_step,
+                    "original_runtime": original_runtime,
+                    "time_since_start": (time_since_start - start_time).total_seconds(),
+                    "replayed_runtime": replayed_runtime,
+                })
+                current_step += 1
 
                 run_folder = repo.split("/")[-1]
                 if run_folder in folders and run_folder == folders[-1]:
                     break
-                elif maximal_only and original_runtime == min_original_runtime:
-                    break
-            pbar.update(1)
+            progess_bar.update(1)
 
         if len(run_data) > 0:
             data = {
                 "step": current_step,
                 "original_runtime": run_data[-1]["original_runtime"],
                 "time_since_start": -1,
-                "runtime0": run_data[-1]["runtime0"],
+                "replayed_runtime": run_data[-1]["replayed_runtime"],
             }
             run_data.append(data)
 
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index 05237f06..d1d43102 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -123,6 +123,8 @@ def reset(  # type: ignore
             config_changes, sql_commands = self.action_space.generate_plan_from_config(
                 config, sc
             )
+            # We dump the page cache here because we're resetting. We don't want stuff from
+            #   a previous task.py invocation to affect this.
             assert self.shift_state(config_changes, sql_commands, dump_page_cache=True)
 
             # Note that we do not actually update the baseline metric/reward used by the reward
@@ -231,7 +233,7 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
         # Attempt to maneuver to the new state.
         # Don't dump the page cache in shift_state() in order to see how the workload performs in
         #   a warm cache scenario.
-        success = self.shift_state(config_changes, sql_commands)
+        success = self.shift_state(config_changes, sql_commands, dump_page_cache=True)
         return success, EnvInfoDict(
             {
                 "attempted_changes": (config_changes, sql_commands),

From 106d4eaff68e50be3c9eaf4db9c499ebfaef93e1 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 19:16:02 +0000
Subject: [PATCH 066/100] got rid of extra row at bottom

---
 tune/protox/agent/replay.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index f59ada09..e0510ecf 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -213,6 +213,9 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
                 if1_count += 1
                 print(f"if1_count={if1_count}")
 
+                if if1_count >= 10:
+                    break
+
                 if _is_tuning_step_line(line):
                     repo = eval(line.split("Running ")[-1])[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
@@ -278,6 +281,7 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
                     "step": current_step,
                     "original_runtime": original_runtime,
                     "time_since_start": (time_since_start - start_time).total_seconds(),
+                    "repo": repo,
                     "replayed_runtime": replayed_runtime,
                 })
                 current_step += 1
@@ -287,15 +291,6 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
                     break
             progess_bar.update(1)
 
-        if len(run_data) > 0:
-            data = {
-                "step": current_step,
-                "original_runtime": run_data[-1]["original_runtime"],
-                "time_since_start": -1,
-                "replayed_runtime": run_data[-1]["replayed_runtime"],
-            }
-            run_data.append(data)
-
     # Output.
     run_data_df = pd.DataFrame(run_data)
     print(f"Finished replaying with run_data_df=\n{run_data_df}\n. Data stored in {dbgym_cfg.cur_task_runs_path()}.")

From 7fc0beefcba7274dbf0994676a80f2d8c0677bde Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 19:24:11 +0000
Subject: [PATCH 067/100] has_timeout -> did_any_query_timeout_in_original

---
 tune/protox/agent/replay.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index e0510ecf..09c5c49d 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -170,7 +170,7 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
         logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables, pg_env.action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
         logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables)}\n\n")
         assert replay_args.workload_timeout_during_replay == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
-        runtime = pg_env.workload.execute_workload(
+        replayed_runtime = pg_env.workload.execute_workload(
             pg_conn=pg_env.pg_conn,
             actions=[action_info],
             actions_names=["Replay"],
@@ -183,7 +183,7 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
             blocklist=replay_args.blocklist,
             first=False,
         )
-        return runtime
+        return replayed_runtime
 
     run_data = []
     progess_bar = tqdm.tqdm(total=num_lines)
@@ -229,7 +229,7 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
                 save_file(dbgym_cfg, run_raw_csv_fpath)
                 run_raw_csv = pd.read_csv(run_raw_csv_fpath)
                 assert len(run_raw_csv.columns) == 6
-                has_timeout = (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
+                did_any_query_timeout_in_original = (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
                 original_runtime = run_raw_csv["Latency (microseconds)"].sum() / 1e6
                 assert original_runtime > 0
 
@@ -269,10 +269,10 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
                     pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=False)
                 existing_index_acts = index_acts
 
-                # Get the runtime.
+                # Execute the workload to get the runtime.
                 if not replay_args.simulated:
                     replayed_runtime = _execute_workload_wrapper(action_info)
-                    logging.info(f"Original Runtime: {original_runtime} (timed out? {has_timeout}). Replayed Runtime: {replayed_runtime}")
+                    logging.info(f"Original Runtime: {original_runtime} (timed out? {did_any_query_timeout_in_original}). Replayed Runtime: {replayed_runtime}")
                 else:
                     replayed_runtime = original_runtime
 
@@ -280,8 +280,8 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
                 run_data.append({
                     "step": current_step,
                     "original_runtime": original_runtime,
+                    "did_any_query_timeout_in_original": did_any_query_timeout_in_original,
                     "time_since_start": (time_since_start - start_time).total_seconds(),
-                    "repo": repo,
                     "replayed_runtime": replayed_runtime,
                 })
                 current_step += 1

From b11d3f4c4c21ee3dff0cb462a4bff70dc72fc827 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 19:28:31 +0000
Subject: [PATCH 068/100] comment

---
 tune/protox/agent/off_policy_algorithm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py
index 249660ff..5b2b4c3b 100644
--- a/tune/protox/agent/off_policy_algorithm.py
+++ b/tune/protox/agent/off_policy_algorithm.py
@@ -185,7 +185,7 @@ def collect_rollouts(
             new_obs, rewards, terms, truncs, infos = env.step(actions)
             dones = terms or truncs
             # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
-            #   stashed in the same directory and potentially crash the system.
+            #   stashed in the same directory and potentially cause a race condition.
             if self.logger and not tuning_mode == TuningMode.HPO:
                 self.logger.stash_results(infos)
 

From 84bcd79626498f66c41c233b2edd7a2d25d946e8 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Fri, 19 Apr 2024 21:06:50 +0000
Subject: [PATCH 069/100] refactored codebase so that all symlinks end with
 .link. full benchmark -> dbms -> tune run working

---
 benchmark/tpch/cli.py            | 74 ++++++++++++++--------------
 benchmark/tpch/load_info.py      | 15 +++---
 dbms/postgres/cli.py             | 18 +++----
 misc/utils.py                    | 82 ++++++++++++++++++--------------
 scripts/pat_test.sh              |  6 +--
 tune/protox/agent/hpo.py         |  2 +-
 tune/protox/agent/tune.py        |  2 +-
 tune/protox/embedding/analyze.py |  4 +-
 tune/protox/embedding/select.py  |  2 +-
 tune/protox/env/workload.py      |  5 +-
 10 files changed, 111 insertions(+), 99 deletions(-)

diff --git a/benchmark/tpch/cli.py b/benchmark/tpch/cli.py
index 5b6d24f1..d5c8c407 100644
--- a/benchmark/tpch/cli.py
+++ b/benchmark/tpch/cli.py
@@ -4,7 +4,7 @@
 
 import click
 
-from misc.utils import DBGymConfig, get_scale_factor_string, workload_name_fn
+from misc.utils import DBGymConfig, get_scale_factor_string, link_result, workload_name_fn
 from util.shell import subprocess_run
 from util.pg import *
 
@@ -56,33 +56,36 @@ def _get_queries_dname(seed: int, scale_factor: float) -> str:
 
 
 def _clone(dbgym_cfg: DBGymConfig):
-    symlink_dir = dbgym_cfg.cur_symlinks_build_path("tpch-kit")
-    if symlink_dir.exists():
-        benchmark_tpch_logger.info(f"Skipping clone: {symlink_dir}")
+    expected_symlink_dpath = dbgym_cfg.cur_symlinks_build_path(mkdir=True) / "tpch-kit.link"
+    if expected_symlink_dpath.exists():
+        benchmark_tpch_logger.info(f"Skipping clone: {expected_symlink_dpath}")
         return
 
-    benchmark_tpch_logger.info(f"Cloning: {symlink_dir}")
+    benchmark_tpch_logger.info(f"Cloning: {expected_symlink_dpath}")
     real_build_path = dbgym_cfg.cur_task_runs_build_path()
     subprocess_run(
         f"./tpch_setup.sh {real_build_path}", cwd=dbgym_cfg.cur_source_path()
     )
-    subprocess_run(
-        f"ln -s {real_build_path / 'tpch-kit'} {dbgym_cfg.cur_symlinks_build_path(mkdir=True)}"
-    )
-    benchmark_tpch_logger.info(f"Cloned: {symlink_dir}")
+    symlink_dpath = link_result(dbgym_cfg, real_build_path / "tpch-kit")
+    assert os.path.samefile(expected_symlink_dpath, symlink_dpath)
+    benchmark_tpch_logger.info(f"Cloned: {expected_symlink_dpath}")
 
 
-def _generate_queries(dbgym_cfg: DBGymConfig, seed_start: int, seed_end: int, scale_factor: float):
-    build_path = dbgym_cfg.cur_symlinks_build_path()
-    assert build_path.exists()
+def _get_tpch_kit_dpath(dbgym_cfg: DBGymConfig) -> Path:
+    tpch_kit_dpath = (dbgym_cfg.cur_symlinks_build_path() / "tpch-kit.link").resolve()
+    assert tpch_kit_dpath.exists() and tpch_kit_dpath.is_absolute() and not tpch_kit_dpath.is_symlink()
+    return tpch_kit_dpath
 
+
+def _generate_queries(dbgym_cfg: DBGymConfig, seed_start: int, seed_end: int, scale_factor: float):
+    tpch_kit_dpath = _get_tpch_kit_dpath(dbgym_cfg)
     data_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
     benchmark_tpch_logger.info(
         f"Generating queries: {data_path} [{seed_start}, {seed_end}]"
     )
     for seed in range(seed_start, seed_end + 1):
-        symlinked_seed = data_path / _get_queries_dname(seed, scale_factor)
-        if symlinked_seed.exists():
+        expected_queries_symlink_dpath = data_path / (_get_queries_dname(seed, scale_factor) + ".link")
+        if expected_queries_symlink_dpath.exists():
             continue
 
         real_dir = dbgym_cfg.cur_task_runs_data_path(_get_queries_dname(seed, scale_factor), mkdir=True)
@@ -90,34 +93,34 @@ def _generate_queries(dbgym_cfg: DBGymConfig, seed_start: int, seed_end: int, sc
             target_sql = (real_dir / f"{i}.sql").resolve()
             subprocess_run(
                 f"DSS_QUERY=./queries ./qgen {i} -r {seed} -s {scale_factor} > {target_sql}",
-                cwd=build_path / "tpch-kit" / "dbgen",
+                cwd=tpch_kit_dpath / "dbgen",
                 verbose=False,
             )
-        subprocess_run(f"ln -s {real_dir} {data_path}", verbose=False)
+        queries_symlink_dpath = link_result(dbgym_cfg, real_dir)
+        assert os.path.samefile(queries_symlink_dpath, expected_queries_symlink_dpath)
     benchmark_tpch_logger.info(
         f"Generated queries: {data_path} [{seed_start}, {seed_end}]"
     )
 
 
 def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float):
-    build_path = dbgym_cfg.cur_symlinks_build_path()
-    assert build_path.exists()
-
+    tpch_kit_dpath = _get_tpch_kit_dpath(dbgym_cfg)
     data_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
-    symlink_dir = data_path / f"tables_sf{get_scale_factor_string(scale_factor)}"
-    if symlink_dir.exists():
-        benchmark_tpch_logger.info(f"Skipping generation: {symlink_dir}")
+    expected_tables_symlink_dpath = data_path / f"tables_sf{get_scale_factor_string(scale_factor)}.link"
+    if expected_tables_symlink_dpath.exists():
+        benchmark_tpch_logger.info(f"Skipping generation: {expected_tables_symlink_dpath}")
         return
 
-    benchmark_tpch_logger.info(f"Generating: {symlink_dir}")
+    benchmark_tpch_logger.info(f"Generating: {expected_tables_symlink_dpath}")
     subprocess_run(
-        f"./dbgen -vf -s {scale_factor}", cwd=build_path / "tpch-kit" / "dbgen"
+        f"./dbgen -vf -s {scale_factor}", cwd=tpch_kit_dpath / "dbgen"
     )
     real_dir = dbgym_cfg.cur_task_runs_data_path(f"tables_sf{get_scale_factor_string(scale_factor)}", mkdir=True)
-    subprocess_run(f"mv ./*.tbl {real_dir}", cwd=build_path / "tpch-kit" / "dbgen")
+    subprocess_run(f"mv ./*.tbl {real_dir}", cwd=tpch_kit_dpath / "dbgen")
 
-    subprocess_run(f"ln -s {real_dir} {data_path}")
-    benchmark_tpch_logger.info(f"Generated: {symlink_dir}")
+    tables_symlink_dpath = link_result(dbgym_cfg, real_dir)
+    assert os.path.samefile(tables_symlink_dpath, expected_tables_symlink_dpath)
+    benchmark_tpch_logger.info(f"Generated: {expected_tables_symlink_dpath}")
 
 
 def _generate_workload(
@@ -129,9 +132,9 @@ def _generate_workload(
 ):
     symlink_data_dir = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
-    workload_symlink_path = symlink_data_dir / workload_name
+    expected_workload_symlink_dpath = symlink_data_dir / (workload_name + ".link")
 
-    benchmark_tpch_logger.info(f"Generating: {workload_symlink_path}")
+    benchmark_tpch_logger.info(f"Generating: {expected_workload_symlink_dpath}")
     real_dir = dbgym_cfg.cur_task_runs_data_path(
         workload_name, mkdir=True
     )
@@ -147,13 +150,12 @@ def _generate_workload(
     with open(real_dir / "order.txt", "w") as f:
         for seed in range(seed_start, seed_end + 1):
             for qnum in queries:
-                sqlfile = symlink_data_dir / _get_queries_dname(seed, scale_factor) / f"{qnum}.sql"
-                assert sqlfile.exists()
-                output = ",".join([f"S{seed}-Q{qnum}", str(sqlfile)])
+                sql_fpath = (symlink_data_dir / (_get_queries_dname(seed, scale_factor) + ".link")).resolve() / f"{qnum}.sql"
+                assert sql_fpath.exists() and not sql_fpath.is_symlink() and sql_fpath.is_absolute(), "We should only write existent real absolute paths to a file"
+                output = ",".join([f"S{seed}-Q{qnum}", str(sql_fpath)])
                 print(output, file=f)
                 # TODO(WAN): add option to deep-copy the workload.
     
-    if workload_symlink_path.exists():
-        os.remove(workload_symlink_path)
-    subprocess_run(f"ln -s {real_dir} {workload_symlink_path}")
-    benchmark_tpch_logger.info(f"Generated: {workload_symlink_path}")
+    workload_symlink_dpath = link_result(dbgym_cfg, real_dir)
+    assert workload_symlink_dpath == expected_workload_symlink_dpath
+    benchmark_tpch_logger.info(f"Generated: {expected_workload_symlink_dpath}")
diff --git a/benchmark/tpch/load_info.py b/benchmark/tpch/load_info.py
index afe4d243..8db2f0b4 100644
--- a/benchmark/tpch/load_info.py
+++ b/benchmark/tpch/load_info.py
@@ -1,5 +1,5 @@
 from dbms.load_info_base_class import LoadInfoBaseClass
-from misc.utils import get_scale_factor_string
+from misc.utils import DBGymConfig, get_scale_factor_string
 
 
 TPCH_SCHEMA_FNAME = "tpch_schema.sql"
@@ -22,7 +22,7 @@ class TpchLoadInfo(LoadInfoBaseClass):
         "lineitem",
     ]
 
-    def __init__(self, dbgym_cfg, scale_factor):
+    def __init__(self, dbgym_cfg: DBGymConfig, scale_factor: float):
         # schema and constraints
         schema_root_dpath = dbgym_cfg.dbgym_repo_path
         for component in TpchLoadInfo.CODEBASE_PATH_COMPONENTS[
@@ -39,13 +39,12 @@ def __init__(self, dbgym_cfg, scale_factor):
         ), f"self._constraints_fpath ({self._constraints_fpath}) does not exist"
 
         # tables
-        data_root_dpath = (
-            dbgym_cfg.dbgym_symlinks_path / TpchLoadInfo.CODEBASE_DNAME / "data"
-        )
-        tables_dpath = data_root_dpath / f"tables_sf{get_scale_factor_string(scale_factor)}"
+        data_root_dpath = dbgym_cfg.dbgym_symlinks_path / TpchLoadInfo.CODEBASE_DNAME / "data"
+        tables_symlink_dpath = data_root_dpath / f"tables_sf{get_scale_factor_string(scale_factor)}.link"
+        tables_dpath = tables_symlink_dpath.resolve()
         assert (
-            tables_dpath.exists()
-        ), f"tables_dpath ({tables_dpath}) does not exist. Make sure you have generated the TPC-H data"
+            tables_dpath.exists() and tables_dpath.is_absolute() and not tables_dpath.is_symlink()
+        ), f"tables_dpath ({tables_dpath}) should be an existent real absolute path. Make sure you have generated the TPC-H data"
         self._tables_and_fpaths = []
         for table in TpchLoadInfo.TABLES:
             table_fpath = tables_dpath / f"{table}.tbl"
diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py
index e858d812..75b03650 100644
--- a/dbms/postgres/cli.py
+++ b/dbms/postgres/cli.py
@@ -84,11 +84,11 @@ def postgres_pgdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: f
 
 
 def _get_pgbin_symlink_path(dbgym_cfg: DBGymConfig) -> Path:
-    return dbgym_cfg.cur_symlinks_build_path("repo", "boot", "build", "postgres", "bin")
+    return dbgym_cfg.cur_symlinks_build_path("repo.link", "boot", "build", "postgres", "bin")
 
 
 def _get_repo_symlink_path(dbgym_cfg: DBGymConfig) -> Path:
-    return dbgym_cfg.cur_symlinks_build_path("repo")
+    return dbgym_cfg.cur_symlinks_build_path("repo.link")
 
 
 def _build_repo(dbgym_cfg: DBGymConfig, rebuild):
@@ -156,21 +156,21 @@ def _create_pgdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: fl
 
 def _generic_pgdata_setup(dbgym_cfg: DBGymConfig):
     # get necessary vars
-    pgbin_symlink_dpath = _get_pgbin_symlink_path(dbgym_cfg)
-    assert pgbin_symlink_dpath.exists()
+    pgbin_real_dpath = _get_pgbin_symlink_path(dbgym_cfg).resolve()
+    assert pgbin_real_dpath.exists()
     dbgym_pguser = DBGYM_POSTGRES_USER
     dbgym_pgpass = DBGYM_POSTGRES_PASS
     pgport = DEFAULT_POSTGRES_PORT
 
     # Create user
-    save_file(dbgym_cfg, pgbin_symlink_dpath / "psql")
+    save_file(dbgym_cfg, pgbin_real_dpath / "psql")
     subprocess_run(
         f"./psql -c \"create user {dbgym_pguser} with superuser password '{dbgym_pgpass}'\" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost",
-        cwd=pgbin_symlink_dpath,
+        cwd=pgbin_real_dpath,
     )
     subprocess_run(
         f'./psql -c "grant pg_monitor to {dbgym_pguser}" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost',
-        cwd=pgbin_symlink_dpath,
+        cwd=pgbin_real_dpath,
     )
 
     # Load shared preload libraries
@@ -179,14 +179,14 @@ def _generic_pgdata_setup(dbgym_cfg: DBGymConfig):
             # You have to use TO and you can't put single quotes around the libraries (https://postgrespro.com/list/thread-id/2580120)
             # The method I wrote here works for both one library and multiple libraries
             f"./psql -c \"ALTER SYSTEM SET shared_preload_libraries TO {SHARED_PRELOAD_LIBRARIES};\" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost",
-            cwd=pgbin_symlink_dpath,
+            cwd=pgbin_real_dpath,
         )
 
     # Create the dbgym database. since one pgdata dir maps to one benchmark, all benchmarks will use the same database
     # as opposed to using databases named after the benchmark
     subprocess_run(
         f"./psql -c \"create database {DBGYM_POSTGRES_DBNAME} with owner = '{dbgym_pguser}'\" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost",
-        cwd=pgbin_symlink_dpath,
+        cwd=pgbin_real_dpath,
     )
 
 
diff --git a/misc/utils.py b/misc/utils.py
index f19c023b..bec81d97 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -71,6 +71,26 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     / f"default_{benchmark_name}_benchbase_config.xml"
 )
 
+# Generally useful functions
+workload_name_fn = (
+    lambda scale_factor, seed_start, seed_end, query_subset : f"workload_sf{get_scale_factor_string(scale_factor)}_{seed_start}_{seed_end}_{query_subset}"
+)
+
+# Standard names of files/directories. These can refer to either the actual file/directory or a link to the file/directory.
+#   Since they can refer to either the actual or the link, they do not have ".link" in them.
+traindata_fname = (
+    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_embedding_traindata.parquet"
+)
+default_embedder_dname = (
+    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_embedder"
+)
+default_hpoed_agent_params_fname = (
+    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_hpoed_agent_params.json"
+)
+default_tuning_steps_dname = (
+    lambda benchmark_name, workload_name, boot_enabled_during_tune: f"{benchmark_name}_{workload_name}{'_boot' if boot_enabled_during_tune else ''}_tuning_steps"
+)
+
 # Paths of dependencies in the workspace. These are named "*_path" because they will be an absolute path
 # The reason these _cannot_ be relative paths is because relative paths are relative to the codebase root, not the workspace root
 # Note that it's okay to hardcode the codebase paths (like dbgym_dbms_postgres) here. In the worst case, we'll just break an
@@ -79,19 +99,18 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
 #   ok to have to hardcode them when reading.
 # Details
 #  - If a name already has the workload_name, I omit scale factor. This is because the workload_name includes the scale factor
-traindata_fname = (
-    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_embedding_traindata.parquet"
-)
+#  - By convention, symlinks should end with ".link". The bug that motivated this decision involved replaying a tuning run. When
+#    replaying a tuning run, you read the tuning_steps/ folder of the tuning run. Earlier, I created a symlink to that tuning_steps/
+#    folder called run_*/dbgym_agent_protox_tune/tuning_steps. However, replay itself generates an output.log file, which goes in
+#    run_*/dbgym_agent_protox_tune/tuning_steps/. The bug was that my replay function was overwriting the output.log file of the
+#    tuning run. By naming all symlinks "*.link", we avoid the possibility of subtle bugs like this happening.
 default_traindata_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(
         workspace_path
     )
     / "dbgym_tune_protox_embedding"
     / "data"
-    / traindata_fname(benchmark_name, workload_name)
-)
-default_embedder_dname = (
-    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_embedder"
+    / (traindata_fname(benchmark_name, workload_name) + ".link")
 )
 default_embedder_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(
@@ -99,19 +118,13 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     )
     / "dbgym_tune_protox_embedding"
     / "data"
-    / default_embedder_dname(benchmark_name, workload_name)
-)
-default_hpoed_agent_params_fname = (
-    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_hpoed_agent_params.json"
+    / (default_embedder_dname(benchmark_name, workload_name) + ".link")
 )
 default_hpoed_agent_params_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(workspace_path)
     / "dbgym_tune_protox_agent"
     / "data"
-    / default_hpoed_agent_params_fname(benchmark_name, workload_name)
-)
-workload_name_fn = (
-    lambda scale_factor, seed_start, seed_end, query_subset : f"workload_sf{get_scale_factor_string(scale_factor)}_{seed_start}_{seed_end}_{query_subset}"
+    / (default_hpoed_agent_params_fname(benchmark_name, workload_name) + ".link")
 )
 default_workload_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(
@@ -119,7 +132,7 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     )
     / f"dbgym_benchmark_{benchmark_name}"
     / "data"
-    / workload_name
+    / (workload_name + ".link")
 )
 default_pristine_pgdata_snapshot_path = (
     lambda workspace_path, benchmark_name, scale_factor: get_symlinks_path_from_workspace_path(
@@ -127,7 +140,7 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     )
     / "dbgym_dbms_postgres"
     / "data"
-    / get_pgdata_tgz_name(benchmark_name, scale_factor)
+    / (get_pgdata_tgz_name(benchmark_name, scale_factor) + ".link")
 )
 default_pgdata_parent_dpath = (
     lambda workspace_path: get_tmp_path_from_workspace_path(
@@ -138,16 +151,13 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     lambda workspace_path: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / "dbgym_dbms_postgres" / "build" / "repo" / "boot"/ "build" / "postgres" / "bin"
-)
-default_tuning_steps_dname = (
-    lambda benchmark_name, workload_name, boot_enabled_during_tune: f"{benchmark_name}_{workload_name}{'_boot' if boot_enabled_during_tune else ''}_tuning_steps"
+    / "dbgym_dbms_postgres" / "build" / "repo.link" / "boot"/ "build" / "postgres" / "bin"
 )
 default_tuning_steps_dpath = (
     lambda workspace_path, benchmark_name, workload_name, boot_enabled_during_tune: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / "dbgym_tune_protox_agent" / "artifacts" / default_tuning_steps_dname(benchmark_name, workload_name, boot_enabled_during_tune)
+    / "dbgym_tune_protox_agent" / "artifacts" / (default_tuning_steps_dname(benchmark_name, workload_name, boot_enabled_during_tune) + ".link")
 )
 
 
@@ -350,7 +360,7 @@ def is_child_path(child_path: os.PathLike, parent_dpath: os.PathLike) -> bool:
     )
 
 
-def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
+def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: Path, mode="r"):
     """
     Open a file and "save" it to [workspace]/task_runs/run_*/.
     It takes in a str | Path to match the interface of open().
@@ -369,6 +379,7 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
         - Opening two "dependency" files of the same name but different paths will lead to two different "base dirs" being symlinked.
     """
     # validate open_fpath
+    assert isinstance(open_fpath, Path)
     assert os.path.isabs(
         open_fpath
     ), f"open_and_save(): open_fpath ({open_fpath}) should be an absolute path"
@@ -415,7 +426,7 @@ def extract_from_task_run_fordpath(dbgym_cfg: DBGymConfig, task_run_fordpath: Pa
 
 
 # TODO(phw2): after merging agent-train, refactor some code in agent-train to use save_file() instead of open_and_save()
-def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
+def save_file(dbgym_cfg: DBGymConfig, fpath: Path) -> Path:
     """
     If an external function takes in a file/directory as input, you will not be able to call open_and_save().
         In these situations, just call save_file().
@@ -426,6 +437,7 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
         In these cases we create a symlink so we have full provenance for how the dependency was created
     """
     # validate fpath
+    assert isinstance(fpath, Path)
     assert not os.path.islink(fpath), f"fpath ({fpath}) should not be a symlink"
     assert os.path.exists(fpath), f"fpath ({fpath}) does not exist"
     assert os.path.isfile(fpath), f"fpath ({fpath}) is not a file"
@@ -440,16 +452,14 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
     if is_child_path(fpath, dbgym_cfg.dbgym_runs_path):
         # get paths we'll need later.
         _, codebase_dname, org_dpath, org_dname = extract_from_task_run_fordpath(dbgym_cfg, fpath)
-        this_run_save_dpath = os.path.join(
-            dbgym_cfg.dbgym_this_run_path, codebase_dname, org_dname
-        )
+        this_run_save_dpath = dbgym_cfg.dbgym_this_run_path / codebase_dname / org_dname
         os.makedirs(this_run_save_dpath, exist_ok=True)
 
         # if the fpath file is directly in org_dpath, we symlink the file directly
         parent_dpath = os.path.dirname(fpath)
         if os.path.samefile(parent_dpath, org_dpath):
             fname = os.path.basename(fpath)
-            symlink_fpath = os.path.join(this_run_save_dpath, fname)
+            symlink_fpath = this_run_save_dpath / (fname + ".link")
             try_create_symlink(fpath, symlink_fpath)
         # else, we know the fpath file is _not_ directly inside org_dpath dir
         # we go as far back as we can while still staying in org_dpath and symlink that "base" dir
@@ -462,18 +472,16 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
 
             # create symlink
             open_base_dname = dir_basename(base_dpath)
-            symlink_dpath = os.path.join(this_run_save_dpath, open_base_dname)
+            symlink_dpath = this_run_save_dpath / (open_base_dname + ".link")
             try_create_symlink(base_dpath, symlink_dpath)
     # if it wasn't generated by a run
     else:
         # since we don't know where the file is at all, the location is "unknown" and the org is "all"
-        this_run_save_dpath = os.path.join(
-            dbgym_cfg.dbgym_this_run_path, "unknown", "all"
-        )
+        this_run_save_dpath = dbgym_cfg.dbgym_this_run_path / "unknown" / "all"
         os.makedirs(this_run_save_dpath, exist_ok=True)
         fname = os.path.basename(fpath)
         # in this case, we want to copy instead of symlinking since it might disappear in the future
-        copy_fpath = os.path.join(this_run_save_dpath, fname)
+        copy_fpath = this_run_save_dpath / fname
         shutil.copy(fpath, copy_fpath)
 
 
@@ -497,12 +505,12 @@ def link_result(dbgym_cfg: DBGymConfig, result_fordpath: Path, custom_result_nam
         result_name = custom_result_name
     else:
         if os.path.isfile(result_fordpath):
-            result_name = os.path.basename(result_fordpath)
+            result_name = os.path.basename(result_fordpath) + ".link"
         elif os.path.isdir(result_fordpath):
-            result_name = dir_basename(result_fordpath)
+            result_name = dir_basename(result_fordpath) + ".link"
         else:
             raise AssertionError("result_fordpath must be either a file or dir")
-    
+
     # Figure out the parent directory path of the symlink
     codebase_dpath, codebase_dname, _, org_dname = extract_from_task_run_fordpath(dbgym_cfg, result_fordpath)
     # We're only supposed to save files generated by us, which means they should be in cur_task_runs_path()
@@ -514,6 +522,7 @@ def link_result(dbgym_cfg: DBGymConfig, result_fordpath: Path, custom_result_nam
     # Note that in a multi-threaded setting, this might remove one created by a process in the same run,
     #   meaning it's not "old" by our definition of "old". However, we'll always end up with a symlink
     #   file of the current run regardless of the order of threads.
+    assert result_name.endswith(".link") and not result_name.endswith(".link.link"), "result_name ({result_name}) should end with \".link\""
     symlink_path = symlink_parent_dpath / result_name
     try_remove_file(symlink_path)
     try_create_symlink(result_fordpath, symlink_path)
@@ -526,6 +535,7 @@ def try_create_symlink(src_path: Path, dst_path: Path) -> None:
     Our functions that create symlinks might be called by multiple processes at once
     during HPO. Thus, this is a thread-safe way to create a symlink.
     """
+    assert dst_path.name.endswith(".link") and not dst_path.name.endswith(".link.link")
     try:
         os.symlink(src_path, dst_path)
     except FileExistsError:
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index dfbae86e..58c0332b 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,9 +7,9 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 9b3fbb74..b31d027e 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -650,7 +650,7 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
     #   run_*/[codebase]/hpo_ray_results/TuneOpt*/.
     best_params_copy_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "params.json"
     shutil.copy(best_params_generated_fpath, best_params_copy_fpath)
-    link_result(dbgym_cfg, best_params_copy_fpath, custom_result_name=default_hpoed_agent_params_fname(hpo_args.benchmark_name, hpo_args.workload_name))
+    link_result(dbgym_cfg, best_params_copy_fpath, custom_result_name=default_hpoed_agent_params_fname(hpo_args.benchmark_name, hpo_args.workload_name) + ".link")
     # We also link from run_*/[codebase]/data/params.json to run_*/[codebase]/hpo_ray_results/TuneOpt*/**/params.json.
     #   This way, when _manually_ looking through run_*/, we can see which HPO trial was
     #   responsible for creating params.json.
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 2f566f17..1b167ce5 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -115,7 +115,7 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
     hpoed_agent_params_copy_fpath = tuning_steps_dpath / "params.json"
     shutil.copy(hpoed_agent_params_path, hpoed_agent_params_copy_fpath)
     tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, enable_boot_during_tune)
-    link_result(dbgym_cfg, tuning_steps_dpath, custom_result_name=tuning_steps_link_dname)
+    link_result(dbgym_cfg, tuning_steps_dpath, custom_result_name=tuning_steps_link_dname + ".link")
     # We also create a link to hpoed_agent_params_path. This is useful when we are _manually_ looking through
     #   run_*/ and want to see which other run_*/ was responsible for creating params.json
     hpoed_agent_params_link_fpath = tuning_steps_dpath / "params.json.link"
diff --git a/tune/protox/embedding/analyze.py b/tune/protox/embedding/analyze.py
index 5341a0da..cdf6666c 100644
--- a/tune/protox/embedding/analyze.py
+++ b/tune/protox/embedding/analyze.py
@@ -80,13 +80,13 @@ def _analyze_embeddings_part(dbgym_cfg: DBGymConfig, part_i: int, generic_args:
     start_time = time.time()
     _create_stats_for_part(dbgym_cfg, part_dpath, generic_args, analyze_args)
     analyze_part_duration = time.time() - start_time
-    with open(os.path.join(part_dpath, "stats_time.txt"), "w") as f:
+    with open(part_dpath / "stats_time.txt", "w") as f:
         f.write(f"{analyze_part_duration}")
 
     start_time = time.time()
     _create_ranges_for_part(dbgym_cfg, part_dpath, generic_args, analyze_args)
     create_range_duration = time.time() - start_time
-    with open(os.path.join(part_dpath, "ranges_time.txt"), "w") as f:
+    with open(part_dpath / "ranges_time.txt", "w") as f:
         f.write(f"{create_range_duration}")
 
 
diff --git a/tune/protox/embedding/select.py b/tune/protox/embedding/select.py
index 26e3c8d4..936bd328 100644
--- a/tune/protox/embedding/select.py
+++ b/tune/protox/embedding/select.py
@@ -77,7 +77,7 @@ def select_best_embeddings(dbgym_cfg: DBGymConfig, generic_args: EmbeddingTrainG
             )
 
             if loop_i == 0:
-                link_result(dbgym_cfg, model_dpath, custom_result_name=default_embedder_dname(generic_args.benchmark_name, generic_args.workload_name))
+                link_result(dbgym_cfg, model_dpath, custom_result_name=default_embedder_dname(generic_args.benchmark_name, generic_args.workload_name) + ".link")
 
             info_txt.write(f"model{idx}/embedder.pth\n")
             idx += 1
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index c0faa432..19f1f3af 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -68,6 +68,7 @@ def _crunch(
         pid: Optional[int],
         query_spec: QuerySpec,
     ) -> None:
+        assert all(sql[1].exists() and not sql[1].is_symlink() and sql[1].is_absolute() for sql in sqls), f"sqls ({sqls}) should only contain existent real absolute paths."
         do_tbl_include_subsets_prune = query_spec["tbl_include_subsets_prune"]
         self.order = []
         self.queries = QueryMap({})
@@ -256,7 +257,7 @@ def __init__(
             sqls = [
                 (
                     line.split(",")[0],
-                    self.workload_path / line.split(",")[1],
+                    Path(line.split(",")[1]),
                     1.0,
                 )
                 for line in lines
@@ -270,7 +271,7 @@ def __init__(
                 sqls = [
                     (
                         split[0],
-                        self.workload_path / split[1],
+                        Path(split[1]),
                         float(split[2]),
                     )
                     for split in splits

From 8ee373d1a519d16cdbe9a777e0cb7f19371ae61f Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Mon, 22 Apr 2024 21:05:52 +0000
Subject: [PATCH 070/100] now writing all holon action variations to action.pkl

---
 experiments/protox_tpch_sf0point1/main.sh | 11 ++++--
 scripts/pat_test.sh                       |  5 +--
 tune/protox/agent/off_policy_algorithm.py |  1 +
 tune/protox/agent/replay.py               | 46 +++++++++++++++--------
 tune/protox/env/mqo/mqo_wrapper.py        | 12 ++++--
 tune/protox/env/pg_env.py                 | 15 +++++---
 tune/protox/env/space/holon_space.py      |  5 +--
 tune/protox/env/workload.py               | 10 ++---
 8 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 432aa60e..ce0e71f5 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -4,11 +4,16 @@ set -euxo pipefail
 
 SCALE_FACTOR=0.1
 INTENDED_PGDATA_HARDWARE=ssd
-PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
+python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check dbms postgres build
+python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 10
 exit 0
 
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 58c0332b..3169b219 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -4,11 +4,10 @@ set -euxo pipefail
 
 SCALE_FACTOR=0.01
 INTENDED_PGDATA_HARDWARE=ssd
-PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py
index 5b2b4c3b..ecea5129 100644
--- a/tune/protox/agent/off_policy_algorithm.py
+++ b/tune/protox/agent/off_policy_algorithm.py
@@ -187,6 +187,7 @@ def collect_rollouts(
             # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
             #   stashed in the same directory and potentially cause a race condition.
             if self.logger and not tuning_mode == TuningMode.HPO:
+                actions_info = infos["actions_info"]
                 self.logger.stash_results(infos)
 
             self.num_timesteps += 1
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 09c5c49d..11d5bc99 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -10,6 +10,7 @@
 import logging
 import pickle
 import click
+import numpy as np
 import pandas as pd
 import tqdm
 from pathlib import Path
@@ -20,9 +21,10 @@
 
 from tune.protox.agent.build_trial import build_trial
 from tune.protox.env.pg_env import PostgresEnv
+from tune.protox.env.space.holon_space import HolonSpace
 from tune.protox.env.space.primitive.index import IndexAction
 from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs
-from tune.protox.env.types import HolonAction
+from tune.protox.env.types import HolonAction, IndexSpaceRawSample
 
 
 REPLAY_DATA_FNAME = "replay_data.csv"
@@ -106,6 +108,10 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     replay_tuning_run(dbgym_cfg, tuning_steps_dpath, replay_args)
 
 
+def check_index_space_raw_samples_equality(sample1: IndexSpaceRawSample, sample2: IndexSpaceRawSample) -> bool:
+    return np.array_equal(sample1[-1], sample2[-1]) and sample1[:-1] == sample2[:-1]
+
+
 def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_args: ReplayArgs):
     """
     Replay a single tuning run (as in one tuning_steps/ folder).
@@ -152,6 +158,7 @@ def _is_tuning_step_line(line: str) -> bool:
     # Build PostgresEnv.
     _, _, agent_env, _, _ = build_trial(dbgym_cfg, TuningMode.REPLAY, hpo_params["seed"], hpo_params)
     pg_env: PostgresEnv = agent_env.unwrapped
+    action_space: HolonSpace = pg_env.action_space
 
     # Reset things.
     if not replay_args.simulated:
@@ -166,16 +173,17 @@ def _is_tuning_step_line(line: str) -> bool:
                 num_lines += 1
 
     # A convenience wrapper around execute_workload() which fills in the arguments properly
-    def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
-        logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables, pg_env.action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
-        logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), pg_env.action_space.get_knob_space().tables)}\n\n")
+    def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
+        logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), action_space.get_knob_space().tables, action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
+        logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), action_space.get_knob_space().tables)}\n\n")
         assert replay_args.workload_timeout_during_replay == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
+        all_holon_action_variations = actions_info["all_holon_action_variations"]
         replayed_runtime = pg_env.workload.execute_workload(
             pg_conn=pg_env.pg_conn,
-            actions=[action_info],
-            actions_names=["Replay"],
+            actions=[holon_action for (_, holon_action) in all_holon_action_variations],
+            variation_names=[variation_name for (variation_name, _) in all_holon_action_variations],
             observation_space=None,
-            action_space=pg_env.action_space,
+            action_space=action_space,
             reset_metrics=None,
             query_timeout=None,
             workload_qdir=None,
@@ -233,14 +241,22 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
                 original_runtime = run_raw_csv["Latency (microseconds)"].sum() / 1e6
                 assert original_runtime > 0
 
-                # Get the indexes from this action and the prior state
-                index_acts = set()
+                # Extract the necessary values from action.pkl
                 with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
                     actions_info = pickle.load(f)
-                    assert type(actions_info) is list and len(actions_info) == 1, f"there should only be one action in actions_info {actions_info}"
-                    action_info = actions_info[0]
-                    assert type(action_info) is tuple and len(action_info) == 3, f"action_info ({action_info}) should be a tuple with system knobs, an index, and per-query knobs"
-                    index_acts.add(action_info[1])
+                    all_holon_action_variations = actions_info["all_holon_action_variations"]
+                    # Extract the KnobSpaceAction and IndexAction from all_holon_action_variations.
+                    # These two should be identical across all HolonActions, which we will assert.
+                    _, first_holon_action = all_holon_action_variations[0]
+                    knob_space_action = first_holon_action[0]
+                    index_space_raw_sample = first_holon_action[1]
+                    index_action = action_space.get_index_space().to_action(index_space_raw_sample)
+                    assert all([knob_space_action == holon_action[0] for (_, holon_action) in all_holon_action_variations])
+                    assert all([check_index_space_raw_samples_equality(index_space_raw_sample, holon_action[1]) for (_, holon_action) in all_holon_action_variations])
+
+                # Get the indexes from this action and the prior state
+                index_acts = set()
+                index_acts.add(index_action)
                 assert len(index_acts) > 0
                 with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
                     prior_states = pickle.load(f)
@@ -263,7 +279,7 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
                 # Modify Postgres to have the right indexes and system-wide knobs. `index_modification_sqls` holds the indexes
                 #   while `cc` holds the system-wide knobs.
                 if not replay_args.simulated:
-                    cc, _ = pg_env.action_space.get_knob_space().generate_action_plan(action_info[0], prior_states[0])
+                    cc, _ = action_space.get_knob_space().generate_action_plan(knob_space_action, prior_states[0])
                     # Like in tuning, we don't dump the page cache when calling shift_state() to see how the workload
                     #   performs in a warm cache scenario.
                     pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=False)
@@ -271,7 +287,7 @@ def _execute_workload_wrapper(action_info: "HolonAction") -> list[float]:
 
                 # Execute the workload to get the runtime.
                 if not replay_args.simulated:
-                    replayed_runtime = _execute_workload_wrapper(action_info)
+                    replayed_runtime = _execute_workload_wrapper(actions_info)
                     logging.info(f"Original Runtime: {original_runtime} (timed out? {did_any_query_timeout_in_original}). Replayed Runtime: {replayed_runtime}")
                 else:
                     replayed_runtime = original_runtime
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index d4dadee5..6a104300 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -30,6 +30,10 @@ def _mutilate_action_with_metrics(
     query_metric_data: Optional[dict[str, BestQueryRun]],
     timeout_qknobs: Optional[QuerySpaceKnobAction] = None,
 ) -> HolonAction:
+    """
+    Modify action to make it the one with the best query knobs out
+        of all variations we tried.
+    """
 
     if query_metric_data is not None:
         extract_q_knobs = action_space.extract_query(action)
@@ -273,15 +277,15 @@ def transmute(
         if info["query_metric_data"]:
             self._update_best_observed(info["query_metric_data"])
 
-        action = _mutilate_action_with_metrics(
+        best_holon_action = _mutilate_action_with_metrics(
             self.action_space, action, info["query_metric_data"], timeout_qknobs
         )
+        best_observed_query_space_action = best_holon_action[2]
 
         with torch.no_grad():
             # Pass the mutilated action back through.
             assert isinstance(self.action_space, HolonSpace)
-            actions_info = self.action_space.convert_actions_to_format_for_replay([action])
-            info["actions_info"] = actions_info
+            info["actions_info"]["best_observed_query_space_action"] = best_observed_query_space_action
             info["maximal_embed"] = self.action_space.to_latent([action])
 
         return self.unwrapped.step_post_execute(success, action, info)
@@ -337,7 +341,7 @@ def reset(self, *args: Any, **kwargs: Any) -> Tuple[Any, EnvInfoDict]:  # type:
                 observation_space=self.observation_space,
                 action_space=self.action_space,
                 actions=[r[1] for r in runs],
-                actions_names=[r[0] for r in runs],
+                variation_names=[r[0] for r in runs],
                 benchbase_config=self.benchbase_config,
                 query_timeout=self.query_timeout,
                 reset_metrics=kwargs["options"]["query_metric_data"],
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index d1d43102..49ef0882 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -164,7 +164,7 @@ def reset(  # type: ignore
                 observation_space=self.observation_space,
                 action_space=self.action_space,
                 actions=[default_action],
-                actions_names=["GlobalDual"],
+                variation_names=["GlobalDual"],
                 benchbase_config=self.benchbase_config,
                 query_timeout=self.query_timeout,
                 update=False,
@@ -246,7 +246,7 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
     def step_execute(
         self,
         setup_success: bool,
-        actions: list[Tuple[str, HolonAction]],
+        all_holon_action_variations: list[Tuple[str, HolonAction]],
         info: EnvInfoDict,
     ) -> Tuple[bool, EnvInfoDict]:
         if setup_success:
@@ -255,6 +255,7 @@ def step_execute(
             # Evaluate the benchmark.
             self.logger.get_logger(__name__).info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(self.pg_conn.conn(), self.action_space.get_knob_space().tables, self.action_space.get_knob_space().knobs, self.workload.queries)}\n\n")
             self.logger.get_logger(__name__).info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(self.pg_conn.conn(), self.action_space.get_knob_space().tables)}\n\n")
+            self.logger.get_logger(__name__).info(f"\n\naction_names: {[a[0] for a in all_holon_action_variations]}\n\n")
             (
                 success,
                 metric,
@@ -269,8 +270,8 @@ def step_execute(
                 action_space=self.action_space,
                 benchbase_config=self.benchbase_config,
                 query_timeout=self.query_timeout,
-                actions=[a[1] for a in actions],
-                actions_names=[a[0] for a in actions],
+                actions=[a[1] for a in all_holon_action_variations],
+                variation_names=[a[0] for a in all_holon_action_variations],
                 update=True,
             )
         else:
@@ -284,7 +285,7 @@ def step_execute(
             metric, reward = self.reward_utility(did_error=True)
             results, q_timeout, query_metric_data = None, True, None
 
-        actions_info = self.action_space.convert_actions_to_format_for_replay([action[1] for action in actions])
+        # Build EnvInfoDict
         info.update(
             EnvInfoDict(
                 {
@@ -293,7 +294,9 @@ def step_execute(
                     "query_metric_data": query_metric_data,
                     "reward": reward,
                     "results": results,
-                    "actions_info": actions_info,
+                    "actions_info": {
+                        "all_holon_action_variations": all_holon_action_variations,
+                    },
                 }
             )
         )
diff --git a/tune/protox/env/space/holon_space.py b/tune/protox/env/space/holon_space.py
index 6b80f928..b0fe9538 100644
--- a/tune/protox/env/space/holon_space.py
+++ b/tune/protox/env/space/holon_space.py
@@ -368,7 +368,4 @@ def generate_plan_from_config(
         assert len(outputs) == 3
         config_changes = list(itertools.chain(*[o[0] for o in outputs]))
         sql_commands = list(itertools.chain(*[o[1] for o in outputs]))
-        return config_changes, sql_commands
-
-    def convert_actions_to_format_for_replay(self, actions: list[HolonAction]) -> list:
-        return [(a[0], self.get_index_space().to_action(a[1]), a[2]) for a in actions]
\ No newline at end of file
+        return config_changes, sql_commands
\ No newline at end of file
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 19f1f3af..c93caac5 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -334,7 +334,7 @@ def execute_workload(
         self,
         pg_conn: PostgresConn,
         actions: list[HolonAction] = [],
-        actions_names: list[str] = [],
+        variation_names: list[str] = [],
         results: Optional[Union[str, Path]] = None,
         observation_space: Optional[StateSpace] = None,
         action_space: Optional[HolonSpace] = None,
@@ -351,7 +351,7 @@ def execute_workload(
             if not override_workload_timeout
             else override_workload_timeout
         )
-        assert len(actions) == len(actions_names)
+        assert len(actions) == len(variation_names)
 
         # Do we need metrics.
         need_metric = False if not observation_space else observation_space.require_metrics()
@@ -478,7 +478,7 @@ def execute_workload(
                                 }
                             ),
                         )
-                        for ql_knob, act_name in zip(ql_knobs, actions_names)
+                        for ql_knob, act_name in zip(ql_knobs, variation_names)
                     ]
                     for r in zruns:
                         if r[2] not in [rr[2] for rr in runs]:
@@ -669,7 +669,7 @@ def execute(
         observation_space: StateSpace,
         action_space: HolonSpace,
         actions: list[HolonAction],
-        actions_names: list[str],
+        variation_names: list[str],
         benchbase_config: dict[str, Any],
         query_timeout: Optional[int] = None,
         reset_metrics: Optional[dict[str, BestQueryRun]] = None,
@@ -694,7 +694,7 @@ def execute(
             ret = self.execute_workload(
                 pg_conn,
                 actions=actions,
-                actions_names=actions_names,
+                variation_names=variation_names,
                 results=results,
                 observation_space=observation_space,
                 action_space=action_space,

From d46c5a9c43901e59fa35e088699621d785f4f207 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 00:33:16 +0000
Subject: [PATCH 071/100] now checking equality with the index space

---
 tune/protox/agent/replay.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 11d5bc99..e8fe8903 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -108,10 +108,6 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     replay_tuning_run(dbgym_cfg, tuning_steps_dpath, replay_args)
 
 
-def check_index_space_raw_samples_equality(sample1: IndexSpaceRawSample, sample2: IndexSpaceRawSample) -> bool:
-    return np.array_equal(sample1[-1], sample2[-1]) and sample1[:-1] == sample2[:-1]
-
-
 def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_args: ReplayArgs):
     """
     Replay a single tuning run (as in one tuning_steps/ folder).
@@ -252,7 +248,7 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                     index_space_raw_sample = first_holon_action[1]
                     index_action = action_space.get_index_space().to_action(index_space_raw_sample)
                     assert all([knob_space_action == holon_action[0] for (_, holon_action) in all_holon_action_variations])
-                    assert all([check_index_space_raw_samples_equality(index_space_raw_sample, holon_action[1]) for (_, holon_action) in all_holon_action_variations])
+                    assert all([index_action == action_space.get_index_space().to_action(holon_action[1]) for (_, holon_action) in all_holon_action_variations])
 
                 # Get the indexes from this action and the prior state
                 index_acts = set()

From 95be6fa569a260ca7f8953270172ec5167182338 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 01:01:07 +0000
Subject: [PATCH 072/100] added comments describing why query timeout and
 workload timeout aren't reverse engineer-able

---
 tune/protox/agent/replay.py     | 21 +++++++++++++++++----
 tune/protox/env/util/execute.py | 16 ++++++++--------
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index e8fe8903..bd099eb4 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -228,12 +228,23 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                     time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
                     maximal_repo = None
 
-                # Get the original runtime.
+                # Get the original runtime as well as whether any individual queries and/or the full workload timed out.
                 run_raw_csv_fpath = tuning_steps_dpath / repo / "run.raw.csv"
                 save_file(dbgym_cfg, run_raw_csv_fpath)
                 run_raw_csv = pd.read_csv(run_raw_csv_fpath)
                 assert len(run_raw_csv.columns) == 6
-                did_any_query_timeout_in_original = (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
+                # `did_any_query_time_out_in_original` will be true when a query does not execute to completion, regardless of how it happened. Even
+                #   if this was because there was only 1s before the workload timed out and thus the query was "unfairly" given a 1s "statement_timeout",
+                #   we will still set `did_any_query_time_out_in_original` to true because that query didn't not execute to completion.
+                # When setting `did_any_query_time_out_in_original`, we can't just check whether the latency in run.raw.csv == `query_timeout` because
+                #   this doesn't handle the edge case where the "statement_timeout" setting in Postgres is set to be < `query_timeout`. This edge case
+                #   would happen when the amount of time remaining before we hit `workload_timeout` is less then `query_timeout` and thus Proto-X sets
+                #   "statement_timeout" to be < `query_timeout` in order to not exceed the `workload_timeout`.
+                did_any_query_time_out_in_original = (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
+                # When setting `did_workload_time_out_in_original`, we can't just check whether the sum of latencies in run.raw.csv == `workload_timeout`
+                #   because Proto-X decreases `workload_timeout` over the course of the tuning run. Specifically, at the end of a tuning step, Proto-X
+                #   sets `workload_timeout` to be equal to the runtime of the workload that just ran.
+                did_workload_time_out_in_original = False
                 original_runtime = run_raw_csv["Latency (microseconds)"].sum() / 1e6
                 assert original_runtime > 0
 
@@ -284,7 +295,7 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 # Execute the workload to get the runtime.
                 if not replay_args.simulated:
                     replayed_runtime = _execute_workload_wrapper(actions_info)
-                    logging.info(f"Original Runtime: {original_runtime} (timed out? {did_any_query_timeout_in_original}). Replayed Runtime: {replayed_runtime}")
+                    logging.info(f"Original Runtime: {original_runtime} (timed out? {did_any_query_time_out_in_original}). Replayed Runtime: {replayed_runtime}")
                 else:
                     replayed_runtime = original_runtime
 
@@ -292,7 +303,8 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 run_data.append({
                     "step": current_step,
                     "original_runtime": original_runtime,
-                    "did_any_query_timeout_in_original": did_any_query_timeout_in_original,
+                    "did_any_query_time_out_in_original": did_any_query_time_out_in_original,
+                    "did_workload_time_out_in_original": did_workload_time_out_in_original,
                     "time_since_start": (time_since_start - start_time).total_seconds(),
                     "replayed_runtime": replayed_runtime,
                 })
@@ -305,6 +317,7 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
 
     # Output.
     run_data_df = pd.DataFrame(run_data)
+    pd.set_option('display.max_columns', 10)
     print(f"Finished replaying with run_data_df=\n{run_data_df}\n. Data stored in {dbgym_cfg.cur_task_runs_path()}.")
     run_data_df.to_csv(dbgym_cfg.cur_task_runs_data_path("run_data.csv"), index=False)
     pg_env.close()
\ No newline at end of file
diff --git a/tune/protox/env/util/execute.py b/tune/protox/env/util/execute.py
index ebc6e254..c2190243 100644
--- a/tune/protox/env/util/execute.py
+++ b/tune/protox/env/util/execute.py
@@ -37,7 +37,7 @@ def _time_query(
     query: str,
     timeout: float,
 ) -> Tuple[float, bool, Any]:
-    did_timeout = False
+    did_time_out = False
     has_explain = "EXPLAIN" in query
     explain_data = None
 
@@ -65,11 +65,11 @@ def _time_query(
                 f"{prefix} exceeded evaluation timeout {timeout}"
             )
         qid_runtime = timeout * 1e6
-        did_timeout = True
+        did_time_out = True
     except Exception as e:
         assert False, print(e)
     # qid_runtime is in microseconds.
-    return qid_runtime, did_timeout, explain_data
+    return qid_runtime, did_time_out, explain_data
 
 
 def _acquire_metrics_around_query(
@@ -87,7 +87,7 @@ def _acquire_metrics_around_query(
     if query_timeout > 0:
         _force_statement_timeout(connection, query_timeout * 1000)
 
-    qid_runtime, did_timeout, explain_data = _time_query(
+    qid_runtime, did_time_out, explain_data = _time_query(
         logger, prefix, connection, query, query_timeout
     )
 
@@ -100,7 +100,7 @@ def _acquire_metrics_around_query(
         diff = None
 
     # qid_runtime is in microseconds.
-    return qid_runtime, did_timeout, explain_data, diff
+    return qid_runtime, did_time_out, explain_data, diff
 
 
 def execute_variations(
@@ -142,7 +142,7 @@ def execute_variations(
         if logger:
             logger.get_logger(__name__).debug(f"{qr.prefix_qid} executing with {pqkk}")
 
-        runtime, did_timeout, explain_data, metric = _acquire_metrics_around_query(
+        runtime, did_time_out, explain_data, metric = _acquire_metrics_around_query(
             logger=logger,
             prefix=qr.prefix_qid,
             connection=connection,
@@ -151,7 +151,7 @@ def execute_variations(
             observation_space=observation_space,
         )
 
-        if not did_timeout:
+        if not did_time_out:
             new_timeout_limit = math.ceil(runtime / 1e3) / 1.0e3
             if new_timeout_limit < timeout_limit:
                 timeout_limit = new_timeout_limit
@@ -161,7 +161,7 @@ def execute_variations(
             best_qr = BestQueryRun(
                 qr,
                 runtime,
-                did_timeout,
+                did_time_out,
                 explain_data,
                 metric,
             )

From a909f1b27df5bd22ae5584888e23e16d5d87b1a9 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 12:56:06 +0000
Subject: [PATCH 073/100] now reliably getting
 did_any_query_time_out_in_original

---
 tune/protox/agent/replay.py     | 11 ++++++-----
 tune/protox/env/util/execute.py |  2 --
 tune/protox/env/util/reward.py  |  9 ++++-----
 tune/protox/env/workload.py     |  8 ++++----
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index bd099eb4..018ea51e 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -232,15 +232,16 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 run_raw_csv_fpath = tuning_steps_dpath / repo / "run.raw.csv"
                 save_file(dbgym_cfg, run_raw_csv_fpath)
                 run_raw_csv = pd.read_csv(run_raw_csv_fpath)
-                assert len(run_raw_csv.columns) == 6
-                # `did_any_query_time_out_in_original` will be true when a query does not execute to completion, regardless of how it happened. Even
-                #   if this was because there was only 1s before the workload timed out and thus the query was "unfairly" given a 1s "statement_timeout",
-                #   we will still set `did_any_query_time_out_in_original` to true because that query didn't not execute to completion.
+                assert len(run_raw_csv.columns) == 7
+                # `did_any_query_time_out_in_original` will be true when *all variations* of at least one query of the original workload did not execute
+                #   to completion, regardless of how it happened. Even if this was because there was only 1s before the workload timed out and thus the
+                #   query was "unfairly" given a 1s "statement_timeout", we will still set `did_any_query_time_out_in_original` to true because that query
+                #   didn't not execute to completion.
                 # When setting `did_any_query_time_out_in_original`, we can't just check whether the latency in run.raw.csv == `query_timeout` because
                 #   this doesn't handle the edge case where the "statement_timeout" setting in Postgres is set to be < `query_timeout`. This edge case
                 #   would happen when the amount of time remaining before we hit `workload_timeout` is less then `query_timeout` and thus Proto-X sets
                 #   "statement_timeout" to be < `query_timeout` in order to not exceed the `workload_timeout`.
-                did_any_query_time_out_in_original = (run_raw_csv["Latency (microseconds)"].max() / 1e6) == hpo_params["query_timeout"]
+                did_any_query_time_out_in_original = any(run_raw_csv["Timed Out"])
                 # When setting `did_workload_time_out_in_original`, we can't just check whether the sum of latencies in run.raw.csv == `workload_timeout`
                 #   because Proto-X decreases `workload_timeout` over the course of the tuning run. Specifically, at the end of a tuning step, Proto-X
                 #   sets `workload_timeout` to be equal to the runtime of the workload that just ran.
diff --git a/tune/protox/env/util/execute.py b/tune/protox/env/util/execute.py
index c2190243..4e92300d 100644
--- a/tune/protox/env/util/execute.py
+++ b/tune/protox/env/util/execute.py
@@ -45,13 +45,11 @@ def _time_query(
         start_time = time.time()
         cursor = connection.execute(query)
         qid_runtime = (time.time() - start_time) * 1e6
-        print(f"{prefix} measured qid_runtime={qid_runtime/1e6}")
 
         if has_explain:
             c = [c for c in cursor][0][0][0]
             assert "Execution Time" in c
             qid_runtime = float(c["Execution Time"]) * 1e3
-            print(f"{prefix} explain qid_runtime={qid_runtime/1e6}")
             explain_data = c
 
         if logger:
diff --git a/tune/protox/env/util/reward.py b/tune/protox/env/util/reward.py
index e8db6f43..bd0c93ce 100644
--- a/tune/protox/env/util/reward.py
+++ b/tune/protox/env/util/reward.py
@@ -88,11 +88,10 @@ def __parse_runtime_for_metric(self, parent: Union[str, Path]) -> float:
 
         summary = [f for f in Path(parent).rglob("*.raw.csv")][0]
         data = pd.read_csv(summary)
-        assert len(data.columns) == 6
-
-        sum_data = data.sum()
-        latency: float = sum_data["Latency (microseconds)"]
-        return latency / 1.0e6
+        assert len(data.columns) == 7
+        summed_data = data.sum()
+        summed_latency: float = summed_data["Latency (microseconds)"]
+        return summed_latency / 1.0e6
 
     def __call__(
         self,
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index c93caac5..df790730 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -406,7 +406,7 @@ def execute_workload(
         # Now let us start executing.
         workload_time = 0.0
         time_left = workload_timeout
-        qid_runtime_data = {}
+        qid_runtime_data: dict[str, BestQueryRun] = {}
         stop_running = False
 
         for execute_idx, qid in enumerate(actual_order):
@@ -606,7 +606,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
             with open(results_dir / "run.raw.csv", "w") as f:
                 # Write the raw query data.
                 f.write(
-                    "Transaction Type Index,Transaction Name,Start Time (microseconds),Latency (microseconds),Worker Id (start number),Phase Id (index in config file)\n"
+                    "Transaction Type Index,Transaction Name,Start Time (microseconds),Latency (microseconds),Timed Out,Worker Id (start number),Phase Id (index in config file)\n"
                 )
 
                 start = 0.0
@@ -616,7 +616,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                         assert data and data.runtime and data.query_run
                         rtime = data.runtime
                         pfx = data.query_run.prefix
-                        f.write(f"{i+1},{qid},{start},{rtime},0,{pfx}\n")
+                        f.write(f"{i+1},{qid},{start},{rtime},{data.timeout},0,{pfx}\n")
                         start += rtime / 1e6
 
                 # Write a penalty term if needed.
@@ -632,7 +632,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                     penalty = 3.0e6
 
                 if penalty > 0:
-                    f.write(f"{len(self.order)},P,{time.time()},{penalty},0,PENALTY\n")
+                    f.write(f"{len(self.order)},P,{time.time()},{penalty},True,0,PENALTY\n")
 
             # Get all the timeouts.
             timeouts = [v.timeout for _, v in qid_runtime_data.items()]

From 586b9a371fe835664c765f4fc63fcb700dc7d052 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 13:18:37 +0000
Subject: [PATCH 074/100] fixed did_workload_time_out_in_original and ignoring
 penalty in original_runtime

---
 tune/protox/agent/replay.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 018ea51e..8e2c176a 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -245,8 +245,14 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 # When setting `did_workload_time_out_in_original`, we can't just check whether the sum of latencies in run.raw.csv == `workload_timeout`
                 #   because Proto-X decreases `workload_timeout` over the course of the tuning run. Specifically, at the end of a tuning step, Proto-X
                 #   sets `workload_timeout` to be equal to the runtime of the workload that just ran.
-                did_workload_time_out_in_original = False
-                original_runtime = run_raw_csv["Latency (microseconds)"].sum() / 1e6
+                # We separate the penalty rows from the non-penalty rows to process them separately.
+                run_raw_csv_penalty_rows = run_raw_csv[run_raw_csv["Transaction Name"] == "P"]
+                run_raw_csv_non_penalty_rows = run_raw_csv[run_raw_csv["Transaction Name"] != "P"]
+                # Penalties are added when the workload times out so this is a reliable indicator of whether the workload timed out.
+                did_workload_time_out_in_original = len(run_raw_csv_penalty_rows) > 0
+                # Penalties are meant to affect the reward of the tuning agent but they are unrelated to the actual runtime, so we ignore them when
+                #   computing the original runtime.
+                original_runtime = run_raw_csv_non_penalty_rows["Latency (microseconds)"].sum() / 1e6
                 assert original_runtime > 0
 
                 # Extract the necessary values from action.pkl

From d91cc6519f45cd5368cd40aac12f364c84375d78 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 13:19:06 +0000
Subject: [PATCH 075/100] changes to scripts

---
 experiments/protox_tpch_sf0point1/main.sh | 6 +++---
 experiments/protox_tpch_sf10/main.sh      | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 432aa60e..c87a7e57 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -7,8 +7,8 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.1
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 10
 exit 0
 
@@ -27,6 +27,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 3dad54b7..0efaf0db 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -7,8 +7,8 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
 exit 0

From cc11a8d82bd3db7a5ed1fd0e4beff99ba2d2bdc6 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 15:31:57 +0000
Subject: [PATCH 076/100] removing breaking after 10 iterations

---
 experiments/protox_tpch_sf0point1/main.sh | 2 +-
 scripts/pat_test.sh                       | 2 +-
 tune/protox/agent/replay.py               | 7 -------
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index b24f1d30..4ded8dd9 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -8,7 +8,7 @@ PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.1
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 10
 exit 0
 
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 3169b219..68c24fa5 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,7 +7,7 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 8e2c176a..0d493621 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -198,7 +198,6 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
         noop_index = False
         maximal_repo = None
         existing_index_acts = []
-        if1_count = 0
 
         for line in f:
             # Keep going until we've found the start.
@@ -214,12 +213,6 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 noop_index = "NOOP" in act[1][0]
 
             elif _is_tuning_step_line(line):
-                if1_count += 1
-                print(f"if1_count={if1_count}")
-
-                if if1_count >= 10:
-                    break
-
                 if _is_tuning_step_line(line):
                     repo = eval(line.split("Running ")[-1])[-1]
                     time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])

From c200e46008e3d62ec30e0d3772c12e91f01f30a0 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 15:36:56 +0000
Subject: [PATCH 077/100] workload_time -> workload_runtime_accum

---
 tune/protox/agent/replay.py |  1 +
 tune/protox/env/workload.py | 13 ++++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 0d493621..03fc3b57 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -187,6 +187,7 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
             blocklist=replay_args.blocklist,
             first=False,
         )
+        assert type(replayed_runtime) is float, "Workload.execute_workload() can return either a float or a tuple. During replay, we must ensure that it returns a float."
         return replayed_runtime
 
     run_data = []
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index df790730..c48fc4d7 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -404,7 +404,10 @@ def execute_workload(
             actual_queries = self.queries
 
         # Now let us start executing.
-        workload_time = 0.0
+        # `workload_runtime_accum` is the accumulated runtime of the queries in the workload. Note that we execute multiple variations of each query, but
+        #   we only add the runtime of the *fastest* variation of each query to `workload_runtime_accum`. If all variations timed out, we'll add whatever
+        #   the timeout was set to to `workload_runtime_accum`.
+        workload_runtime_accum = 0.0
         time_left = workload_timeout
         qid_runtime_data: dict[str, BestQueryRun] = {}
         stop_running = False
@@ -509,7 +512,7 @@ def execute_workload(
                             connection=pg_conn.conn(),
                             runs=runs,
                             query=query,
-                            query_timeout=min(target_pqt, workload_timeout - workload_time + 1),
+                            query_timeout=min(target_pqt, workload_timeout - workload_runtime_accum + 1),
                             logger=self.logger,
                             sysknobs=sysknobs,
                             observation_space=observation_space,
@@ -533,7 +536,7 @@ def execute_workload(
                     qid_runtime = best_run.runtime
 
                 time_left -= qid_runtime / 1e6
-                workload_time += qid_runtime / 1e6
+                workload_runtime_accum += qid_runtime / 1e6
                 if time_left < 0:
                     # We need to undo any potential statements after the timed out query.
                     for st, rq in queries[qidx+1:]:
@@ -624,7 +627,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                 if stop_running and self.workload_timeout_penalty > 1:
                     # Get the penalty.
                     penalty = (
-                        workload_timeout * self.workload_timeout_penalty - workload_time
+                        workload_timeout * self.workload_timeout_penalty - workload_runtime_accum
                     )
                     penalty = (penalty + 1.05) * 1e6 if not first else penalty * 1e6
                 elif stop_running and not first:
@@ -638,7 +641,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
             timeouts = [v.timeout for _, v in qid_runtime_data.items()]
             return True, (any(timeouts) or stop_running), qid_runtime_data
 
-        return workload_time
+        return workload_runtime_accum
 
     @time_record("execute")
     def _execute_benchbase(

From 6beea71d3e8af203488224a7484afbb6afd2a1e1 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 15:38:16 +0000
Subject: [PATCH 078/100] workload_timeout -> this_execution_workload_timeout

---
 tune/protox/env/workload.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index c48fc4d7..26eac96a 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -346,7 +346,7 @@ def execute_workload(
         blocklist: list[str] = [],
         first: bool = False,
     ) -> Union[float, Tuple[bool, bool, dict[str, Any]]]:
-        workload_timeout = (
+        this_execution_workload_timeout = (
             self.workload_timeout
             if not override_workload_timeout
             else override_workload_timeout
@@ -408,7 +408,7 @@ def execute_workload(
         #   we only add the runtime of the *fastest* variation of each query to `workload_runtime_accum`. If all variations timed out, we'll add whatever
         #   the timeout was set to to `workload_runtime_accum`.
         workload_runtime_accum = 0.0
-        time_left = workload_timeout
+        time_left = this_execution_workload_timeout
         qid_runtime_data: dict[str, BestQueryRun] = {}
         stop_running = False
 
@@ -487,7 +487,7 @@ def execute_workload(
                         if r[2] not in [rr[2] for rr in runs]:
                             runs.append(r)
 
-                    target_pqt = query_timeout if query_timeout else workload_timeout
+                    target_pqt = query_timeout if query_timeout else this_execution_workload_timeout
                     skip_execute = False
                     if (
                         reset_metrics is not None
@@ -512,7 +512,7 @@ def execute_workload(
                             connection=pg_conn.conn(),
                             runs=runs,
                             query=query,
-                            query_timeout=min(target_pqt, workload_timeout - workload_runtime_accum + 1),
+                            query_timeout=min(target_pqt, this_execution_workload_timeout - workload_runtime_accum + 1),
                             logger=self.logger,
                             sysknobs=sysknobs,
                             observation_space=observation_space,
@@ -627,7 +627,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                 if stop_running and self.workload_timeout_penalty > 1:
                     # Get the penalty.
                     penalty = (
-                        workload_timeout * self.workload_timeout_penalty - workload_runtime_accum
+                        this_execution_workload_timeout * self.workload_timeout_penalty - workload_runtime_accum
                     )
                     penalty = (penalty + 1.05) * 1e6 if not first else penalty * 1e6
                 elif stop_running and not first:

From 006cc4a62631c1c83e128210ccf95f207fefa0d3 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 15:46:07 +0000
Subject: [PATCH 079/100] removed time_left since it's redundant with
 workload_runtime_accum

---
 tune/protox/env/util/execute.py | 2 ++
 tune/protox/env/workload.py     | 7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tune/protox/env/util/execute.py b/tune/protox/env/util/execute.py
index 4e92300d..f991c257 100644
--- a/tune/protox/env/util/execute.py
+++ b/tune/protox/env/util/execute.py
@@ -84,6 +84,8 @@ def _acquire_metrics_around_query(
 
     if query_timeout > 0:
         _force_statement_timeout(connection, query_timeout * 1000)
+    else:
+        assert query_timeout == 0, f"Setting query_timeout to 0 indicates \"timeout\". However, setting query_timeout ({query_timeout}) < 0 is a bug."
 
     qid_runtime, did_time_out, explain_data = _time_query(
         logger, prefix, connection, query, query_timeout
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 26eac96a..88b277fb 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -408,7 +408,6 @@ def execute_workload(
         #   we only add the runtime of the *fastest* variation of each query to `workload_runtime_accum`. If all variations timed out, we'll add whatever
         #   the timeout was set to to `workload_runtime_accum`.
         workload_runtime_accum = 0.0
-        time_left = this_execution_workload_timeout
         qid_runtime_data: dict[str, BestQueryRun] = {}
         stop_running = False
 
@@ -453,7 +452,7 @@ def execute_workload(
                         f"{qid}",
                         pg_conn.conn(),
                         query,
-                        query_timeout=time_left,
+                        query_timeout=this_execution_workload_timeout - workload_runtime_accum,
                         observation_space=None,
                     )
 
@@ -535,9 +534,9 @@ def execute_workload(
                     qid_runtime_data[qid] = best_run
                     qid_runtime = best_run.runtime
 
-                time_left -= qid_runtime / 1e6
                 workload_runtime_accum += qid_runtime / 1e6
-                if time_left < 0:
+                
+                if workload_runtime_accum > this_execution_workload_timeout:
                     # We need to undo any potential statements after the timed out query.
                     for st, rq in queries[qidx+1:]:
                         if st != QueryType.SELECT:

From a12348dd32ee2617d935e4a9a6bb6b494f14d1a9 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 16:34:37 +0000
Subject: [PATCH 080/100] removed disable_pg_hint code

---
 tune/protox/agent/replay.py |   1 -
 tune/protox/env/workload.py | 164 ++++++++++++++----------------------
 2 files changed, 61 insertions(+), 104 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 03fc3b57..b5ce4f00 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -183,7 +183,6 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
             reset_metrics=None,
             query_timeout=None,
             workload_qdir=None,
-            disable_pg_hint=False,
             blocklist=replay_args.blocklist,
             first=False,
         )
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 88b277fb..622ebe58 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -342,7 +342,6 @@ def execute_workload(
         override_workload_timeout: Optional[float] = None,
         query_timeout: Optional[int] = None,
         workload_qdir: Optional[Tuple[Union[str, Path], Union[str, Path]]] = None,
-        disable_pg_hint: bool = False,
         blocklist: list[str] = [],
         first: bool = False,
     ) -> Union[float, Tuple[bool, bool, dict[str, Any]]]:
@@ -428,114 +427,74 @@ def execute_workload(
                     pg_conn.conn().execute(query)
                     continue
 
-                if disable_pg_hint:
-                    assert len(ql_knobs) == 1
-                    ql_knob = ql_knobs[0]
-                    qid_knobs = {
-                        ql_knob[0].knobs[k]: ql_knob[1][k]
-                        for k in ql_knob[1].keys()
-                        if f"{qid}_" in k
-                    }
-
-                    # Alter the session first.
-                    disable = ";".join(
-                        [
-                            f"SET {knob.knob_name} = OFF"
-                            for knob, value in qid_knobs.items()
-                            if value == 0
-                        ]
+                # De-duplicate the runs.
+                runs: list[QueryRun] = []
+                zruns: list[QueryRun] = [
+                    QueryRun(
+                        act_name,
+                        f"{act_name}_{qid}",
+                        QuerySpaceKnobAction(
+                            {
+                                ql_knob[0].knobs[k]: ql_knob[1][k]
+                                for k in ql_knob[1].keys()
+                                if f"{qid}_" in k
+                            }
+                        ),
                     )
-                    pg_conn.conn().execute(disable)
-
-                    qid_runtime, _, _, _ = _acquire_metrics_around_query(
-                        self.logger,
-                        f"{qid}",
-                        pg_conn.conn(),
-                        query,
-                        query_timeout=this_execution_workload_timeout - workload_runtime_accum,
-                        observation_space=None,
+                    for ql_knob, act_name in zip(ql_knobs, variation_names)
+                ]
+                for r in zruns:
+                    if r[2] not in [rr[2] for rr in runs]:
+                        runs.append(r)
+
+                target_pqt = query_timeout if query_timeout else this_execution_workload_timeout
+                skip_execute = False
+                if (
+                    reset_metrics is not None
+                    and qid in reset_metrics
+                    and not reset_metrics[qid].timeout
+                ):
+                    # If we have a reset metric, use it's timeout and convert to seconds.
+                    truntime = reset_metrics[qid].runtime
+                    assert truntime is not None
+                    target_pqt = math.ceil(truntime / 1.0e6)
+
+                    # If we've seen the exact same query knobs before, skip it.
+                    rmetrics = reset_metrics[qid]
+                    skip_execute = (
+                        (rmetrics.query_run is not None)
+                        and (rmetrics.query_run.qknobs is not None)
+                        and (rmetrics.query_run.qknobs == runs[-1].qknobs)
                     )
 
-                    undo_disable = ";".join(
-                        [
-                            f"SET {knob.knob_name} = ON"
-                            for knob, value in qid_knobs.items()
-                            if value == 0
-                        ]
+                if not skip_execute:
+                    best_run: BestQueryRun = execute_variations(
+                        connection=pg_conn.conn(),
+                        runs=runs,
+                        query=query,
+                        query_timeout=min(target_pqt, this_execution_workload_timeout - workload_runtime_accum + 1),
+                        logger=self.logger,
+                        sysknobs=sysknobs,
+                        observation_space=observation_space,
                     )
-                    pg_conn.conn().execute(undo_disable)
-
                 else:
-                    # De-duplicate the runs.
-                    runs: list[QueryRun] = []
-                    zruns: list[QueryRun] = [
-                        QueryRun(
-                            act_name,
-                            f"{act_name}_{qid}",
-                            QuerySpaceKnobAction(
-                                {
-                                    ql_knob[0].knobs[k]: ql_knob[1][k]
-                                    for k in ql_knob[1].keys()
-                                    if f"{qid}_" in k
-                                }
-                            ),
-                        )
-                        for ql_knob, act_name in zip(ql_knobs, variation_names)
-                    ]
-                    for r in zruns:
-                        if r[2] not in [rr[2] for rr in runs]:
-                            runs.append(r)
-
-                    target_pqt = query_timeout if query_timeout else this_execution_workload_timeout
-                    skip_execute = False
-                    if (
-                        reset_metrics is not None
-                        and qid in reset_metrics
-                        and not reset_metrics[qid].timeout
+                    assert reset_metrics
+                    best_run = reset_metrics[qid]
+
+                if reset_metrics is not None and qid in reset_metrics:
+                    # Old one is actually better so let's use that.
+                    rmetric = reset_metrics[qid]
+                    if best_run.timeout or (
+                        best_run.runtime
+                        and rmetric.runtime
+                        and rmetric.runtime < best_run.runtime
                     ):
-                        # If we have a reset metric, use it's timeout and convert to seconds.
-                        truntime = reset_metrics[qid].runtime
-                        assert truntime is not None
-                        target_pqt = math.ceil(truntime / 1.0e6)
-
-                        # If we've seen this exact before, skip it.
-                        rmetrics = reset_metrics[qid]
-                        skip_execute = (
-                            (rmetrics.query_run is not None)
-                            and (rmetrics.query_run.qknobs is not None)
-                            and (rmetrics.query_run.qknobs == runs[-1].qknobs)
-                        )
+                        best_run = rmetric
+
+                assert best_run.runtime
+                qid_runtime_data[qid] = best_run
+                workload_runtime_accum += best_run.runtime / 1e6
 
-                    if not skip_execute:
-                        best_run: BestQueryRun = execute_variations(
-                            connection=pg_conn.conn(),
-                            runs=runs,
-                            query=query,
-                            query_timeout=min(target_pqt, this_execution_workload_timeout - workload_runtime_accum + 1),
-                            logger=self.logger,
-                            sysknobs=sysknobs,
-                            observation_space=observation_space,
-                        )
-                    else:
-                        assert reset_metrics
-                        best_run = reset_metrics[qid]
-
-                    if reset_metrics is not None and qid in reset_metrics:
-                        # Old one is actually better so let's use that.
-                        rmetric = reset_metrics[qid]
-                        if best_run.timeout or (
-                            best_run.runtime
-                            and rmetric.runtime
-                            and rmetric.runtime < best_run.runtime
-                        ):
-                            best_run = rmetric
-
-                    assert best_run.runtime
-                    qid_runtime_data[qid] = best_run
-                    qid_runtime = best_run.runtime
-
-                workload_runtime_accum += qid_runtime / 1e6
-                
                 if workload_runtime_accum > this_execution_workload_timeout:
                     # We need to undo any potential statements after the timed out query.
                     for st, rq in queries[qidx+1:]:
@@ -704,7 +663,6 @@ def execute(
                 override_workload_timeout=self.workload_timeout,
                 query_timeout=query_timeout,
                 workload_qdir=None,
-                disable_pg_hint=False,
                 blocklist=[],
                 first=first,
             )

From 7320e06068509b045e643d61e980411d9bd515fd Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 16:55:23 +0000
Subject: [PATCH 081/100] removed noop index dead code

---
 tune/protox/agent/replay.py              | 10 ++--------
 tune/protox/env/space/primitive/index.py |  5 ++---
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index b5ce4f00..49762a18 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -195,7 +195,6 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
         current_step = 0
         start_found = False
         start_time = None
-        noop_index = False
         maximal_repo = None
         existing_index_acts = []
 
@@ -208,10 +207,6 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                     progess_bar.update(1)
                 continue
 
-            elif "Selected action: " in line:
-                act = eval(line.split("Selected action: ")[-1])
-                noop_index = "NOOP" in act[1][0]
-
             elif _is_tuning_step_line(line):
                 if _is_tuning_step_line(line):
                     repo = eval(line.split("Running ")[-1])[-1]
@@ -268,9 +263,8 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
                     prior_states = pickle.load(f)
                     all_sc = set(prior_states[1])
-                    if not noop_index:
-                        for index_act in index_acts:
-                            all_sc.add(index_act)
+                    for index_act in index_acts:
+                        all_sc.add(index_act)
 
                     all_sc = {a for a in all_sc if not "USING btree ()" in a.sql(True)}
                     index_acts = all_sc
diff --git a/tune/protox/env/space/primitive/index.py b/tune/protox/env/space/primitive/index.py
index 4fe9d749..7fcc1509 100644
--- a/tune/protox/env/space/primitive/index.py
+++ b/tune/protox/env/space/primitive/index.py
@@ -117,9 +117,8 @@ def __hash__(self) -> int:
         )
         return h
 
-    def __repr__(self, add: bool = True) -> str:
-        return "{a} {idx_name} ON {tbl_name} USING {idx_type} ({columns}) {inc_clause}".format(
-            a="CREATE" if add else "NOOP",
+    def __repr__(self) -> str:
+        return "CREATE {idx_name} ON {tbl_name} USING {idx_type} ({columns}) {inc_clause}".format(
             idx_name=self.get_index_name(),
             tbl_name=self.tbl_name,
             idx_type=self.idx_type,

From e1c3f075ad38b6a8b51fa666fae53a01a65d0ac7 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 17:02:03 +0000
Subject: [PATCH 082/100] removed dead var

---
 tune/protox/env/workload.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 622ebe58..1d015696 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -352,9 +352,6 @@ def execute_workload(
         )
         assert len(actions) == len(variation_names)
 
-        # Do we need metrics.
-        need_metric = False if not observation_space else observation_space.require_metrics()
-
         sysknobs = KnobSpaceAction({})
         ql_knobs = []
         if len(actions) > 0:

From 9c45bf75d6b06b7c0978a46b5e3309ff74fd3725 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 18:47:12 +0000
Subject: [PATCH 083/100] renamed BestQueryRun.timeout to timed_out

---
 tune/protox/env/mqo/mqo_wrapper.py | 22 +++++++++++-----------
 tune/protox/env/types.py           |  2 +-
 tune/protox/env/workload.py        | 14 +++++++-------
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 6a104300..25bfaf8e 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -41,7 +41,7 @@ def _mutilate_action_with_metrics(
 
         processed = set()
         for q, data in query_metric_data.items():
-            if not data.timeout:
+            if not data.timed_out:
                 assert data.query_run
                 pqk = data.query_run.qknobs
                 for k, v in data.query_run.qknobs.items():
@@ -147,18 +147,18 @@ def __init__(
 
     def _update_best_observed(self, query_metric_data: dict[str, BestQueryRun], force_overwrite=False) -> None:
         if query_metric_data is not None:
-            for q, data in query_metric_data.items():
-                if q not in self.best_observed or force_overwrite:
-                    self.best_observed[q] = BestQueryRun(data.query_run, data.runtime, data.timeout, None, None)
+            for qid, best_run in query_metric_data.items():
+                if qid not in self.best_observed or force_overwrite:
+                    self.best_observed[qid] = BestQueryRun(best_run.query_run, best_run.runtime, best_run.timed_out, None, None)
                     if self.logger:
-                        self.logger.get_logger(__name__).debug(f"[best_observe] {q}: {data.runtime/1e6} (force: {force_overwrite})")
-                elif not data.timeout:
-                    qobs = self.best_observed[q]
-                    assert qobs.runtime and data.runtime
-                    if data.runtime < qobs.runtime:
-                        self.best_observed[q] = BestQueryRun(data.query_run, data.runtime, data.timeout, None, None)
+                        self.logger.get_logger(__name__).debug(f"[best_observe] {qid}: {best_run.runtime/1e6} (force: {force_overwrite})")
+                elif not best_run.timed_out:
+                    qobs = self.best_observed[qid]
+                    assert qobs.runtime and best_run.runtime
+                    if best_run.runtime < qobs.runtime:
+                        self.best_observed[qid] = BestQueryRun(best_run.query_run, best_run.runtime, best_run.timed_out, None, None)
                         if self.logger:
-                            self.logger.get_logger(__name__).debug(f"[best_observe] {q}: {data.runtime/1e6}")
+                            self.logger.get_logger(__name__).debug(f"[best_observe] {qid}: {best_run.runtime/1e6}")
 
     def step(  # type: ignore
         self,
diff --git a/tune/protox/env/types.py b/tune/protox/env/types.py
index 4ae71d76..d9db7ffa 100644
--- a/tune/protox/env/types.py
+++ b/tune/protox/env/types.py
@@ -136,7 +136,7 @@ class ServerIndexMetadata(TypedDict, total=False):
     [
         ("query_run", Optional[QueryRun]),
         ("runtime", Optional[float]),
-        ("timeout", bool),
+        ("timed_out", bool),
         ("explain_data", Optional[Any]),
         ("metric_data", Optional[dict[str, Any]]),
     ],
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 1d015696..b0a560f4 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -449,7 +449,7 @@ def execute_workload(
                 if (
                     reset_metrics is not None
                     and qid in reset_metrics
-                    and not reset_metrics[qid].timeout
+                    and not reset_metrics[qid].timed_out
                 ):
                     # If we have a reset metric, use it's timeout and convert to seconds.
                     truntime = reset_metrics[qid].runtime
@@ -481,7 +481,7 @@ def execute_workload(
                 if reset_metrics is not None and qid in reset_metrics:
                     # Old one is actually better so let's use that.
                     rmetric = reset_metrics[qid]
-                    if best_run.timeout or (
+                    if best_run.timed_out or (
                         best_run.runtime
                         and rmetric.runtime
                         and rmetric.runtime < best_run.runtime
@@ -574,7 +574,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                         assert data and data.runtime and data.query_run
                         rtime = data.runtime
                         pfx = data.query_run.prefix
-                        f.write(f"{i+1},{qid},{start},{rtime},{data.timeout},0,{pfx}\n")
+                        f.write(f"{i+1},{qid},{start},{rtime},{data.time_out},0,{pfx}\n")
                         start += rtime / 1e6
 
                 # Write a penalty term if needed.
@@ -593,8 +593,8 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                     f.write(f"{len(self.order)},P,{time.time()},{penalty},True,0,PENALTY\n")
 
             # Get all the timeouts.
-            timeouts = [v.timeout for _, v in qid_runtime_data.items()]
-            return True, (any(timeouts) or stop_running), qid_runtime_data
+            did_any_query_time_out = any([best_run.timed_out for _, best_run in qid_runtime_data.items()])
+            return (did_any_query_time_out or stop_running), qid_runtime_data
 
         return workload_runtime_accum
 
@@ -664,8 +664,8 @@ def execute(
                 first=first,
             )
             assert isinstance(ret, tuple)
-            success, q_timeout, query_metric_data = ret[0], ret[1], ret[2]
-            assert success
+            q_timeout, query_metric_data = ret[0], ret[1]
+            success = True
 
         metric, reward = None, None
         if reward_utility is not None:

From 509f7dc7c133a94b76c99a954a8eefb0e41b379f Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 18:48:02 +0000
Subject: [PATCH 084/100] renamed stop_running to workload_timed_out

---
 tune/protox/env/workload.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index b0a560f4..a6a06f27 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -405,10 +405,10 @@ def execute_workload(
         #   the timeout was set to to `workload_runtime_accum`.
         workload_runtime_accum = 0.0
         qid_runtime_data: dict[str, BestQueryRun] = {}
-        stop_running = False
+        workload_timed_out = False
 
         for execute_idx, qid in enumerate(actual_order):
-            if stop_running:
+            if workload_timed_out:
                 break
 
             queries = actual_queries[qid]
@@ -500,7 +500,7 @@ def execute_workload(
                             assert st != QueryType.INS_UPD_DEL
                             pg_conn.conn().execute(rq)
 
-                    stop_running = True
+                    workload_timed_out = True
                     break
 
         # Undo any necessary state changes.
@@ -579,13 +579,13 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
 
                 # Write a penalty term if needed.
                 penalty = 0.0
-                if stop_running and self.workload_timeout_penalty > 1:
+                if workload_timed_out and self.workload_timeout_penalty > 1:
                     # Get the penalty.
                     penalty = (
                         this_execution_workload_timeout * self.workload_timeout_penalty - workload_runtime_accum
                     )
                     penalty = (penalty + 1.05) * 1e6 if not first else penalty * 1e6
-                elif stop_running and not first:
+                elif workload_timed_out and not first:
                     # Always degrade it a little if we've timed out.
                     penalty = 3.0e6
 
@@ -594,7 +594,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
 
             # Get all the timeouts.
             did_any_query_time_out = any([best_run.timed_out for _, best_run in qid_runtime_data.items()])
-            return (did_any_query_time_out or stop_running), qid_runtime_data
+            return (did_any_query_time_out or workload_timed_out), qid_runtime_data
 
         return workload_runtime_accum
 

From 22617e07d9c769bab9a3e32c66b5de84446be690 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 18:55:48 +0000
Subject: [PATCH 085/100] refactored execute_workload() to separately return
 whether the workload or the query timed out

---
 tune/protox/env/pg_env.py                     |  6 +++---
 .../env/target_reset/target_reset_wrapper.py  |  4 ++--
 tune/protox/env/types.py                      |  4 ++--
 tune/protox/env/workload.py                   | 21 +++++++++----------
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index 49ef0882..612e05f7 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -261,7 +261,7 @@ def step_execute(
                 metric,
                 reward,
                 results,
-                q_timeout,
+                did_anything_time_out,
                 query_metric_data,
             ) = self.workload.execute(
                 pg_conn=self.pg_conn,
@@ -283,14 +283,14 @@ def step_execute(
             success = False
             # Since we reached an invalid area, just set the next state to be the current state.
             metric, reward = self.reward_utility(did_error=True)
-            results, q_timeout, query_metric_data = None, True, None
+            results, did_anything_time_out, query_metric_data = None, True, None
 
         # Build EnvInfoDict
         info.update(
             EnvInfoDict(
                 {
                     "metric": metric,
-                    "q_timeout": q_timeout,
+                    "did_anything_time_out": did_anything_time_out,
                     "query_metric_data": query_metric_data,
                     "reward": reward,
                     "results": results,
diff --git a/tune/protox/env/target_reset/target_reset_wrapper.py b/tune/protox/env/target_reset/target_reset_wrapper.py
index 519a5d58..800ec60a 100644
--- a/tune/protox/env/target_reset/target_reset_wrapper.py
+++ b/tune/protox/env/target_reset/target_reset_wrapper.py
@@ -41,12 +41,12 @@ def step(  # type: ignore
         obs, rews, terms, truncs, infos = self.env.step(*args, **kwargs)
         query_metric_data = infos.get("query_metric_data", None)
         assert self.best_metric is not None
-        q_timeout = infos.get("q_timeout", False)
+        did_anything_time_out = infos.get("did_anything_time_out", False)
 
         metric = infos["metric"]
         if self.reward_utility.is_perf_better(metric, self.best_metric):
             self.best_metric = infos["metric"]
-            if not q_timeout:
+            if not did_anything_time_out:
                 self.real_best_metric = self.best_metric
 
             if self.maximize_state:
diff --git a/tune/protox/env/types.py b/tune/protox/env/types.py
index d9db7ffa..1c4ffcfb 100644
--- a/tune/protox/env/types.py
+++ b/tune/protox/env/types.py
@@ -188,8 +188,8 @@ class EnvInfoDict(TypedDict, total=False):
     metric: float
     # Reward of this step.
     reward: float
-    # Whether any queries timed out during this step's evaluation.
-    q_timeout: bool
+    # Whether any queries timed out or the workload as a whole timed out.
+    did_anything_time_out: bool
     # Query metric data.
     query_metric_data: Optional[dict[str, BestQueryRun]]
     # Information about the actions that were executed this step.
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index a6a06f27..9b53c545 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -570,11 +570,11 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                 start = 0.0
                 for i, qid in enumerate(self.order):
                     if qid in qid_runtime_data:
-                        data = qid_runtime_data[qid]
-                        assert data and data.runtime and data.query_run
-                        rtime = data.runtime
-                        pfx = data.query_run.prefix
-                        f.write(f"{i+1},{qid},{start},{rtime},{data.time_out},0,{pfx}\n")
+                        best_run = qid_runtime_data[qid]
+                        assert best_run and best_run.runtime and best_run.query_run
+                        rtime = best_run.runtime
+                        pfx = best_run.query_run.prefix
+                        f.write(f"{i+1},{qid},{start},{rtime},{best_run.timed_out},0,{pfx}\n")
                         start += rtime / 1e6
 
                 # Write a penalty term if needed.
@@ -594,7 +594,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
 
             # Get all the timeouts.
             did_any_query_time_out = any([best_run.timed_out for _, best_run in qid_runtime_data.items()])
-            return (did_any_query_time_out or workload_timed_out), qid_runtime_data
+            return did_any_query_time_out, workload_timed_out, qid_runtime_data
 
         return workload_runtime_accum
 
@@ -649,7 +649,7 @@ def execute(
             # We can only create a state if we succeeded.
             success = observation_space.check_benchbase(self.dbgym_cfg, results)
         else:
-            ret = self.execute_workload(
+            did_any_query_time_out, did_workload_time_out, query_metric_data = self.execute_workload(
                 pg_conn,
                 actions=actions,
                 variation_names=variation_names,
@@ -663,8 +663,7 @@ def execute(
                 blocklist=[],
                 first=first,
             )
-            assert isinstance(ret, tuple)
-            q_timeout, query_metric_data = ret[0], ret[1]
+            did_anything_time_out = did_any_query_time_out or did_workload_time_out
             success = True
 
         metric, reward = None, None
@@ -675,6 +674,6 @@ def execute(
 
         if self.logger:
             self.logger.get_logger(__name__).info(
-                f"Benchmark iteration with metric {metric} (reward: {reward}) (q_timeout: {q_timeout})"
+                f"Benchmark iteration with metric {metric} (reward: {reward}) (did_anything_timeout: {did_anything_time_out})"
             )
-        return success, metric, reward, results, q_timeout, query_metric_data
+        return success, metric, reward, results, did_anything_time_out, query_metric_data

From bf5fe7377167014d37ed1eabfccadc85e684942d Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 19:11:20 +0000
Subject: [PATCH 086/100] replaced workload_runtime_accum with
 compute_total_workload_runtime()

---
 scripts/pat_test.sh         |  2 +-
 tune/protox/env/workload.py | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 68c24fa5..3169b219 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,7 +7,7 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 9b53c545..7945587a 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -329,6 +329,10 @@ def column_usages(self) -> TableAttrListMap:
     def max_indexable(self) -> int:
         return max([len(cols) for _, cols in self.query_usages.items()])
 
+    @staticmethod
+    def compute_total_workload_runtime(qid_runtime_data: dict[str, BestQueryRun]) -> float:
+        return sum(best_run.runtime for best_run in qid_runtime_data.values()) / 1.0e6
+
     @time_record("execute")
     def execute_workload(
         self,
@@ -400,10 +404,6 @@ def execute_workload(
             actual_queries = self.queries
 
         # Now let us start executing.
-        # `workload_runtime_accum` is the accumulated runtime of the queries in the workload. Note that we execute multiple variations of each query, but
-        #   we only add the runtime of the *fastest* variation of each query to `workload_runtime_accum`. If all variations timed out, we'll add whatever
-        #   the timeout was set to to `workload_runtime_accum`.
-        workload_runtime_accum = 0.0
         qid_runtime_data: dict[str, BestQueryRun] = {}
         workload_timed_out = False
 
@@ -469,7 +469,7 @@ def execute_workload(
                         connection=pg_conn.conn(),
                         runs=runs,
                         query=query,
-                        query_timeout=min(target_pqt, this_execution_workload_timeout - workload_runtime_accum + 1),
+                        query_timeout=min(target_pqt, this_execution_workload_timeout - Workload.compute_total_workload_runtime(qid_runtime_data) + 1),
                         logger=self.logger,
                         sysknobs=sysknobs,
                         observation_space=observation_space,
@@ -490,9 +490,8 @@ def execute_workload(
 
                 assert best_run.runtime
                 qid_runtime_data[qid] = best_run
-                workload_runtime_accum += best_run.runtime / 1e6
 
-                if workload_runtime_accum > this_execution_workload_timeout:
+                if Workload.compute_total_workload_runtime(qid_runtime_data) > this_execution_workload_timeout:
                     # We need to undo any potential statements after the timed out query.
                     for st, rq in queries[qidx+1:]:
                         if st != QueryType.SELECT:
@@ -561,6 +560,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                     output["flattened"] = True
                     f.write(json.dumps(output, indent=4))
 
+            # run.raw.csv will essentially contain the information in qid_runtime_data. However, run.raw.csv may have an extra line for the penalty.
             with open(results_dir / "run.raw.csv", "w") as f:
                 # Write the raw query data.
                 f.write(
@@ -582,7 +582,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                 if workload_timed_out and self.workload_timeout_penalty > 1:
                     # Get the penalty.
                     penalty = (
-                        this_execution_workload_timeout * self.workload_timeout_penalty - workload_runtime_accum
+                        this_execution_workload_timeout * self.workload_timeout_penalty - Workload.compute_total_workload_runtime(qid_runtime_data)
                     )
                     penalty = (penalty + 1.05) * 1e6 if not first else penalty * 1e6
                 elif workload_timed_out and not first:
@@ -596,7 +596,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
             did_any_query_time_out = any([best_run.timed_out for _, best_run in qid_runtime_data.items()])
             return did_any_query_time_out, workload_timed_out, qid_runtime_data
 
-        return workload_runtime_accum
+        return Workload.compute_total_workload_runtime(qid_runtime_data)
 
     @time_record("execute")
     def _execute_benchbase(

From 6d237ec59ffcfc55349c00d4ad299ef54463abb7 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 19:17:36 +0000
Subject: [PATCH 087/100] now seeing whether workload or query timed out in
 replay

---
 tune/protox/agent/replay.py | 35 ++++++++++++++++-------------------
 tune/protox/env/workload.py | 10 ++++------
 2 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 49762a18..ebf98e2a 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -5,26 +5,22 @@
 Additionally, the original tuning run may have been accelerated by Boot, whereas the
     replayed tuning run is not.
 """
-import datetime
 import json
 import logging
 import pickle
 import click
-import numpy as np
 import pandas as pd
 import tqdm
 from pathlib import Path
 from dateutil.parser import parse
 
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, TuningMode, conv_inputpath_to_realabspath, open_and_save, save_file, workload_name_fn, default_tuning_steps_dpath
-# sys.path.append("/home/phw2/dbgym") # TODO(phw2): figure out if this is required
-
+from misc.utils import DBGymConfig, TuningMode, conv_inputpath_to_realabspath, open_and_save, save_file, workload_name_fn, default_tuning_steps_dpath
 from tune.protox.agent.build_trial import build_trial
 from tune.protox.env.pg_env import PostgresEnv
 from tune.protox.env.space.holon_space import HolonSpace
-from tune.protox.env.space.primitive.index import IndexAction
 from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs
-from tune.protox.env.types import HolonAction, IndexSpaceRawSample
+from tune.protox.env.types import HolonAction
+from tune.protox.env.workload import Workload
 
 
 REPLAY_DATA_FNAME = "replay_data.csv"
@@ -168,13 +164,13 @@ def _is_tuning_step_line(line: str) -> bool:
             elif _is_tuning_step_line(line):
                 num_lines += 1
 
-    # A convenience wrapper around execute_workload() which fills in the arguments properly
+    # A convenience wrapper around execute_workload() which fills in the arguments properly and processes the return values.
     def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
         logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), action_space.get_knob_space().tables, action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
         logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), action_space.get_knob_space().tables)}\n\n")
         assert replay_args.workload_timeout_during_replay == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
         all_holon_action_variations = actions_info["all_holon_action_variations"]
-        replayed_runtime = pg_env.workload.execute_workload(
+        did_any_query_time_out, did_workload_time_out, qid_runtime_data = pg_env.workload.execute_workload(
             pg_conn=pg_env.pg_conn,
             actions=[holon_action for (_, holon_action) in all_holon_action_variations],
             variation_names=[variation_name for (variation_name, _) in all_holon_action_variations],
@@ -186,8 +182,8 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
             blocklist=replay_args.blocklist,
             first=False,
         )
-        assert type(replayed_runtime) is float, "Workload.execute_workload() can return either a float or a tuple. During replay, we must ensure that it returns a float."
-        return replayed_runtime
+        workload_runtime = Workload.compute_total_workload_runtime(qid_runtime_data)
+        return did_any_query_time_out, did_workload_time_out, workload_runtime
 
     run_data = []
     progess_bar = tqdm.tqdm(total=num_lines)
@@ -240,8 +236,8 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 did_workload_time_out_in_original = len(run_raw_csv_penalty_rows) > 0
                 # Penalties are meant to affect the reward of the tuning agent but they are unrelated to the actual runtime, so we ignore them when
                 #   computing the original runtime.
-                original_runtime = run_raw_csv_non_penalty_rows["Latency (microseconds)"].sum() / 1e6
-                assert original_runtime > 0
+                original_workload_runtime = run_raw_csv_non_penalty_rows["Latency (microseconds)"].sum() / 1e6
+                assert original_workload_runtime > 0
 
                 # Extract the necessary values from action.pkl
                 with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
@@ -288,19 +284,20 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
 
                 # Execute the workload to get the runtime.
                 if not replay_args.simulated:
-                    replayed_runtime = _execute_workload_wrapper(actions_info)
-                    logging.info(f"Original Runtime: {original_runtime} (timed out? {did_any_query_time_out_in_original}). Replayed Runtime: {replayed_runtime}")
+                    did_any_query_time_out_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = _execute_workload_wrapper(actions_info)
                 else:
-                    replayed_runtime = original_runtime
+                    did_any_query_time_out_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = did_any_query_time_out_in_original, did_workload_time_out_in_original, original_workload_runtime
 
                 # Add this tuning step's data to `run_data``.
                 run_data.append({
                     "step": current_step,
-                    "original_runtime": original_runtime,
+                    "time_since_start": (time_since_start - start_time).total_seconds(),
+                    "original_workload_runtime": original_workload_runtime,
                     "did_any_query_time_out_in_original": did_any_query_time_out_in_original,
                     "did_workload_time_out_in_original": did_workload_time_out_in_original,
-                    "time_since_start": (time_since_start - start_time).total_seconds(),
-                    "replayed_runtime": replayed_runtime,
+                    "replayed_workload_runtime": replayed_workload_runtime,
+                    "did_any_query_time_out_in_replay": did_any_query_time_out_in_replay,
+                    "did_workload_time_out_in_replay": did_workload_time_out_in_replay,
                 })
                 current_step += 1
 
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 7945587a..d476ffd9 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -348,7 +348,7 @@ def execute_workload(
         workload_qdir: Optional[Tuple[Union[str, Path], Union[str, Path]]] = None,
         blocklist: list[str] = [],
         first: bool = False,
-    ) -> Union[float, Tuple[bool, bool, dict[str, Any]]]:
+    ) -> Tuple[bool, bool, dict[str, Any]]:
         this_execution_workload_timeout = (
             self.workload_timeout
             if not override_workload_timeout
@@ -592,11 +592,9 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                 if penalty > 0:
                     f.write(f"{len(self.order)},P,{time.time()},{penalty},True,0,PENALTY\n")
 
-            # Get all the timeouts.
-            did_any_query_time_out = any([best_run.timed_out for _, best_run in qid_runtime_data.items()])
-            return did_any_query_time_out, workload_timed_out, qid_runtime_data
-
-        return Workload.compute_total_workload_runtime(qid_runtime_data)
+        # Get all the timeouts.
+        did_any_query_time_out = any([best_run.timed_out for _, best_run in qid_runtime_data.items()])
+        return did_any_query_time_out, workload_timed_out, qid_runtime_data
 
     @time_record("execute")
     def _execute_benchbase(

From 5bd43c6c475fc0c808122c9748b2c95e160adb87 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Tue, 23 Apr 2024 19:55:37 +0000
Subject: [PATCH 088/100] now logging this_step_run_data before validity checks

---
 tune/protox/agent/replay.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index ebf98e2a..3229ef34 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -288,8 +288,8 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 else:
                     did_any_query_time_out_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = did_any_query_time_out_in_original, did_workload_time_out_in_original, original_workload_runtime
 
-                # Add this tuning step's data to `run_data``.
-                run_data.append({
+                # Perform some validity checks and then add this tuning step's data to `run_data``.
+                this_step_run_data = {
                     "step": current_step,
                     "time_since_start": (time_since_start - start_time).total_seconds(),
                     "original_workload_runtime": original_workload_runtime,
@@ -298,7 +298,13 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                     "replayed_workload_runtime": replayed_workload_runtime,
                     "did_any_query_time_out_in_replay": did_any_query_time_out_in_replay,
                     "did_workload_time_out_in_replay": did_workload_time_out_in_replay,
-                })
+                }
+                # Log before performing checks to help with debugging.
+                logging.info(f"this_step_run_data={this_step_run_data}")
+                assert not (did_workload_time_out_in_original and not did_any_query_time_out_in_original), "If the original workload timed out, at least one of the queries should have timed out (except for the extremely rare case where the workload timed out in between two queries)."
+                assert not (did_workload_time_out_in_replay and not did_any_query_time_out_in_replay), "If the replayed workload timed out, at least one of the queries should have timed out (except for the extremely rare case where the workload timed out in between two queries)."
+                assert not (did_any_query_time_out_in_replay and not did_workload_time_out_in_replay), "During replay, individual queries should not time out unless they timed out because the whole workload timed out."
+                run_data.append(this_step_run_data)
                 current_step += 1
 
                 run_folder = repo.split("/")[-1]

From c6b15dd6eae500e49d01b59cc0f4558d1333420e Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 24 Apr 2024 21:04:52 +0000
Subject: [PATCH 089/100] added replay_all_variations option

---
 scripts/pat_test.sh                |  2 +-
 tune/protox/agent/replay.py        | 27 +++++++++++++++++++++------
 tune/protox/env/mqo/mqo_wrapper.py |  5 ++---
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 3169b219..50ee1735 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -8,7 +8,7 @@ PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --replay-all-variations
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 3229ef34..ee39fc64 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -28,9 +28,10 @@
 
 class ReplayArgs:
     def __init__(
-        self, workload_timeout_during_replay: bool, simulated: bool, cutoff: float, blocklist: list
+        self, workload_timeout_during_replay: bool, replay_all_variations: bool, simulated: bool, cutoff: float, blocklist: list
     ):
         self.workload_timeout_during_replay = workload_timeout_during_replay
+        self.replay_all_variations = replay_all_variations
         self.simulated = simulated
         self.cutoff = cutoff
         self.blocklist = blocklist
@@ -70,6 +71,11 @@ def __init__(
     # I just made it use the workload timeout from HPO because I don't currently persist the tuning HPO params.
     help="The timeout (in seconds) of a workload when replaying. By default, it will be equal to the workload timeout used during HPO."
 )
+@click.option(
+    "--replay-all-variations",
+    is_flag=True,
+    help="If true, replay all the variations of each query. If false, only replay the variation we found was best in the tuning run. Replaying all variations has two possible use cases: (1) it makes the cache warm to better replicate behavior during tuning, (2) if the best variation during tuning was determined with Boot, it might not still be the best variation."
+)
 @click.option(
     "--simulated",
     is_flag=True,
@@ -87,7 +93,7 @@ def __init__(
     type=list,
     help="Ignore running queries in the blocklist."
 )
-def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout_during_replay: bool, simulated: bool, cutoff: float, blocklist: list) -> None:
+def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout_during_replay: bool, replay_all_variations: bool, simulated: bool, cutoff: float, blocklist: list) -> None:
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
 
@@ -98,7 +104,7 @@ def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_en
     tuning_steps_dpath = conv_inputpath_to_realabspath(dbgym_cfg, tuning_steps_dpath)
 
     # Group args together to reduce the # of parameters we pass into functions
-    replay_args = ReplayArgs(workload_timeout_during_replay, simulated, cutoff, blocklist)
+    replay_args = ReplayArgs(workload_timeout_during_replay, replay_all_variations, simulated, cutoff, blocklist)
 
     # Replay
     replay_tuning_run(dbgym_cfg, tuning_steps_dpath, replay_args)
@@ -169,11 +175,20 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
         logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), action_space.get_knob_space().tables, action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
         logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), action_space.get_knob_space().tables)}\n\n")
         assert replay_args.workload_timeout_during_replay == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
-        all_holon_action_variations = actions_info["all_holon_action_variations"]
+
+        if replay_args.replay_all_variations:
+            all_holon_action_variations = actions_info["all_holon_action_variations"]
+            actions = [holon_action for (_, holon_action) in all_holon_action_variations]
+            variation_names = [variation_name for (variation_name, _) in all_holon_action_variations]
+        else:
+            best_observed_holon_action = actions_info["best_observed_holon_action"]
+            actions = [best_observed_holon_action]
+            variation_names = ["BestObserved"]
+
         did_any_query_time_out, did_workload_time_out, qid_runtime_data = pg_env.workload.execute_workload(
             pg_conn=pg_env.pg_conn,
-            actions=[holon_action for (_, holon_action) in all_holon_action_variations],
-            variation_names=[variation_name for (variation_name, _) in all_holon_action_variations],
+            actions=actions,
+            variation_names=variation_names,
             observation_space=None,
             action_space=action_space,
             reset_metrics=None,
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 25bfaf8e..e25e6110 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -277,15 +277,14 @@ def transmute(
         if info["query_metric_data"]:
             self._update_best_observed(info["query_metric_data"])
 
-        best_holon_action = _mutilate_action_with_metrics(
+        best_observed_holon_action = _mutilate_action_with_metrics(
             self.action_space, action, info["query_metric_data"], timeout_qknobs
         )
-        best_observed_query_space_action = best_holon_action[2]
 
         with torch.no_grad():
             # Pass the mutilated action back through.
             assert isinstance(self.action_space, HolonSpace)
-            info["actions_info"]["best_observed_query_space_action"] = best_observed_query_space_action
+            info["actions_info"]["best_observed_holon_action"] = best_observed_holon_action
             info["maximal_embed"] = self.action_space.to_latent([action])
 
         return self.unwrapped.step_post_execute(success, action, info)

From d0ed37f19afa5fcc0d1fb89d2147b70adac46226 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 24 Apr 2024 22:07:34 +0000
Subject: [PATCH 090/100] added comments to _mutilate_action_with_metrics

---
 tune/protox/env/mqo/mqo_wrapper.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index e25e6110..26a12dfd 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -35,22 +35,29 @@ def _mutilate_action_with_metrics(
         of all variations we tried.
     """
 
+    # At the start of the function, the query knobs in `action` are those selected by the agent.
+
     if query_metric_data is not None:
         extract_q_knobs = action_space.extract_query(action)
         assert extract_q_knobs
 
         processed = set()
         for q, data in query_metric_data.items():
+            # For queries where at least one variation didn't time out, modify the query knobs in `action`
+            #   to be that from the best variation.
             if not data.timed_out:
                 assert data.query_run
-                pqk = data.query_run.qknobs
                 for k, v in data.query_run.qknobs.items():
                     # Implant the best.
                     extract_q_knobs[k] = v
+            # For all queries that we ran, even if all their variations time out, add them to `processed`.
+            # By doing so, the next part of the function will not affect queries where all variations timed
+            #   out and will leave their knobs equal to the ones selected by the agent.
             processed.add(q)
 
+        # If we have set `timeout_qknobs`, then use those knobs for the queries that we didn't run at all.
+        # Usually, these `timeout_qknobs` are those of the "PrevDual" variation.
         if timeout_qknobs:
-            qspace = action_space.get_query_space()
             assert timeout_qknobs
 
             all_qids = set([k.query_name for k in timeout_qknobs.keys()]) - processed
@@ -65,6 +72,16 @@ def _mutilate_action_with_metrics(
                     extract_q_knobs[k] = v
 
         action = action_space.replace_query(action, extract_q_knobs)
+
+    # There are three types of queries we handle in different ways.
+    # For queries that executed where at least one variation didn't time out, we can safely use the
+    #   query knobs of their best variation.
+    # For queries that executed where all their variations timed out, we don't want to use the knobs
+    #   in `timeout_qknobs` since those are known to be bad. Instead, we just use the knobs selected by
+    #   by the agent, which may be different from the knobs of *all* variations. 
+    # Finally, for queries that didn't execute, we'll assume that some arbitrary variation ("PrevDual")
+    #   is probably better than the knobs set by the agent.
+
     return action
 
 

From b64fda20ee02a6dbdfac7b97de70a2a0cd801741 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 24 Apr 2024 22:11:49 +0000
Subject: [PATCH 091/100] added comment about best observed in replay.py

---
 tune/protox/agent/replay.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index ee39fc64..75096667 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -181,6 +181,9 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
             actions = [holon_action for (_, holon_action) in all_holon_action_variations]
             variation_names = [variation_name for (variation_name, _) in all_holon_action_variations]
         else:
+            # Note that "best observed" is not an entirely accurate name. Specifically, if the workload times out, some queries
+            #   will not have had a chance to run at all. Based on the behavior of `_mutilate_action_with_metrics()`, we select
+            #   an arbitrary variation fo the queries that have not executed at all.
             best_observed_holon_action = actions_info["best_observed_holon_action"]
             actions = [best_observed_holon_action]
             variation_names = ["BestObserved"]

From 4fab4f2eb8273771e891d3cbdc3551e9e0537df7 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Wed, 24 Apr 2024 22:52:35 +0000
Subject: [PATCH 092/100] changed bool of queries timed out to an actual num

---
 experiments/protox_tpch_sf0point1/main.sh |  4 +--
 scripts/pat_test.sh                       |  4 +--
 tune/protox/agent/replay.py               | 31 ++++++++++-------------
 tune/protox/env/workload.py               | 10 ++++----
 4 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 4ded8dd9..b9ce26bd 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -8,8 +8,8 @@ PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --workload-timeout-during-replay 10
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 50ee1735..68c24fa5 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,8 +7,8 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --replay-all-variations
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index 75096667..c26f9b5c 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -188,7 +188,7 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
             actions = [best_observed_holon_action]
             variation_names = ["BestObserved"]
 
-        did_any_query_time_out, did_workload_time_out, qid_runtime_data = pg_env.workload.execute_workload(
+        num_timed_out_queries, did_workload_time_out, qid_runtime_data = pg_env.workload.execute_workload(
             pg_conn=pg_env.pg_conn,
             actions=actions,
             variation_names=variation_names,
@@ -201,7 +201,7 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
             first=False,
         )
         workload_runtime = Workload.compute_total_workload_runtime(qid_runtime_data)
-        return did_any_query_time_out, did_workload_time_out, workload_runtime
+        return num_timed_out_queries, did_workload_time_out, workload_runtime
 
     run_data = []
     progess_bar = tqdm.tqdm(total=num_lines)
@@ -235,15 +235,10 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 save_file(dbgym_cfg, run_raw_csv_fpath)
                 run_raw_csv = pd.read_csv(run_raw_csv_fpath)
                 assert len(run_raw_csv.columns) == 7
-                # `did_any_query_time_out_in_original` will be true when *all variations* of at least one query of the original workload did not execute
-                #   to completion, regardless of how it happened. Even if this was because there was only 1s before the workload timed out and thus the
-                #   query was "unfairly" given a 1s "statement_timeout", we will still set `did_any_query_time_out_in_original` to true because that query
-                #   didn't not execute to completion.
-                # When setting `did_any_query_time_out_in_original`, we can't just check whether the latency in run.raw.csv == `query_timeout` because
-                #   this doesn't handle the edge case where the "statement_timeout" setting in Postgres is set to be < `query_timeout`. This edge case
-                #   would happen when the amount of time remaining before we hit `workload_timeout` is less then `query_timeout` and thus Proto-X sets
-                #   "statement_timeout" to be < `query_timeout` in order to not exceed the `workload_timeout`.
-                did_any_query_time_out_in_original = any(run_raw_csv["Timed Out"])
+                # `num_timed_out_queries_in_original` counts the number of queries where *all variations* timed out. Note that the query_timeout of
+                #   a query may be set extremely low because the workload is about to time out, so it could be viewed as "unfair" to count those queries as
+                #   having timed out. Regardless, that's how we currently do things.
+                num_timed_out_queries_in_original = run_raw_csv["Timed Out"].sum()
                 # When setting `did_workload_time_out_in_original`, we can't just check whether the sum of latencies in run.raw.csv == `workload_timeout`
                 #   because Proto-X decreases `workload_timeout` over the course of the tuning run. Specifically, at the end of a tuning step, Proto-X
                 #   sets `workload_timeout` to be equal to the runtime of the workload that just ran.
@@ -302,26 +297,26 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
 
                 # Execute the workload to get the runtime.
                 if not replay_args.simulated:
-                    did_any_query_time_out_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = _execute_workload_wrapper(actions_info)
+                    num_timed_out_queries_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = _execute_workload_wrapper(actions_info)
                 else:
-                    did_any_query_time_out_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = did_any_query_time_out_in_original, did_workload_time_out_in_original, original_workload_runtime
+                    num_timed_out_queries_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = num_timed_out_queries_in_original, did_workload_time_out_in_original, original_workload_runtime
 
                 # Perform some validity checks and then add this tuning step's data to `run_data``.
                 this_step_run_data = {
                     "step": current_step,
                     "time_since_start": (time_since_start - start_time).total_seconds(),
                     "original_workload_runtime": original_workload_runtime,
-                    "did_any_query_time_out_in_original": did_any_query_time_out_in_original,
+                    "num_timed_out_queries_in_original": num_timed_out_queries_in_original,
                     "did_workload_time_out_in_original": did_workload_time_out_in_original,
                     "replayed_workload_runtime": replayed_workload_runtime,
-                    "did_any_query_time_out_in_replay": did_any_query_time_out_in_replay,
+                    "num_timed_out_queries_in_replay": num_timed_out_queries_in_replay,
                     "did_workload_time_out_in_replay": did_workload_time_out_in_replay,
                 }
                 # Log before performing checks to help with debugging.
                 logging.info(f"this_step_run_data={this_step_run_data}")
-                assert not (did_workload_time_out_in_original and not did_any_query_time_out_in_original), "If the original workload timed out, at least one of the queries should have timed out (except for the extremely rare case where the workload timed out in between two queries)."
-                assert not (did_workload_time_out_in_replay and not did_any_query_time_out_in_replay), "If the replayed workload timed out, at least one of the queries should have timed out (except for the extremely rare case where the workload timed out in between two queries)."
-                assert not (did_any_query_time_out_in_replay and not did_workload_time_out_in_replay), "During replay, individual queries should not time out unless they timed out because the whole workload timed out."
+                assert not (did_workload_time_out_in_original and not num_timed_out_queries_in_original > 0), "If the original workload timed out, at least one of the queries should have timed out (except for the extremely rare case where the workload timed out in between two queries)."
+                assert not (did_workload_time_out_in_replay and not num_timed_out_queries_in_replay > 0), "If the replayed workload timed out, at least one of the queries should have timed out (except for the extremely rare case where the workload timed out in between two queries)."
+                assert not (num_timed_out_queries_in_replay > 0 and not did_workload_time_out_in_replay), "During replay, individual queries should not time out unless they timed out because the whole workload timed out."
                 run_data.append(this_step_run_data)
                 current_step += 1
 
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index d476ffd9..1dfc453a 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -348,7 +348,7 @@ def execute_workload(
         workload_qdir: Optional[Tuple[Union[str, Path], Union[str, Path]]] = None,
         blocklist: list[str] = [],
         first: bool = False,
-    ) -> Tuple[bool, bool, dict[str, Any]]:
+    ) -> Tuple[int, bool, dict[str, Any]]:
         this_execution_workload_timeout = (
             self.workload_timeout
             if not override_workload_timeout
@@ -593,8 +593,8 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                     f.write(f"{len(self.order)},P,{time.time()},{penalty},True,0,PENALTY\n")
 
         # Get all the timeouts.
-        did_any_query_time_out = any([best_run.timed_out for _, best_run in qid_runtime_data.items()])
-        return did_any_query_time_out, workload_timed_out, qid_runtime_data
+        num_timed_out_queries = sum([1 if best_run.timed_out else 0 for _, best_run in qid_runtime_data.items()])
+        return num_timed_out_queries, workload_timed_out, qid_runtime_data
 
     @time_record("execute")
     def _execute_benchbase(
@@ -647,7 +647,7 @@ def execute(
             # We can only create a state if we succeeded.
             success = observation_space.check_benchbase(self.dbgym_cfg, results)
         else:
-            did_any_query_time_out, did_workload_time_out, query_metric_data = self.execute_workload(
+            num_timed_out_queries, did_workload_time_out, query_metric_data = self.execute_workload(
                 pg_conn,
                 actions=actions,
                 variation_names=variation_names,
@@ -661,7 +661,7 @@ def execute(
                 blocklist=[],
                 first=first,
             )
-            did_anything_time_out = did_any_query_time_out or did_workload_time_out
+            did_anything_time_out = num_timed_out_queries > 0 or did_workload_time_out
             success = True
 
         metric, reward = None, None

From a35a576ebec34a65af736d1d20b7f2826f008e7b Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 25 Apr 2024 19:09:11 +0000
Subject: [PATCH 093/100] added info for num executed queries

---
 scripts/pat_test.sh                       |  2 +-
 tune/protox/agent/off_policy_algorithm.py |  1 -
 tune/protox/agent/replay.py               | 22 +++++++++++++---------
 tune/protox/env/workload.py               |  2 +-
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 68c24fa5..3219a95f 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,7 +7,7 @@ INTENDED_PGDATA_HARDWARE=ssd
 PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py
index ecea5129..5b2b4c3b 100644
--- a/tune/protox/agent/off_policy_algorithm.py
+++ b/tune/protox/agent/off_policy_algorithm.py
@@ -187,7 +187,6 @@ def collect_rollouts(
             # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
             #   stashed in the same directory and potentially cause a race condition.
             if self.logger and not tuning_mode == TuningMode.HPO:
-                actions_info = infos["actions_info"]
                 self.logger.stash_results(infos)
 
             self.num_timesteps += 1
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
index c26f9b5c..9bf346bb 100644
--- a/tune/protox/agent/replay.py
+++ b/tune/protox/agent/replay.py
@@ -201,7 +201,8 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
             first=False,
         )
         workload_runtime = Workload.compute_total_workload_runtime(qid_runtime_data)
-        return num_timed_out_queries, did_workload_time_out, workload_runtime
+        num_executed_queries = len(qid_runtime_data)
+        return num_executed_queries, num_timed_out_queries, did_workload_time_out, workload_runtime
 
     run_data = []
     progess_bar = tqdm.tqdm(total=num_lines)
@@ -235,16 +236,19 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
                 save_file(dbgym_cfg, run_raw_csv_fpath)
                 run_raw_csv = pd.read_csv(run_raw_csv_fpath)
                 assert len(run_raw_csv.columns) == 7
-                # `num_timed_out_queries_in_original` counts the number of queries where *all variations* timed out. Note that the query_timeout of
-                #   a query may be set extremely low because the workload is about to time out, so it could be viewed as "unfair" to count those queries as
-                #   having timed out. Regardless, that's how we currently do things.
-                num_timed_out_queries_in_original = run_raw_csv["Timed Out"].sum()
                 # When setting `did_workload_time_out_in_original`, we can't just check whether the sum of latencies in run.raw.csv == `workload_timeout`
                 #   because Proto-X decreases `workload_timeout` over the course of the tuning run. Specifically, at the end of a tuning step, Proto-X
                 #   sets `workload_timeout` to be equal to the runtime of the workload that just ran.
                 # We separate the penalty rows from the non-penalty rows to process them separately.
                 run_raw_csv_penalty_rows = run_raw_csv[run_raw_csv["Transaction Name"] == "P"]
                 run_raw_csv_non_penalty_rows = run_raw_csv[run_raw_csv["Transaction Name"] != "P"]
+                # Get the number of executed queries. A query timing out is not the same as a query not being executed. We do this instead of getting the
+                #   number of skipped queries since we don't have the total # of queries with the current codebase.
+                num_executed_queries_in_original = len(run_raw_csv_non_penalty_rows)
+                # `num_timed_out_queries_in_original` counts the number of queries where *all variations* timed out. Note that the query_timeout of
+                #   a query may be set extremely low because the workload is about to time out, so it could be viewed as "unfair" to count those queries as
+                #   having timed out. Regardless, that's how we currently do things.
+                num_timed_out_queries_in_original = run_raw_csv_non_penalty_rows["Timed Out"].sum()
                 # Penalties are added when the workload times out so this is a reliable indicator of whether the workload timed out.
                 did_workload_time_out_in_original = len(run_raw_csv_penalty_rows) > 0
                 # Penalties are meant to affect the reward of the tuning agent but they are unrelated to the actual runtime, so we ignore them when
@@ -297,25 +301,25 @@ def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
 
                 # Execute the workload to get the runtime.
                 if not replay_args.simulated:
-                    num_timed_out_queries_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = _execute_workload_wrapper(actions_info)
+                    num_executed_queries_in_replay, num_timed_out_queries_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = _execute_workload_wrapper(actions_info)
                 else:
-                    num_timed_out_queries_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = num_timed_out_queries_in_original, did_workload_time_out_in_original, original_workload_runtime
+                    num_executed_queries_in_replay, num_timed_out_queries_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = num_executed_queries_in_original, num_timed_out_queries_in_original, did_workload_time_out_in_original, original_workload_runtime
 
                 # Perform some validity checks and then add this tuning step's data to `run_data``.
                 this_step_run_data = {
                     "step": current_step,
                     "time_since_start": (time_since_start - start_time).total_seconds(),
                     "original_workload_runtime": original_workload_runtime,
+                    "num_executed_queries_in_original": num_executed_queries_in_original,
                     "num_timed_out_queries_in_original": num_timed_out_queries_in_original,
                     "did_workload_time_out_in_original": did_workload_time_out_in_original,
                     "replayed_workload_runtime": replayed_workload_runtime,
+                    "num_executed_queries_in_replay": num_executed_queries_in_replay,
                     "num_timed_out_queries_in_replay": num_timed_out_queries_in_replay,
                     "did_workload_time_out_in_replay": did_workload_time_out_in_replay,
                 }
                 # Log before performing checks to help with debugging.
                 logging.info(f"this_step_run_data={this_step_run_data}")
-                assert not (did_workload_time_out_in_original and not num_timed_out_queries_in_original > 0), "If the original workload timed out, at least one of the queries should have timed out (except for the extremely rare case where the workload timed out in between two queries)."
-                assert not (did_workload_time_out_in_replay and not num_timed_out_queries_in_replay > 0), "If the replayed workload timed out, at least one of the queries should have timed out (except for the extremely rare case where the workload timed out in between two queries)."
                 assert not (num_timed_out_queries_in_replay > 0 and not did_workload_time_out_in_replay), "During replay, individual queries should not time out unless they timed out because the whole workload timed out."
                 run_data.append(this_step_run_data)
                 current_step += 1
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 1dfc453a..098b5233 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -590,7 +590,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                     penalty = 3.0e6
 
                 if penalty > 0:
-                    f.write(f"{len(self.order)},P,{time.time()},{penalty},True,0,PENALTY\n")
+                    f.write(f"{len(self.order)},P,{time.time()},{penalty},,0,PENALTY\n")
 
         # Get all the timeouts.
         num_timed_out_queries = sum([1 if best_run.timed_out else 0 for _, best_run in qid_runtime_data.items()])

From 6016334e54606f82af90c0eae0bf2bbcc35d623e Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 25 Apr 2024 20:02:55 +0000
Subject: [PATCH 094/100] reset now doesn't overwrite the results from step

---
 tune/protox/env/logger.py                |  4 +--
 tune/protox/env/mqo/mqo_wrapper.py       |  4 +--
 tune/protox/env/pg_env.py                | 14 ++++-----
 tune/protox/env/space/state/metric.py    |  8 ++---
 tune/protox/env/space/state/space.py     |  2 +-
 tune/protox/env/space/state/structure.py |  2 +-
 tune/protox/env/types.py                 |  2 +-
 tune/protox/env/util/reward.py           | 12 ++++----
 tune/protox/env/workload.py              | 38 ++++++++++++------------
 9 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 95aca21a..c532b20e 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -99,8 +99,8 @@ def stash_results(
         """
         time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         time = name_override if name_override else time
-        if info_dict["results"] is not None and Path(info_dict["results"]).exists():
-            local["mv"][info_dict["results"], f"{self.tuning_steps_dpath}/{time}"].run()
+        if info_dict["results_dpath"] is not None and Path(info_dict["results_dpath"]).exists():
+            local["mv"][info_dict["results_dpath"], f"{self.tuning_steps_dpath}/{time}"].run()
         else:
             Path(f"{self.tuning_steps_dpath}/{time}").mkdir(parents=True, exist_ok=True)
 
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 26a12dfd..c0b0a54d 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -348,7 +348,7 @@ def reset(self, *args: Any, **kwargs: Any) -> Tuple[Any, EnvInfoDict]:  # type:
                 success,
                 metric,
                 _,
-                results,
+                results_dpath,
                 _,
                 target_metric_data,
             ) = self.unwrapped.workload.execute(
@@ -380,7 +380,7 @@ def reset(self, *args: Any, **kwargs: Any) -> Tuple[Any, EnvInfoDict]:  # type:
 
             # Reward should be irrelevant. If we do accidentally use it, cause an error.
             # Similarly, metric should be irrelevant. Do not shift the workload timeout.
-            info = EnvInfoDict({"metric": None, "reward": None, "results": results})
+            info = EnvInfoDict({"metric": None, "reward": None, "results_dpath": results_dpath})
             # Use this to adjust the container and state but don't shift the step.
             state, _, _, _, info = self.unwrapped.step_post_execute(
                 True, action, info, soft=True
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index 612e05f7..a26c78f4 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -158,7 +158,7 @@ def reset(  # type: ignore
             )
             default_action = self.action_space.null_action(sc)
 
-            success, metric, _, results, _, query_metric_data = self.workload.execute(
+            success, metric, _, results_dpath, _, query_metric_data = self.workload.execute(
                 pg_conn=self.pg_conn,
                 reward_utility=self.reward_utility,
                 observation_space=self.observation_space,
@@ -181,7 +181,7 @@ def reset(  # type: ignore
                 self.workload.queries,
             )
             state = self.observation_space.construct_offline(
-                self.pg_conn.conn(), results, self.state_container
+                self.pg_conn.conn(), results_dpath, self.state_container
             )
 
             # Set the metric workload.
@@ -197,7 +197,7 @@ def reset(  # type: ignore
                     "baseline_metric": metric,
                     "baseline_reward": reward,
                     "query_metric_data": query_metric_data,
-                    "results": results,
+                    "results_dpath": results_dpath,
                     "prior_state_container": None,
                     "prior_pgconf": None,
                     "actions_info": None,
@@ -260,7 +260,7 @@ def step_execute(
                 success,
                 metric,
                 reward,
-                results,
+                results_dpath,
                 did_anything_time_out,
                 query_metric_data,
             ) = self.workload.execute(
@@ -283,7 +283,7 @@ def step_execute(
             success = False
             # Since we reached an invalid area, just set the next state to be the current state.
             metric, reward = self.reward_utility(did_error=True)
-            results, did_anything_time_out, query_metric_data = None, True, None
+            results_dpath, did_anything_time_out, query_metric_data = None, True, None
 
         # Build EnvInfoDict
         info.update(
@@ -293,7 +293,7 @@ def step_execute(
                     "did_anything_time_out": did_anything_time_out,
                     "query_metric_data": query_metric_data,
                     "reward": reward,
-                    "results": results,
+                    "results_dpath": results_dpath,
                     "actions_info": {
                         "all_holon_action_variations": all_holon_action_variations,
                     },
@@ -334,7 +334,7 @@ def step_post_execute(
             # Now. The state container should be accurate.
             assert isinstance(self.observation_space, StateSpace)
             next_state = self.observation_space.construct_offline(
-                self.pg_conn.conn(), info["results"], self.state_container
+                self.pg_conn.conn(), info["results_dpath"], self.state_container
             )
         else:
             assert self.current_state
diff --git a/tune/protox/env/space/state/metric.py b/tune/protox/env/space/state/metric.py
index 948dff92..099fde14 100644
--- a/tune/protox/env/space/state/metric.py
+++ b/tune/protox/env/space/state/metric.py
@@ -152,10 +152,10 @@ def __init__(
                     self.internal_spaces[metric] = Box(low=-np.inf, high=np.inf)
         super().__init__(self.internal_spaces, seed)
 
-    def check_benchbase(self, dbgym_cfg: DBGymConfig, results: Union[str, Path]) -> bool:
-        assert results is not None
-        assert Path(results).exists()
-        metric_files = [f for f in Path(results).rglob("*metrics.json")]
+    def check_benchbase(self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path]) -> bool:
+        assert results_dpath is not None
+        assert Path(results_dpath).exists()
+        metric_files = [f for f in Path(results_dpath).rglob("*metrics.json")]
         if len(metric_files) != 2:
             return False
 
diff --git a/tune/protox/env/space/state/space.py b/tune/protox/env/space/state/space.py
index f7baa3bc..8119818b 100644
--- a/tune/protox/env/space/state/space.py
+++ b/tune/protox/env/space/state/space.py
@@ -14,7 +14,7 @@ def require_metrics(self) -> bool:
         pass
 
     @abstractmethod
-    def check_benchbase(self, dbgym_cfg: DBGymConfig, results: Union[str, Path]) -> bool:
+    def check_benchbase(self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path]) -> bool:
         pass
 
     @abstractmethod
diff --git a/tune/protox/env/space/state/structure.py b/tune/protox/env/space/state/structure.py
index d1a09986..df681a2d 100644
--- a/tune/protox/env/space/state/structure.py
+++ b/tune/protox/env/space/state/structure.py
@@ -50,7 +50,7 @@ def __init__(
     def require_metrics(self) -> bool:
         return False
 
-    def check_benchbase(self, dbgym_cfg: DBGymConfig, results: Union[str, Path]) -> bool:
+    def check_benchbase(self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path]) -> bool:
         # We don't use benchbase metrics anyways.
         return True
 
diff --git a/tune/protox/env/types.py b/tune/protox/env/types.py
index 1c4ffcfb..976317ed 100644
--- a/tune/protox/env/types.py
+++ b/tune/protox/env/types.py
@@ -174,7 +174,7 @@ class EnvInfoDict(TypedDict, total=False):
     # Data generated from each run.
     best_query_run_data: dict[str, BestQueryRun]
     # Path to run artifacts.
-    results: Optional[Union[str, Path]]
+    results_dpath: Optional[Union[str, Path]]
 
     # Previous state container.
     prior_state_container: Optional[HolonStateContainer]
diff --git a/tune/protox/env/util/reward.py b/tune/protox/env/util/reward.py
index bd0c93ce..ba01b8a0 100644
--- a/tune/protox/env/util/reward.py
+++ b/tune/protox/env/util/reward.py
@@ -95,7 +95,7 @@ def __parse_runtime_for_metric(self, parent: Union[str, Path]) -> float:
 
     def __call__(
         self,
-        result_dir: Union[str, Path, None] = None,
+        results_dpath: Union[str, Path, None] = None,
         metric: Optional[float] = None,
         update: bool = True,
         did_error: bool = False,
@@ -108,14 +108,14 @@ def __call__(
         # (param) (new_tps/old_tps) + (1-param) (max(min_mem, new_mem)/min_mem
         #
         # minimum memory before start trading...)
-        assert did_error or result_dir is not None or metric is not None
+        assert did_error or results_dpath is not None or metric is not None
         self.logger.get_logger(__name__).debug(
-            f"[reward_calc]: {result_dir} {metric} {update} {did_error}"
+            f"[reward_calc]: {results_dpath} {metric} {update} {did_error}"
         )
 
         if metric is None:
             # Either it errored or we have a result directory to process.
-            assert did_error or result_dir
+            assert did_error or results_dpath
 
             # Extract the metric if we're running it manually.
             metric_fn = (
@@ -127,8 +127,8 @@ def __call__(
             if did_error:
                 metric = self.worst_perf
             else:
-                assert result_dir
-                metric = metric_fn(result_dir)
+                assert results_dpath
+                metric = metric_fn(results_dpath)
         actual_r = None
         assert metric is not None
 
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index 098b5233..2b3c7e8c 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -339,7 +339,7 @@ def execute_workload(
         pg_conn: PostgresConn,
         actions: list[HolonAction] = [],
         variation_names: list[str] = [],
-        results: Optional[Union[str, Path]] = None,
+        results_dpath: Optional[Union[str, Path]] = None,
         observation_space: Optional[StateSpace] = None,
         action_space: Optional[HolonSpace] = None,
         reset_metrics: Optional[dict[str, BestQueryRun]] = None,
@@ -511,13 +511,13 @@ def execute_workload(
                     assert sql_type != QueryType.INS_UPD_DEL
                     pg_conn.conn().execute(query)
 
-        if results is not None:
+        if results_dpath is not None:
             # Make the result directory.
-            results_dir = Path(results)
-            if not results_dir.exists():
-                results_dir.mkdir(parents=True, exist_ok=True)
+            results_dpath = Path(results_dpath)
+            if not results_dpath.exists():
+                results_dpath.mkdir(parents=True, exist_ok=True)
 
-            with open(results_dir / "run.plans", "w") as f:
+            with open(results_dpath / "run.plans", "w") as f:
                 # Output the explain data.
                 for qid, run in qid_runtime_data.items():
                     if run.explain_data is not None:
@@ -538,7 +538,7 @@ def execute_workload(
                     [v.metric_data for _, v in qid_runtime_data.items()],
                 )
                 accum_stats = observation_space.merge_deltas(accum_data)
-                with open(results_dir / "run.metrics.json", "w") as f:
+                with open(results_dpath / "run.metrics.json", "w") as f:
                     # Flatten it.
                     def flatten(d: dict[str, Any]) -> dict[str, Any]:
                         flat: dict[str, Any] = {}
@@ -561,7 +561,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                     f.write(json.dumps(output, indent=4))
 
             # run.raw.csv will essentially contain the information in qid_runtime_data. However, run.raw.csv may have an extra line for the penalty.
-            with open(results_dir / "run.raw.csv", "w") as f:
+            with open(results_dpath / "run.raw.csv", "w") as f:
                 # Write the raw query data.
                 f.write(
                     "Transaction Type Index,Transaction Name,Start Time (microseconds),Latency (microseconds),Timed Out,Worker Id (start number),Phase Id (index in config file)\n"
@@ -598,7 +598,7 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
 
     @time_record("execute")
     def _execute_benchbase(
-        self, benchbase_config: dict[str, Any], results: Union[str, Path]
+        self, benchbase_config: dict[str, Any], results_dpath: Union[str, Path]
     ) -> bool:
         bb_path = benchbase_config["benchbase_path"]
         with local.cwd(bb_path):
@@ -610,7 +610,7 @@ def _execute_benchbase(
                 "-c",
                 benchbase_config["benchbase_config_path"],
                 "-d",
-                results,
+                results_dpath,
                 "--execute=true",
             ].run(retcode=None)
 
@@ -636,22 +636,22 @@ def execute(
         if self.logger:
             self.logger.get_logger(__name__).info("Starting to run benchmark...")
 
-        # Purge results directory first.
-        tmp_dir = tempfile.gettempdir()
-        results = f"{tmp_dir}/results{pg_conn.pgport}"
-        shutil.rmtree(results, ignore_errors=True)
+        # Generate a unique temporary directory to store results in.
+        results_dpath = Path(tempfile.mkdtemp())
+        print(results_dpath.is_dir(), results_dpath.exists(), not any(results_dpath.iterdir()))
+        assert results_dpath.is_dir() and results_dpath.exists() and not any(results_dpath.iterdir()), "results_dpath should be existent and empty since mkdtemp should guarantee a unique dir."
 
         if self.benchbase:
             # Execute benchbase if specified.
-            success = self._execute_benchbase(benchbase_config, results)
+            success = self._execute_benchbase(benchbase_config, results_dpath)
             # We can only create a state if we succeeded.
-            success = observation_space.check_benchbase(self.dbgym_cfg, results)
+            success = observation_space.check_benchbase(self.dbgym_cfg, results_dpath)
         else:
             num_timed_out_queries, did_workload_time_out, query_metric_data = self.execute_workload(
                 pg_conn,
                 actions=actions,
                 variation_names=variation_names,
-                results=results,
+                results_dpath=results_dpath,
                 observation_space=observation_space,
                 action_space=action_space,
                 reset_metrics=reset_metrics,
@@ -667,11 +667,11 @@ def execute(
         metric, reward = None, None
         if reward_utility is not None:
             metric, reward = reward_utility(
-                result_dir=results, update=update, did_error=not success
+                results_dpath=results_dpath, update=update, did_error=not success
             )
 
         if self.logger:
             self.logger.get_logger(__name__).info(
                 f"Benchmark iteration with metric {metric} (reward: {reward}) (did_anything_timeout: {did_anything_time_out})"
             )
-        return success, metric, reward, results, did_anything_time_out, query_metric_data
+        return success, metric, reward, results_dpath, did_anything_time_out, query_metric_data

From 47363150ac90561be9114a7a21660c456ec6118a Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 25 Apr 2024 20:20:14 +0000
Subject: [PATCH 095/100] wrote load_per_machine_envvars.sh

---
 experiments/load_per_machine_envvars.sh   | 11 +++++++++++
 experiments/protox_tpch_sf0point1/main.sh |  3 ++-
 experiments/protox_tpch_sf10/main.sh      |  2 +-
 scripts/pat_test.sh                       |  2 +-
 4 files changed, 15 insertions(+), 3 deletions(-)
 create mode 100644 experiments/load_per_machine_envvars.sh

diff --git a/experiments/load_per_machine_envvars.sh b/experiments/load_per_machine_envvars.sh
new file mode 100644
index 00000000..905c6c01
--- /dev/null
+++ b/experiments/load_per_machine_envvars.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+host=$(hostname)
+
+if [ "$host" == "dev4" ]; then
+    export PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+elif [ "$host" == "dev6" ]; then
+    export PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
+else
+    echo "Did not recognize host \"$host\""
+    exit 1
+fi
\ No newline at end of file
diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index b9ce26bd..0a32e7f3 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -4,7 +4,8 @@ set -euxo pipefail
 
 SCALE_FACTOR=0.1
 INTENDED_PGDATA_HARDWARE=ssd
-PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
+. ./experiments/load_per_machine_envvars.sh
+echo $PGDATA_PARENT_DPATH
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 # python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 0efaf0db..35facab0 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -4,7 +4,7 @@ set -euxo pipefail
 
 SCALE_FACTOR=10
 INTENDED_PGDATA_HARDWARE=ssd
-PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+. ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 3219a95f..12f14347 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -4,7 +4,7 @@ set -euxo pipefail
 
 SCALE_FACTOR=0.01
 INTENDED_PGDATA_HARDWARE=ssd
-PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
+. ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02

From 9849a99053fca71c1e3d47633d6b6e2ae4bcbbd7 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 25 Apr 2024 20:32:53 +0000
Subject: [PATCH 096/100] added build_space_good_for_boot option

---
 experiments/protox_tpch_sf0point1/main.sh |  2 +-
 experiments/protox_tpch_sf10/main.sh      |  2 +-
 scripts/pat_test.sh                       |  7 +++---
 tune/protox/agent/hpo.py                  | 28 +++++++++++++++++++----
 4 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 0a32e7f3..5a111a4f 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -28,6 +28,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 35facab0..71f00f67 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -26,5 +26,5 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 12f14347..2b7d554e 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,8 +7,9 @@ INTENDED_PGDATA_HARDWARE=ssd
 . ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
+# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
@@ -27,6 +28,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index b31d027e..0d2c7fb8 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -30,7 +30,7 @@
 
 
 class AgentHPOArgs:
-    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo):
+    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo, build_space_good_for_boot):
         self.benchmark_name = benchmark_name
         self.workload_name = workload_name
         self.embedder_path = embedder_path
@@ -50,6 +50,7 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
         self.query_timeout = query_timeout
         self.enable_boot_during_hpo = enable_boot_during_hpo
         self.boot_config_fpath_during_hpo = boot_config_fpath_during_hpo
+        self.build_space_good_for_boot = build_space_good_for_boot
 
 
 @click.command()
@@ -170,6 +171,23 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
     type=Path,
     help="The path to the file configuring Boot when running HPO. When tuning, you may use a different Boot config.",
 )
+# Building a space good for Boot is subtly different from whether we enable Boot during HPO.
+# There are certain options that qualitatively do not perform well with Boot (e.g. metrics state
+#   because Boot extrapolates the query runtime but not metrics). This param controls whether we
+#   use those options or not.
+# I chose the word "good" instead of "compatible" because metrics state does not _crash_ if you
+#   use Boot but it just doesn't seem like it would perform well.
+# One workflow where these two variables are different is where we don't enable Boot during HPO
+#   but do want to enable Boot during tuning.
+# However, whether we're building a space good for Boot is also different from whether we enable
+#   Boot during tuning. We often want to compare one tuning run with Boot against one without
+#   Boot, in which case we'd build a space good for Boot and then run it once with Boot and once
+#   without Boot.
+@click.option(
+    "--build-space-good-for-boot",
+    is_flag=True,
+    help="Whether to avoid certain options that are known to not perform well when Boot is enabled. See the codebase for why this is subtly different from --enable-boot-during-hpo.",
+)
 def hpo(
     dbgym_cfg,
     benchmark_name,
@@ -195,6 +213,7 @@ def hpo(
     query_timeout,
     enable_boot_during_hpo: bool,
     boot_config_fpath_during_hpo: Path,
+    build_space_good_for_boot: bool,
 ):
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
@@ -235,7 +254,7 @@ def hpo(
         assert False
 
     # Create args object
-    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo)
+    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo, build_space_good_for_boot)
     _tune_hpo(dbgym_cfg, hpo_args)
 
 
@@ -253,9 +272,9 @@ def build_space(
     seed: int=0,
     enable_boot_during_hpo: bool=False,
     boot_config_fpath_during_hpo: Path=None,
+    build_space_good_for_boot: bool = False,
     workload_timeouts: list[int]=[600],
     query_timeouts: list[int]=[30],
-    boot_enabled: bool = False,
 ) -> dict[str, Any]:
 
     return {
@@ -311,7 +330,7 @@ def build_space(
         "normalize_reward": tune.choice([False, True]),
 
         # State.
-        "metric_state": tune.choice(([] if boot_enabled else ["metric"]) + ["structure", "structure_normalize"]),
+        "metric_state": tune.choice(([] if build_space_good_for_boot else ["metric"]) + ["structure", "structure_normalize"]),
         "maximize_state": not benchmark_config.get("oltp_workload", False),
         # Whether to normalize state or not.
         "normalize_state": tune.sample_from(lambda spc: False if spc["config"]["metric_state"] == "structure_normalize" else True),
@@ -592,6 +611,7 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         seed=hpo_args.seed,
         enable_boot_during_hpo=hpo_args.enable_boot_during_hpo,
         boot_config_fpath_during_hpo=hpo_args.boot_config_fpath_during_hpo,
+        build_space_good_for_boot=hpo_args.build_space_good_for_boot,
         workload_timeouts=workload_timeouts,
         query_timeouts=query_timeouts,
     )

From d2fb27502ea99efde1dc9c79ed1c777c5f867289 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Sun, 28 Apr 2024 17:57:22 +0000
Subject: [PATCH 097/100] resolved some PR comments

---
 experiments/protox_tpch_sf10/main.sh | 9 +++++----
 scripts/pat_test.sh                  | 6 +++---
 tune/protox/env/mqo/mqo_wrapper.py   | 2 +-
 tune/protox/env/pg_env.py            | 9 ++++-----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 71f00f67..2627c942 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -7,10 +7,11 @@ INTENDED_PGDATA_HARDWARE=ssd
 . ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4
+# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
 exit 0
 
 # benchmark
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index 2b7d554e..b7de59f7 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,9 +7,9 @@ INTENDED_PGDATA_HARDWARE=ssd
 . ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
-# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index c0b0a54d..61f1d277 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -302,7 +302,7 @@ def transmute(
             # Pass the mutilated action back through.
             assert isinstance(self.action_space, HolonSpace)
             info["actions_info"]["best_observed_holon_action"] = best_observed_holon_action
-            info["maximal_embed"] = self.action_space.to_latent([action])
+            info["maximal_embed"] = self.action_space.to_latent([best_observed_holon_action])
 
         return self.unwrapped.step_post_execute(success, action, info)
 
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index a26c78f4..62fa92b8 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -123,9 +123,9 @@ def reset(  # type: ignore
             config_changes, sql_commands = self.action_space.generate_plan_from_config(
                 config, sc
             )
-            # We dump the page cache here because we're resetting. We don't want stuff from
-            #   a previous task.py invocation to affect this.
-            assert self.shift_state(config_changes, sql_commands, dump_page_cache=True)
+            # Don't dump the page cache because we want to keep it warm to see the performance of
+            #   workloads under a warm cache.
+            assert self.shift_state(config_changes, sql_commands, dump_page_cache=False)
 
             # Note that we do not actually update the baseline metric/reward used by the reward
             # utility. This is so the reward is not stochastic with respect to the starting state.
@@ -233,7 +233,7 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
         # Attempt to maneuver to the new state.
         # Don't dump the page cache in shift_state() in order to see how the workload performs in
         #   a warm cache scenario.
-        success = self.shift_state(config_changes, sql_commands, dump_page_cache=True)
+        success = self.shift_state(config_changes, sql_commands, dump_page_cache=False)
         return success, EnvInfoDict(
             {
                 "attempted_changes": (config_changes, sql_commands),
@@ -365,7 +365,6 @@ def shift_state(
         config_changes: list[str],
         sql_commands: list[str],
         dump_page_cache: bool = False,
-        ignore_error: bool = False,
     ) -> bool:
         def attempt_checkpoint(conn_str: str) -> None:
             # CHECKPOINT to prevent the DBMS from entering a super slow shutdown

From af33bc7266ea77706759557165c9a51189b4177d Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Mon, 27 May 2024 00:08:16 +0000
Subject: [PATCH 098/100] added comment about tune

---
 tune/protox/agent/tune.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index 1b167ce5..a7aaef15 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -51,6 +51,7 @@
     help="The number of hours to run the tuning agent for. If you do not specify this argument, it will be the same as --tune-duration-during-hpo."
 )
 def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, boot_config_fpath_during_tune: Path, tune_duration_during_tune: float) -> None:
+    '''IMPORTANT: The "tune" here is the one in "tune a DBMS". This is *different* from the "tune" in ray.tune.TuneConfig, which means to "tune hyperparameters".''' 
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
     if hpoed_agent_params_path == None:

From 474d7ee7ee758c9c1b1d25eaab88e80f245a353d Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Mon, 27 May 2024 00:48:52 +0000
Subject: [PATCH 099/100] different tune trials during hpo now name their
 tuning_steps dir differently

---
 scripts/pat_test.sh       |  2 +-
 tune/protox/agent/hpo.py  | 17 ++++++++++++-----
 tune/protox/agent/tune.py |  2 +-
 tune/protox/env/logger.py | 13 ++++++-------
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index b7de59f7..afab9108 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -7,7 +7,7 @@ INTENDED_PGDATA_HARDWARE=ssd
 . ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
 python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 0d2c7fb8..40cd1f95 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -435,7 +435,7 @@ def __call__(self) -> bool:
 
 
 class TuneTrial:
-    def __init__(self, dbgym_cfg: DBGymConfig, tuning_mode: TuningMode) -> None:
+    def __init__(self, dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, ray_trial_id: str | None=None) -> None:
         """
         We use this object for HPO, tune, and replay. It behaves *slightly* differently
         depending on what it's used for, which is why we have the tuning_mode param.
@@ -443,6 +443,12 @@ def __init__(self, dbgym_cfg: DBGymConfig, tuning_mode: TuningMode) -> None:
         self.dbgym_cfg = dbgym_cfg
         self.tuning_mode = tuning_mode
 
+        if self.tuning_mode == TuningMode.HPO:
+            assert ray_trial_id != None, "If we're doing HPO, we will create multiple TuneTrial() objects. We thus need to differentiate them somehow."
+        else:
+            assert ray_trial_id == None, "If we're not doing HPO, we (currently) will create only one TuneTrial() object. For clarity, we set ray_trial_id to None since ray_trial_id should not be used in this case."
+        self.ray_trial_id = ray_trial_id
+
     def setup(self, hpo_params: dict[str, Any]) -> None:
         # Attach mythril directory to the search path.
         sys.path.append(os.path.expanduser(self.dbgym_cfg.dbgym_repo_path))
@@ -498,9 +504,10 @@ def step(self) -> dict[Any, Any]:
             )
             self.env_init = True
 
-            # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
-            #   stashed in the same directory and potentially crash the system.
-            if not self.tuning_mode == TuningMode.HPO:
+            # During HPO, we need to make sure different trials don't create folders that override each other.
+            if self.tuning_mode == TuningMode.HPO:
+                self.logger.stash_results(infos, name_override=f"baseline_{self.ray_trial_id}")
+            else:
                 self.logger.stash_results(infos, name_override="baseline")
         else:
             self.agent.learn(self.env, total_timesteps=1, tuning_mode=self.tuning_mode)
@@ -548,7 +555,7 @@ class TuneOpt(Trainable):
         dbgym_cfg = global_dbgym_cfg
 
         def setup(self, hpo_params: dict[str, Any]) -> None:
-            self.trial = TuneTrial(TuneOpt.dbgym_cfg, TuningMode.HPO)
+            self.trial = TuneTrial(TuneOpt.dbgym_cfg, TuningMode.HPO, ray_trial_id=self.trial_id)
             self.trial.setup(hpo_params)
 
         def step(self) -> dict[Any, Any]:
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index a7aaef15..c25eaf62 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -51,7 +51,7 @@
     help="The number of hours to run the tuning agent for. If you do not specify this argument, it will be the same as --tune-duration-during-hpo."
 )
 def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, boot_config_fpath_during_tune: Path, tune_duration_during_tune: float) -> None:
-    '''IMPORTANT: The "tune" here is the one in "tune a DBMS". This is *different* from the "tune" in ray.tune.TuneConfig, which means to "tune hyperparameters".''' 
+    """IMPORTANT: The "tune" here is the one in "tune a DBMS". This is *different* from the "tune" in ray.tune.TuneConfig, which means to "tune hyperparameters".""" 
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
     if hpoed_agent_params_path == None:
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index c532b20e..7bbfc37a 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -97,25 +97,24 @@ def stash_results(
         """
         Stash data about this step of tuning so that it can be replayed.
         """
-        time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-        time = name_override if name_override else time
+        dname = name_override if name_override else datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         if info_dict["results_dpath"] is not None and Path(info_dict["results_dpath"]).exists():
-            local["mv"][info_dict["results_dpath"], f"{self.tuning_steps_dpath}/{time}"].run()
+            local["mv"][info_dict["results_dpath"], f"{self.tuning_steps_dpath}/{dname}"].run()
         else:
-            Path(f"{self.tuning_steps_dpath}/{time}").mkdir(parents=True, exist_ok=True)
+            Path(f"{self.tuning_steps_dpath}/{dname}").mkdir(parents=True, exist_ok=True)
 
         if info_dict["prior_pgconf"]:
             local["cp"][
-                info_dict["prior_pgconf"], f"{self.tuning_steps_dpath}/{time}/old_pg.conf"
+                info_dict["prior_pgconf"], f"{self.tuning_steps_dpath}/{dname}/old_pg.conf"
             ].run()
 
         if info_dict["prior_state_container"]:
-            with open(self.tuning_steps_dpath / time / "prior_state.pkl", "wb") as f:
+            with open(self.tuning_steps_dpath / dname / "prior_state.pkl", "wb") as f:
                 # info_dict["prior_state_container"] is a somewhat complex object so we use pickle over json
                 pickle.dump(info_dict["prior_state_container"], f)
 
         if info_dict["actions_info"]:
-            with open(self.tuning_steps_dpath / time / "action.pkl", "wb") as f:
+            with open(self.tuning_steps_dpath / dname / "action.pkl", "wb") as f:
                 pickle.dump(info_dict["actions_info"], f)
 
     def advance(self) -> None:

From a6e00b9b04519a9650e6563827be7aa84a23e0c9 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Thu, 30 May 2024 03:42:05 +0000
Subject: [PATCH 100/100] now logging during HPO for both baseline and tuning
 steps

---
 tune/protox/agent/build_trial.py          |  8 +++++---
 tune/protox/agent/hpo.py                  | 12 +++++-------
 tune/protox/agent/off_policy_algorithm.py |  7 +++++--
 tune/protox/agent/wolp/wolp.py            |  2 ++
 tune/protox/env/logger.py                 |  7 ++++++-
 5 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index d80f2610..58e1aeb7 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -5,7 +5,7 @@
 import socket
 import xml.etree.ElementTree as ET
 from pathlib import Path
-from typing import Any, Callable, Tuple, Union
+from typing import Any, Callable, Optional, Tuple, Union
 
 import gymnasium as gym
 import numpy as np
@@ -381,6 +381,7 @@ def _build_agent(
     observation_space: StateSpace,
     action_space: HolonSpace,
     logger: Logger,
+    ray_trial_id: Optional[str],
 ) -> Wolp:
     action_dim = noise_action_dim = action_space.latent_dim()
     critic_action_dim = action_space.critic_dim()
@@ -498,6 +499,7 @@ def _build_agent(
             obs_shape=[gym.spaces.utils.flatdim(observation_space)],
             action_dim=critic_action_dim,
         ),
+        ray_trial_id=ray_trial_id,
         learning_starts=hpo_params["learning_starts"],
         batch_size=hpo_params["batch_size"],
         train_freq=(hpo_params["train_freq_frequency"], hpo_params["train_freq_unit"]),
@@ -510,7 +512,7 @@ def _build_agent(
 
 
 def build_trial(
-    dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, seed: int, hpo_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, seed: int, hpo_params: dict[str, Any], ray_trial_id: Optional[str]=None
 ) -> Tuple[Logger, TargetResetWrapper, AgentEnv, Wolp, str]:
     # The massive trial builder.
 
@@ -533,5 +535,5 @@ def build_trial(
         logger,
     )
 
-    agent = _build_agent(seed, hpo_params, observation_space, holon_space, logger)
+    agent = _build_agent(seed, hpo_params, observation_space, holon_space, logger, ray_trial_id)
     return logger, target_reset, env, agent, signal
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 40cd1f95..60498514 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -10,7 +10,7 @@
 import os
 import pandas as pd
 from datetime import datetime
-from typing import Any, Union
+from typing import Any, Optional, Union
 import random
 import click
 import ssd_checker
@@ -435,7 +435,7 @@ def __call__(self) -> bool:
 
 
 class TuneTrial:
-    def __init__(self, dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, ray_trial_id: str | None=None) -> None:
+    def __init__(self, dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, ray_trial_id: Optional[str]=None) -> None:
         """
         We use this object for HPO, tune, and replay. It behaves *slightly* differently
         depending on what it's used for, which is why we have the tuning_mode param.
@@ -470,6 +470,7 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
             self.tuning_mode,
             seed=seed,
             hpo_params=hpo_params,
+            ray_trial_id=self.ray_trial_id,
         )
         self.logger.get_logger(None).info("%s", hpo_params)
         self.logger.get_logger(None).info(f"Seed: {seed}")
@@ -504,11 +505,8 @@ def step(self) -> dict[Any, Any]:
             )
             self.env_init = True
 
-            # During HPO, we need to make sure different trials don't create folders that override each other.
-            if self.tuning_mode == TuningMode.HPO:
-                self.logger.stash_results(infos, name_override=f"baseline_{self.ray_trial_id}")
-            else:
-                self.logger.stash_results(infos, name_override="baseline")
+            assert self.ray_trial_id != None if self.tuning_mode == TuningMode.HPO else True, "If we're doing HPO, we need to ensure that we're passing a non-None ray_trial_id to stash_results() to avoid conflicting folder names."
+            self.logger.stash_results(infos, name_override="baseline", ray_trial_id=self.ray_trial_id)
         else:
             self.agent.learn(self.env, total_timesteps=1, tuning_mode=self.tuning_mode)
 
diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py
index 5b2b4c3b..dd39d7ba 100644
--- a/tune/protox/agent/off_policy_algorithm.py
+++ b/tune/protox/agent/off_policy_algorithm.py
@@ -47,10 +47,12 @@ def __init__(
         gradient_steps: int = 1,
         action_noise: Optional[ActionNoise] = None,
         seed: Optional[int] = None,
+        ray_trial_id: Optional[str] = None,
     ):
         super().__init__(seed=seed)
         self.policy = policy
         self.replay_buffer = replay_buffer
+        self.ray_trial_id = ray_trial_id
 
         self.batch_size = batch_size
         self.learning_starts = learning_starts
@@ -186,8 +188,9 @@ def collect_rollouts(
             dones = terms or truncs
             # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
             #   stashed in the same directory and potentially cause a race condition.
-            if self.logger and not tuning_mode == TuningMode.HPO:
-                self.logger.stash_results(infos)
+            if self.logger:
+                assert self.ray_trial_id != None if tuning_mode == TuningMode.HPO else True, "If we're doing HPO, we need to ensure that we're passing a non-None ray_trial_id to stash_results() to avoid conflicting folder names."
+                self.logger.stash_results(infos, ray_trial_id=self.ray_trial_id)
 
             self.num_timesteps += 1
             num_collected_steps += 1
diff --git a/tune/protox/agent/wolp/wolp.py b/tune/protox/agent/wolp/wolp.py
index 7929d779..ba519258 100644
--- a/tune/protox/agent/wolp/wolp.py
+++ b/tune/protox/agent/wolp/wolp.py
@@ -53,6 +53,7 @@ def __init__(
         target_action_noise: Optional[ActionNoise] = None,
         seed: Optional[int] = None,
         neighbor_parameters: Dict[str, Any] = {},
+        ray_trial_id: Optional[str] = None,
     ):
         super().__init__(
             policy,
@@ -63,6 +64,7 @@ def __init__(
             gradient_steps,
             action_noise=action_noise,
             seed=seed,
+            ray_trial_id=ray_trial_id,
         )
 
         self.target_action_noise = target_action_noise
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 7bbfc37a..12176780 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -92,12 +92,17 @@ def get_logger(self, name: Optional[str]) -> logging.Logger:
         return logging.getLogger(name)
 
     def stash_results(
-        self, info_dict: dict[str, Any], name_override: Optional[str] = None
+        self, info_dict: dict[str, Any], name_override: Optional[str] = None, ray_trial_id: Optional[str] = None,
     ) -> None:
         """
         Stash data about this step of tuning so that it can be replayed.
         """
         dname = name_override if name_override else datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        if ray_trial_id != None:
+            # Orthogonal to whether name_override is used, ray_trial_id disambiguates between folders created
+            # by different HPO trials so that the folders don't overwrite each other.
+            dname += f"_{ray_trial_id}"
+
         if info_dict["results_dpath"] is not None and Path(info_dict["results_dpath"]).exists():
             local["mv"][info_dict["results_dpath"], f"{self.tuning_steps_dpath}/{dname}"].run()
         else: