diff --git a/benchmark/tpch/cli.py b/benchmark/tpch/cli.py
index 5b6d24f1..d5c8c407 100644
--- a/benchmark/tpch/cli.py
+++ b/benchmark/tpch/cli.py
@@ -4,7 +4,7 @@
 
 import click
 
-from misc.utils import DBGymConfig, get_scale_factor_string, workload_name_fn
+from misc.utils import DBGymConfig, get_scale_factor_string, link_result, workload_name_fn
 from util.shell import subprocess_run
 from util.pg import *
 
@@ -56,33 +56,36 @@ def _get_queries_dname(seed: int, scale_factor: float) -> str:
 
 
 def _clone(dbgym_cfg: DBGymConfig):
-    symlink_dir = dbgym_cfg.cur_symlinks_build_path("tpch-kit")
-    if symlink_dir.exists():
-        benchmark_tpch_logger.info(f"Skipping clone: {symlink_dir}")
+    expected_symlink_dpath = dbgym_cfg.cur_symlinks_build_path(mkdir=True) / "tpch-kit.link"
+    if expected_symlink_dpath.exists():
+        benchmark_tpch_logger.info(f"Skipping clone: {expected_symlink_dpath}")
         return
 
-    benchmark_tpch_logger.info(f"Cloning: {symlink_dir}")
+    benchmark_tpch_logger.info(f"Cloning: {expected_symlink_dpath}")
     real_build_path = dbgym_cfg.cur_task_runs_build_path()
     subprocess_run(
         f"./tpch_setup.sh {real_build_path}", cwd=dbgym_cfg.cur_source_path()
     )
-    subprocess_run(
-        f"ln -s {real_build_path / 'tpch-kit'} {dbgym_cfg.cur_symlinks_build_path(mkdir=True)}"
-    )
-    benchmark_tpch_logger.info(f"Cloned: {symlink_dir}")
+    symlink_dpath = link_result(dbgym_cfg, real_build_path / "tpch-kit")
+    assert os.path.samefile(expected_symlink_dpath, symlink_dpath)
+    benchmark_tpch_logger.info(f"Cloned: {expected_symlink_dpath}")
 
 
-def _generate_queries(dbgym_cfg: DBGymConfig, seed_start: int, seed_end: int, scale_factor: float):
-    build_path = dbgym_cfg.cur_symlinks_build_path()
-    assert build_path.exists()
+def _get_tpch_kit_dpath(dbgym_cfg: DBGymConfig) -> Path:
+    tpch_kit_dpath = (dbgym_cfg.cur_symlinks_build_path() / "tpch-kit.link").resolve()
+    assert tpch_kit_dpath.exists() and tpch_kit_dpath.is_absolute() and not tpch_kit_dpath.is_symlink()
+    return tpch_kit_dpath
 
+
+def _generate_queries(dbgym_cfg: DBGymConfig, seed_start: int, seed_end: int, scale_factor: float):
+    tpch_kit_dpath = _get_tpch_kit_dpath(dbgym_cfg)
     data_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
     benchmark_tpch_logger.info(
         f"Generating queries: {data_path} [{seed_start}, {seed_end}]"
     )
     for seed in range(seed_start, seed_end + 1):
-        symlinked_seed = data_path / _get_queries_dname(seed, scale_factor)
-        if symlinked_seed.exists():
+        expected_queries_symlink_dpath = data_path / (_get_queries_dname(seed, scale_factor) + ".link")
+        if expected_queries_symlink_dpath.exists():
             continue
 
         real_dir = dbgym_cfg.cur_task_runs_data_path(_get_queries_dname(seed, scale_factor), mkdir=True)
@@ -90,34 +93,34 @@ def _generate_queries(dbgym_cfg: DBGymConfig, seed_start: int, seed_end: int, sc
             target_sql = (real_dir / f"{i}.sql").resolve()
             subprocess_run(
                 f"DSS_QUERY=./queries ./qgen {i} -r {seed} -s {scale_factor} > {target_sql}",
-                cwd=build_path / "tpch-kit" / "dbgen",
+                cwd=tpch_kit_dpath / "dbgen",
                 verbose=False,
             )
-        subprocess_run(f"ln -s {real_dir} {data_path}", verbose=False)
+        queries_symlink_dpath = link_result(dbgym_cfg, real_dir)
+        assert os.path.samefile(queries_symlink_dpath, expected_queries_symlink_dpath)
     benchmark_tpch_logger.info(
         f"Generated queries: {data_path} [{seed_start}, {seed_end}]"
     )
 
 
 def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float):
-    build_path = dbgym_cfg.cur_symlinks_build_path()
-    assert build_path.exists()
-
+    tpch_kit_dpath = _get_tpch_kit_dpath(dbgym_cfg)
     data_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
-    symlink_dir = data_path / f"tables_sf{get_scale_factor_string(scale_factor)}"
-    if symlink_dir.exists():
-        benchmark_tpch_logger.info(f"Skipping generation: {symlink_dir}")
+    expected_tables_symlink_dpath = data_path / f"tables_sf{get_scale_factor_string(scale_factor)}.link"
+    if expected_tables_symlink_dpath.exists():
+        benchmark_tpch_logger.info(f"Skipping generation: {expected_tables_symlink_dpath}")
         return
 
-    benchmark_tpch_logger.info(f"Generating: {symlink_dir}")
+    benchmark_tpch_logger.info(f"Generating: {expected_tables_symlink_dpath}")
     subprocess_run(
-        f"./dbgen -vf -s {scale_factor}", cwd=build_path / "tpch-kit" / "dbgen"
+        f"./dbgen -vf -s {scale_factor}", cwd=tpch_kit_dpath / "dbgen"
     )
     real_dir = dbgym_cfg.cur_task_runs_data_path(f"tables_sf{get_scale_factor_string(scale_factor)}", mkdir=True)
-    subprocess_run(f"mv ./*.tbl {real_dir}", cwd=build_path / "tpch-kit" / "dbgen")
+    subprocess_run(f"mv ./*.tbl {real_dir}", cwd=tpch_kit_dpath / "dbgen")
 
-    subprocess_run(f"ln -s {real_dir} {data_path}")
-    benchmark_tpch_logger.info(f"Generated: {symlink_dir}")
+    tables_symlink_dpath = link_result(dbgym_cfg, real_dir)
+    assert os.path.samefile(tables_symlink_dpath, expected_tables_symlink_dpath)
+    benchmark_tpch_logger.info(f"Generated: {expected_tables_symlink_dpath}")
 
 
 def _generate_workload(
@@ -129,9 +132,9 @@ def _generate_workload(
 ):
     symlink_data_dir = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
-    workload_symlink_path = symlink_data_dir / workload_name
+    expected_workload_symlink_dpath = symlink_data_dir / (workload_name + ".link")
 
-    benchmark_tpch_logger.info(f"Generating: {workload_symlink_path}")
+    benchmark_tpch_logger.info(f"Generating: {expected_workload_symlink_dpath}")
     real_dir = dbgym_cfg.cur_task_runs_data_path(
         workload_name, mkdir=True
     )
@@ -147,13 +150,12 @@ def _generate_workload(
     with open(real_dir / "order.txt", "w") as f:
         for seed in range(seed_start, seed_end + 1):
             for qnum in queries:
-                sqlfile = symlink_data_dir / _get_queries_dname(seed, scale_factor) / f"{qnum}.sql"
-                assert sqlfile.exists()
-                output = ",".join([f"S{seed}-Q{qnum}", str(sqlfile)])
+                sql_fpath = (symlink_data_dir / (_get_queries_dname(seed, scale_factor) + ".link")).resolve() / f"{qnum}.sql"
+                assert sql_fpath.exists() and not sql_fpath.is_symlink() and sql_fpath.is_absolute(), "We should only write existent real absolute paths to a file"
+                output = ",".join([f"S{seed}-Q{qnum}", str(sql_fpath)])
                 print(output, file=f)
                 # TODO(WAN): add option to deep-copy the workload.
     
-    if workload_symlink_path.exists():
-        os.remove(workload_symlink_path)
-    subprocess_run(f"ln -s {real_dir} {workload_symlink_path}")
-    benchmark_tpch_logger.info(f"Generated: {workload_symlink_path}")
+    workload_symlink_dpath = link_result(dbgym_cfg, real_dir)
+    assert workload_symlink_dpath == expected_workload_symlink_dpath
+    benchmark_tpch_logger.info(f"Generated: {expected_workload_symlink_dpath}")
diff --git a/benchmark/tpch/load_info.py b/benchmark/tpch/load_info.py
index afe4d243..8db2f0b4 100644
--- a/benchmark/tpch/load_info.py
+++ b/benchmark/tpch/load_info.py
@@ -1,5 +1,5 @@
 from dbms.load_info_base_class import LoadInfoBaseClass
-from misc.utils import get_scale_factor_string
+from misc.utils import DBGymConfig, get_scale_factor_string
 
 
 TPCH_SCHEMA_FNAME = "tpch_schema.sql"
@@ -22,7 +22,7 @@ class TpchLoadInfo(LoadInfoBaseClass):
         "lineitem",
     ]
 
-    def __init__(self, dbgym_cfg, scale_factor):
+    def __init__(self, dbgym_cfg: DBGymConfig, scale_factor: float):
         # schema and constraints
         schema_root_dpath = dbgym_cfg.dbgym_repo_path
         for component in TpchLoadInfo.CODEBASE_PATH_COMPONENTS[
@@ -39,13 +39,12 @@ def __init__(self, dbgym_cfg, scale_factor):
         ), f"self._constraints_fpath ({self._constraints_fpath}) does not exist"
 
         # tables
-        data_root_dpath = (
-            dbgym_cfg.dbgym_symlinks_path / TpchLoadInfo.CODEBASE_DNAME / "data"
-        )
-        tables_dpath = data_root_dpath / f"tables_sf{get_scale_factor_string(scale_factor)}"
+        data_root_dpath = dbgym_cfg.dbgym_symlinks_path / TpchLoadInfo.CODEBASE_DNAME / "data"
+        tables_symlink_dpath = data_root_dpath / f"tables_sf{get_scale_factor_string(scale_factor)}.link"
+        tables_dpath = tables_symlink_dpath.resolve()
         assert (
-            tables_dpath.exists()
-        ), f"tables_dpath ({tables_dpath}) does not exist. Make sure you have generated the TPC-H data"
+            tables_dpath.exists() and tables_dpath.is_absolute() and not tables_dpath.is_symlink()
+        ), f"tables_dpath ({tables_dpath}) should be an existent real absolute path. Make sure you have generated the TPC-H data"
         self._tables_and_fpaths = []
         for table in TpchLoadInfo.TABLES:
             table_fpath = tables_dpath / f"{table}.tbl"
diff --git a/benchmark/tpch/tpch_constraints.sql b/benchmark/tpch/tpch_constraints.sql
index fca8c21d..81e23f20 100644
--- a/benchmark/tpch/tpch_constraints.sql
+++ b/benchmark/tpch/tpch_constraints.sql
@@ -7,26 +7,27 @@ ALTER TABLE orders ADD CONSTRAINT orders_o_custkey_fkey FOREIGN KEY (o_custkey)
 ALTER TABLE lineitem ADD CONSTRAINT lineitem_l_orderkey_fkey FOREIGN KEY (l_orderkey) REFERENCES orders (o_orderkey) ON DELETE CASCADE;
 ALTER TABLE lineitem ADD CONSTRAINT lineitem_l_partkey_l_suppkey_fkey FOREIGN KEY (l_partkey, l_suppkey) REFERENCES partsupp (ps_partkey, ps_suppkey) ON DELETE CASCADE;
 
-CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC);
-CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC);
-CREATE INDEX n_rk ON nation (n_regionkey ASC);
-CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC);
-CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC);
-CREATE INDEX s_nk ON supplier (s_nationkey ASC);
-CREATE INDEX ps_pk ON partsupp (ps_partkey ASC);
-CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC);
-CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC);
-CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC);
-CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC);
-CREATE INDEX c_nk ON customer (c_nationkey ASC);
-CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC);
-CREATE INDEX o_ck ON orders (o_custkey ASC);
-CREATE INDEX o_od ON orders (o_orderdate ASC);
-CREATE INDEX l_ok ON lineitem (l_orderkey ASC);
-CREATE INDEX l_pk ON lineitem (l_partkey ASC);
-CREATE INDEX l_sk ON lineitem (l_suppkey ASC);
-CREATE INDEX l_sd ON lineitem (l_shipdate ASC);
-CREATE INDEX l_cd ON lineitem (l_commitdate ASC);
-CREATE INDEX l_rd ON lineitem (l_receiptdate ASC);
-CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC);
-CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);
\ No newline at end of file
+-- We don't create any indexes so that there's a clean slate for tuning
+-- CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC);
+-- CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC);
+-- CREATE INDEX n_rk ON nation (n_regionkey ASC);
+-- CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC);
+-- CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC);
+-- CREATE INDEX s_nk ON supplier (s_nationkey ASC);
+-- CREATE INDEX ps_pk ON partsupp (ps_partkey ASC);
+-- CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC);
+-- CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC);
+-- CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC);
+-- CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC);
+-- CREATE INDEX c_nk ON customer (c_nationkey ASC);
+-- CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC);
+-- CREATE INDEX o_ck ON orders (o_custkey ASC);
+-- CREATE INDEX o_od ON orders (o_orderdate ASC);
+-- CREATE INDEX l_ok ON lineitem (l_orderkey ASC);
+-- CREATE INDEX l_pk ON lineitem (l_partkey ASC);
+-- CREATE INDEX l_sk ON lineitem (l_suppkey ASC);
+-- CREATE INDEX l_sd ON lineitem (l_shipdate ASC);
+-- CREATE INDEX l_cd ON lineitem (l_commitdate ASC);
+-- CREATE INDEX l_rd ON lineitem (l_receiptdate ASC);
+-- CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC);
+-- CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);
\ No newline at end of file
diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py
index 3c6ecd3d..75b03650 100644
--- a/dbms/postgres/cli.py
+++ b/dbms/postgres/cli.py
@@ -1,9 +1,9 @@
-'''
+"""
 At a high level, this file's goal is to (1) install+build postgres and (2) create pgdata.
 On the other hand, the goal of tune.protox.env.util.postgres is to provide helpers to manage
     a Postgres instance during agent tuning.
 util.pg provides helpers used by *both* of the above files (as well as other files).
-'''
+"""
 import logging
 import os
 import shutil
@@ -84,11 +84,11 @@ def postgres_pgdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: f
 
 
 def _get_pgbin_symlink_path(dbgym_cfg: DBGymConfig) -> Path:
-    return dbgym_cfg.cur_symlinks_build_path("repo", "boot", "build", "postgres", "bin")
+    return dbgym_cfg.cur_symlinks_build_path("repo.link", "boot", "build", "postgres", "bin")
 
 
 def _get_repo_symlink_path(dbgym_cfg: DBGymConfig) -> Path:
-    return dbgym_cfg.cur_symlinks_build_path("repo")
+    return dbgym_cfg.cur_symlinks_build_path("repo.link")
 
 
 def _build_repo(dbgym_cfg: DBGymConfig, rebuild):
@@ -143,7 +143,7 @@ def _create_pgdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: fl
     # Create .tgz file.
     # Note that you can't pass "[pgdata].tgz" as an arg to cur_task_runs_data_path() because that would create "[pgdata].tgz" as a dir.
     pgdata_tgz_real_fpath = dbgym_cfg.cur_task_runs_data_path(
-        ".", mkdir=True
+        mkdir=True
     ) / get_pgdata_tgz_name(benchmark_name, scale_factor)
     # We need to cd into pgdata_dpath so that the tar file does not contain folders for the whole path of pgdata_dpath.
     subprocess_run(f"tar -czf {pgdata_tgz_real_fpath} .", cwd=pgdata_dpath)
@@ -156,21 +156,21 @@ def _create_pgdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: fl
 
 def _generic_pgdata_setup(dbgym_cfg: DBGymConfig):
     # get necessary vars
-    pgbin_symlink_dpath = _get_pgbin_symlink_path(dbgym_cfg)
-    assert pgbin_symlink_dpath.exists()
+    pgbin_real_dpath = _get_pgbin_symlink_path(dbgym_cfg).resolve()
+    assert pgbin_real_dpath.exists()
     dbgym_pguser = DBGYM_POSTGRES_USER
     dbgym_pgpass = DBGYM_POSTGRES_PASS
     pgport = DEFAULT_POSTGRES_PORT
 
     # Create user
-    save_file(dbgym_cfg, pgbin_symlink_dpath / "psql")
+    save_file(dbgym_cfg, pgbin_real_dpath / "psql")
     subprocess_run(
         f"./psql -c \"create user {dbgym_pguser} with superuser password '{dbgym_pgpass}'\" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost",
-        cwd=pgbin_symlink_dpath,
+        cwd=pgbin_real_dpath,
     )
     subprocess_run(
         f'./psql -c "grant pg_monitor to {dbgym_pguser}" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost',
-        cwd=pgbin_symlink_dpath,
+        cwd=pgbin_real_dpath,
     )
 
     # Load shared preload libraries
@@ -179,14 +179,14 @@ def _generic_pgdata_setup(dbgym_cfg: DBGymConfig):
             # You have to use TO and you can't put single quotes around the libraries (https://postgrespro.com/list/thread-id/2580120)
             # The method I wrote here works for both one library and multiple libraries
             f"./psql -c \"ALTER SYSTEM SET shared_preload_libraries TO {SHARED_PRELOAD_LIBRARIES};\" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost",
-            cwd=pgbin_symlink_dpath,
+            cwd=pgbin_real_dpath,
         )
 
     # Create the dbgym database. since one pgdata dir maps to one benchmark, all benchmarks will use the same database
     # as opposed to using databases named after the benchmark
     subprocess_run(
         f"./psql -c \"create database {DBGYM_POSTGRES_DBNAME} with owner = '{dbgym_pguser}'\" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost",
-        cwd=pgbin_symlink_dpath,
+        cwd=pgbin_real_dpath,
     )
 
 
diff --git a/experiments/load_per_machine_envvars.sh b/experiments/load_per_machine_envvars.sh
new file mode 100644
index 00000000..905c6c01
--- /dev/null
+++ b/experiments/load_per_machine_envvars.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+host=$(hostname)
+
+if [ "$host" == "dev4" ]; then
+    export PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+elif [ "$host" == "dev6" ]; then
+    export PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
+else
+    echo "Did not recognize host \"$host\""
+    exit 1
+fi
\ No newline at end of file
diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
new file mode 100755
index 00000000..5a111a4f
--- /dev/null
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+SCALE_FACTOR=0.1
+INTENDED_PGDATA_HARDWARE=ssd
+. ./experiments/load_per_machine_envvars.sh
+echo $PGDATA_PARENT_DPATH
+
+# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
+# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+exit 0
+
+# benchmark
+python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
+python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
+
+# postgres
+python3 task.py --no-startup-check dbms postgres build
+python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+
+exit 0
+
+# embedding
+python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # long datagen so that train doesn't crash
+python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
+
+# agent
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 62c6cf22..2627c942 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -4,10 +4,14 @@ set -euxo pipefail
 
 SCALE_FACTOR=10
 INTENDED_PGDATA_HARDWARE=ssd
-PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+. ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
+# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4
+# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
 exit 0
 
 # benchmark
@@ -23,5 +27,5 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --duration 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
diff --git a/misc/utils.py b/misc/utils.py
index e2560692..bec81d97 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -1,15 +1,23 @@
+from enum import Enum
 import os
 import shutil
 import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
+from typing import Tuple
 import click
 import yaml
 import redis
 
 from util.shell import subprocess_run
 
+# Enums
+TuningMode = Enum('TuningMode', ['HPO', 'TUNE', 'REPLAY'])
+
+# Default values
+DEFAULT_WORKLOAD_TIMEOUT = 600
+
 # Relative paths of different folders in the codebase
 DBMS_PATH = Path("dbms")
 POSTGRES_PATH = DBMS_PATH / "postgres"
@@ -63,6 +71,26 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     / f"default_{benchmark_name}_benchbase_config.xml"
 )
 
+# Generally useful functions
+workload_name_fn = (
+    lambda scale_factor, seed_start, seed_end, query_subset : f"workload_sf{get_scale_factor_string(scale_factor)}_{seed_start}_{seed_end}_{query_subset}"
+)
+
+# Standard names of files/directories. These can refer to either the actual file/directory or a link to the file/directory.
+#   Since they can refer to either the actual or the link, they do not have ".link" in them.
+traindata_fname = (
+    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_embedding_traindata.parquet"
+)
+default_embedder_dname = (
+    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_embedder"
+)
+default_hpoed_agent_params_fname = (
+    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_hpoed_agent_params.json"
+)
+default_tuning_steps_dname = (
+    lambda benchmark_name, workload_name, boot_enabled_during_tune: f"{benchmark_name}_{workload_name}{'_boot' if boot_enabled_during_tune else ''}_tuning_steps"
+)
+
 # Paths of dependencies in the workspace. These are named "*_path" because they will be an absolute path
 # The reason these _cannot_ be relative paths is because relative paths are relative to the codebase root, not the workspace root
 # Note that it's okay to hardcode the codebase paths (like dbgym_dbms_postgres) here. In the worst case, we'll just break an
@@ -71,19 +99,18 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
 #   ok to have to hardcode them when reading.
 # Details
 #  - If a name already has the workload_name, I omit scale factor. This is because the workload_name includes the scale factor
-traindata_fname = (
-    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_embedding_traindata.parquet"
-)
+#  - By convention, symlinks should end with ".link". The bug that motivated this decision involved replaying a tuning run. When
+#    replaying a tuning run, you read the tuning_steps/ folder of the tuning run. Earlier, I created a symlink to that tuning_steps/
+#    folder called run_*/dbgym_agent_protox_tune/tuning_steps. However, replay itself generates an output.log file, which goes in
+#    run_*/dbgym_agent_protox_tune/tuning_steps/. The bug was that my replay function was overwriting the output.log file of the
+#    tuning run. By naming all symlinks "*.link", we avoid the possibility of subtle bugs like this happening.
 default_traindata_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(
         workspace_path
     )
     / "dbgym_tune_protox_embedding"
     / "data"
-    / traindata_fname(benchmark_name, workload_name)
-)
-default_embedder_dname = (
-    lambda benchmark_name, workload_name: f"{benchmark_name}_{workload_name}_embedder"
+    / (traindata_fname(benchmark_name, workload_name) + ".link")
 )
 default_embedder_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(
@@ -91,16 +118,13 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     )
     / "dbgym_tune_protox_embedding"
     / "data"
-    / default_embedder_dname(benchmark_name, workload_name)
+    / (default_embedder_dname(benchmark_name, workload_name) + ".link")
 )
 default_hpoed_agent_params_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(workspace_path)
     / "dbgym_tune_protox_agent"
     / "data"
-    / f"{benchmark_name}_{workload_name}_hpoed_agent_params.json"
-)
-workload_name_fn = (
-    lambda scale_factor, seed_start, seed_end, query_subset : f"workload_sf{get_scale_factor_string(scale_factor)}_{seed_start}_{seed_end}_{query_subset}"
+    / (default_hpoed_agent_params_fname(benchmark_name, workload_name) + ".link")
 )
 default_workload_path = (
     lambda workspace_path, benchmark_name, workload_name: get_symlinks_path_from_workspace_path(
@@ -108,15 +132,15 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     )
     / f"dbgym_benchmark_{benchmark_name}"
     / "data"
-    / workload_name
+    / (workload_name + ".link")
 )
 default_pristine_pgdata_snapshot_path = (
     lambda workspace_path, benchmark_name, scale_factor: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / f"dbgym_dbms_postgres"
+    / "dbgym_dbms_postgres"
     / "data"
-    / get_pgdata_tgz_name(benchmark_name, scale_factor)
+    / (get_pgdata_tgz_name(benchmark_name, scale_factor) + ".link")
 )
 default_pgdata_parent_dpath = (
     lambda workspace_path: get_tmp_path_from_workspace_path(
@@ -127,7 +151,13 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     lambda workspace_path: get_symlinks_path_from_workspace_path(
         workspace_path
     )
-    / f"dbgym_dbms_postgres" / "build" / "repo" / "boot"/ "build" / "postgres" / "bin"
+    / "dbgym_dbms_postgres" / "build" / "repo.link" / "boot"/ "build" / "postgres" / "bin"
+)
+default_tuning_steps_dpath = (
+    lambda workspace_path, benchmark_name, workload_name, boot_enabled_during_tune: get_symlinks_path_from_workspace_path(
+        workspace_path
+    )
+    / "dbgym_tune_protox_agent" / "artifacts" / (default_tuning_steps_dname(benchmark_name, workload_name, boot_enabled_during_tune) + ".link")
 )
 
 
@@ -330,12 +360,13 @@ def is_child_path(child_path: os.PathLike, parent_dpath: os.PathLike) -> bool:
     )
 
 
-def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
+def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: Path, mode="r"):
     """
     Open a file and "save" it to [workspace]/task_runs/run_*/.
     It takes in a str | Path to match the interface of open().
     This file does not work if open_fpath is a symlink, to make its interface identical to that of open().
         Make sure to resolve all symlinks with conv_inputpath_to_realabspath().
+    To avoid confusion, I'm enforcing this function to only work with absolute paths.
     See the comment of save_file() for what "saving" means
     If you are generating a "result" for the run, _do not_ use this. Just use the normal open().
         This shouldn't be too hard to remember because this function crashes if open_fpath doesn't exist,
@@ -347,7 +378,8 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
      - If you open two "config" files of the same name but different paths, only the first open will be saved.
         - Opening two "dependency" files of the same name but different paths will lead to two different "base dirs" being symlinked.
     """
-    # process/validate open_fpath
+    # validate open_fpath
+    assert isinstance(open_fpath, Path)
     assert os.path.isabs(
         open_fpath
     ), f"open_and_save(): open_fpath ({open_fpath}) should be an absolute path"
@@ -364,19 +396,48 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: os.PathLike, mode="r"):
     return open(open_fpath, mode=mode)
 
 
+def extract_from_task_run_fordpath(dbgym_cfg: DBGymConfig, task_run_fordpath: Path) -> Tuple[Path, str, Path, str]:
+    """
+    The task_runs/ folder is organized like task_runs/run_*/[codebase]/[org]/any/path/you/want.
+    This function extracts the [codebase] and [org] components
+    """
+    assert not task_run_fordpath.is_symlink()
+    parent_dpath = os.path.dirname(task_run_fordpath)
+    assert not os.path.samefile(
+        parent_dpath, dbgym_cfg.dbgym_runs_path
+    ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/ dir instead of directly in dbgym_cfg.dbgym_runs_path ({dbgym_cfg.dbgym_runs_path})"
+    assert not os.path.samefile(
+        parent_dir(parent_dpath), dbgym_cfg.dbgym_runs_path
+    ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/[codebase]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
+    assert not os.path.samefile(
+        parent_dir(parent_dir(parent_dpath)), dbgym_cfg.dbgym_runs_path
+    ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/[codebase]/[organization]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
+    # org_dpath is the run_*/[codebase]/[organization]/ dir that task_run_fordpath is in
+    org_dpath = parent_dpath
+    while not os.path.samefile(
+        parent_dir(parent_dir(parent_dir(org_dpath))), dbgym_cfg.dbgym_runs_path
+    ):
+        org_dpath = parent_dir(org_dpath)
+    org_dname = dir_basename(org_dpath)
+    codebase_dpath = parent_dir(org_dpath)
+    codebase_dname = dir_basename(codebase_dpath)
+
+    return codebase_dpath, codebase_dname, org_dpath, org_dname
+
+
 # TODO(phw2): after merging agent-train, refactor some code in agent-train to use save_file() instead of open_and_save()
-def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
+def save_file(dbgym_cfg: DBGymConfig, fpath: Path) -> Path:
     """
     If an external function takes in a file/directory as input, you will not be able to call open_and_save().
         In these situations, just call save_file().
+    Like open_and_save(), this function only works with real absolute paths.
     "Saving" can mean either copying the file or creating a symlink to it
     We copy the file if it is a "config", meaning it just exists without having been generated
     We create a symlink if it is a "dependency", meaning a task.py command was run to generate it
         In these cases we create a symlink so we have full provenance for how the dependency was created
     """
-    # process fpath and ensure that it's a file at the end
-    fpath = conv_inputpath_to_realabspath(dbgym_cfg, fpath)
-    fpath = os.path.realpath(fpath)  # traverse symlinks
+    # validate fpath
+    assert isinstance(fpath, Path)
     assert not os.path.islink(fpath), f"fpath ({fpath}) should not be a symlink"
     assert os.path.exists(fpath), f"fpath ({fpath}) does not exist"
     assert os.path.isfile(fpath), f"fpath ({fpath}) is not a file"
@@ -390,34 +451,15 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
     #   2. files or dirs generated by a run may be very large (up to 100s of GBs) so we don't want to copy them
     if is_child_path(fpath, dbgym_cfg.dbgym_runs_path):
         # get paths we'll need later.
-        parent_dpath = os.path.dirname(fpath)
-        assert not os.path.samefile(
-            parent_dpath, dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/ dir instead of directly in dbgym_cfg.dbgym_runs_path ({dbgym_cfg.dbgym_runs_path})"
-        assert not os.path.samefile(
-            parent_dir(parent_dpath), dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/[codebase]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
-        assert not os.path.samefile(
-            parent_dir(parent_dir(parent_dpath)), dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/[codebase]/[organization]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
-        # org_dpath is the run_*/[codebase]/[organization]/ dir that fpath is in
-        org_dpath = parent_dpath
-        while not os.path.samefile(
-            parent_dir(parent_dir(parent_dir(org_dpath))), dbgym_cfg.dbgym_runs_path
-        ):
-            org_dpath = parent_dir(org_dpath)
-        org_dname = dir_basename(org_dpath)
-        codebase_dpath = parent_dir(org_dpath)
-        codebase_dname = dir_basename(codebase_dpath)
-        this_run_save_dpath = os.path.join(
-            dbgym_cfg.dbgym_this_run_path, codebase_dname, org_dname
-        )
+        _, codebase_dname, org_dpath, org_dname = extract_from_task_run_fordpath(dbgym_cfg, fpath)
+        this_run_save_dpath = dbgym_cfg.dbgym_this_run_path / codebase_dname / org_dname
         os.makedirs(this_run_save_dpath, exist_ok=True)
 
         # if the fpath file is directly in org_dpath, we symlink the file directly
+        parent_dpath = os.path.dirname(fpath)
         if os.path.samefile(parent_dpath, org_dpath):
             fname = os.path.basename(fpath)
-            symlink_fpath = os.path.join(this_run_save_dpath, fname)
+            symlink_fpath = this_run_save_dpath / (fname + ".link")
             try_create_symlink(fpath, symlink_fpath)
         # else, we know the fpath file is _not_ directly inside org_dpath dir
         # we go as far back as we can while still staying in org_dpath and symlink that "base" dir
@@ -430,61 +472,70 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: os.PathLike) -> Path:
 
             # create symlink
             open_base_dname = dir_basename(base_dpath)
-            symlink_dpath = os.path.join(this_run_save_dpath, open_base_dname)
+            symlink_dpath = this_run_save_dpath / (open_base_dname + ".link")
             try_create_symlink(base_dpath, symlink_dpath)
     # if it wasn't generated by a run
     else:
         # since we don't know where the file is at all, the location is "unknown" and the org is "all"
-        this_run_save_dpath = os.path.join(
-            dbgym_cfg.dbgym_this_run_path, "unknown", "all"
-        )
+        this_run_save_dpath = dbgym_cfg.dbgym_this_run_path / "unknown" / "all"
         os.makedirs(this_run_save_dpath, exist_ok=True)
         fname = os.path.basename(fpath)
         # in this case, we want to copy instead of symlinking since it might disappear in the future
-        copy_fpath = os.path.join(this_run_save_dpath, fname)
+        copy_fpath = this_run_save_dpath / fname
         shutil.copy(fpath, copy_fpath)
 
 
 # TODO(phw2): refactor our manual symlinking in postgres/cli.py to use link_result() instead
-def link_result(dbgym_cfg: DBGymConfig, result_path: Path, custom_result_name: str | None=None) -> Path:
+def link_result(dbgym_cfg: DBGymConfig, result_fordpath: Path, custom_result_name: str | None=None) -> Path:
     """
-    result_path must be a "result", meaning it was generated inside dbgym_cfg.dbgym_this_run_path
-    result_path itself can be a file or a dir but not a symlink
-    Returns the symlink path.
-    Create a symlink of the same name to result_path inside [workspace]/data/
-    Will override the old symlink if there is one
-    This is called so that [workspace]/data/ always contains the latest generated version of a file
+    result_fordpath must be a "result", meaning it was generated inside dbgym_cfg.dbgym_this_run_path.
+    Further, result_fordpath must have been generated by this invocation to task.py. This also means that
+        result_fordpath itself can be a file or a dir but not a symlink.
+    Given a file or directory in task_runs/run_*/[codebase]/[org], this will create a symlink inside
+        symlinks/[codebase]/[org]/.
+    Will override the old symlink if there is one, so that symlinks/ always contains the latest generated
+        version of a file.
+    This function will return the path to the symlink that was created.
     """
-    result_path = conv_inputpath_to_realabspath(dbgym_cfg, result_path)
-    assert is_child_path(result_path, dbgym_cfg.dbgym_this_run_path)
-    assert not os.path.islink(result_path)
+    result_fordpath = conv_inputpath_to_realabspath(dbgym_cfg, result_fordpath)
+    assert is_child_path(result_fordpath, dbgym_cfg.dbgym_this_run_path)
+    assert not os.path.islink(result_fordpath)
 
     if custom_result_name != None:
         result_name = custom_result_name
     else:
-        if os.path.isfile(result_path):
-            result_name = os.path.basename(result_path)
-        elif os.path.isdir(result_path):
-            result_name = dir_basename(result_path)
+        if os.path.isfile(result_fordpath):
+            result_name = os.path.basename(result_fordpath) + ".link"
+        elif os.path.isdir(result_fordpath):
+            result_name = dir_basename(result_fordpath) + ".link"
         else:
-            raise AssertionError("result_path must be either a file or dir")
-    symlink_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True) / result_name
+            raise AssertionError("result_fordpath must be either a file or dir")
+
+    # Figure out the parent directory path of the symlink
+    codebase_dpath, codebase_dname, _, org_dname = extract_from_task_run_fordpath(dbgym_cfg, result_fordpath)
+    # We're only supposed to save files generated by us, which means they should be in cur_task_runs_path()
+    assert os.path.samefile(codebase_dpath, dbgym_cfg.cur_task_runs_path()), f"link_result should only be called on files generated by this invocation to task.py"
+    symlink_parent_dpath = dbgym_cfg.dbgym_symlinks_path / codebase_dname / org_dname
+    symlink_parent_dpath.mkdir(parents=True, exist_ok=True)
 
     # Remove the old symlink ("old" meaning created in an earlier run) if there is one
     # Note that in a multi-threaded setting, this might remove one created by a process in the same run,
     #   meaning it's not "old" by our definition of "old". However, we'll always end up with a symlink
     #   file of the current run regardless of the order of threads.
+    assert result_name.endswith(".link") and not result_name.endswith(".link.link"), "result_name ({result_name}) should end with \".link\""
+    symlink_path = symlink_parent_dpath / result_name
     try_remove_file(symlink_path)
-    try_create_symlink(result_path, symlink_path)
+    try_create_symlink(result_fordpath, symlink_path)
 
     return symlink_path
 
 
 def try_create_symlink(src_path: Path, dst_path: Path) -> None:
-    '''
+    """
     Our functions that create symlinks might be called by multiple processes at once
     during HPO. Thus, this is a thread-safe way to create a symlink.
-    '''
+    """
+    assert dst_path.name.endswith(".link") and not dst_path.name.endswith(".link.link")
     try:
         os.symlink(src_path, dst_path)
     except FileExistsError:
@@ -493,10 +544,10 @@ def try_create_symlink(src_path: Path, dst_path: Path) -> None:
 
 
 def try_remove_file(path: Path) -> None:
-    '''
+    """
     Our functions that remove files might be called by multiple processes at once
     during HPO. Thus, this is a thread-safe way to remove a file.
-    '''
+    """
     try:
         os.remove(path)
     except FileNotFoundError:
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index aa1b7a50..afab9108 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -4,10 +4,12 @@ set -euxo pipefail
 
 SCALE_FACTOR=0.01
 INTENDED_PGDATA_HARDWARE=ssd
-PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+. ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --duration 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
+python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
@@ -26,5 +28,6 @@ python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-fa
 python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --duration 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --enable-boot-during-hpo
+python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
 python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 91441daf..58e1aeb7 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -5,7 +5,7 @@
 import socket
 import xml.etree.ElementTree as ET
 from pathlib import Path
-from typing import Any, Callable, Tuple, Union
+from typing import Any, Callable, Optional, Tuple, Union
 
 import gymnasium as gym
 import numpy as np
@@ -17,7 +17,7 @@
 )
 from torch import nn
 
-from misc.utils import DBGymConfig, open_and_save, make_redis_started, save_file
+from misc.utils import DBGymConfig, TuningMode, open_and_save, make_redis_started, save_file
 from tune.protox.agent.agent_env import AgentEnv
 from tune.protox.agent.buffers import ReplayBuffer
 from tune.protox.agent.noise import ClampNoise
@@ -93,9 +93,9 @@ def _get_signal(signal_folder: Union[str, Path]) -> Tuple[int, str]:
     raise IOError("No free ports to bind postgres to.")
 
 
-def _modify_benchbase_config(logdir: str, port: int, hpo_params: dict[str, Any]) -> None:
+def _modify_benchbase_config(dbgym_cfg: DBGymConfig, port: int, hpo_params: dict[str, Any]) -> None:
     if hpo_params["benchmark_config"]["query_spec"]["oltp_workload"]:
-        conf_etree = ET.parse(Path(logdir) / "benchmark.xml")
+        conf_etree = ET.parse(dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "benchmark.xml")
         jdbc = f"jdbc:postgresql://localhost:{port}/benchbase?preferQueryMode=extended"
         conf_etree.getroot().find("url").text = jdbc  # type: ignore
 
@@ -110,7 +110,7 @@ def _modify_benchbase_config(logdir: str, port: int, hpo_params: dict[str, Any])
                 conf_etree.getroot().find("works").find("work").find("time").text = str(oltp_config["oltp_duration"])  # type: ignore
             if works.find("warmup") is not None:  # type: ignore
                 conf_etree.getroot().find("works").find("work").find("warmup").text = str(oltp_config["oltp_warmup"])  # type: ignore
-        conf_etree.write(Path(logdir) / "benchmark.xml")
+        conf_etree.write(dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "benchmark.xml")
 
 
 def _gen_noise_scale(
@@ -130,14 +130,12 @@ def f(p: ProtoAction, n: torch.Tensor) -> ProtoAction:
 
 
 def _build_utilities(
-    dbgym_cfg: DBGymConfig, logdir: str, pgport: int, is_hpo: bool, hpo_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, pgport: int, hpo_params: dict[str, Any]
 ) -> Tuple[Logger, RewardUtility, PostgresConn, Workload]:
     logger = Logger(
+        dbgym_cfg,
         hpo_params["trace"],
         hpo_params["verbose"],
-        Path(logdir),
-        Path(logdir) / "repository",
-        Path(logdir) / "tboard",
     )
 
     reward_utility = RewardUtility(
@@ -153,19 +151,18 @@ def _build_utilities(
 
     # If we're using Boot, PostgresConn.start_with_changes() assumes that Redis is running. Thus,
     #   we start Redis here if necessary.
-    enable_boot = hpo_params["enable_boot_during_hpo"] if is_hpo else hpo_params["enable_boot_during_tune"]
+    enable_boot = hpo_params["enable_boot"][str(tuning_mode)]
     if enable_boot:
         make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"])
 
-    pgconn = PostgresConn(
+    pg_conn = PostgresConn(
         dbgym_cfg=dbgym_cfg,
         pgport=pgport,
         pristine_pgdata_snapshot_fpath=Path(hpo_params["pgconn_info"]["pristine_pgdata_snapshot_path"]),
         pgdata_parent_dpath=Path(hpo_params["pgconn_info"]["pgdata_parent_dpath"]),
         pgbin_path=Path(hpo_params["pgconn_info"]["pgbin_path"]),
-        postgres_logs_dir=Path(logdir) / "pg_logs",
         enable_boot=enable_boot,
-        boot_config_fpath=hpo_params["boot_config_fpath"],
+        boot_config_fpath=hpo_params["boot_config_fpath"][str(tuning_mode)],
         connect_timeout=300,
         logger=logger,
     )
@@ -177,12 +174,12 @@ def _build_utilities(
         query_spec=hpo_params["benchmark_config"]["query_spec"],
         workload_path=Path(hpo_params["workload_path"]),
         pid=None,
-        workload_timeout=hpo_params["workload_timeout"],
+        workload_timeout=hpo_params["workload_timeout"][str(tuning_mode)],
         workload_timeout_penalty=hpo_params["workload_timeout_penalty"],
         logger=logger,
     )
 
-    return logger, reward_utility, pgconn, workload
+    return logger, reward_utility, pg_conn, workload
 
 
 def _build_actions(
@@ -276,7 +273,7 @@ def _build_actions(
     return hspace, lsc
 
 
-def _build_obs_space(
+def _build_observation_space(
     dbgym_cfg: DBGymConfig, action_space: HolonSpace, lsc: LSC, hpo_params: dict[str, Any], seed: int
 ) -> StateSpace:
     if hpo_params["metric_state"] == "metric":
@@ -307,9 +304,10 @@ def _build_obs_space(
 
 def _build_env(
     dbgym_cfg: DBGymConfig,
+    tuning_mode: TuningMode,
     hpo_params: dict[str, Any],
-    pgconn: PostgresConn,
-    obs_space: StateSpace,
+    pg_conn: PostgresConn,
+    observation_space: StateSpace,
     holon_space: HolonSpace,
     lsc: LSC,
     workload: Workload,
@@ -320,16 +318,16 @@ def _build_env(
     env = gym.make(
         "Postgres-v0",
         dbgym_cfg=dbgym_cfg,
-        observation_space=obs_space,
+        tuning_mode=tuning_mode,
+        observation_space=observation_space,
         action_space=holon_space,
         workload=workload,
         horizon=hpo_params["horizon"],
         reward_utility=reward_utility,
-        pgconn=pgconn,
-        pqt=hpo_params["query_timeout"],
+        pg_conn=pg_conn,
+        query_timeout=hpo_params["query_timeout"],
         benchbase_config=hpo_params["benchbase_config"],
         logger=logger,
-        replay=False,
     )
 
     # Check whether to create the MQO wrapper.
@@ -344,7 +342,7 @@ def _build_env(
                 workload_eval_inverse=hpo_params["workload_eval_inverse"],
                 workload_eval_reset=hpo_params["workload_eval_reset"],
                 benchbase_config=hpo_params["benchbase_config"],
-                pqt=hpo_params["query_timeout"],
+                query_timeout=hpo_params["query_timeout"],
                 env=env,
                 logger=logger,
             )
@@ -380,18 +378,19 @@ def _build_env(
 def _build_agent(
     seed: int,
     hpo_params: dict[str, Any],
-    obs_space: StateSpace,
+    observation_space: StateSpace,
     action_space: HolonSpace,
     logger: Logger,
+    ray_trial_id: Optional[str],
 ) -> Wolp:
     action_dim = noise_action_dim = action_space.latent_dim()
     critic_action_dim = action_space.critic_dim()
 
     actor = Actor(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         net_arch=[int(l) for l in hpo_params["pi_arch"].split(",")],
-        features_dim=gym.spaces.utils.flatdim(obs_space),
+        features_dim=gym.spaces.utils.flatdim(observation_space),
         activation_fn=_parse_activation_fn(hpo_params["activation_fn"]),
         weight_init=hpo_params["weight_init"],
         bias_zero=hpo_params["bias_zero"],
@@ -401,10 +400,10 @@ def _build_agent(
     )
 
     actor_target = Actor(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         net_arch=[int(l) for l in hpo_params["pi_arch"].split(",")],
-        features_dim=gym.spaces.utils.flatdim(obs_space),
+        features_dim=gym.spaces.utils.flatdim(observation_space),
         activation_fn=_parse_activation_fn(hpo_params["activation_fn"]),
         weight_init=hpo_params["weight_init"],
         bias_zero=hpo_params["bias_zero"],
@@ -418,10 +417,10 @@ def _build_agent(
     )
 
     critic = ContinuousCritic(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         net_arch=[int(l) for l in hpo_params["qf_arch"].split(",")],
-        features_dim=gym.spaces.utils.flatdim(obs_space),
+        features_dim=gym.spaces.utils.flatdim(observation_space),
         activation_fn=_parse_activation_fn(hpo_params["activation_fn"]),
         weight_init=hpo_params["weight_init"],
         bias_zero=hpo_params["bias_zero"],
@@ -430,10 +429,10 @@ def _build_agent(
     )
 
     critic_target = ContinuousCritic(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         net_arch=[int(l) for l in hpo_params["qf_arch"].split(",")],
-        features_dim=gym.spaces.utils.flatdim(obs_space),
+        features_dim=gym.spaces.utils.flatdim(observation_space),
         activation_fn=_parse_activation_fn(hpo_params["activation_fn"]),
         weight_init=hpo_params["weight_init"],
         bias_zero=hpo_params["bias_zero"],
@@ -447,7 +446,7 @@ def _build_agent(
     )
 
     policy = WolpPolicy(
-        observation_space=obs_space,
+        observation_space=observation_space,
         action_space=action_space,
         actor=actor,
         actor_target=actor_target,
@@ -497,9 +496,10 @@ def _build_agent(
         policy=policy,
         replay_buffer=ReplayBuffer(
             buffer_size=hpo_params["buffer_size"],
-            obs_shape=[gym.spaces.utils.flatdim(obs_space)],
+            obs_shape=[gym.spaces.utils.flatdim(observation_space)],
             action_dim=critic_action_dim,
         ),
+        ray_trial_id=ray_trial_id,
         learning_starts=hpo_params["learning_starts"],
         batch_size=hpo_params["batch_size"],
         train_freq=(hpo_params["train_freq_frequency"], hpo_params["train_freq_unit"]),
@@ -512,21 +512,22 @@ def _build_agent(
 
 
 def build_trial(
-    dbgym_cfg: DBGymConfig, seed: int, logdir: str, is_hpo: bool, hpo_params: dict[str, Any]
+    dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, seed: int, hpo_params: dict[str, Any], ray_trial_id: Optional[str]=None
 ) -> Tuple[Logger, TargetResetWrapper, AgentEnv, Wolp, str]:
     # The massive trial builder.
 
     port, signal = _get_signal(hpo_params["pgconn_info"]["pgbin_path"])
-    _modify_benchbase_config(logdir, port, hpo_params)
+    _modify_benchbase_config(dbgym_cfg, port, hpo_params)
 
-    logger, reward_utility, pgconn, workload = _build_utilities(dbgym_cfg, logdir, port, is_hpo, hpo_params)
+    logger, reward_utility, pg_conn, workload = _build_utilities(dbgym_cfg, tuning_mode, port, hpo_params)
     holon_space, lsc = _build_actions(dbgym_cfg, seed, hpo_params, workload, logger)
-    obs_space = _build_obs_space(dbgym_cfg, holon_space, lsc, hpo_params, seed)
+    observation_space = _build_observation_space(dbgym_cfg, holon_space, lsc, hpo_params, seed)
     target_reset, env = _build_env(
         dbgym_cfg,
+        tuning_mode,
         hpo_params,
-        pgconn,
-        obs_space,
+        pg_conn,
+        observation_space,
         holon_space,
         lsc,
         workload,
@@ -534,5 +535,5 @@ def build_trial(
         logger,
     )
 
-    agent = _build_agent(seed, hpo_params, obs_space, holon_space, logger)
+    agent = _build_agent(seed, hpo_params, observation_space, holon_space, logger, ray_trial_id)
     return logger, target_reset, env, agent, signal
diff --git a/tune/protox/agent/cli.py b/tune/protox/agent/cli.py
index 968d2f12..a78814a0 100644
--- a/tune/protox/agent/cli.py
+++ b/tune/protox/agent/cli.py
@@ -3,6 +3,7 @@
 from misc.utils import DBGymConfig
 from tune.protox.agent.hpo import hpo
 from tune.protox.agent.tune import tune
+from tune.protox.agent.replay import replay
 
 
 @click.group("agent")
@@ -13,3 +14,4 @@ def agent_group(dbgym_cfg: DBGymConfig):
 
 agent_group.add_command(hpo)
 agent_group.add_command(tune)
+agent_group.add_command(replay)
diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py
index f2bc6b26..3c19900c 100644
--- a/tune/protox/agent/coerce_config.py
+++ b/tune/protox/agent/coerce_config.py
@@ -1,7 +1,7 @@
 from typing import Any
 import yaml
 
-from misc.utils import DBGymConfig, open_and_save
+from misc.utils import DBGymConfig, TuningMode, open_and_save
 
 
 def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dict[str, Any]) -> dict[str, Any]:
@@ -24,8 +24,12 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dic
             "verbose": True,
             "trace": True,
             "seed": hpo_params["mythril_args"]["seed"],
-            "duration": hpo_params["mythril_args"]["duration"],
-            "workload_timeout": hpo_params["mythril_args"]["workload_timeout"],
+            "tune_duration": {
+                str(TuningMode.HPO): hpo_params["mythril_args"]["duration"],
+            },
+            "workload_timeout": {
+                str(TuningMode.HPO): hpo_params["mythril_args"]["workload_timeout"],
+            },
             "query_timeout": hpo_params["mythril_args"]["timeout"],
             "pgconn_info": {
                 "pgport": 5432,
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index eb9a3ffb..60498514 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -1,3 +1,4 @@
+import shutil
 import sys
 import time
 import json
@@ -9,7 +10,7 @@
 import os
 import pandas as pd
 from datetime import datetime
-from typing import Any, Union
+from typing import Any, Optional, Union
 import random
 import click
 import ssd_checker
@@ -22,14 +23,14 @@
 from ray.train import SyncConfig
 
 from tune.protox.agent.build_trial import build_trial
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DBGymConfig, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_PATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, TuningMode, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_PATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
 
 
 METRIC_NAME = "Best Metric"
 
 
 class AgentHPOArgs:
-    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, duration, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath):
+    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo, build_space_good_for_boot):
         self.benchmark_name = benchmark_name
         self.workload_name = workload_name
         self.embedder_path = embedder_path
@@ -44,11 +45,12 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
         self.agent = agent
         self.max_concurrent = max_concurrent
         self.num_samples = num_samples
-        self.duration = duration
+        self.tune_duration_during_hpo = tune_duration_during_hpo
         self.workload_timeout = workload_timeout
         self.query_timeout = query_timeout
         self.enable_boot_during_hpo = enable_boot_during_hpo
-        self.boot_config_fpath = boot_config_fpath
+        self.boot_config_fpath_during_hpo = boot_config_fpath_during_hpo
+        self.build_space_good_for_boot = build_space_good_for_boot
 
 
 @click.command()
@@ -144,11 +146,11 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
     help=f"The # of times to specific hyperparameter configs to sample from the hyperparameter search space and train agent models with.",
 )
 @click.option(
-    "--duration", default=30, type=float, help="The total number of hours to run for."
+    "--tune-duration-during-hpo", default=4, type=float, help="The number of hours to run each hyperparamer config tuning trial for."
 )
 @click.option(
     "--workload-timeout",
-    default=600,
+    default=DEFAULT_WORKLOAD_TIMEOUT,
     type=int,
     help="The timeout (in seconds) of a workload. We run the workload once per DBMS configuration. For OLAP workloads, certain configurations may be extremely suboptimal, so we need to time out the workload.",
 )
@@ -164,10 +166,27 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
     help="Whether to enable the Boot query accelerator during the HPO process. Deciding to use Boot during HPO is separate from deciding to use Boot during tuning.",
 )
 @click.option(
-    "--boot-config-fpath",
+    "--boot-config-fpath-during-hpo",
     default=DEFAULT_BOOT_CONFIG_FPATH,
     type=Path,
-    help="The path to the file configuring Boot.",
+    help="The path to the file configuring Boot when running HPO. When tuning, you may use a different Boot config.",
+)
+# Building a space good for Boot is subtly different from whether we enable Boot during HPO.
+# There are certain options that qualitatively do not perform well with Boot (e.g. metrics state
+#   because Boot extrapolates the query runtime but not metrics). This param controls whether we
+#   use those options or not.
+# I chose the word "good" instead of "compatible" because metrics state does not _crash_ if you
+#   use Boot but it just doesn't seem like it would perform well.
+# One workflow where these two variables are different is where we don't enable Boot during HPO
+#   but do want to enable Boot during tuning.
+# However, whether we're building a space good for Boot is also different from whether we enable
+#   Boot during tuning. We often want to compare one tuning run with Boot against one without
+#   Boot, in which case we'd build a space good for Boot and then run it once with Boot and once
+#   without Boot.
+@click.option(
+    "--build-space-good-for-boot",
+    is_flag=True,
+    help="Whether to avoid certain options that are known to not perform well when Boot is enabled. See the codebase for why this is subtly different from --enable-boot-during-hpo.",
 )
 def hpo(
     dbgym_cfg,
@@ -189,11 +208,12 @@ def hpo(
     agent,
     max_concurrent,
     num_samples,
-    duration,
+    tune_duration_during_hpo,
     workload_timeout,
     query_timeout,
     enable_boot_during_hpo: bool,
-    boot_config_fpath: Path,
+    boot_config_fpath_during_hpo: Path,
+    build_space_good_for_boot: bool,
 ):
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
@@ -223,7 +243,7 @@ def hpo(
     pgdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, pgdata_parent_dpath)
     pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path)
     workload_path = conv_inputpath_to_realabspath(dbgym_cfg, workload_path)
-    boot_config_fpath = conv_inputpath_to_realabspath(dbgym_cfg, boot_config_fpath)
+    boot_config_fpath_during_hpo = conv_inputpath_to_realabspath(dbgym_cfg, boot_config_fpath_during_hpo)
 
     # Check assertions on args
     if intended_pgdata_hardware == "hdd":
@@ -234,7 +254,7 @@ def hpo(
         assert False
 
     # Create args object
-    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, duration, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath)
+    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo, build_space_good_for_boot)
     _tune_hpo(dbgym_cfg, hpo_args)
 
 
@@ -248,13 +268,13 @@ def build_space(
     embedder_path: list[Path],
     pgconn_info: dict[str, str],
     benchbase_config: dict[str, Any]={},
-    duration: int=30,
+    tune_duration_during_hpo: int=30,
     seed: int=0,
     enable_boot_during_hpo: bool=False,
-    boot_config_fpath: Path=None,
+    boot_config_fpath_during_hpo: Path=None,
+    build_space_good_for_boot: bool = False,
     workload_timeouts: list[int]=[600],
     query_timeouts: list[int]=[30],
-    boot_enabled: bool = False,
 ) -> dict[str, Any]:
 
     return {
@@ -263,12 +283,24 @@ def build_space(
         "verbose": True,
         "trace": True,
         "seed": seed,
-        "enable_boot_during_hpo": enable_boot_during_hpo,
-        "boot_config_fpath": boot_config_fpath,
+        # For params that may differ between HPO, tune, and replay, I chose to represent them
+        #   as dictionaries. I felt this was less confusing that overriding parts of the hpo_params
+        #   during tune or replay. With the dictionary representation, we never override anything in
+        #   hpo_params - we only ever add new fields to hpo_params.
+        "enable_boot": {
+            str(TuningMode.HPO): enable_boot_during_hpo,
+        },
+        "boot_config_fpath": {
+            str(TuningMode.HPO): boot_config_fpath_during_hpo,
+        },
         
         # Timeouts.
-        "duration": duration,
-        "workload_timeout": tune.choice(workload_timeouts),
+        "tune_duration": {
+            str(TuningMode.HPO): tune_duration_during_hpo,
+        },
+        "workload_timeout": {
+            str(TuningMode.HPO): tune.choice(workload_timeouts),
+        },
         "query_timeout": tune.choice(query_timeouts),
 
         # Paths.
@@ -298,7 +330,7 @@ def build_space(
         "normalize_reward": tune.choice([False, True]),
 
         # State.
-        "metric_state": tune.choice(([] if boot_enabled else ["metric"]) + ["structure", "structure_normalize"]),
+        "metric_state": tune.choice(([] if build_space_good_for_boot else ["metric"]) + ["structure", "structure_normalize"]),
         "maximize_state": not benchmark_config.get("oltp_workload", False),
         # Whether to normalize state or not.
         "normalize_state": tune.sample_from(lambda spc: False if spc["config"]["metric_state"] == "structure_normalize" else True),
@@ -374,9 +406,9 @@ def build_space(
 
 
 class TuneTimeoutChecker(object):
-    def __init__(self, duration: int) -> None:
-        self.limit = (duration * 3600) > 0
-        self.remain = int(duration * 3600)
+    def __init__(self, tune_duration: float) -> None:
+        self.limit = (tune_duration * 3600) > 0
+        self.remain = int(tune_duration * 3600)
         self.running = False
         self.start = 0.
 
@@ -403,13 +435,19 @@ def __call__(self) -> bool:
 
 
 class TuneTrial:
-    def __init__(self, dbgym_cfg: DBGymConfig, is_hpo: bool) -> None:
-        '''
-        We use this object for both HPO and tune. It behaves *slightly* differently
-        depending on what it's used for, which is why we have an is_hpo param.
-        '''
+    def __init__(self, dbgym_cfg: DBGymConfig, tuning_mode: TuningMode, ray_trial_id: Optional[str]=None) -> None:
+        """
+        We use this object for HPO, tune, and replay. It behaves *slightly* differently
+        depending on what it's used for, which is why we have the tuning_mode param.
+        """
         self.dbgym_cfg = dbgym_cfg
-        self.is_hpo = is_hpo
+        self.tuning_mode = tuning_mode
+
+        if self.tuning_mode == TuningMode.HPO:
+            assert ray_trial_id != None, "If we're doing HPO, we will create multiple TuneTrial() objects. We thus need to differentiate them somehow."
+        else:
+            assert ray_trial_id == None, "If we're not doing HPO, we (currently) will create only one TuneTrial() object. For clarity, we set ray_trial_id to None since ray_trial_id should not be used in this case."
+        self.ray_trial_id = ray_trial_id
 
     def setup(self, hpo_params: dict[str, Any]) -> None:
         # Attach mythril directory to the search path.
@@ -423,21 +461,22 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
         )
         np.random.seed(seed)
         torch.manual_seed(seed)
-        assert hasattr(self, "logdir")
 
-        self.timeout = TuneTimeoutChecker(hpo_params["duration"])
+        tune_duration = hpo_params["tune_duration"][str(self.tuning_mode)]
+
+        self.timeout_checker = TuneTimeoutChecker(tune_duration)
         self.logger, self.target_reset, self.env, self.agent, self.signal = build_trial(
             self.dbgym_cfg,
+            self.tuning_mode,
             seed=seed,
-            logdir=self.logdir,
-            is_hpo=self.is_hpo,
-            hpo_params=hpo_params
+            hpo_params=hpo_params,
+            ray_trial_id=self.ray_trial_id,
         )
         self.logger.get_logger(None).info("%s", hpo_params)
         self.logger.get_logger(None).info(f"Seed: {seed}")
 
         # Attach the timeout checker and loggers.
-        self.agent.set_timeout_checker(self.timeout)
+        self.agent.set_timeout_checker(self.timeout_checker)
         self.agent.set_logger(self.logger)
 
         self.env_init = False
@@ -447,7 +486,7 @@ def setup(self, hpo_params: dict[str, Any]) -> None:
     def step(self) -> dict[Any, Any]:
         self.step_count += 1
         # Only measure the actual tuning time.
-        self.timeout.resume()
+        self.timeout_checker.resume()
 
         episode = self.agent._episode_num
         it = self.agent.num_timesteps
@@ -465,11 +504,13 @@ def step(self) -> dict[Any, Any]:
                 f"Baseline Metric: {baseline_metric}. Baseline Reward: {baseline_reward}"
             )
             self.env_init = True
-            self.logger.stash_results(infos, name_override="baseline")
+
+            assert self.ray_trial_id != None if self.tuning_mode == TuningMode.HPO else True, "If we're doing HPO, we need to ensure that we're passing a non-None ray_trial_id to stash_results() to avoid conflicting folder names."
+            self.logger.stash_results(infos, name_override="baseline", ray_trial_id=self.ray_trial_id)
         else:
-            self.agent.learn(self.env, total_timesteps=1)
+            self.agent.learn(self.env, total_timesteps=1, tuning_mode=self.tuning_mode)
 
-        self.timeout.pause()
+        self.timeout_checker.pause()
         self.logger.advance()
 
         # Step telemetry that we care about.
@@ -487,7 +528,7 @@ def step(self) -> dict[Any, Any]:
         }
 
         # If we've timed out. Note that we've timed out.
-        if self.timeout():
+        if self.timeout_checker():
             self.cleanup()
             data[ray.tune.result.DONE] = True
 
@@ -512,8 +553,7 @@ class TuneOpt(Trainable):
         dbgym_cfg = global_dbgym_cfg
 
         def setup(self, hpo_params: dict[str, Any]) -> None:
-            self.trial = TuneTrial(TuneOpt.dbgym_cfg, True)
-            self.trial.logdir = self.logdir # type: ignore
+            self.trial = TuneTrial(TuneOpt.dbgym_cfg, TuningMode.HPO, ray_trial_id=self.trial_id)
             self.trial.setup(hpo_params)
 
         def step(self) -> dict[Any, Any]:
@@ -572,10 +612,11 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
             "pgbin_path": hpo_args.pgbin_path,
         },
         benchbase_config=benchbase_config,
-        duration=hpo_args.duration,
+        tune_duration_during_hpo=hpo_args.tune_duration_during_hpo,
         seed=hpo_args.seed,
         enable_boot_during_hpo=hpo_args.enable_boot_during_hpo,
-        boot_config_fpath=hpo_args.boot_config_fpath,
+        boot_config_fpath_during_hpo=hpo_args.boot_config_fpath_during_hpo,
+        build_space_good_for_boot=hpo_args.build_space_good_for_boot,
         workload_timeouts=workload_timeouts,
         query_timeouts=query_timeouts,
     )
@@ -609,6 +650,7 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         sync_config=SyncConfig(),
         verbose=2,
         log_to_file=True,
+        storage_path=dbgym_cfg.cur_task_runs_path("hpo_ray_results", mkdir=True),
     )
 
     tuner = ray.tune.Tuner(
@@ -624,5 +666,18 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
             if results[i].error:
                 print(f"Trial {results[i]} FAILED")
         assert False, print("Encountered exceptions!")
+    
+    # Save the best params.json.
     best_result = results.get_best_result(metric=METRIC_NAME, mode=mode)
-    print(f"best_result={best_result}")
+    best_params_generated_fpath = Path(best_result.path) / "params.json"
+    # Before saving, copy it into run_*/[codebase]/data/. This way, save_file() called on
+    #   params.json will link directly to run_*/[codebase]/data/params.json instead of to
+    #   run_*/[codebase]/hpo_ray_results/TuneOpt*/.
+    best_params_copy_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "params.json"
+    shutil.copy(best_params_generated_fpath, best_params_copy_fpath)
+    link_result(dbgym_cfg, best_params_copy_fpath, custom_result_name=default_hpoed_agent_params_fname(hpo_args.benchmark_name, hpo_args.workload_name) + ".link")
+    # We also link from run_*/[codebase]/data/params.json to run_*/[codebase]/hpo_ray_results/TuneOpt*/**/params.json.
+    #   This way, when _manually_ looking through run_*/, we can see which HPO trial was
+    #   responsible for creating params.json.
+    best_params_link_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "params.json.link"
+    os.symlink(best_params_generated_fpath, best_params_link_fpath)
diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py
index 68e5f1be..dd39d7ba 100644
--- a/tune/protox/agent/off_policy_algorithm.py
+++ b/tune/protox/agent/off_policy_algorithm.py
@@ -4,6 +4,7 @@
 import numpy as np
 from numpy.typing import NDArray
 
+from misc.utils import TuningMode
 from tune.protox.agent.agent_env import AgentEnv
 from tune.protox.agent.base_class import BaseAlgorithm
 from tune.protox.agent.buffers import ReplayBuffer
@@ -46,10 +47,12 @@ def __init__(
         gradient_steps: int = 1,
         action_noise: Optional[ActionNoise] = None,
         seed: Optional[int] = None,
+        ray_trial_id: Optional[str] = None,
     ):
         super().__init__(seed=seed)
         self.policy = policy
         self.replay_buffer = replay_buffer
+        self.ray_trial_id = ray_trial_id
 
         self.batch_size = batch_size
         self.learning_starts = learning_starts
@@ -137,6 +140,7 @@ def _sample_action(
 
     def collect_rollouts(
         self,
+        tuning_mode: TuningMode,
         env: AgentEnv,
         train_freq: TrainFreq,
         replay_buffer: ReplayBuffer,
@@ -182,8 +186,11 @@ def collect_rollouts(
             # Rescale and perform action
             new_obs, rewards, terms, truncs, infos = env.step(actions)
             dones = terms or truncs
+            # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get
+            #   stashed in the same directory and potentially cause a race condition.
             if self.logger:
-                self.logger.stash_results(infos)
+                assert self.ray_trial_id != None if tuning_mode == TuningMode.HPO else True, "If we're doing HPO, we need to ensure that we're passing a non-None ray_trial_id to stash_results() to avoid conflicting folder names."
+                self.logger.stash_results(infos, ray_trial_id=self.ray_trial_id)
 
             self.num_timesteps += 1
             num_collected_steps += 1
@@ -210,17 +217,18 @@ def collect_rollouts(
             num_collected_steps, num_collected_episodes, continue_training
         )
 
-    def learn(self, env: AgentEnv, total_timesteps: int) -> None:
+    def learn(self, env: AgentEnv, total_timesteps: int, tuning_mode: TuningMode) -> None:
         assert isinstance(env, AgentEnv)
         total_timesteps = self._setup_learn(env, total_timesteps)
 
         while self.num_timesteps < total_timesteps:
             rollout = self.collect_rollouts(
+                tuning_mode,
                 env,
                 train_freq=self.train_freq,
+                replay_buffer=self.replay_buffer,
                 action_noise=self.action_noise,
                 learning_starts=self.learning_starts,
-                replay_buffer=self.replay_buffer,
             )
 
             if rollout.continue_training is False:
diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py
new file mode 100644
index 00000000..9bf346bb
--- /dev/null
+++ b/tune/protox/agent/replay.py
@@ -0,0 +1,337 @@
+"""
+Replaying a tuning run gives you the authoritative runtimes of that tuning run.
+The original tuning run has per-query timeouts, so the runtimes may be inaccurate. The
+    replayed tuning run does not have per-query timeouts.
+Additionally, the original tuning run may have been accelerated by Boot, whereas the
+    replayed tuning run is not.
+"""
+import json
+import logging
+import pickle
+import click
+import pandas as pd
+import tqdm
+from pathlib import Path
+from dateutil.parser import parse
+
+from misc.utils import DBGymConfig, TuningMode, conv_inputpath_to_realabspath, open_and_save, save_file, workload_name_fn, default_tuning_steps_dpath
+from tune.protox.agent.build_trial import build_trial
+from tune.protox.env.pg_env import PostgresEnv
+from tune.protox.env.space.holon_space import HolonSpace
+from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs
+from tune.protox.env.types import HolonAction
+from tune.protox.env.workload import Workload
+
+
+REPLAY_DATA_FNAME = "replay_data.csv"
+
+
+class ReplayArgs:
+    def __init__(
+        self, workload_timeout_during_replay: bool, replay_all_variations: bool, simulated: bool, cutoff: float, blocklist: list
+    ):
+        self.workload_timeout_during_replay = workload_timeout_during_replay
+        self.replay_all_variations = replay_all_variations
+        self.simulated = simulated
+        self.cutoff = cutoff
+        self.blocklist = blocklist
+
+
+@click.command()
+@click.pass_obj
+@click.argument("benchmark-name")
+@click.option("--seed-start", type=int, default=15721, help="A workload consists of queries from multiple seeds. This is the starting seed (inclusive).")
+@click.option("--seed-end", type=int, default=15721, help="A workload consists of queries from multiple seeds. This is the ending seed (inclusive).")
+@click.option(
+    "--query-subset",
+    type=click.Choice(["all", "even", "odd"]),
+    default="all",
+)
+@click.option(
+    "--scale-factor",
+    default=1.0,
+    help="The scale factor used when generating the data of the benchmark.",
+)
+@click.option(
+    "--boot-enabled-during-tune",
+    is_flag=True,
+    help="Whether Boot was enabled during tuning.",
+)
+@click.option(
+    "--tuning-steps-dpath",
+    default=None,
+    type=Path,
+    help="The path to the `tuning_steps` directory to be replayed."
+)
+@click.option(
+    "--workload-timeout-during-replay",
+    default=None,
+    type=int,
+    # You can make it use the workload timeout used during tuning if you want.
+    # I just made it use the workload timeout from HPO because I don't currently persist the tuning HPO params.
+    help="The timeout (in seconds) of a workload when replaying. By default, it will be equal to the workload timeout used during HPO."
+)
+@click.option(
+    "--replay-all-variations",
+    is_flag=True,
+    help="If true, replay all the variations of each query. If false, only replay the variation we found was best in the tuning run. Replaying all variations has two possible use cases: (1) it makes the cache warm to better replicate behavior during tuning, (2) if the best variation during tuning was determined with Boot, it might not still be the best variation."
+)
+@click.option(
+    "--simulated",
+    is_flag=True,
+    help="Set to true to use the runtimes from the original tuning run instead of replaying the workload."
+)
+@click.option(
+    "--cutoff",
+    default=None,
+    type=float,
+    help="Only evaluate configs up to cutoff hours. None means \"evaluate all configs\"."
+)
+@click.option(
+    "--blocklist",
+    default=[],
+    type=list,
+    help="Ignore running queries in the blocklist."
+)
+def replay(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, boot_enabled_during_tune: bool, tuning_steps_dpath: Path, workload_timeout_during_replay: bool, replay_all_variations: bool, simulated: bool, cutoff: float, blocklist: list) -> None:
+    # Set args to defaults programmatically (do this before doing anything else in the function)
+    workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
+
+    if tuning_steps_dpath == None:
+        tuning_steps_dpath = default_tuning_steps_dpath(dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name, boot_enabled_during_tune)
+
+    # Convert all input paths to absolute paths
+    tuning_steps_dpath = conv_inputpath_to_realabspath(dbgym_cfg, tuning_steps_dpath)
+
+    # Group args together to reduce the # of parameters we pass into functions
+    replay_args = ReplayArgs(workload_timeout_during_replay, replay_all_variations, simulated, cutoff, blocklist)
+
+    # Replay
+    replay_tuning_run(dbgym_cfg, tuning_steps_dpath, replay_args)
+
+
+def replay_tuning_run(dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_args: ReplayArgs):
+    """
+    Replay a single tuning run (as in one tuning_steps/ folder).
+    """
+    def _is_tuning_step_line(line: str) -> bool:
+        return "mv" in line and "tuning_steps" in line and "baseline" not in line
+
+    hpo_params_fpath = tuning_steps_dpath / "params.json"
+    with open_and_save(dbgym_cfg, hpo_params_fpath, "r") as f:
+        hpo_params = json.load(f)
+
+    # Set defaults that depend on hpo_params
+    if replay_args.workload_timeout_during_replay == None:
+        replay_args.workload_timeout_during_replay = hpo_params["workload_timeout"][str(TuningMode.HPO)]
+
+    # Set the hpo_params that are allowed to differ between HPO, tuning, and replay.
+    hpo_params["enable_boot"][str(TuningMode.REPLAY)] = False
+    hpo_params["boot_config_fpath"][str(TuningMode.REPLAY)] = None
+    hpo_params["workload_timeout"][str(TuningMode.REPLAY)] = replay_args.workload_timeout_during_replay
+
+    # Go through output.log and find the tuning_steps/[time]/ folders
+    # This finds all the [time] folders in tuning_steps/ (except "baseline" since we ignore that in `_is_tuning_step_line()`),
+    #   so you could just do `ls tuning_steps/` if you wanted to.
+    folders = []
+    start_found = False
+    output_log_fpath = tuning_steps_dpath / "output.log"
+    with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
+        for line in f:
+            if not start_found:
+                if "Baseline Metric" in line:
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0].split("[")[0])
+                    start_found = True
+            else:
+                if _is_tuning_step_line(line):
+                    repo = eval(line.split("Running ")[-1])[-1]
+                    last_folder = repo.split("/")[-1]
+                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
+                    if replay_args.cutoff == None or (time_since_start - start_time).total_seconds() < replay_args.cutoff * 3600:
+                        folders.append(last_folder)
+    
+    # Set tune_duration to be high so that it doesn't cut the replay off early
+    hpo_params["tune_duration"][str(TuningMode.REPLAY)] = replay_args.workload_timeout_during_replay * len(folders)
+
+    # Build PostgresEnv.
+    _, _, agent_env, _, _ = build_trial(dbgym_cfg, TuningMode.REPLAY, hpo_params["seed"], hpo_params)
+    pg_env: PostgresEnv = agent_env.unwrapped
+    action_space: HolonSpace = pg_env.action_space
+
+    # Reset things.
+    if not replay_args.simulated:
+        pg_env.pg_conn.restore_pristine_snapshot()
+
+    num_lines = 0
+    with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
+        for line in f:
+            if "Baseline Metric" in line:
+                num_lines += 1
+            elif _is_tuning_step_line(line):
+                num_lines += 1
+
+    # A convenience wrapper around execute_workload() which fills in the arguments properly and processes the return values.
+    def _execute_workload_wrapper(actions_info: list["HolonAction"]) -> list[float]:
+        logging.info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), action_space.get_knob_space().tables, action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n")
+        logging.info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), action_space.get_knob_space().tables)}\n\n")
+        assert replay_args.workload_timeout_during_replay == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] == pg_env.workload.workload_timeout, "All these different sources of workload_timeout during replay should show the same value"
+
+        if replay_args.replay_all_variations:
+            all_holon_action_variations = actions_info["all_holon_action_variations"]
+            actions = [holon_action for (_, holon_action) in all_holon_action_variations]
+            variation_names = [variation_name for (variation_name, _) in all_holon_action_variations]
+        else:
+            # Note that "best observed" is not an entirely accurate name. Specifically, if the workload times out, some queries
+            #   will not have had a chance to run at all. Based on the behavior of `_mutilate_action_with_metrics()`, we select
+            #   an arbitrary variation fo the queries that have not executed at all.
+            best_observed_holon_action = actions_info["best_observed_holon_action"]
+            actions = [best_observed_holon_action]
+            variation_names = ["BestObserved"]
+
+        num_timed_out_queries, did_workload_time_out, qid_runtime_data = pg_env.workload.execute_workload(
+            pg_conn=pg_env.pg_conn,
+            actions=actions,
+            variation_names=variation_names,
+            observation_space=None,
+            action_space=action_space,
+            reset_metrics=None,
+            query_timeout=None,
+            workload_qdir=None,
+            blocklist=replay_args.blocklist,
+            first=False,
+        )
+        workload_runtime = Workload.compute_total_workload_runtime(qid_runtime_data)
+        num_executed_queries = len(qid_runtime_data)
+        return num_executed_queries, num_timed_out_queries, did_workload_time_out, workload_runtime
+
+    run_data = []
+    progess_bar = tqdm.tqdm(total=num_lines)
+    with open_and_save(dbgym_cfg, output_log_fpath, "r") as f:
+        current_step = 0
+        start_found = False
+        start_time = None
+        maximal_repo = None
+        existing_index_acts = []
+
+        for line in f:
+            # Keep going until we've found the start.
+            if not start_found:
+                if "Baseline Metric" in line:
+                    start_found = True
+                    start_time = parse(line.split("INFO:")[-1].split(" Baseline Metric")[0].split("[")[0])
+                    progess_bar.update(1)
+                continue
+
+            elif _is_tuning_step_line(line):
+                if _is_tuning_step_line(line):
+                    repo = eval(line.split("Running ")[-1])[-1]
+                    time_since_start = parse(line.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
+                elif "Found new maximal state with" in line:
+                    repo = eval(maximal_repo.split("Running ")[-1])[-1]
+                    time_since_start = parse(maximal_repo.split("DEBUG:")[-1].split(" Running")[0].split("[")[0])
+                    maximal_repo = None
+
+                # Get the original runtime as well as whether any individual queries and/or the full workload timed out.
+                run_raw_csv_fpath = tuning_steps_dpath / repo / "run.raw.csv"
+                save_file(dbgym_cfg, run_raw_csv_fpath)
+                run_raw_csv = pd.read_csv(run_raw_csv_fpath)
+                assert len(run_raw_csv.columns) == 7
+                # When setting `did_workload_time_out_in_original`, we can't just check whether the sum of latencies in run.raw.csv == `workload_timeout`
+                #   because Proto-X decreases `workload_timeout` over the course of the tuning run. Specifically, at the end of a tuning step, Proto-X
+                #   sets `workload_timeout` to be equal to the runtime of the workload that just ran.
+                # We separate the penalty rows from the non-penalty rows to process them separately.
+                run_raw_csv_penalty_rows = run_raw_csv[run_raw_csv["Transaction Name"] == "P"]
+                run_raw_csv_non_penalty_rows = run_raw_csv[run_raw_csv["Transaction Name"] != "P"]
+                # Get the number of executed queries. A query timing out is not the same as a query not being executed. We do this instead of getting the
+                #   number of skipped queries since we don't have the total # of queries with the current codebase.
+                num_executed_queries_in_original = len(run_raw_csv_non_penalty_rows)
+                # `num_timed_out_queries_in_original` counts the number of queries where *all variations* timed out. Note that the query_timeout of
+                #   a query may be set extremely low because the workload is about to time out, so it could be viewed as "unfair" to count those queries as
+                #   having timed out. Regardless, that's how we currently do things.
+                num_timed_out_queries_in_original = run_raw_csv_non_penalty_rows["Timed Out"].sum()
+                # Penalties are added when the workload times out so this is a reliable indicator of whether the workload timed out.
+                did_workload_time_out_in_original = len(run_raw_csv_penalty_rows) > 0
+                # Penalties are meant to affect the reward of the tuning agent but they are unrelated to the actual runtime, so we ignore them when
+                #   computing the original runtime.
+                original_workload_runtime = run_raw_csv_non_penalty_rows["Latency (microseconds)"].sum() / 1e6
+                assert original_workload_runtime > 0
+
+                # Extract the necessary values from action.pkl
+                with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "action.pkl", "rb") as f:
+                    actions_info = pickle.load(f)
+                    all_holon_action_variations = actions_info["all_holon_action_variations"]
+                    # Extract the KnobSpaceAction and IndexAction from all_holon_action_variations.
+                    # These two should be identical across all HolonActions, which we will assert.
+                    _, first_holon_action = all_holon_action_variations[0]
+                    knob_space_action = first_holon_action[0]
+                    index_space_raw_sample = first_holon_action[1]
+                    index_action = action_space.get_index_space().to_action(index_space_raw_sample)
+                    assert all([knob_space_action == holon_action[0] for (_, holon_action) in all_holon_action_variations])
+                    assert all([index_action == action_space.get_index_space().to_action(holon_action[1]) for (_, holon_action) in all_holon_action_variations])
+
+                # Get the indexes from this action and the prior state
+                index_acts = set()
+                index_acts.add(index_action)
+                assert len(index_acts) > 0
+                with open_and_save(dbgym_cfg, tuning_steps_dpath / repo / "prior_state.pkl", "rb") as f:
+                    prior_states = pickle.load(f)
+                    all_sc = set(prior_states[1])
+                    for index_act in index_acts:
+                        all_sc.add(index_act)
+
+                    all_sc = {a for a in all_sc if not "USING btree ()" in a.sql(True)}
+                    index_acts = all_sc
+                # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step
+                index_modification_sqls = []
+                for index_act in index_acts:
+                    if index_act not in existing_index_acts:
+                        index_modification_sqls.append(index_act.sql(True))
+                for existing_index_act in existing_index_acts:
+                    if existing_index_act not in index_acts:
+                        index_modification_sqls.append(existing_index_act.sql(False))
+
+                # Modify Postgres to have the right indexes and system-wide knobs. `index_modification_sqls` holds the indexes
+                #   while `cc` holds the system-wide knobs.
+                if not replay_args.simulated:
+                    cc, _ = action_space.get_knob_space().generate_action_plan(knob_space_action, prior_states[0])
+                    # Like in tuning, we don't dump the page cache when calling shift_state() to see how the workload
+                    #   performs in a warm cache scenario.
+                    pg_env.shift_state(cc, index_modification_sqls, dump_page_cache=False)
+                existing_index_acts = index_acts
+
+                # Execute the workload to get the runtime.
+                if not replay_args.simulated:
+                    num_executed_queries_in_replay, num_timed_out_queries_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = _execute_workload_wrapper(actions_info)
+                else:
+                    num_executed_queries_in_replay, num_timed_out_queries_in_replay, did_workload_time_out_in_replay, replayed_workload_runtime = num_executed_queries_in_original, num_timed_out_queries_in_original, did_workload_time_out_in_original, original_workload_runtime
+
+                # Perform some validity checks and then add this tuning step's data to `run_data``.
+                this_step_run_data = {
+                    "step": current_step,
+                    "time_since_start": (time_since_start - start_time).total_seconds(),
+                    "original_workload_runtime": original_workload_runtime,
+                    "num_executed_queries_in_original": num_executed_queries_in_original,
+                    "num_timed_out_queries_in_original": num_timed_out_queries_in_original,
+                    "did_workload_time_out_in_original": did_workload_time_out_in_original,
+                    "replayed_workload_runtime": replayed_workload_runtime,
+                    "num_executed_queries_in_replay": num_executed_queries_in_replay,
+                    "num_timed_out_queries_in_replay": num_timed_out_queries_in_replay,
+                    "did_workload_time_out_in_replay": did_workload_time_out_in_replay,
+                }
+                # Log before performing checks to help with debugging.
+                logging.info(f"this_step_run_data={this_step_run_data}")
+                assert not (num_timed_out_queries_in_replay > 0 and not did_workload_time_out_in_replay), "During replay, individual queries should not time out unless they timed out because the whole workload timed out."
+                run_data.append(this_step_run_data)
+                current_step += 1
+
+                run_folder = repo.split("/")[-1]
+                if run_folder in folders and run_folder == folders[-1]:
+                    break
+            progess_bar.update(1)
+
+    # Output.
+    run_data_df = pd.DataFrame(run_data)
+    pd.set_option('display.max_columns', 10)
+    print(f"Finished replaying with run_data_df=\n{run_data_df}\n. Data stored in {dbgym_cfg.cur_task_runs_path()}.")
+    run_data_df.to_csv(dbgym_cfg.cur_task_runs_data_path("run_data.csv"), index=False)
+    pg_env.close()
\ No newline at end of file
diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py
index fecac110..c25eaf62 100644
--- a/tune/protox/agent/tune.py
+++ b/tune/protox/agent/tune.py
@@ -1,11 +1,12 @@
 import json
 import os
 from pathlib import Path
+import shutil
 import time
 import click
 import pandas as pd
 
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, conv_inputpath_to_realabspath, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, WORKSPACE_PATH_PLACEHOLDER, DBGymConfig, TuningMode, conv_inputpath_to_realabspath, link_result, open_and_save, default_hpoed_agent_params_path, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, workload_name_fn, default_tuning_steps_dname
 from tune.protox.agent.coerce_config import coerce_config
 from tune.protox.agent.hpo import TuneTrial, build_space
 
@@ -37,7 +38,20 @@
     is_flag=True,
     help="Whether to enable the Boot query accelerator during the tuning process. Deciding to use Boot during tuning is separate from deciding to use Boot during HPO.",
 )
-def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool) -> None:
+@click.option(
+    "--boot-config-fpath-during-tune",
+    default=DEFAULT_BOOT_CONFIG_FPATH,
+    type=Path,
+    help="The path to the file configuring Boot when tuning. This may be a different Boot config than the one used for HPO.",
+)
+@click.option(
+    "--tune-duration-during-tune",
+    default=None,
+    type=float,
+    help="The number of hours to run the tuning agent for. If you do not specify this argument, it will be the same as --tune-duration-during-hpo."
+)
+def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end: int, query_subset: str, scale_factor: float, hpoed_agent_params_path: Path, enable_boot_during_tune: bool, boot_config_fpath_during_tune: Path, tune_duration_during_tune: float) -> None:
+    """IMPORTANT: The "tune" here is the one in "tune a DBMS". This is *different* from the "tune" in ray.tune.TuneConfig, which means to "tune hyperparameters".""" 
     # Set args to defaults programmatically (do this before doing anything else in the function)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
     if hpoed_agent_params_path == None:
@@ -45,43 +59,65 @@ def tune(dbgym_cfg: DBGymConfig, benchmark_name: str, seed_start: int, seed_end:
 
     # Convert all input paths to absolute paths
     hpoed_agent_params_path = conv_inputpath_to_realabspath(dbgym_cfg, hpoed_agent_params_path)
+    boot_config_fpath_during_tune = conv_inputpath_to_realabspath(dbgym_cfg, boot_config_fpath_during_tune)
 
     # Tune
     with open_and_save(dbgym_cfg, hpoed_agent_params_path, "r") as f:
-        hpoed_params = json.load(f)
+        hpo_params = json.load(f)
 
     # Coerce using a dummy space.
-    hpoed_params = coerce_config(dbgym_cfg, build_space(
+    hpo_params = coerce_config(dbgym_cfg, build_space(
         sysknobs={},
         benchmark_config={},
         workload_path=Path(),
         embedder_path=[],
         pgconn_info={}
-    ), hpoed_params)
+    ), hpo_params)
+
+    # Set defaults that depend on hpo_params
+    if tune_duration_during_tune == None:
+        tune_duration_during_tune = hpo_params["tune_duration"][str(TuningMode.HPO)]
 
-    # Add configs to the hpoed_params that are allowed to differ between HPO and tuning.
-    # In general, for configs that can differ between HPO and tuning, I chose to append
-    #   "_during_hpo"/"_during_tune" to the end of them instead of naming them the same
+    # Set the hpo_params that are allowed to differ between HPO, tuning, and replay.
+    # In general, for configs that can differ between HPO, tuning, and replay I chose to name
+    #   them "*tune*" and "*hpo*" to the end of them instead of naming them the same
     #   and overriding the config during tuning. It's just much less confusing if we
-    #   make sure to never override any configs in hpoed_params.
-    hpoed_params["enable_boot_during_tune"] = enable_boot_during_tune
+    #   make sure to never override any configs in hpo_params.
+    # Note that while we currently do not persist the hpo_params used during *tuning* back to
+    #   a file, this is entirely possible to do in the future if needed.
+    hpo_params["enable_boot"][str(TuningMode.TUNE)] = enable_boot_during_tune
+    hpo_params["boot_config_fpath"][str(TuningMode.TUNE)] = boot_config_fpath_during_tune
+    hpo_params["tune_duration"][str(TuningMode.TUNE)] = tune_duration_during_tune
+    hpo_params["workload_timeout"][str(TuningMode.TUNE)] = hpo_params["workload_timeout"][str(TuningMode.HPO)]
 
     # Piggyback off the HPO magic.
-    t = TuneTrial(dbgym_cfg, False)
-    # This is a hack.
-    t.logdir = Path(dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True)) # type: ignore
-    t.logdir.mkdir(parents=True, exist_ok=True) # type: ignore
-    t.setup(hpoed_params)
+    tune_trial = TuneTrial(dbgym_cfg, TuningMode.TUNE)
+    tune_trial.setup(hpo_params)
     start = time.time()
 
     data = []
     step_data_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "step_data.csv"
-    while (time.time() - start) < hpoed_params["duration"] * 3600:
-        data.append(t.step())
+    while (time.time() - start) < tune_duration_during_tune * 3600:
+        data.append(tune_trial.step())
 
         # Continuously write the file out.
         pd.DataFrame(data).to_csv(step_data_fpath, index=False)
 
-    t.cleanup()
+    tune_trial.cleanup()
+
     # Output the step data.
-    pd.DataFrame(data).to_csv(step_data_fpath, index=False)
\ No newline at end of file
+    pd.DataFrame(data).to_csv(step_data_fpath, index=False)
+
+    # Link the tuning steps data (this directory allows you to replay the tuning run).
+    tuning_steps_dpath = dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps")
+    # Replaying requires params.json, so we also copy it into the tuning_steps/ directory.
+    # We copy hpoed_agent_params_path instead of moving it because hpoed_agent_params_path was generated in another task run
+    # We copy instead of just symlinking so that tuning_steps/ is a fully self-contained directory.
+    hpoed_agent_params_copy_fpath = tuning_steps_dpath / "params.json"
+    shutil.copy(hpoed_agent_params_path, hpoed_agent_params_copy_fpath)
+    tuning_steps_link_dname = default_tuning_steps_dname(benchmark_name, workload_name, enable_boot_during_tune)
+    link_result(dbgym_cfg, tuning_steps_dpath, custom_result_name=tuning_steps_link_dname + ".link")
+    # We also create a link to hpoed_agent_params_path. This is useful when we are _manually_ looking through
+    #   run_*/ and want to see which other run_*/ was responsible for creating params.json
+    hpoed_agent_params_link_fpath = tuning_steps_dpath / "params.json.link"
+    os.symlink(hpoed_agent_params_path, hpoed_agent_params_link_fpath)
diff --git a/tune/protox/agent/wolp/wolp.py b/tune/protox/agent/wolp/wolp.py
index 7929d779..ba519258 100644
--- a/tune/protox/agent/wolp/wolp.py
+++ b/tune/protox/agent/wolp/wolp.py
@@ -53,6 +53,7 @@ def __init__(
         target_action_noise: Optional[ActionNoise] = None,
         seed: Optional[int] = None,
         neighbor_parameters: Dict[str, Any] = {},
+        ray_trial_id: Optional[str] = None,
     ):
         super().__init__(
             policy,
@@ -63,6 +64,7 @@ def __init__(
             gradient_steps,
             action_noise=action_noise,
             seed=seed,
+            ray_trial_id=ray_trial_id,
         )
 
         self.target_action_noise = target_action_noise
diff --git a/tune/protox/embedding/analyze.py b/tune/protox/embedding/analyze.py
index 24746a9d..cdf6666c 100644
--- a/tune/protox/embedding/analyze.py
+++ b/tune/protox/embedding/analyze.py
@@ -64,11 +64,11 @@ def analyze_all_embeddings_parts(dbgym_cfg: DBGymConfig, num_parts: int, generic
     start_time = time.time()
     for part_i in range(num_parts):
         _analyze_embeddings_part(dbgym_cfg, part_i, generic_args, analyze_args)
-    duration = time.time() - start_time
+    analyze_all_parts_duration = time.time() - start_time
     with open(
         dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "analyze_all_time.txt", "w"
     ) as f:
-        f.write(f"{duration}")
+        f.write(f"{analyze_all_parts_duration}")
 
 
 def _analyze_embeddings_part(dbgym_cfg: DBGymConfig, part_i: int, generic_args: EmbeddingTrainGenericArgs, analyze_args: EmbeddingAnalyzeArgs):
@@ -79,15 +79,15 @@ def _analyze_embeddings_part(dbgym_cfg: DBGymConfig, part_i: int, generic_args:
 
     start_time = time.time()
     _create_stats_for_part(dbgym_cfg, part_dpath, generic_args, analyze_args)
-    duration = time.time() - start_time
-    with open(os.path.join(part_dpath, "stats_time.txt"), "w") as f:
-        f.write(f"{duration}")
+    analyze_part_duration = time.time() - start_time
+    with open(part_dpath / "stats_time.txt", "w") as f:
+        f.write(f"{analyze_part_duration}")
 
     start_time = time.time()
     _create_ranges_for_part(dbgym_cfg, part_dpath, generic_args, analyze_args)
-    duration = time.time() - start_time
-    with open(os.path.join(part_dpath, "ranges_time.txt"), "w") as f:
-        f.write(f"{duration}")
+    create_range_duration = time.time() - start_time
+    with open(part_dpath / "ranges_time.txt", "w") as f:
+        f.write(f"{create_range_duration}")
 
 
 def _create_stats_for_part(dbgym_cfg: DBGymConfig, part_dpath: Path, generic_args: EmbeddingTrainGenericArgs, analyze_args: EmbeddingAnalyzeArgs):
diff --git a/tune/protox/embedding/datagen.py b/tune/protox/embedding/datagen.py
index a86d6b44..940a3dfd 100644
--- a/tune/protox/embedding/datagen.py
+++ b/tune/protox/embedding/datagen.py
@@ -257,9 +257,9 @@ def datagen(
     start_postgres(dbgym_cfg, pgbin_path, pgdata_dpath)
     _gen_traindata_dir(dbgym_cfg, generic_args, dir_gen_args)
     _combine_traindata_dir_into_parquet(dbgym_cfg, generic_args, file_gen_args)
-    duration = time.time() - start_time
+    datagen_duration = time.time() - start_time
     with open(f"{dbgym_cfg.dbgym_this_run_path}/datagen_time.txt", "w") as f:
-        f.write(f"{duration}")
+        f.write(f"{datagen_duration}")
     stop_postgres(dbgym_cfg, pgbin_path, pgdata_dpath)
 
 
diff --git a/tune/protox/embedding/select.py b/tune/protox/embedding/select.py
index df9c9194..936bd328 100644
--- a/tune/protox/embedding/select.py
+++ b/tune/protox/embedding/select.py
@@ -28,7 +28,7 @@ def select_best_embeddings(dbgym_cfg: DBGymConfig, generic_args: EmbeddingTrainG
         data = _attach(data, raw_data, select_args.idx_limit)
 
     curated_dpath = dbgym_cfg.cur_task_runs_data_path("curated", mkdir=True)
-    curated_results_fpath = dbgym_cfg.cur_task_runs_data_path(".", mkdir=True) / "curated_results.csv"
+    curated_results_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "curated_results.csv"
     data.to_csv(
         curated_results_fpath, index=False
     )
@@ -77,7 +77,7 @@ def select_best_embeddings(dbgym_cfg: DBGymConfig, generic_args: EmbeddingTrainG
             )
 
             if loop_i == 0:
-                link_result(dbgym_cfg, model_dpath, custom_result_name=default_embedder_dname(generic_args.benchmark_name, generic_args.workload_name))
+                link_result(dbgym_cfg, model_dpath, custom_result_name=default_embedder_dname(generic_args.benchmark_name, generic_args.workload_name) + ".link")
 
             info_txt.write(f"model{idx}/embedder.pth\n")
             idx += 1
diff --git a/tune/protox/embedding/train_all.py b/tune/protox/embedding/train_all.py
index 270e8159..20d73292 100644
--- a/tune/protox/embedding/train_all.py
+++ b/tune/protox/embedding/train_all.py
@@ -212,11 +212,11 @@ def train_all_embeddings(
     dtime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     run_config = RunConfig(
         name=f"ProtoXEmbeddingHPO_{dtime}",
-        storage_path=None,
         failure_config=FailureConfig(max_failures=0, fail_fast=True),
         sync_config=SyncConfig(),
         verbose=2,
         log_to_file=True,
+        storage_path=dbgym_cfg.cur_task_runs_path("embedding_ray_results", mkdir=True),
     )
 
     resources = {"cpu": 1}
@@ -250,9 +250,9 @@ def train_all_embeddings(
                 print(f"Trial {results[i]} FAILED")
         assert False
 
-    duration = time.time() - start_time
+    train_all_embeddings_duration = time.time() - start_time
     with open(f"{dbgym_cfg.dbgym_this_run_path}/hpo_train_time.txt", "w") as f:
-        f.write(f"{duration}")
+        f.write(f"{train_all_embeddings_duration}")
 
 
 def _hpo_train(
diff --git a/tune/protox/env/logger.py b/tune/protox/env/logger.py
index 1ab8d2bb..12176780 100644
--- a/tune/protox/env/logger.py
+++ b/tune/protox/env/logger.py
@@ -1,6 +1,7 @@
 import inspect
 import json
 import logging
+import pickle
 import time
 from datetime import datetime
 from pathlib import Path
@@ -11,6 +12,8 @@
 from torch.utils.tensorboard import SummaryWriter  # type: ignore
 from typing_extensions import ParamSpec
 
+from misc.utils import DBGymConfig
+
 P = ParamSpec("P")
 T = TypeVar("T")
 
@@ -53,24 +56,23 @@ def default(self, obj: Any) -> Any:
 class Logger(object):
     def __init__(
         self,
+        dbgym_cfg: DBGymConfig,
         trace: bool,
         verbose: bool,
-        output_log_path: str,
-        repository_path: str,
-        tensorboard_path: str,
     ) -> None:
+        self.log_dpath = dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True)
         self.trace = trace
         self.verbose = verbose
-        self.repository_path = repository_path
-        Path(repository_path).mkdir(parents=True, exist_ok=True)
+        self.tensorboard_dpath = self.log_dpath / "tboard"
+        self.tuning_steps_dpath = self.log_dpath / "tuning_steps"
+        self.tuning_steps_dpath.mkdir(parents=True, exist_ok=True)
 
         level = logging.INFO if not self.verbose else logging.DEBUG
         formatter = "%(levelname)s:%(asctime)s [%(filename)s:%(lineno)s]  %(message)s"
         logging.basicConfig(format=formatter, level=level, force=True)
 
         # Setup the file logger.
-        Path(output_log_path).mkdir(parents=True, exist_ok=True)
-        file_logger = logging.FileHandler("{}/output.log".format(output_log_path))
+        file_logger = logging.FileHandler(self.tuning_steps_dpath / "output.log")
         file_logger.setFormatter(logging.Formatter(formatter))
         file_logger.setLevel(level)
         logging.getLogger().addHandler(file_logger)
@@ -78,8 +80,8 @@ def __init__(
         # Setup the writer.
         self.writer: Union[SummaryWriter, None] = None
         if self.trace:
-            Path(tensorboard_path).mkdir(parents=True, exist_ok=True)
-            self.writer = SummaryWriter(tensorboard_path)  # type: ignore
+            self.tensorboard_dpath.mkdir(parents=True, exist_ok=True)
+            self.writer = SummaryWriter(self.tensorboard_dpath)  # type: ignore
 
         self.iteration = 1
         self.iteration_data: dict[str, Any] = {}
@@ -90,27 +92,35 @@ def get_logger(self, name: Optional[str]) -> logging.Logger:
         return logging.getLogger(name)
 
     def stash_results(
-        self, info_dict: dict[str, Any], name_override: Optional[str] = None
+        self, info_dict: dict[str, Any], name_override: Optional[str] = None, ray_trial_id: Optional[str] = None,
     ) -> None:
-        time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-        time = name_override if name_override else time
-        if info_dict["results"] is not None and Path(info_dict["results"]).exists():
-            local["mv"][info_dict["results"], f"{self.repository_path}/{time}"].run()
+        """
+        Stash data about this step of tuning so that it can be replayed.
+        """
+        dname = name_override if name_override else datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        if ray_trial_id != None:
+            # Orthogonal to whether name_override is used, ray_trial_id disambiguates between folders created
+            # by different HPO trials so that the folders don't overwrite each other.
+            dname += f"_{ray_trial_id}"
+
+        if info_dict["results_dpath"] is not None and Path(info_dict["results_dpath"]).exists():
+            local["mv"][info_dict["results_dpath"], f"{self.tuning_steps_dpath}/{dname}"].run()
         else:
-            Path(f"{self.repository_path}/{time}").mkdir(parents=True, exist_ok=True)
+            Path(f"{self.tuning_steps_dpath}/{dname}").mkdir(parents=True, exist_ok=True)
 
         if info_dict["prior_pgconf"]:
-            local["mv"][
-                info_dict["prior_pgconf"], f"{self.repository_path}/{time}/old_pg.conf"
+            local["cp"][
+                info_dict["prior_pgconf"], f"{self.tuning_steps_dpath}/{dname}/old_pg.conf"
             ].run()
 
         if info_dict["prior_state_container"]:
-            with open(f"{self.repository_path}/{time}/prior_state.txt", "w") as f:
-                f.write(str(info_dict["prior_state_container"]))
+            with open(self.tuning_steps_dpath / dname / "prior_state.pkl", "wb") as f:
+                # info_dict["prior_state_container"] is a somewhat complex object so we use pickle over json
+                pickle.dump(info_dict["prior_state_container"], f)
 
-        if info_dict["action_json"]:
-            with open(f"{self.repository_path}/{time}/action.txt", "w") as f:
-                f.write(info_dict["action_json"])
+        if info_dict["actions_info"]:
+            with open(self.tuning_steps_dpath / dname / "action.pkl", "wb") as f:
+                pickle.dump(info_dict["actions_info"], f)
 
     def advance(self) -> None:
         if self.writer is None:
diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py
index 02e4d124..61f1d277 100644
--- a/tune/protox/env/mqo/mqo_wrapper.py
+++ b/tune/protox/env/mqo/mqo_wrapper.py
@@ -30,6 +30,12 @@ def _mutilate_action_with_metrics(
     query_metric_data: Optional[dict[str, BestQueryRun]],
     timeout_qknobs: Optional[QuerySpaceKnobAction] = None,
 ) -> HolonAction:
+    """
+    Modify action to make it the one with the best query knobs out
+        of all variations we tried.
+    """
+
+    # At the start of the function, the query knobs in `action` are those selected by the agent.
 
     if query_metric_data is not None:
         extract_q_knobs = action_space.extract_query(action)
@@ -37,16 +43,21 @@ def _mutilate_action_with_metrics(
 
         processed = set()
         for q, data in query_metric_data.items():
-            if not data.timeout:
+            # For queries where at least one variation didn't time out, modify the query knobs in `action`
+            #   to be that from the best variation.
+            if not data.timed_out:
                 assert data.query_run
-                pqk = data.query_run.qknobs
                 for k, v in data.query_run.qknobs.items():
                     # Implant the best.
                     extract_q_knobs[k] = v
+            # For all queries that we ran, even if all their variations time out, add them to `processed`.
+            # By doing so, the next part of the function will not affect queries where all variations timed
+            #   out and will leave their knobs equal to the ones selected by the agent.
             processed.add(q)
 
+        # If we have set `timeout_qknobs`, then use those knobs for the queries that we didn't run at all.
+        # Usually, these `timeout_qknobs` are those of the "PrevDual" variation.
         if timeout_qknobs:
-            qspace = action_space.get_query_space()
             assert timeout_qknobs
 
             all_qids = set([k.query_name for k in timeout_qknobs.keys()]) - processed
@@ -61,6 +72,16 @@ def _mutilate_action_with_metrics(
                     extract_q_knobs[k] = v
 
         action = action_space.replace_query(action, extract_q_knobs)
+
+    # There are three types of queries we handle in different ways.
+    # For queries that executed where at least one variation didn't time out, we can safely use the
+    #   query knobs of their best variation.
+    # For queries that executed where all their variations timed out, we don't want to use the knobs
+    #   in `timeout_qknobs` since those are known to be bad. Instead, we just use the knobs selected by
+    #   by the agent, which may be different from the knobs of *all* variations. 
+    # Finally, for queries that didn't execute, we'll assume that some arbitrary variation ("PrevDual")
+    #   is probably better than the knobs set by the agent.
+
     return action
 
 
@@ -114,7 +135,7 @@ def __init__(
         workload_eval_mode: str,
         workload_eval_inverse: bool,
         workload_eval_reset: bool,
-        pqt: int,
+        query_timeout: int,
         benchbase_config: dict[str, Any],
         env: gym.Env[Any, Any],
         logger: Optional[Logger],
@@ -136,25 +157,25 @@ def __init__(
         self.workload_eval_mode = workload_eval_mode
         self.workload_eval_inverse = workload_eval_inverse
         self.workload_eval_reset = workload_eval_reset
-        self.pqt = pqt
+        self.query_timeout = query_timeout
         self.benchbase_config = benchbase_config
         self.best_observed: dict[str, BestQueryRun] = {}
         self.logger = logger
 
     def _update_best_observed(self, query_metric_data: dict[str, BestQueryRun], force_overwrite=False) -> None:
         if query_metric_data is not None:
-            for q, data in query_metric_data.items():
-                if q not in self.best_observed or force_overwrite:
-                    self.best_observed[q] = BestQueryRun(data.query_run, data.runtime, data.timeout, None, None)
+            for qid, best_run in query_metric_data.items():
+                if qid not in self.best_observed or force_overwrite:
+                    self.best_observed[qid] = BestQueryRun(best_run.query_run, best_run.runtime, best_run.timed_out, None, None)
                     if self.logger:
-                        self.logger.get_logger(__name__).debug(f"[best_observe] {q}: {data.runtime/1e6} (force: {force_overwrite})")
-                elif not data.timeout:
-                    qobs = self.best_observed[q]
-                    assert qobs.runtime and data.runtime
-                    if data.runtime < qobs.runtime:
-                        self.best_observed[q] = BestQueryRun(data.query_run, data.runtime, data.timeout, None, None)
+                        self.logger.get_logger(__name__).debug(f"[best_observe] {qid}: {best_run.runtime/1e6} (force: {force_overwrite})")
+                elif not best_run.timed_out:
+                    qobs = self.best_observed[qid]
+                    assert qobs.runtime and best_run.runtime
+                    if best_run.runtime < qobs.runtime:
+                        self.best_observed[qid] = BestQueryRun(best_run.query_run, best_run.runtime, best_run.timed_out, None, None)
                         if self.logger:
-                            self.logger.get_logger(__name__).debug(f"[best_observe] {q}: {data.runtime/1e6}")
+                            self.logger.get_logger(__name__).debug(f"[best_observe] {qid}: {best_run.runtime/1e6}")
 
     def step(  # type: ignore
         self,
@@ -191,7 +212,7 @@ def step(  # type: ignore
         if self.workload_eval_mode in ["all", "all_enum", "global_dual"]:
             # Load the global (optimizer) knobs.
             qid_ams = parse_access_methods(
-                self.unwrapped.pgconn.conn(), self.unwrapped.workload.queries
+                self.unwrapped.pg_conn.conn(), self.unwrapped.workload.queries
             )
             runs.append(
                 (
@@ -268,19 +289,20 @@ def transmute(
             )
 
         # Execute.
+        self.logger.get_logger(__name__).info("MQOWrapper called step_execute()")
         success, info = self.unwrapped.step_execute(success, runs, info)
         if info["query_metric_data"]:
             self._update_best_observed(info["query_metric_data"])
 
-        action = _mutilate_action_with_metrics(
+        best_observed_holon_action = _mutilate_action_with_metrics(
             self.action_space, action, info["query_metric_data"], timeout_qknobs
         )
 
         with torch.no_grad():
             # Pass the mutilated action back through.
             assert isinstance(self.action_space, HolonSpace)
-            info["action_json"] = json.dumps(self.action_space.to_jsonable([action]))
-            info["maximal_embed"] = self.action_space.to_latent([action])
+            info["actions_info"]["best_observed_holon_action"] = best_observed_holon_action
+            info["maximal_embed"] = self.action_space.to_latent([best_observed_holon_action])
 
         return self.unwrapped.step_post_execute(success, action, info)
 
@@ -326,18 +348,18 @@ def reset(self, *args: Any, **kwargs: Any) -> Tuple[Any, EnvInfoDict]:  # type:
                 success,
                 metric,
                 _,
-                results,
+                results_dpath,
                 _,
                 target_metric_data,
             ) = self.unwrapped.workload.execute(
-                pgconn=self.unwrapped.pgconn,
+                pg_conn=self.unwrapped.pg_conn,
                 reward_utility=self.unwrapped.reward_utility,
-                obs_space=self.observation_space,
+                observation_space=self.observation_space,
                 action_space=self.action_space,
                 actions=[r[1] for r in runs],
-                actions_names=[r[0] for r in runs],
+                variation_names=[r[0] for r in runs],
                 benchbase_config=self.benchbase_config,
-                pqt=self.pqt,
+                query_timeout=self.query_timeout,
                 reset_metrics=kwargs["options"]["query_metric_data"],
                 update=False,
                 first=False,
@@ -358,7 +380,7 @@ def reset(self, *args: Any, **kwargs: Any) -> Tuple[Any, EnvInfoDict]:  # type:
 
             # Reward should be irrelevant. If we do accidentally use it, cause an error.
             # Similarly, metric should be irrelevant. Do not shift the workload timeout.
-            info = EnvInfoDict({"metric": None, "reward": None, "results": results})
+            info = EnvInfoDict({"metric": None, "reward": None, "results_dpath": results_dpath})
             # Use this to adjust the container and state but don't shift the step.
             state, _, _, _, info = self.unwrapped.step_post_execute(
                 True, action, info, soft=True
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index f13f1884..62fa92b8 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -8,10 +8,11 @@
 import psycopg
 from plumbum import local
 
-from misc.utils import DBGymConfig
+from misc.utils import DBGymConfig, TuningMode
 from tune.protox.env.logger import Logger, time_record
 from tune.protox.env.space.holon_space import HolonSpace
 from tune.protox.env.space.state.space import StateSpace
+from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs
 from tune.protox.env.types import (
     EnvInfoDict,
     HolonAction,
@@ -27,21 +28,21 @@ class PostgresEnv(gym.Env[Any, Any]):
     def __init__(
         self,
         dbgym_cfg: DBGymConfig,
+        tuning_mode: TuningMode,
         observation_space: StateSpace,
         action_space: HolonSpace,
         workload: Workload,
         horizon: int,
         reward_utility: RewardUtility,
-        pgconn: PostgresConn,
-        pqt: int,
+        pg_conn: PostgresConn,
+        query_timeout: int,
         benchbase_config: dict[str, Any],
         logger: Optional[Logger] = None,
-        replay: bool = False,
     ):
         super().__init__()
 
         self.dbgym_cfg = dbgym_cfg
-        self.replay = replay
+        self.tuning_mode = tuning_mode
         self.logger = logger
         self.action_space = action_space
         self.observation_space = observation_space
@@ -50,8 +51,8 @@ def __init__(
         self.reward_utility = reward_utility
 
         self.benchbase_config = benchbase_config
-        self.pgconn = pgconn
-        self.pqt = pqt
+        self.pg_conn = pg_conn
+        self.query_timeout = query_timeout
 
         self.current_state: Optional[Any] = None
         self.baseline_metric: Optional[float] = None
@@ -59,13 +60,13 @@ def __init__(
 
     def _restore_last_snapshot(self) -> None:
         assert self.horizon > 1 and self.workload.oltp_workload
-        assert self.pgconn.restore_checkpointed_snapshot()
+        assert self.pg_conn.restore_checkpointed_snapshot()
         assert isinstance(self.action_space, HolonSpace)
 
         self.state_container = self.action_space.generate_state_container(
             self.state_container,
             None,
-            self.pgconn.conn(),
+            self.pg_conn.conn(),
             self.workload.queries,
         )
 
@@ -105,24 +106,26 @@ def reset(  # type: ignore
 
             if self.workload.oltp_workload and self.horizon == 1:
                 # Restore a pristine snapshot of the world if OTLP and horizon = 1
-                self.pgconn.restore_pristine_snapshot()
+                self.pg_conn.restore_pristine_snapshot()
             else:
                 # Instead of restoring a pristine snapshot, just reset the knobs.
                 # This in effect "resets" the baseline knob settings.
-                self.pgconn.start_with_changes(conf_changes=[])
+                self.pg_conn.start_with_changes(conf_changes=[])
 
             # Maneuver the state into the requested state/config.
             assert isinstance(self.action_space, HolonSpace)
             sc = self.action_space.generate_state_container(
                 self.state_container,
                 None,
-                self.pgconn.conn(),
+                self.pg_conn.conn(),
                 self.workload.queries,
             )
             config_changes, sql_commands = self.action_space.generate_plan_from_config(
                 config, sc
             )
-            assert self.shift_state(config_changes, sql_commands)
+            # Don't dump the page cache because we want to keep it warm to see the performance of
+            #   workloads under a warm cache.
+            assert self.shift_state(config_changes, sql_commands, dump_page_cache=False)
 
             # Note that we do not actually update the baseline metric/reward used by the reward
             # utility. This is so the reward is not stochastic with respect to the starting state.
@@ -142,8 +145,8 @@ def reset(  # type: ignore
 
         else:
             # Restore a pristine snapshot of the world.
-            self.pgconn.restore_pristine_snapshot()
-            assert not self.replay
+            self.pg_conn.restore_pristine_snapshot()
+            assert self.tuning_mode != TuningMode.REPLAY
 
             # On the first time, run the benchmark to get the baseline.
             assert isinstance(self.observation_space, StateSpace)
@@ -151,19 +154,19 @@ def reset(  # type: ignore
 
             # Get the stock state container.
             sc = self.action_space.generate_state_container(
-                None, None, self.pgconn.conn(), self.workload.queries
+                None, None, self.pg_conn.conn(), self.workload.queries
             )
             default_action = self.action_space.null_action(sc)
 
-            success, metric, _, results, _, query_metric_data = self.workload.execute(
-                pgconn=self.pgconn,
+            success, metric, _, results_dpath, _, query_metric_data = self.workload.execute(
+                pg_conn=self.pg_conn,
                 reward_utility=self.reward_utility,
-                obs_space=self.observation_space,
+                observation_space=self.observation_space,
                 action_space=self.action_space,
                 actions=[default_action],
-                actions_names=["GlobalDual"],
+                variation_names=["GlobalDual"],
                 benchbase_config=self.benchbase_config,
-                pqt=self.pqt,
+                query_timeout=self.query_timeout,
                 update=False,
                 first=True,
             )
@@ -174,11 +177,11 @@ def reset(  # type: ignore
             self.state_container = self.action_space.generate_state_container(
                 self.state_container,
                 None,
-                self.pgconn.conn(),
+                self.pg_conn.conn(),
                 self.workload.queries,
             )
             state = self.observation_space.construct_offline(
-                self.pgconn.conn(), results, self.state_container
+                self.pg_conn.conn(), results_dpath, self.state_container
             )
 
             # Set the metric workload.
@@ -194,10 +197,10 @@ def reset(  # type: ignore
                     "baseline_metric": metric,
                     "baseline_reward": reward,
                     "query_metric_data": query_metric_data,
-                    "results": results,
+                    "results_dpath": results_dpath,
                     "prior_state_container": None,
                     "prior_pgconf": None,
-                    "action_json": None,
+                    "actions_info": None,
                 }
             )
             self.baseline_metric = metric
@@ -217,8 +220,8 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
         # Get the prior state.
         prior_state = copy.deepcopy(self.state_container)
         # Save the old configuration file.
-        old_conf_path = f"{self.pgconn.pgdata_dpath}/postgresql.auto.conf"
-        conf_path = f"{self.pgconn.pgdata_dpath}/postgresql.auto.old"
+        old_conf_path = f"{self.pg_conn.pgdata_dpath}/postgresql.auto.conf"
+        conf_path = f"{self.pg_conn.pgdata_dpath}/postgresql.auto.old"
         local["cp"][old_conf_path, conf_path].run()
 
         # Figure out what we have to change to get to the new configuration.
@@ -228,7 +231,9 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
             action, prior_state
         )
         # Attempt to maneuver to the new state.
-        success = self.shift_state(config_changes, sql_commands)
+        # Don't dump the page cache in shift_state() in order to see how the workload performs in
+        #   a warm cache scenario.
+        success = self.shift_state(config_changes, sql_commands, dump_page_cache=False)
         return success, EnvInfoDict(
             {
                 "attempted_changes": (config_changes, sql_commands),
@@ -241,30 +246,32 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
     def step_execute(
         self,
         setup_success: bool,
-        actions: list[Tuple[str, HolonAction]],
+        all_holon_action_variations: list[Tuple[str, HolonAction]],
         info: EnvInfoDict,
     ) -> Tuple[bool, EnvInfoDict]:
         if setup_success:
             assert isinstance(self.observation_space, StateSpace)
             assert isinstance(self.action_space, HolonSpace)
             # Evaluate the benchmark.
-            start_time = time.time()
+            self.logger.get_logger(__name__).info(f"\n\nfetch_server_knobs(): {fetch_server_knobs(self.pg_conn.conn(), self.action_space.get_knob_space().tables, self.action_space.get_knob_space().knobs, self.workload.queries)}\n\n")
+            self.logger.get_logger(__name__).info(f"\n\nfetch_server_indexes(): {fetch_server_indexes(self.pg_conn.conn(), self.action_space.get_knob_space().tables)}\n\n")
+            self.logger.get_logger(__name__).info(f"\n\naction_names: {[a[0] for a in all_holon_action_variations]}\n\n")
             (
                 success,
                 metric,
                 reward,
-                results,
-                q_timeout,
+                results_dpath,
+                did_anything_time_out,
                 query_metric_data,
             ) = self.workload.execute(
-                pgconn=self.pgconn,
+                pg_conn=self.pg_conn,
                 reward_utility=self.reward_utility,
-                obs_space=self.observation_space,
+                observation_space=self.observation_space,
                 action_space=self.action_space,
                 benchbase_config=self.benchbase_config,
-                pqt=self.pqt,
-                actions=[a[1] for a in actions],
-                actions_names=[a[0] for a in actions],
+                query_timeout=self.query_timeout,
+                actions=[a[1] for a in all_holon_action_variations],
+                variation_names=[a[0] for a in all_holon_action_variations],
                 update=True,
             )
         else:
@@ -276,19 +283,20 @@ def step_execute(
             success = False
             # Since we reached an invalid area, just set the next state to be the current state.
             metric, reward = self.reward_utility(did_error=True)
-            results, q_timeout, query_metric_data = None, True, None
+            results_dpath, did_anything_time_out, query_metric_data = None, True, None
 
+        # Build EnvInfoDict
         info.update(
             EnvInfoDict(
                 {
                     "metric": metric,
-                    "q_timeout": q_timeout,
+                    "did_anything_time_out": did_anything_time_out,
                     "query_metric_data": query_metric_data,
                     "reward": reward,
-                    "results": results,
-                    "action_json": json.dumps(
-                        self.action_space.to_jsonable([a[1] for a in actions])
-                    ),
+                    "results_dpath": results_dpath,
+                    "actions_info": {
+                        "all_holon_action_variations": all_holon_action_variations,
+                    },
                 }
             )
         )
@@ -319,14 +327,14 @@ def step_post_execute(
             self.state_container = self.action_space.generate_state_container(
                 self.state_container,
                 action,
-                self.pgconn.conn(),
+                self.pg_conn.conn(),
                 self.workload.queries,
             )
 
             # Now. The state container should be accurate.
             assert isinstance(self.observation_space, StateSpace)
             next_state = self.observation_space.construct_offline(
-                self.pgconn.conn(), info["results"], self.state_container
+                self.pg_conn.conn(), info["results_dpath"], self.state_container
             )
         else:
             assert self.current_state
@@ -346,7 +354,7 @@ def step_post_execute(
     def step(  # type: ignore
         self, action: HolonAction
     ) -> Tuple[Any, float, bool, bool, EnvInfoDict]:
-        assert not self.replay
+        assert self.tuning_mode != TuningMode.REPLAY
         success, info = self.step_before_execution(action)
         success, info = self.step_execute(success, [("PerQuery", action)], info)
         return self.step_post_execute(success, action, info)
@@ -357,7 +365,6 @@ def shift_state(
         config_changes: list[str],
         sql_commands: list[str],
         dump_page_cache: bool = False,
-        ignore_error: bool = False,
     ) -> bool:
         def attempt_checkpoint(conn_str: str) -> None:
             # CHECKPOINT to prevent the DBMS from entering a super slow shutdown
@@ -389,7 +396,7 @@ def attempt_checkpoint(conn_str: str) -> None:
                     f"Executing {sql} [{i+1}/{len(sql_commands)}]"
                 )
 
-            ret, stderr = self.pgconn.psql(sql)
+            ret, stderr = self.pg_conn.psql(sql)
             if ret == -1:
                 if stderr:
                     print(stderr, flush=True)
@@ -399,23 +406,23 @@ def attempt_checkpoint(conn_str: str) -> None:
                         # We've killed the index operation.
                         or "operational" in stderr
                     )
-                    attempt_checkpoint(self.pgconn.get_connstr())
+                    attempt_checkpoint(self.pg_conn.get_connstr())
                 return False
 
             assert ret == 0, print(stderr)
 
         # Now try and perform the configuration changes.
-        return self.pgconn.start_with_changes(
+        return self.pg_conn.start_with_changes(
             conf_changes=config_changes,
             dump_page_cache=dump_page_cache,
             save_checkpoint=self.workload.oltp_workload and self.horizon > 1,
         )
 
     def close(self) -> None:
-        self.pgconn.shutdown_postgres()
+        self.pg_conn.shutdown_postgres()
         # This file may not be in in [workspace]/tmp/, so it's important to delete it
-        local["rm"]["-rf", self.pgconn.pgdata_dpath].run()
+        local["rm"]["-rf", self.pg_conn.pgdata_dpath].run()
         # Even though these files get deleted because [workspace]/tmp/ gets deleted,
         #   we'll just delete them here anyways because why not
-        local["rm"]["-f", self.pgconn.checkpoint_pgdata_snapshot_fpath].run()
-        local["rm"]["-f", f"{self.pgconn.checkpoint_pgdata_snapshot_fpath}.tmp"].run()
+        local["rm"]["-f", self.pg_conn.checkpoint_pgdata_snapshot_fpath].run()
+        local["rm"]["-f", f"{self.pg_conn.checkpoint_pgdata_snapshot_fpath}.tmp"].run()
diff --git a/tune/protox/env/space/holon_space.py b/tune/protox/env/space/holon_space.py
index f6f25cb9..b0fe9538 100644
--- a/tune/protox/env/space/holon_space.py
+++ b/tune/protox/env/space/holon_space.py
@@ -368,4 +368,4 @@ def generate_plan_from_config(
         assert len(outputs) == 3
         config_changes = list(itertools.chain(*[o[0] for o in outputs]))
         sql_commands = list(itertools.chain(*[o[1] for o in outputs]))
-        return config_changes, sql_commands
+        return config_changes, sql_commands
\ No newline at end of file
diff --git a/tune/protox/env/space/primitive/index.py b/tune/protox/env/space/primitive/index.py
index ad357be0..7fcc1509 100644
--- a/tune/protox/env/space/primitive/index.py
+++ b/tune/protox/env/space/primitive/index.py
@@ -6,7 +6,8 @@
 class IndexAction(object):
     IA = TypeVar("IA", bound="IndexAction")
 
-    index_counter: ClassVar[int] = 0
+    index_name_counter = 0
+    index_name_map: dict["IndexAction", int] = dict()
 
     def __init__(
         self,
@@ -26,7 +27,6 @@ def __init__(
         self.inc_names = inc_names
         self.raw_repr = raw_repr
         self.bias = bias
-        self._idx_name: Optional[str] = None
 
     @property
     def is_valid(self) -> bool:
@@ -54,20 +54,11 @@ def construct_md(
             raw_repr=None,
             bias=0.0,
         )
-        ia._idx_name = idx_name
+        assert ia.get_index_name() == idx_name, f"ia.get_index_name()={ia.get_index_name()} but idx_name={idx_name}"
         return ia
 
-    @property
-    def idx_name(self) -> str:
-        if self._idx_name is not None:
-            return self._idx_name
-
-        IndexAction.index_counter += 1
-        self._idx_name = f"index{IndexAction.index_counter}"
-        return self._idx_name
-
     def sql(self, add: bool, allow_fail: bool = False) -> str:
-        idx_name = self.idx_name
+        idx_name = self.get_index_name()
         if not add:
             if allow_fail:
                 return f"DROP INDEX IF EXISTS {idx_name}"
@@ -86,6 +77,15 @@ def sql(self, add: bool, allow_fail: bool = False) -> str:
             ),
         )
 
+    # A given index name (like "index5") maps one-to-one to the function of an
+    # index (i.e. its table, columns, etc.).
+    def get_index_name(self):
+        if self not in IndexAction.index_name_map:
+            IndexAction.index_name_map[self] = f"index{IndexAction.index_name_counter}"
+            IndexAction.index_name_counter += 1
+        
+        return IndexAction.index_name_map[self]
+
     # This equality/hash mechanism is purely based off of index identity.
     # We ensure that all other flags are exclusive from a "validity" pre-check.
     #
@@ -97,12 +97,13 @@ def __eq__(self, other: object) -> bool:
             assert isinstance(other, IndexAction)
             ts = set(self.inc_names)
             os = set(other.inc_names)
-            return (
+            is_eq = (
                 self.idx_type == other.idx_type
                 and self.tbl_name == other.tbl_name
                 and self.columns == other.columns
                 and ts == os
             )
+            return is_eq
         return False
 
     def __hash__(self) -> int:
@@ -116,10 +117,9 @@ def __hash__(self) -> int:
         )
         return h
 
-    def __repr__(self, add: bool = True) -> str:
-        return "{a} {idx_name} ON {tbl_name} USING {idx_type} ({columns}) {inc_clause}".format(
-            a="CREATE" if add else "NOOP",
-            idx_name=self.idx_name,
+    def __repr__(self) -> str:
+        return "CREATE {idx_name} ON {tbl_name} USING {idx_type} ({columns}) {inc_clause}".format(
+            idx_name=self.get_index_name(),
             tbl_name=self.tbl_name,
             idx_type=self.idx_type,
             columns=",".join(self.columns),
diff --git a/tune/protox/env/space/state/metric.py b/tune/protox/env/space/state/metric.py
index 948dff92..099fde14 100644
--- a/tune/protox/env/space/state/metric.py
+++ b/tune/protox/env/space/state/metric.py
@@ -152,10 +152,10 @@ def __init__(
                     self.internal_spaces[metric] = Box(low=-np.inf, high=np.inf)
         super().__init__(self.internal_spaces, seed)
 
-    def check_benchbase(self, dbgym_cfg: DBGymConfig, results: Union[str, Path]) -> bool:
-        assert results is not None
-        assert Path(results).exists()
-        metric_files = [f for f in Path(results).rglob("*metrics.json")]
+    def check_benchbase(self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path]) -> bool:
+        assert results_dpath is not None
+        assert Path(results_dpath).exists()
+        metric_files = [f for f in Path(results_dpath).rglob("*metrics.json")]
         if len(metric_files) != 2:
             return False
 
diff --git a/tune/protox/env/space/state/space.py b/tune/protox/env/space/state/space.py
index f7baa3bc..8119818b 100644
--- a/tune/protox/env/space/state/space.py
+++ b/tune/protox/env/space/state/space.py
@@ -14,7 +14,7 @@ def require_metrics(self) -> bool:
         pass
 
     @abstractmethod
-    def check_benchbase(self, dbgym_cfg: DBGymConfig, results: Union[str, Path]) -> bool:
+    def check_benchbase(self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path]) -> bool:
         pass
 
     @abstractmethod
diff --git a/tune/protox/env/space/state/structure.py b/tune/protox/env/space/state/structure.py
index d1a09986..df681a2d 100644
--- a/tune/protox/env/space/state/structure.py
+++ b/tune/protox/env/space/state/structure.py
@@ -50,7 +50,7 @@ def __init__(
     def require_metrics(self) -> bool:
         return False
 
-    def check_benchbase(self, dbgym_cfg: DBGymConfig, results: Union[str, Path]) -> bool:
+    def check_benchbase(self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path]) -> bool:
         # We don't use benchbase metrics anyways.
         return True
 
diff --git a/tune/protox/env/target_reset/target_reset_wrapper.py b/tune/protox/env/target_reset/target_reset_wrapper.py
index 519a5d58..800ec60a 100644
--- a/tune/protox/env/target_reset/target_reset_wrapper.py
+++ b/tune/protox/env/target_reset/target_reset_wrapper.py
@@ -41,12 +41,12 @@ def step(  # type: ignore
         obs, rews, terms, truncs, infos = self.env.step(*args, **kwargs)
         query_metric_data = infos.get("query_metric_data", None)
         assert self.best_metric is not None
-        q_timeout = infos.get("q_timeout", False)
+        did_anything_time_out = infos.get("did_anything_time_out", False)
 
         metric = infos["metric"]
         if self.reward_utility.is_perf_better(metric, self.best_metric):
             self.best_metric = infos["metric"]
-            if not q_timeout:
+            if not did_anything_time_out:
                 self.real_best_metric = self.best_metric
 
             if self.maximize_state:
diff --git a/tune/protox/env/types.py b/tune/protox/env/types.py
index 0ee36f85..976317ed 100644
--- a/tune/protox/env/types.py
+++ b/tune/protox/env/types.py
@@ -136,7 +136,7 @@ class ServerIndexMetadata(TypedDict, total=False):
     [
         ("query_run", Optional[QueryRun]),
         ("runtime", Optional[float]),
-        ("timeout", bool),
+        ("timed_out", bool),
         ("explain_data", Optional[Any]),
         ("metric_data", Optional[dict[str, Any]]),
     ],
@@ -174,7 +174,7 @@ class EnvInfoDict(TypedDict, total=False):
     # Data generated from each run.
     best_query_run_data: dict[str, BestQueryRun]
     # Path to run artifacts.
-    results: Optional[Union[str, Path]]
+    results_dpath: Optional[Union[str, Path]]
 
     # Previous state container.
     prior_state_container: Optional[HolonStateContainer]
@@ -188,12 +188,13 @@ class EnvInfoDict(TypedDict, total=False):
     metric: float
     # Reward of this step.
     reward: float
-    # Whether any queries timed out during this step's evaluation.
-    q_timeout: bool
+    # Whether any queries timed out or the workload as a whole timed out.
+    did_anything_time_out: bool
     # Query metric data.
     query_metric_data: Optional[dict[str, BestQueryRun]]
-    # JSON of the action that was executed.
-    action_json: Optional[str]
+    # Information about the actions that were executed this step.
+    # The actions are in a format usable by replay. (TODO(phw2))
+    actions_info: Tuple["KnobSpaceAction", "IndexAction", "QuerySpaceAction"]
     # ProtoAction of the altered step action.
     maximal_embed: ProtoAction
 
diff --git a/tune/protox/env/util/execute.py b/tune/protox/env/util/execute.py
index 1dc09e74..f991c257 100644
--- a/tune/protox/env/util/execute.py
+++ b/tune/protox/env/util/execute.py
@@ -37,7 +37,7 @@ def _time_query(
     query: str,
     timeout: float,
 ) -> Tuple[float, bool, Any]:
-    has_timeout = False
+    did_time_out = False
     has_explain = "EXPLAIN" in query
     explain_data = None
 
@@ -63,11 +63,11 @@ def _time_query(
                 f"{prefix} exceeded evaluation timeout {timeout}"
             )
         qid_runtime = timeout * 1e6
-        has_timeout = True
+        did_time_out = True
     except Exception as e:
         assert False, print(e)
     # qid_runtime is in microseconds.
-    return qid_runtime, has_timeout, explain_data
+    return qid_runtime, did_time_out, explain_data
 
 
 def _acquire_metrics_around_query(
@@ -75,44 +75,46 @@ def _acquire_metrics_around_query(
     prefix: str,
     connection: psycopg.Connection[Any],
     query: str,
-    pqt: float = 0.0,
-    obs_space: Optional[StateSpace] = None,
+    query_timeout: float = 0.0,
+    observation_space: Optional[StateSpace] = None,
 ) -> Tuple[float, bool, Any, Any]:
     _force_statement_timeout(connection, 0)
-    if obs_space and obs_space.require_metrics():
-        initial_metrics = obs_space.construct_online(connection)
+    if observation_space and observation_space.require_metrics():
+        initial_metrics = observation_space.construct_online(connection)
 
-    if pqt > 0:
-        _force_statement_timeout(connection, pqt * 1000)
+    if query_timeout > 0:
+        _force_statement_timeout(connection, query_timeout * 1000)
+    else:
+        assert query_timeout == 0, f"Setting query_timeout to 0 indicates \"timeout\". However, setting query_timeout ({query_timeout}) < 0 is a bug."
 
-    qid_runtime, did_timeout, explain_data = _time_query(
-        logger, prefix, connection, query, pqt
+    qid_runtime, did_time_out, explain_data = _time_query(
+        logger, prefix, connection, query, query_timeout
     )
 
     # Wipe the statement timeout.
     _force_statement_timeout(connection, 0)
-    if obs_space and obs_space.require_metrics():
-        final_metrics = obs_space.construct_online(connection)
-        diff = obs_space.state_delta(initial_metrics, final_metrics)
+    if observation_space and observation_space.require_metrics():
+        final_metrics = observation_space.construct_online(connection)
+        diff = observation_space.state_delta(initial_metrics, final_metrics)
     else:
         diff = None
 
     # qid_runtime is in microseconds.
-    return qid_runtime, did_timeout, explain_data, diff
+    return qid_runtime, did_time_out, explain_data, diff
 
 
 def execute_variations(
     connection: psycopg.Connection[Any],
     runs: list[QueryRun],
     query: str,
-    pqt: float = 0,
+    query_timeout: float = 0,
     logger: Optional[Logger] = None,
     sysknobs: Optional[KnobSpaceAction] = None,
-    obs_space: Optional[StateSpace] = None,
+    observation_space: Optional[StateSpace] = None,
 ) -> BestQueryRun:
 
     # Initial timeout.
-    timeout_limit = pqt
+    timeout_limit = query_timeout
     # Best run invocation.
     best_qr = BestQueryRun(None, None, True, None, None)
 
@@ -140,16 +142,16 @@ def execute_variations(
         if logger:
             logger.get_logger(__name__).debug(f"{qr.prefix_qid} executing with {pqkk}")
 
-        runtime, did_timeout, explain_data, metric = _acquire_metrics_around_query(
+        runtime, did_time_out, explain_data, metric = _acquire_metrics_around_query(
             logger=logger,
             prefix=qr.prefix_qid,
             connection=connection,
             query=pqk_query,
-            pqt=timeout_limit,
-            obs_space=obs_space,
+            query_timeout=timeout_limit,
+            observation_space=observation_space,
         )
 
-        if not did_timeout:
+        if not did_time_out:
             new_timeout_limit = math.ceil(runtime / 1e3) / 1.0e3
             if new_timeout_limit < timeout_limit:
                 timeout_limit = new_timeout_limit
@@ -159,7 +161,7 @@ def execute_variations(
             best_qr = BestQueryRun(
                 qr,
                 runtime,
-                did_timeout,
+                did_time_out,
                 explain_data,
                 metric,
             )
diff --git a/tune/protox/env/util/pg_conn.py b/tune/protox/env/util/pg_conn.py
index 496a5232..3a4f0207 100644
--- a/tune/protox/env/util/pg_conn.py
+++ b/tune/protox/env/util/pg_conn.py
@@ -1,10 +1,10 @@
-'''
+"""
 At a high level, this file's goal is to provide helpers to manage a Postgres instance during
     agent tuning.
 On the other hand, the goal of dbms.postgres.cli is to (1) install+build postgres and (2)
     create pgdata.
 util.pg provides helpers used by *both* of the above files (as well as other files).
-'''
+"""
 import os
 import shutil
 import threading
@@ -31,18 +31,15 @@ def __init__(
         pristine_pgdata_snapshot_fpath: Path,
         pgdata_parent_dpath: Path,
         pgbin_path: Union[str, Path],
-        postgres_logs_dir: Union[str, Path],
         connect_timeout: int,
         enable_boot: bool,
         boot_config_fpath: Path,
         logger: Logger,
     ) -> None:
 
-        Path(postgres_logs_dir).mkdir(parents=True, exist_ok=True)
         self.dbgym_cfg = dbgym_cfg
         self.pgport = pgport
         self.pgbin_path = pgbin_path
-        self.postgres_logs_dir = postgres_logs_dir
         self.connect_timeout = connect_timeout
         self.enable_boot = enable_boot
         self.boot_config_fpath = boot_config_fpath
@@ -82,10 +79,12 @@ def disconnect(self) -> None:
             self._conn = None
 
     def move_log(self) -> None:
-        if Path(f"{self.postgres_logs_dir}/pg.log").exists():
+        pglog_fpath = self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / f"pg{self.pgport}.log"
+        pglog_this_step_fpath = self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / f"pg{self.pgport}.log.{self.log_step}"
+        if pglog_fpath.exists():
             shutil.move(
-                f"{self.postgres_logs_dir}/pg.log",
-                f"{self.postgres_logs_dir}/pg.log.{self.log_step}",
+                pglog_fpath,
+                pglog_this_step_fpath
             )
             self.log_step += 1
 
@@ -127,9 +126,9 @@ def start_with_changes(
         dump_page_cache: bool = False,
         save_checkpoint: bool = False,
     ) -> bool:
-        '''
+        """
         This function assumes that some snapshot has already been untarred into self.pgdata_dpath
-        '''
+        """
         # Install the new configuration changes.
         if conf_changes is not None:
             if SHARED_PRELOAD_LIBRARIES:
@@ -176,7 +175,9 @@ def start_with_changes(
                 "-t",
                 "180",
                 "-l",
-                f"{self.postgres_logs_dir}/pg.log",
+                # We log to pg{self.pgport}.log instead of pg.log so that different PostgresConn objects
+                #   don't all try to write to the same file.
+                self.dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / f"pg{self.pgport}.log",
                 "start",
             ].run(retcode=None)
 
@@ -245,7 +246,7 @@ def start_with_changes(
         return True
 
     def _set_up_boot(self, intelligent_cache: bool, early_stop: bool, seq_sample: bool, seq_sample_pct: int, seq_sample_seed: int, mu_hyp_opt: float, mu_hyp_time: int, mu_hyp_stdev: float):
-        '''
+        """
         Sets up Boot on the currently running Postgres instances.
         Uses instance vars of PostgresConn for configuration.
         I chose to not encode any "default values" in this function. This is so that all values
@@ -253,7 +254,7 @@ def _set_up_boot(self, intelligent_cache: bool, early_stop: bool, seq_sample: bo
             was used in a given experiment by looking only at the config file. If we did encode
             "default values" in the function, we would need to know the state of the code at the
             time of the experiment, which is very difficult in the general case.
-        '''
+        """
         # If any of these commands fail, they'll throw a Python exception
         # Thus, if none of them throw an exception, we know they passed
         self.logger.get_logger(__name__).debug("Setting up boot")
diff --git a/tune/protox/env/util/reward.py b/tune/protox/env/util/reward.py
index e8db6f43..ba01b8a0 100644
--- a/tune/protox/env/util/reward.py
+++ b/tune/protox/env/util/reward.py
@@ -88,15 +88,14 @@ def __parse_runtime_for_metric(self, parent: Union[str, Path]) -> float:
 
         summary = [f for f in Path(parent).rglob("*.raw.csv")][0]
         data = pd.read_csv(summary)
-        assert len(data.columns) == 6
-
-        sum_data = data.sum()
-        latency: float = sum_data["Latency (microseconds)"]
-        return latency / 1.0e6
+        assert len(data.columns) == 7
+        summed_data = data.sum()
+        summed_latency: float = summed_data["Latency (microseconds)"]
+        return summed_latency / 1.0e6
 
     def __call__(
         self,
-        result_dir: Union[str, Path, None] = None,
+        results_dpath: Union[str, Path, None] = None,
         metric: Optional[float] = None,
         update: bool = True,
         did_error: bool = False,
@@ -109,14 +108,14 @@ def __call__(
         # (param) (new_tps/old_tps) + (1-param) (max(min_mem, new_mem)/min_mem
         #
         # minimum memory before start trading...)
-        assert did_error or result_dir is not None or metric is not None
+        assert did_error or results_dpath is not None or metric is not None
         self.logger.get_logger(__name__).debug(
-            f"[reward_calc]: {result_dir} {metric} {update} {did_error}"
+            f"[reward_calc]: {results_dpath} {metric} {update} {did_error}"
         )
 
         if metric is None:
             # Either it errored or we have a result directory to process.
-            assert did_error or result_dir
+            assert did_error or results_dpath
 
             # Extract the metric if we're running it manually.
             metric_fn = (
@@ -128,8 +127,8 @@ def __call__(
             if did_error:
                 metric = self.worst_perf
             else:
-                assert result_dir
-                metric = metric_fn(result_dir)
+                assert results_dpath
+                metric = metric_fn(results_dpath)
         actual_r = None
         assert metric is not None
 
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index b1bf9391..2b3c7e8c 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -68,6 +68,7 @@ def _crunch(
         pid: Optional[int],
         query_spec: QuerySpec,
     ) -> None:
+        assert all(sql[1].exists() and not sql[1].is_symlink() and sql[1].is_absolute() for sql in sqls), f"sqls ({sqls}) should only contain existent real absolute paths."
         do_tbl_include_subsets_prune = query_spec["tbl_include_subsets_prune"]
         self.order = []
         self.queries = QueryMap({})
@@ -256,7 +257,7 @@ def __init__(
             sqls = [
                 (
                     line.split(",")[0],
-                    self.workload_path / line.split(",")[1],
+                    Path(line.split(",")[1]),
                     1.0,
                 )
                 for line in lines
@@ -270,7 +271,7 @@ def __init__(
                 sqls = [
                     (
                         split[0],
-                        self.workload_path / split[1],
+                        Path(split[1]),
                         float(split[2]),
                     )
                     for split in splits
@@ -328,32 +329,32 @@ def column_usages(self) -> TableAttrListMap:
     def max_indexable(self) -> int:
         return max([len(cols) for _, cols in self.query_usages.items()])
 
+    @staticmethod
+    def compute_total_workload_runtime(qid_runtime_data: dict[str, BestQueryRun]) -> float:
+        return sum(best_run.runtime for best_run in qid_runtime_data.values()) / 1.0e6
+
     @time_record("execute")
-    def _execute_workload(
+    def execute_workload(
         self,
-        pgconn: PostgresConn,
+        pg_conn: PostgresConn,
         actions: list[HolonAction] = [],
-        actions_names: list[str] = [],
-        results: Optional[Union[str, Path]] = None,
-        obs_space: Optional[StateSpace] = None,
+        variation_names: list[str] = [],
+        results_dpath: Optional[Union[str, Path]] = None,
+        observation_space: Optional[StateSpace] = None,
         action_space: Optional[HolonSpace] = None,
         reset_metrics: Optional[dict[str, BestQueryRun]] = None,
         override_workload_timeout: Optional[float] = None,
-        pqt: Optional[int] = None,
+        query_timeout: Optional[int] = None,
         workload_qdir: Optional[Tuple[Union[str, Path], Union[str, Path]]] = None,
-        disable_pg_hint: bool = False,
         blocklist: list[str] = [],
         first: bool = False,
-    ) -> Union[float, Tuple[bool, bool, dict[str, Any]]]:
-        workload_timeout = (
+    ) -> Tuple[int, bool, dict[str, Any]]:
+        this_execution_workload_timeout = (
             self.workload_timeout
             if not override_workload_timeout
             else override_workload_timeout
         )
-        assert len(actions) == len(actions_names)
-
-        # Do we need metrics.
-        need_metric = False if not obs_space else obs_space.require_metrics()
+        assert len(actions) == len(variation_names)
 
         sysknobs = KnobSpaceAction({})
         ql_knobs = []
@@ -379,7 +380,7 @@ def _execute_workload(
                     for action in actions
                 ],
             )
-
+        
         # Figure out workload to execute.
         if workload_qdir is not None and workload_qdir[0] is not None:
             # Load actual queries to execute.
@@ -403,13 +404,11 @@ def _execute_workload(
             actual_queries = self.queries
 
         # Now let us start executing.
-        workload_time = 0.0
-        time_left = workload_timeout
-        qid_runtime_data = {}
-        stop_running = False
+        qid_runtime_data: dict[str, BestQueryRun] = {}
+        workload_timed_out = False
 
         for execute_idx, qid in enumerate(actual_order):
-            if stop_running:
+            if workload_timed_out:
                 break
 
             queries = actual_queries[qid]
@@ -422,126 +421,85 @@ def _execute_workload(
                 if sql_type != QueryType.SELECT:
                     # This is a sanity check because any OLTP workload should be run through benchbase, and any OLAP workload should not have INS_UPD_DEL queries. 
                     assert sql_type != QueryType.INS_UPD_DEL
-                    pgconn.conn().execute(query)
+                    pg_conn.conn().execute(query)
                     continue
 
-                if disable_pg_hint:
-                    assert len(ql_knobs) == 1
-                    ql_knob = ql_knobs[0]
-                    qid_knobs = {
-                        ql_knob[0].knobs[k]: ql_knob[1][k]
-                        for k in ql_knob[1].keys()
-                        if f"{qid}_" in k
-                    }
-
-                    # Alter the session first.
-                    disable = ";".join(
-                        [
-                            f"SET {knob.knob_name} = OFF"
-                            for knob, value in qid_knobs.items()
-                            if value == 0
-                        ]
+                # De-duplicate the runs.
+                runs: list[QueryRun] = []
+                zruns: list[QueryRun] = [
+                    QueryRun(
+                        act_name,
+                        f"{act_name}_{qid}",
+                        QuerySpaceKnobAction(
+                            {
+                                ql_knob[0].knobs[k]: ql_knob[1][k]
+                                for k in ql_knob[1].keys()
+                                if f"{qid}_" in k
+                            }
+                        ),
                     )
-                    pgconn.conn().execute(disable)
-
-                    qid_runtime, _, _, _ = _acquire_metrics_around_query(
-                        self.logger,
-                        f"{qid}",
-                        pgconn.conn(),
-                        query,
-                        pqt=time_left,
-                        obs_space=None,
+                    for ql_knob, act_name in zip(ql_knobs, variation_names)
+                ]
+                for r in zruns:
+                    if r[2] not in [rr[2] for rr in runs]:
+                        runs.append(r)
+
+                target_pqt = query_timeout if query_timeout else this_execution_workload_timeout
+                skip_execute = False
+                if (
+                    reset_metrics is not None
+                    and qid in reset_metrics
+                    and not reset_metrics[qid].timed_out
+                ):
+                    # If we have a reset metric, use it's timeout and convert to seconds.
+                    truntime = reset_metrics[qid].runtime
+                    assert truntime is not None
+                    target_pqt = math.ceil(truntime / 1.0e6)
+
+                    # If we've seen the exact same query knobs before, skip it.
+                    rmetrics = reset_metrics[qid]
+                    skip_execute = (
+                        (rmetrics.query_run is not None)
+                        and (rmetrics.query_run.qknobs is not None)
+                        and (rmetrics.query_run.qknobs == runs[-1].qknobs)
                     )
 
-                    undo_disable = ";".join(
-                        [
-                            f"SET {knob.knob_name} = ON"
-                            for knob, value in qid_knobs.items()
-                            if value == 0
-                        ]
+                if not skip_execute:
+                    best_run: BestQueryRun = execute_variations(
+                        connection=pg_conn.conn(),
+                        runs=runs,
+                        query=query,
+                        query_timeout=min(target_pqt, this_execution_workload_timeout - Workload.compute_total_workload_runtime(qid_runtime_data) + 1),
+                        logger=self.logger,
+                        sysknobs=sysknobs,
+                        observation_space=observation_space,
                     )
-                    pgconn.conn().execute(undo_disable)
-
                 else:
-                    # De-duplicate the runs.
-                    runs: list[QueryRun] = []
-                    zruns: list[QueryRun] = [
-                        QueryRun(
-                            act_name,
-                            f"{act_name}_{qid}",
-                            QuerySpaceKnobAction(
-                                {
-                                    ql_knob[0].knobs[k]: ql_knob[1][k]
-                                    for k in ql_knob[1].keys()
-                                    if f"{qid}_" in k
-                                }
-                            ),
-                        )
-                        for ql_knob, act_name in zip(ql_knobs, actions_names)
-                    ]
-                    for r in zruns:
-                        if r[2] not in [rr[2] for rr in runs]:
-                            runs.append(r)
-
-                    target_pqt = pqt if pqt else workload_timeout
-                    skip_execute = False
-                    if (
-                        reset_metrics is not None
-                        and qid in reset_metrics
-                        and not reset_metrics[qid].timeout
+                    assert reset_metrics
+                    best_run = reset_metrics[qid]
+
+                if reset_metrics is not None and qid in reset_metrics:
+                    # Old one is actually better so let's use that.
+                    rmetric = reset_metrics[qid]
+                    if best_run.timed_out or (
+                        best_run.runtime
+                        and rmetric.runtime
+                        and rmetric.runtime < best_run.runtime
                     ):
-                        # If we have a reset metric, use it's timeout and convert to seconds.
-                        truntime = reset_metrics[qid].runtime
-                        assert truntime is not None
-                        target_pqt = math.ceil(truntime / 1.0e6)
-
-                        # If we've seen this exact before, skip it.
-                        rmetrics = reset_metrics[qid]
-                        skip_execute = (
-                            (rmetrics.query_run is not None)
-                            and (rmetrics.query_run.qknobs is not None)
-                            and (rmetrics.query_run.qknobs == runs[-1].qknobs)
-                        )
+                        best_run = rmetric
 
-                    if not skip_execute:
-                        best_run: BestQueryRun = execute_variations(
-                            connection=pgconn.conn(),
-                            runs=runs,
-                            query=query,
-                            pqt=min(target_pqt, workload_timeout - workload_time + 1),
-                            logger=self.logger,
-                            sysknobs=sysknobs,
-                            obs_space=obs_space,
-                        )
-                    else:
-                        assert reset_metrics
-                        best_run = reset_metrics[qid]
-
-                    if reset_metrics is not None and qid in reset_metrics:
-                        # Old one is actually better so let's use that.
-                        rmetric = reset_metrics[qid]
-                        if best_run.timeout or (
-                            best_run.runtime
-                            and rmetric.runtime
-                            and rmetric.runtime < best_run.runtime
-                        ):
-                            best_run = rmetric
-
-                    assert best_run.runtime
-                    qid_runtime_data[qid] = best_run
-                    qid_runtime = best_run.runtime
-
-                time_left -= qid_runtime / 1e6
-                workload_time += qid_runtime / 1e6
-                if time_left < 0:
+                assert best_run.runtime
+                qid_runtime_data[qid] = best_run
+
+                if Workload.compute_total_workload_runtime(qid_runtime_data) > this_execution_workload_timeout:
                     # We need to undo any potential statements after the timed out query.
                     for st, rq in queries[qidx+1:]:
                         if st != QueryType.SELECT:
                             # This is a sanity check because any OLTP workload should be run through benchbase, and any OLAP workload should not have INS_UPD_DEL queries. If we do have INS_UPD_DEL queries, our "undo" logic will likely have to change.
                             assert st != QueryType.INS_UPD_DEL
-                            pgconn.conn().execute(rq)
+                            pg_conn.conn().execute(rq)
 
-                    stop_running = True
+                    workload_timed_out = True
                     break
 
         # Undo any necessary state changes.
@@ -551,15 +509,15 @@ def _execute_workload(
                 assert sql_type != QueryType.UNKNOWN
                 if sql_type != QueryType.SELECT:
                     assert sql_type != QueryType.INS_UPD_DEL
-                    pgconn.conn().execute(query)
+                    pg_conn.conn().execute(query)
 
-        if results is not None:
+        if results_dpath is not None:
             # Make the result directory.
-            results_dir = Path(results)
-            if not results_dir.exists():
-                results_dir.mkdir(parents=True, exist_ok=True)
+            results_dpath = Path(results_dpath)
+            if not results_dpath.exists():
+                results_dpath.mkdir(parents=True, exist_ok=True)
 
-            with open(results_dir / "run.plans", "w") as f:
+            with open(results_dpath / "run.plans", "w") as f:
                 # Output the explain data.
                 for qid, run in qid_runtime_data.items():
                     if run.explain_data is not None:
@@ -572,15 +530,15 @@ def _execute_workload(
                         f.write(json.dumps(run.explain_data))
                         f.write("\n\n")
 
-            if obs_space and obs_space.require_metrics():
+            if observation_space and observation_space.require_metrics():
                 # Create the metrics.
                 # Log the metrics data as a flattened.
                 accum_data = cast(
                     list[dict[str, Any]],
                     [v.metric_data for _, v in qid_runtime_data.items()],
                 )
-                accum_stats = obs_space.merge_deltas(accum_data)
-                with open(results_dir / "run.metrics.json", "w") as f:
+                accum_stats = observation_space.merge_deltas(accum_data)
+                with open(results_dpath / "run.metrics.json", "w") as f:
                     # Flatten it.
                     def flatten(d: dict[str, Any]) -> dict[str, Any]:
                         flat: dict[str, Any] = {}
@@ -602,46 +560,45 @@ def flatten(d: dict[str, Any]) -> dict[str, Any]:
                     output["flattened"] = True
                     f.write(json.dumps(output, indent=4))
 
-            with open(results_dir / "run.raw.csv", "w") as f:
+            # run.raw.csv will essentially contain the information in qid_runtime_data. However, run.raw.csv may have an extra line for the penalty.
+            with open(results_dpath / "run.raw.csv", "w") as f:
                 # Write the raw query data.
                 f.write(
-                    "Transaction Type Index,Transaction Name,Start Time (microseconds),Latency (microseconds),Worker Id (start number),Phase Id (index in config file)\n"
+                    "Transaction Type Index,Transaction Name,Start Time (microseconds),Latency (microseconds),Timed Out,Worker Id (start number),Phase Id (index in config file)\n"
                 )
 
                 start = 0.0
                 for i, qid in enumerate(self.order):
                     if qid in qid_runtime_data:
-                        data = qid_runtime_data[qid]
-                        assert data and data.runtime and data.query_run
-                        rtime = data.runtime
-                        pfx = data.query_run.prefix
-                        f.write(f"{i+1},{qid},{start},{rtime},0,{pfx}\n")
+                        best_run = qid_runtime_data[qid]
+                        assert best_run and best_run.runtime and best_run.query_run
+                        rtime = best_run.runtime
+                        pfx = best_run.query_run.prefix
+                        f.write(f"{i+1},{qid},{start},{rtime},{best_run.timed_out},0,{pfx}\n")
                         start += rtime / 1e6
 
                 # Write a penalty term if needed.
                 penalty = 0.0
-                if stop_running and self.workload_timeout_penalty > 1:
+                if workload_timed_out and self.workload_timeout_penalty > 1:
                     # Get the penalty.
                     penalty = (
-                        workload_timeout * self.workload_timeout_penalty - workload_time
+                        this_execution_workload_timeout * self.workload_timeout_penalty - Workload.compute_total_workload_runtime(qid_runtime_data)
                     )
                     penalty = (penalty + 1.05) * 1e6 if not first else penalty * 1e6
-                elif stop_running and not first:
+                elif workload_timed_out and not first:
                     # Always degrade it a little if we've timed out.
                     penalty = 3.0e6
 
                 if penalty > 0:
-                    f.write(f"{len(self.order)},P,{time.time()},{penalty},0,PENALTY\n")
-
-            # Get all the timeouts.
-            timeouts = [v.timeout for _, v in qid_runtime_data.items()]
-            return True, (any(timeouts) or stop_running), qid_runtime_data
+                    f.write(f"{len(self.order)},P,{time.time()},{penalty},,0,PENALTY\n")
 
-        return workload_time
+        # Get all the timeouts.
+        num_timed_out_queries = sum([1 if best_run.timed_out else 0 for _, best_run in qid_runtime_data.items()])
+        return num_timed_out_queries, workload_timed_out, qid_runtime_data
 
     @time_record("execute")
     def _execute_benchbase(
-        self, benchbase_config: dict[str, Any], results: Union[str, Path]
+        self, benchbase_config: dict[str, Any], results_dpath: Union[str, Path]
     ) -> bool:
         bb_path = benchbase_config["benchbase_path"]
         with local.cwd(bb_path):
@@ -653,7 +610,7 @@ def _execute_benchbase(
                 "-c",
                 benchbase_config["benchbase_config_path"],
                 "-d",
-                results,
+                results_dpath,
                 "--execute=true",
             ].run(retcode=None)
 
@@ -663,14 +620,14 @@ def _execute_benchbase(
 
     def execute(
         self,
-        pgconn: PostgresConn,
+        pg_conn: PostgresConn,
         reward_utility: RewardUtility,
-        obs_space: StateSpace,
+        observation_space: StateSpace,
         action_space: HolonSpace,
         actions: list[HolonAction],
-        actions_names: list[str],
+        variation_names: list[str],
         benchbase_config: dict[str, Any],
-        pqt: Optional[int] = None,
+        query_timeout: Optional[int] = None,
         reset_metrics: Optional[dict[str, BestQueryRun]] = None,
         update: bool = True,
         first: bool = False,
@@ -679,44 +636,42 @@ def execute(
         if self.logger:
             self.logger.get_logger(__name__).info("Starting to run benchmark...")
 
-        # Purge results directory first.
-        tmp_dir = tempfile.gettempdir()
-        results = f"{tmp_dir}/results{pgconn.pgport}"
-        shutil.rmtree(results, ignore_errors=True)
+        # Generate a unique temporary directory to store results in.
+        results_dpath = Path(tempfile.mkdtemp())
+        print(results_dpath.is_dir(), results_dpath.exists(), not any(results_dpath.iterdir()))
+        assert results_dpath.is_dir() and results_dpath.exists() and not any(results_dpath.iterdir()), "results_dpath should be existent and empty since mkdtemp should guarantee a unique dir."
 
         if self.benchbase:
             # Execute benchbase if specified.
-            success = self._execute_benchbase(benchbase_config, results)
+            success = self._execute_benchbase(benchbase_config, results_dpath)
             # We can only create a state if we succeeded.
-            success = obs_space.check_benchbase(self.dbgym_cfg, results)
+            success = observation_space.check_benchbase(self.dbgym_cfg, results_dpath)
         else:
-            ret = self._execute_workload(
-                pgconn,
+            num_timed_out_queries, did_workload_time_out, query_metric_data = self.execute_workload(
+                pg_conn,
                 actions=actions,
-                actions_names=actions_names,
-                results=results,
-                obs_space=obs_space,
+                variation_names=variation_names,
+                results_dpath=results_dpath,
+                observation_space=observation_space,
                 action_space=action_space,
                 reset_metrics=reset_metrics,
                 override_workload_timeout=self.workload_timeout,
-                pqt=pqt,
+                query_timeout=query_timeout,
                 workload_qdir=None,
-                disable_pg_hint=False,
                 blocklist=[],
                 first=first,
             )
-            assert isinstance(ret, tuple)
-            success, q_timeout, query_metric_data = ret[0], ret[1], ret[2]
-            assert success
+            did_anything_time_out = num_timed_out_queries > 0 or did_workload_time_out
+            success = True
 
         metric, reward = None, None
         if reward_utility is not None:
             metric, reward = reward_utility(
-                result_dir=results, update=update, did_error=not success
+                results_dpath=results_dpath, update=update, did_error=not success
             )
 
         if self.logger:
             self.logger.get_logger(__name__).info(
-                f"Benchmark iteration with metric {metric} (reward: {reward}) (q_timeout: {q_timeout})"
+                f"Benchmark iteration with metric {metric} (reward: {reward}) (did_anything_timeout: {did_anything_time_out})"
             )
-        return success, metric, reward, results, q_timeout, query_metric_data
+        return success, metric, reward, results_dpath, did_anything_time_out, query_metric_data