feat: indirect dependency evaluation

TOD-theses · Aug 9, 2024 · a2aa3ba · a2aa3ba
1 parent f7b7bb5
commit a2aa3ba
Show file tree

Hide file tree

Showing 5 changed files with 168 additions and 4 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,6 @@ psycopg[binary]
 tqdm==4.66.1
 typing_extensions==4.10.0
 requests==2.31.0
-tod_attack_miner @ git+https://github.com/TOD-theses/tod_attack_miner@ce28501324039f4d6fab9ccf00514c826d5fe383
+tod_attack_miner @ git+https://github.com/TOD-theses/tod_attack_miner@d32fba53bd873b5247c8548a37decf9966a3179a
 tod_checker @ git+https://github.com/TOD-theses/tod_checker@3e254e0be2c34f0963e575dea54699bf7937f210
 t_race_stats @ git+https://github.com/TOD-theses/t_race_stats@7cbbc3375d52de347933a155e11de679fbeabbe4
diff --git a/t_race/commands/check.py b/t_race/commands/check.py
@@ -1,4 +1,5 @@
 from argparse import ArgumentParser, Namespace
+from collections import defaultdict
 import csv
 from dataclasses import dataclass
 from importlib.metadata import version
@@ -69,6 +70,17 @@ def init_parser_check(parser: ArgumentParser):
         choices=("approximation", "overall"),
         default=DEFAULTS.TOD_METHOD,
     )
+    parser.add_argument(
+        "--check-indirect-dependencies",
+        action="store_true",
+        help="Check a CSV of indirect dependencies",
+    )
+    parser.add_argument(
+        "--check-indirect-dependencies-csv",
+        type=Path,
+        default=DEFAULTS.INDIRECT_DEPENDENCIES_CSV_PATH,
+        help="Path where the check results for indirect dependencies should be stored",
+    )
     parser.add_argument(
         "--check-props-for-all",
         action="store_true",
@@ -110,9 +122,25 @@ def check_command(args: Namespace, time_tracker: TimeTracker):
         args.base_dir / args.properties_details_jsonl
     )
     tod_method = args.tod_method
+    should_check_indirect_dependencies: bool = args.check_indirect_dependencies
+    indirect_dependencies_path: Path = (
+        args.base_dir / args.check_indirect_dependencies_csv
+    )
 
     checker = create_checker(args.provider)
 
+    if should_check_indirect_dependencies:
+        check_indirect_dependencies(
+            checker,
+            transactions_csv_path,
+            tod_check_results_file_path,
+            tod_check_details_file_path,
+            indirect_dependencies_path,
+            args.max_workers,
+            time_tracker,
+        )
+        return
+
     with time_tracker.task(("check",)):
         check(
             checker,
@@ -299,6 +327,98 @@ def check_properties(
                 details_file.write(json.dumps(details_obj) + "\n")
 
 
+def check_indirect_dependencies(
+    checker_param: TodChecker,
+    tod_candidates_path: Path,
+    tod_check_results_path: Path,
+    tod_check_details_path: Path,
+    indirect_dependencies_results_path: Path,
+    max_workers: int,
+    time_tracker: TimeTracker,
+):
+    global checker
+    checker = checker_param
+    indirect_dependencies = load_indirect_dependencies(tod_candidates_path)
+    transaction_pairs = set(
+        (tx_a, tx_x) for _, _, (tx_a, tx_x, _) in indirect_dependencies
+    )
+    transaction_pairs.update(
+        set((tx_x, tx_b) for _, _, (_, tx_x, tx_b) in indirect_dependencies)
+    )
+    tod_candidates = set((tx_a, tx_b) for tx_a, tx_b, _ in indirect_dependencies)
+
+    blocks = set()
+    with time_tracker.task(("check", "download transactions")):
+        for tx in tqdm(set(flatten(transaction_pairs)), desc="Fetch transactions"):
+            blocks.add(checker.download_data_for_transaction(tx))
+    with time_tracker.task(("check", "fetch state changes")):
+        for block in tqdm(blocks, desc="Fetch state changes"):
+            checker.download_data_for_block(block)
+
+    with time_tracker.task(("check", "check")):
+        with open(tod_check_results_path, "w", newline="") as csv_file, open(
+            tod_check_details_path, "w"
+        ) as details_file:
+            writer = csv.DictWriter(csv_file, ["tx_a", "tx_b", "result"])
+            writer.writeheader()
+            with ThreadPool(max_workers) as p:
+                process_inputs = [
+                    CheckArgs((tx_a, tx_b), "overall")
+                    for tx_a, tx_b in transaction_pairs
+                ]
+                for result in tqdm(
+                    p.imap_unordered(check_candidate, process_inputs, chunksize=10),
+                    total=len(process_inputs),
+                    desc="Check TOD",
+                ):
+                    time_tracker.save_time_ms(
+                        ("check", "check", result.id), result.elapsed_ms
+                    )
+                    tx_a, tx_b = result.id.split("_")
+                    writer.writerow(
+                        {
+                            "tx_a": tx_a,
+                            "tx_b": tx_b,
+                            "result": result.result,
+                        }
+                    )
+                    details: dict = {
+                        "tx_a": tx_a,
+                        "tx_b": tx_b,
+                        "details": None,
+                        "failure": None,
+                    }
+                    if result.details:
+                        details["details"] = result.details.as_dict()
+                    if result.result not in ("TOD", "not TOD"):
+                        details["failure"] = result.result
+                    details_file.write(json.dumps(details) + "\n")
+
+    tods = load_tod_transactions(tod_check_results_path)
+    dependent_paths: dict[tuple[str, str], list[str]] = defaultdict(list)
+
+    for tx_a, tx_b, path in indirect_dependencies:
+        tx_x = path[1]
+        if (tx_a, tx_x) in tods and (tx_x, tx_b) in tods:
+            dependent_paths[(tx_a, tx_b)].append(tx_x)
+
+    with open(indirect_dependencies_results_path, "w", newline="") as csv_file:
+        writer = csv.DictWriter(
+            csv_file, ["tx_a", "tx_b", "indirect_dependency", "witnesses"]
+        )
+        writer.writeheader()
+        for tx_a, tx_b in tod_candidates:
+            paths = dependent_paths[(tx_a, tx_b)]
+            writer.writerow(
+                {
+                    "tx_a": tx_a,
+                    "tx_b": tx_b,
+                    "indirect_dependency": len(paths) > 0,
+                    "witnesses": "|".join(paths),
+                }
+            )
+
+
 def create_checker(provider: str):
     rpc = RPC(provider, OverridesFormatter("old Erigon"))
     state_changes_fetcher = StateChangesFetcher(rpc)
@@ -453,5 +573,13 @@ def load_tod_transactions(csv_path: Path) -> Sequence[tuple[str, str]]:
         return [(row["tx_a"], row["tx_b"]) for row in reader if row["result"] == "TOD"]
 
 
+def load_indirect_dependencies(
+    csv_path: Path,
+) -> Sequence[tuple[str, str, Sequence[str]]]:
+    with open(csv_path, "r", newline="") as f:
+        reader = csv.DictReader(f)
+        return [(row["tx_a"], row["tx_b"], row["path"].split("|")) for row in reader]
+
+
 def flatten(nested_list: Iterable[Iterable]) -> list:
     return [element for sublist in nested_list for element in sublist]
diff --git a/t_race/commands/defaults.py b/t_race/commands/defaults.py
@@ -8,6 +8,7 @@ class DEFAULTS:
     TOD_MINING_EVALUATION_CSV_PATH = Path("tod_candidates_evaluation.csv")
     TOD_CHECK_CSV_PATH = Path("tod_check.csv")
     TOD_CHECK_JSONL_PATH = Path("tod_check_details.jsonl")
+    INDIRECT_DEPENDENCIES_CSV_PATH = Path("tod_check_indirect_dependencies.csv")
     TOD_PROPERTIES_CSV_PATH = Path("tod_properties.csv")
     TOD_PROPERTIES_JSONL_PATH = Path("tod_properties_details.jsonl")
     TOD_MINER_STATS_PATH = Path("mining_stats.json")

diff --git a/t_race/commands/mine.py b/t_race/commands/mine.py
@@ -14,6 +14,7 @@
 from tod_attack_miner.db.filters import (
     get_filters_except_duplicate_limits,
     get_filters_duplicate_limits,
+    get_filters_up_to_indirect_dependencies,
 )
 
 from t_race.commands.check import load_tod_candidates
@@ -95,6 +96,11 @@ def init_parser_mine(parser: ArgumentParser):
         default=DEFAULTS.TOD_MINING_EVALUATION_CSV_PATH,
         help="See --evaluate-candidates-csv",
     )
+    parser.add_argument(
+        "--extract-indirect-dependencies",
+        action="store_true",
+        help="For the evaluation candidates, extract the indirect dependencies and stop further mining",
+    )
     parser.add_argument("--postgres-user", type=str, default="postgres")
     parser.add_argument("--postgres-password", type=str, default="password")
     parser.add_argument("--postgres-host", type=str, default="localhost")
@@ -107,6 +113,7 @@ def mine_command(args: Namespace, time_tracker: TimeTracker):
     output_stats_path = args.base_dir / args.output_stats_path
     evaluation_candidates_csv: Path | None = args.evaluate_candidates_csv
     evaluation_csv = args.base_dir / args.evaluation_csv
+    extract_indirect_dependencies: bool = args.extract_indirect_dependencies
 
     assert (
         not evaluation_candidates_csv or evaluation_candidates_csv.exists()
@@ -127,6 +134,7 @@ def mine_command(args: Namespace, time_tracker: TimeTracker):
             args.quick_stats,
             evaluation_candidates_csv,
             evaluation_csv,
+            extract_indirect_dependencies,
             time_tracker,
         )
 
@@ -142,6 +150,7 @@ def mine(
     quick_stats: bool,
     evaluate_candidates_csv_path: Path | None,
     evaluation_csv_path: Path,
+    extract_indirect_dependencies: bool,
     time_tracker: TimeTracker,
 ):
     with psycopg.connect(conn_str) as conn:
@@ -174,9 +183,17 @@ def mine(
                 evaluation_candidates = load_tod_candidates(
                     evaluate_candidates_csv_path
                 )
-                results = miner.evaluate_candidates(filters, evaluation_candidates)
-                print(f"Saving evaluation results to {evaluation_csv_path}")
-                save_evaluation_results(evaluation_csv_path, results)
+                if extract_indirect_dependencies:
+                    filters = get_filters_up_to_indirect_dependencies(window_size)
+                    results = miner.get_indirect_dependencies(
+                        filters, evaluation_candidates, max_depth=1
+                    )
+                    print(f"Saving indirect dependencies to {evaluation_csv_path}")
+                    save_indirect_dependencies(evaluation_csv_path, results)
+                else:
+                    results = miner.evaluate_candidates(filters, evaluation_candidates)
+                    print(f"Saving evaluation results to {evaluation_csv_path}")
+                    save_evaluation_results(evaluation_csv_path, results)
             else:
                 miner.filter_candidates(filters)
             print(f"Reduced to {miner.count_candidates()} TOD candidates")
@@ -226,3 +243,20 @@ def save_evaluation_results(
             for c in results
         ]
         csv_writer.writerows(rows)
+
+
+def save_indirect_dependencies(
+    results_csv_path: Path, results: Iterable[tuple[str, str, str]]
+):
+    with open(results_csv_path, "w") as f:
+        csv_writer = csv.DictWriter(f, ["tx_a", "tx_b", "path"])
+        csv_writer.writeheader()
+        rows = [
+            {
+                "tx_a": tx_a,
+                "tx_b": tx_b,
+                "path": path,
+            }
+            for tx_a, tx_b, path in results
+        ]
+        csv_writer.writerows(rows)
diff --git a/t_race/commands/run.py b/t_race/commands/run.py
@@ -82,6 +82,7 @@ def run_mining(args: Namespace, time_tracker: TimeTracker):
         not args.extensive_stats,
         None,
         Path(),
+        False,
         time_tracker,
     )
-Original file line number
+Diff line change
@@ Expand Up / @@ -82,6 +82,7 @@ def run_mining(args: Namespace, time_tracker: TimeTracker): @@
             not args.extensive_stats,
             None,
             Path(),
+            False,
             time_tracker,
         )
@@ Expand Down @@