diff --git a/requirements.txt b/requirements.txt index 2277f70..a9ffb39 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ psycopg[binary] tqdm==4.66.1 typing_extensions==4.10.0 requests==2.31.0 -tod_attack_miner @ git+https://github.com/TOD-theses/tod_attack_miner@ce28501324039f4d6fab9ccf00514c826d5fe383 +tod_attack_miner @ git+https://github.com/TOD-theses/tod_attack_miner@d32fba53bd873b5247c8548a37decf9966a3179a tod_checker @ git+https://github.com/TOD-theses/tod_checker@3e254e0be2c34f0963e575dea54699bf7937f210 t_race_stats @ git+https://github.com/TOD-theses/t_race_stats@7cbbc3375d52de347933a155e11de679fbeabbe4 \ No newline at end of file diff --git a/t_race/commands/check.py b/t_race/commands/check.py index f45888e..353ccce 100644 --- a/t_race/commands/check.py +++ b/t_race/commands/check.py @@ -1,4 +1,5 @@ from argparse import ArgumentParser, Namespace +from collections import defaultdict import csv from dataclasses import dataclass from importlib.metadata import version @@ -69,6 +70,17 @@ def init_parser_check(parser: ArgumentParser): choices=("approximation", "overall"), default=DEFAULTS.TOD_METHOD, ) + parser.add_argument( + "--check-indirect-dependencies", + action="store_true", + help="Check a CSV of indirect dependencies", + ) + parser.add_argument( + "--check-indirect-dependencies-csv", + type=Path, + default=DEFAULTS.INDIRECT_DEPENDENCIES_CSV_PATH, + help="Path where the check results for indirect dependencies should be stored", + ) parser.add_argument( "--check-props-for-all", action="store_true", @@ -110,9 +122,25 @@ def check_command(args: Namespace, time_tracker: TimeTracker): args.base_dir / args.properties_details_jsonl ) tod_method = args.tod_method + should_check_indirect_dependencies: bool = args.check_indirect_dependencies + indirect_dependencies_path: Path = ( + args.base_dir / args.check_indirect_dependencies_csv + ) checker = create_checker(args.provider) + if should_check_indirect_dependencies: + check_indirect_dependencies( + checker, + transactions_csv_path, + tod_check_results_file_path, + tod_check_details_file_path, + indirect_dependencies_path, + args.max_workers, + time_tracker, + ) + return + with time_tracker.task(("check",)): check( checker, @@ -299,6 +327,98 @@ def check_properties( details_file.write(json.dumps(details_obj) + "\n") +def check_indirect_dependencies( + checker_param: TodChecker, + tod_candidates_path: Path, + tod_check_results_path: Path, + tod_check_details_path: Path, + indirect_dependencies_results_path: Path, + max_workers: int, + time_tracker: TimeTracker, +): + global checker + checker = checker_param + indirect_dependencies = load_indirect_dependencies(tod_candidates_path) + transaction_pairs = set( + (tx_a, tx_x) for _, _, (tx_a, tx_x, _) in indirect_dependencies + ) + transaction_pairs.update( + set((tx_x, tx_b) for _, _, (_, tx_x, tx_b) in indirect_dependencies) + ) + tod_candidates = set((tx_a, tx_b) for tx_a, tx_b, _ in indirect_dependencies) + + blocks = set() + with time_tracker.task(("check", "download transactions")): + for tx in tqdm(set(flatten(transaction_pairs)), desc="Fetch transactions"): + blocks.add(checker.download_data_for_transaction(tx)) + with time_tracker.task(("check", "fetch state changes")): + for block in tqdm(blocks, desc="Fetch state changes"): + checker.download_data_for_block(block) + + with time_tracker.task(("check", "check")): + with open(tod_check_results_path, "w", newline="") as csv_file, open( + tod_check_details_path, "w" + ) as details_file: + writer = csv.DictWriter(csv_file, ["tx_a", "tx_b", "result"]) + writer.writeheader() + with ThreadPool(max_workers) as p: + process_inputs = [ + CheckArgs((tx_a, tx_b), "overall") + for tx_a, tx_b in transaction_pairs + ] + for result in tqdm( + p.imap_unordered(check_candidate, process_inputs, chunksize=10), + total=len(process_inputs), + desc="Check TOD", + ): + time_tracker.save_time_ms( + ("check", "check", result.id), result.elapsed_ms + ) + tx_a, tx_b = result.id.split("_") + writer.writerow( + { + "tx_a": tx_a, + "tx_b": tx_b, + "result": result.result, + } + ) + details: dict = { + "tx_a": tx_a, + "tx_b": tx_b, + "details": None, + "failure": None, + } + if result.details: + details["details"] = result.details.as_dict() + if result.result not in ("TOD", "not TOD"): + details["failure"] = result.result + details_file.write(json.dumps(details) + "\n") + + tods = load_tod_transactions(tod_check_results_path) + dependent_paths: dict[tuple[str, str], list[str]] = defaultdict(list) + + for tx_a, tx_b, path in indirect_dependencies: + tx_x = path[1] + if (tx_a, tx_x) in tods and (tx_x, tx_b) in tods: + dependent_paths[(tx_a, tx_b)].append(tx_x) + + with open(indirect_dependencies_results_path, "w", newline="") as csv_file: + writer = csv.DictWriter( + csv_file, ["tx_a", "tx_b", "indirect_dependency", "witnesses"] + ) + writer.writeheader() + for tx_a, tx_b in tod_candidates: + paths = dependent_paths[(tx_a, tx_b)] + writer.writerow( + { + "tx_a": tx_a, + "tx_b": tx_b, + "indirect_dependency": len(paths) > 0, + "witnesses": "|".join(paths), + } + ) + + def create_checker(provider: str): rpc = RPC(provider, OverridesFormatter("old Erigon")) state_changes_fetcher = StateChangesFetcher(rpc) @@ -453,5 +573,13 @@ def load_tod_transactions(csv_path: Path) -> Sequence[tuple[str, str]]: return [(row["tx_a"], row["tx_b"]) for row in reader if row["result"] == "TOD"] +def load_indirect_dependencies( + csv_path: Path, +) -> Sequence[tuple[str, str, Sequence[str]]]: + with open(csv_path, "r", newline="") as f: + reader = csv.DictReader(f) + return [(row["tx_a"], row["tx_b"], row["path"].split("|")) for row in reader] + + def flatten(nested_list: Iterable[Iterable]) -> list: return [element for sublist in nested_list for element in sublist] diff --git a/t_race/commands/defaults.py b/t_race/commands/defaults.py index 58af401..666a3d3 100644 --- a/t_race/commands/defaults.py +++ b/t_race/commands/defaults.py @@ -8,6 +8,7 @@ class DEFAULTS: TOD_MINING_EVALUATION_CSV_PATH = Path("tod_candidates_evaluation.csv") TOD_CHECK_CSV_PATH = Path("tod_check.csv") TOD_CHECK_JSONL_PATH = Path("tod_check_details.jsonl") + INDIRECT_DEPENDENCIES_CSV_PATH = Path("tod_check_indirect_dependencies.csv") TOD_PROPERTIES_CSV_PATH = Path("tod_properties.csv") TOD_PROPERTIES_JSONL_PATH = Path("tod_properties_details.jsonl") TOD_MINER_STATS_PATH = Path("mining_stats.json") diff --git a/t_race/commands/mine.py b/t_race/commands/mine.py index 2137017..1466262 100644 --- a/t_race/commands/mine.py +++ b/t_race/commands/mine.py @@ -14,6 +14,7 @@ from tod_attack_miner.db.filters import ( get_filters_except_duplicate_limits, get_filters_duplicate_limits, + get_filters_up_to_indirect_dependencies, ) from t_race.commands.check import load_tod_candidates @@ -95,6 +96,11 @@ def init_parser_mine(parser: ArgumentParser): default=DEFAULTS.TOD_MINING_EVALUATION_CSV_PATH, help="See --evaluate-candidates-csv", ) + parser.add_argument( + "--extract-indirect-dependencies", + action="store_true", + help="For the evaluation candidates, extract the indirect dependencies and stop further mining", + ) parser.add_argument("--postgres-user", type=str, default="postgres") parser.add_argument("--postgres-password", type=str, default="password") parser.add_argument("--postgres-host", type=str, default="localhost") @@ -107,6 +113,7 @@ def mine_command(args: Namespace, time_tracker: TimeTracker): output_stats_path = args.base_dir / args.output_stats_path evaluation_candidates_csv: Path | None = args.evaluate_candidates_csv evaluation_csv = args.base_dir / args.evaluation_csv + extract_indirect_dependencies: bool = args.extract_indirect_dependencies assert ( not evaluation_candidates_csv or evaluation_candidates_csv.exists() @@ -127,6 +134,7 @@ def mine_command(args: Namespace, time_tracker: TimeTracker): args.quick_stats, evaluation_candidates_csv, evaluation_csv, + extract_indirect_dependencies, time_tracker, ) @@ -142,6 +150,7 @@ def mine( quick_stats: bool, evaluate_candidates_csv_path: Path | None, evaluation_csv_path: Path, + extract_indirect_dependencies: bool, time_tracker: TimeTracker, ): with psycopg.connect(conn_str) as conn: @@ -174,9 +183,17 @@ def mine( evaluation_candidates = load_tod_candidates( evaluate_candidates_csv_path ) - results = miner.evaluate_candidates(filters, evaluation_candidates) - print(f"Saving evaluation results to {evaluation_csv_path}") - save_evaluation_results(evaluation_csv_path, results) + if extract_indirect_dependencies: + filters = get_filters_up_to_indirect_dependencies(window_size) + results = miner.get_indirect_dependencies( + filters, evaluation_candidates, max_depth=1 + ) + print(f"Saving indirect dependencies to {evaluation_csv_path}") + save_indirect_dependencies(evaluation_csv_path, results) + else: + results = miner.evaluate_candidates(filters, evaluation_candidates) + print(f"Saving evaluation results to {evaluation_csv_path}") + save_evaluation_results(evaluation_csv_path, results) else: miner.filter_candidates(filters) print(f"Reduced to {miner.count_candidates()} TOD candidates") @@ -226,3 +243,20 @@ def save_evaluation_results( for c in results ] csv_writer.writerows(rows) + + +def save_indirect_dependencies( + results_csv_path: Path, results: Iterable[tuple[str, str, str]] +): + with open(results_csv_path, "w") as f: + csv_writer = csv.DictWriter(f, ["tx_a", "tx_b", "path"]) + csv_writer.writeheader() + rows = [ + { + "tx_a": tx_a, + "tx_b": tx_b, + "path": path, + } + for tx_a, tx_b, path in results + ] + csv_writer.writerows(rows) diff --git a/t_race/commands/run.py b/t_race/commands/run.py index 699e9e0..8bc14a9 100644 --- a/t_race/commands/run.py +++ b/t_race/commands/run.py @@ -82,6 +82,7 @@ def run_mining(args: Namespace, time_tracker: TimeTracker): not args.extensive_stats, None, Path(), + False, time_tracker, )