Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancoGiachetta committed Dec 13, 2024
2 parents 6c01c6d + 452df65 commit 0f4545a
Show file tree
Hide file tree
Showing 10 changed files with 307 additions and 104 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
debug/
target/
**.pyc

.env
.envrc
Expand Down
22 changes: 11 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,17 +126,25 @@ In the `plotting` directory, you can find python scripts to plot relevant inform
Make sure to erase the `compiled_programs` directory, then run:

```bash
cargo run --features benchmark,structured_logging bench-block-range 724000 724000 mainnet 1 | tee native-logs
cargo run --features benchmark,structured_logging,only_cairo_vm bench-block-range 724000 724000 mainnet 1 | tee vm-logs
./scripts/benchmark_tx.sh <tx> <net> <block> <laps>
```

This generates four files:
- `{native,vm}-data-$tx-$net.json`: Contains the execution time of each contract call
- `{native,vm}-logs-$tx-$net.json`: Contains the output of running the benchmark

If you want to benchmark a full block, you could run:
```bash
./scripts/benchmark_block.sh <block-start> <block-end> <net> <laps>
```

Once you have done this, you can use the plotting scripts:

- `python ./plotting/plot_execution_time.py native-data vm-data`: Plots the execution time of Native vs VM, by contract class.
- `python ./plotting/plot_compilation_memory.py native-logs`: Size of the compiled native libraries, by contract class.
- `python ./plotting/plot_compilation_memory_corr.py native-logs vm-logs`: Size of the compiled native libraries, by the associated Casm contract size.
- `python ./plotting/plot_compilation_memory_trend.py native-logs vm-logs`: Size of the compiled native and casm contracts, by the sierra contract size.
- `python ./plotting/plot_compilation_time.py native-logs`: Native compilation time, by contract class
- `python ./plotting/plot_compilation_time_trend.py native-logs vm-logs`: Native and Casm compilation time, by the sierra contract size.
- `python ./plotting/plot_execution_time.py native-logs vm-logs`: Plots the execution time of Native vs VM, by contract class.
- `python ./plotting/plot_compilation_time_finer.py native-logs`: Native compilation time, with fine-grained stage separation, by contract class.

166 changes: 133 additions & 33 deletions plotting/plot_execution_time.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,155 @@
from argparse import ArgumentParser

argument_parser = ArgumentParser('Stress Test Plotter')
argument_parser.add_argument("native_logs_path")
argument_parser.add_argument("vm_logs_path")
arguments = argument_parser.parse_args()

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import io
from utils import format_hash

parser = ArgumentParser("Stress Test Plotter")
parser.add_argument("native_data")
parser.add_argument("vm_data")
parser.add_argument("-s", "--speedup", action="store_true")
args = parser.parse_args()

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)


datasetNative = pd.read_json(arguments.native_logs_path, lines=True, typ="series")
datasetVM = pd.read_json(arguments.vm_logs_path, lines=True, typ="series")
def load_dataset(path, f):
return pd.read_json(path).apply(f, axis=1).dropna().apply(pd.Series)

def canonicalize_execution_time_by_contract_class(event):
# skip caching logs
if find_span(event, "caching block range") != None:
return None

# keep contract execution finished logs
if "contract execution finished" not in event["fields"]["message"]:
return None
def process_row(row):
class_hash = row.class_hash
selector = row.selector
time = row.time["nanos"] + row.time["secs"] * 10e9

return {
"class hash": event["span"]["class_hash"],
"time": float(event["fields"]["time"]),
"class_hash": class_hash,
"selector": selector,
"time": time,
}

def find_span(event, name):
for span in event["spans"]:
if name in span["name"]:
return span
return None

def format_hash(class_hash):
return f"0x{class_hash[:6]}..."
dataNative = load_dataset(args.native_data, process_row)
dataNative["executor"] = "native"
dataVM = load_dataset(args.vm_data, process_row)
dataVM["executor"] = "vm"
data = pd.concat([dataNative, dataVM])

# GROUP BY SELECTOR

# calculate mean by class hash
data_by_selector = (
data.groupby(["executor", "class_hash", "selector"])
.agg(
total_time=("time", "sum"),
mean_time=("time", "mean"),
samples=("time", "size"),
)
.unstack("executor")
)
data_by_selector.columns = data_by_selector.columns.map("_".join)

if (data_by_selector["samples_native"] != data_by_selector["samples_vm"]).any():
raise Exception("Native and VM should have the same number of samples")

# calculate speedup
data_by_selector["speedup"] = (
data_by_selector["total_time_vm"] / data_by_selector["total_time_native"]
)
total_native = data_by_selector["total_time_native"].sum() / 10e9
total_vm = data_by_selector["total_time_vm"].sum() / 10e9
print(f"Total Native: {total_native} seconds")
print(f"Total VM: {total_vm} seconds")
print("Total Speedup:", total_vm / total_native)

datasetNative = datasetNative.apply(canonicalize_execution_time_by_contract_class).dropna().apply(pd.Series)
datasetVM = datasetVM.apply(canonicalize_execution_time_by_contract_class).dropna().apply(pd.Series)
# sort by decreasing time
data_by_selector.sort_values(["total_time_vm"], ascending=[False], inplace=True) # type: ignore

datasetNative = datasetNative.groupby("class hash").mean()
datasetVM = datasetVM.groupby("class hash").mean()
s = io.StringIO()
data_by_selector.to_csv(s)
print(s.getvalue())

figure, ax = plt.subplots()
# GROUP BY CLASS

sns.set_color_codes("bright")
data_by_class = (
data.groupby(["executor", "class_hash"])
.agg(
total_time=("time", "sum"),
mean_time=("time", "mean"),
samples=("time", "size"),
)
.unstack("executor")
)
data_by_class.columns = data_by_class.columns.map("_".join)
data_by_class["speedup"] = (
data_by_class["total_time_vm"] / data_by_class["total_time_native"]
)
data_by_class.sort_values(["total_time_vm"], ascending=[False], inplace=True) # type: ignore
data_by_class = data_by_class.nlargest(50, "total_time_vm") # type: ignore

sns.barplot(ax=ax, y="class hash", x="time", data=datasetVM, formatter=format_hash, label="VM Execution Time", color="r", alpha = 0.75) # type: ignore
sns.barplot(ax=ax, y="class hash", x="time", data=datasetNative, formatter=format_hash, label="Native Execution Time", color="b", alpha = 0.75) # type: ignore
# ======================
# PLOTTING
# ======================

ax.set_xlabel("Mean Time (ms)")
figure, axes = plt.subplots(1, 2)

ax = axes[0]

sns.barplot(
ax=ax,
y="class_hash",
x="total_time_vm",
data=data_by_class, # type: ignore
formatter=format_hash,
label="VM Execution Time",
color="r",
alpha=0.75,
) # type: ignore
sns.barplot(
ax=ax,
y="class_hash",
x="total_time_native",
data=data_by_class, # type: ignore
formatter=format_hash,
label="Native Execution Time",
color="b",
alpha=0.75,
) # type: ignore

ax.set_xlabel("Total Time (ns)")
ax.set_ylabel("Class Hash")
ax.set_title("Total time by Contract Class")
ax.set_xscale("log", base=2)

ax = axes[1]

sns.barplot(
ax=ax,
y="class_hash",
x="speedup",
data=data_by_class, # type: ignore
formatter=format_hash,
label="Execution Speedup",
color="b",
alpha=0.75,
) # type: ignore

ax.set_xlabel("Speedup")
ax.set_ylabel("Class Hash")
ax.set_title("Native vs. VM by Contract Class")
ax.set_title("Speedup by Contract Class")

if args.speedup:
fig, ax = plt.subplots()
sns.violinplot(
ax=ax,
x="speedup",
data=data_by_class, # type: ignore
cut=0,
)
ax.set_xlabel("Speedup")
ax.set_title("Speedup Distribution")

plt.show()
2 changes: 2 additions & 0 deletions plotting/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def format_hash(class_hash):
return f"{class_hash[:6]}..."
3 changes: 2 additions & 1 deletion replay/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ version = "0.1.0"
edition = "2021"

[features]
benchmark = []
benchmark = ["dep:serde", "dep:serde_json", "dep:serde_with"]
# The only_cairo_vm feature is designed to avoid executing transitions with cairo_native and instead use cairo_vm exclusively
only_cairo_vm = ["rpc-state-reader/only_casm"]
structured_logging = []
profiling = []
state_dump = ["dep:serde", "dep:serde_json", "dep:serde_with", "dep:starknet-types-core"]

[dependencies]
Expand Down
Loading

0 comments on commit 0f4545a

Please sign in to comment.