Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

This adds 10MiB downloads hack to tornettools [Discussion] #73

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tornettools/generate_tgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __generate_tgenrc_perfclient(server_peers, path):
G.add_node("stream_50k", sendsize="1000 bytes", recvsize="50 KiB", stallout="0 seconds", timeout="15 seconds")
G.add_node("stream_1m", sendsize="1000 bytes", recvsize="1 MiB", stallout="0 seconds", timeout="60 seconds")
G.add_node("stream_5m", sendsize="1000 bytes", recvsize="5 MiB", stallout="0 seconds", timeout="120 seconds")
G.add_node("stream_10m", sendsize="1000 bytes", recvsize="10 MiB", stallout="0 seconds", timeout="240 seconds")

G.add_edge("start", "pause")

Expand All @@ -89,6 +90,7 @@ def __generate_tgenrc_perfclient(server_peers, path):
G.add_edge("pause", "stream_50k", weight="12.0")
G.add_edge("pause", "stream_1m", weight="2.0")
G.add_edge("pause", "stream_5m", weight="1.0")
G.add_edge("pause", "stream_10m", weight="1.0")

write_graphml(G, path)

Expand Down
11 changes: 9 additions & 2 deletions tornettools/parse_onionperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@

def run(args):
db = {"circuit_rtt": [], "client_goodput": [], "client_goodput_5MiB": [],
"circuit_build_times": [], "download_times": {}, "daily_counts": {},
"relay_goodput": {}}
"client_goodput_10MiB": [], "circuit_build_times": [],
"download_times": {}, "daily_counts": {}, "relay_goodput": {}}

if args.bandwidth_data_path is not None:
logging.info(f"Parsing bandwidth data stored in '{args.bandwidth_data_path}'")
Expand Down Expand Up @@ -159,6 +159,11 @@ def __handle_stream(db, stream, day):
if goodput is not None:
db['client_goodput_5MiB'].append(goodput)

goodput = __goodput_bps(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI I think this isn't going to find anything since this code is parsing the public tor data, which doesn't have 10 MB downloads. (It shouldn't hurt anything either, though)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes did mentioned it above: "Please not that I have added the option to parse the same download size from onionperf. This is not needed at this point since we do not have onionperf clients producing these tests. I have mainly added this for consistency and in case we might consider having a few onionperf clients producing these test in the not so far future."

stream, aka_int(9437184, 9 * 2**20), aka_int(10485760, 10 * 2**20))
if goodput is not None:
db['client_goodput_10MiB'].append(goodput)

elif lb > 0 and cmd > 0:
__store_transfer_time(db, transfer_size_target, ttlb)

Expand Down Expand Up @@ -187,5 +192,7 @@ def __get_timeout_limit(num_bytes):
return 60.0
elif num_bytes == 5242880:
return 120.0
elif num_bytes == 10485760:
return 240.0
else:
return 3600.0
12 changes: 12 additions & 0 deletions tornettools/parse_tgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def extract_tgen_plot_data(args):
__extract_error_rate(args, data, circuittype, startts, stopts)
__extract_client_goodput(args, data, circuittype, startts, stopts)
__extract_client_goodput_5MiB(args, data, circuittype, startts, stopts)
__extract_client_goodput_10MiB(args, data, circuittype, startts, stopts)

def __extract_round_trip_time(args, data, circuittype, startts, stopts):
rtt = __get_round_trip_time(data, circuittype, startts, stopts)
Expand Down Expand Up @@ -99,6 +100,17 @@ def __extract_client_goodput_5MiB(args, data, circuittype, startts, stopts):
outpath = f"{args.prefix}/tornet.plot.data/perfclient_goodput_5MiB.{circuittype}.json"
dump_json_data(client_goodput, outpath, compress=False)

def __extract_client_goodput_10MiB(args, data, circuittype, startts, stopts):
# goodput of the 10th Mebibyte. metrics.torproject uses this as of ~ April 2022.
# https://gitlab.torproject.org/jnewsome/sponsor-61-sims/-/issues/15
# https://metrics.torproject.org/reproducible-metrics.html#performance
client_goodput = __get_client_goodput(
data, circuittype, startts, stopts,
aka_int(9437184, 9 * 2**20),
aka_int(10485760, 10 * 2**20))
outpath = f"{args.prefix}/tornet.plot.data/perfclient_goodput_10MiB.{circuittype}.json"
dump_json_data(client_goodput, outpath, compress=False)

def __get_download_time(data, circuittype, startts, stopts, bytekey):
dt = {'ALL': []}

Expand Down
36 changes: 35 additions & 1 deletion tornettools/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __plot_tornet(args):
__plot_transfer_time(args, circuittype, torperf_dbs, tornet_dbs, "51200")
__plot_transfer_time(args, circuittype, torperf_dbs, tornet_dbs, "1048576")
__plot_transfer_time(args, circuittype, torperf_dbs, tornet_dbs, "5242880")
__plot_transfer_time(args, circuittype, torperf_dbs, tornet_dbs, "10485760")

logging.info(f"Loading {circuittype} tornet goodput data")
tornet_dbs = __load_tornet_datasets(args, __pattern_for_basename(circuittype, 'perfclient_goodput'))
Expand All @@ -88,6 +89,11 @@ def __plot_tornet(args):
logging.info(f"Plotting {circuittype} client goodput [5 MiB]")
__plot_client_goodput_5MiB(args, circuittype, torperf_dbs, tornet_dbs)

logging.info(f"Loading {circuittype} tornet goodput data 10MiB")
tornet_dbs = __load_tornet_datasets(args, __pattern_for_basename(circuittype, 'perfclient_goodput_10MiB'))
logging.info(f"Plotting {circuittype} client goodput [10 MiB]")
__plot_client_goodput_10MiB(args, circuittype, torperf_dbs, tornet_dbs)

logging.info(f"Loading {circuittype} tornet transfer error rate data")
tornet_dbs = __load_tornet_datasets(args, __pattern_for_basename(circuittype, 'error_rate'))
logging.info(f"Plotting {circuittype} transfer error rates")
Expand Down Expand Up @@ -236,8 +242,13 @@ def __plot_transfer_time(args, circuittype, torperf_dbs, tornet_dbs, bytes_key):
# cache the corresponding data in the 'data' keyword for __plot_cdf_figure
for tornet_db in tornet_dbs:
tornet_db['data'] = [tornet_db['dataset'][i][bytes_key] for i, _ in enumerate(tornet_db['dataset']) if bytes_key in tornet_db['dataset'][i]]
# We do not have public Tor data about 10MiB downloads but we are still
# interested in calculatin the transfer times for the simulated network
for torperf_db in torperf_dbs:
torperf_db['data'] = [torperf_db['dataset']['download_times'][bytes_key]]
if int(bytes_key) <= 5242880:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok for a one-off, but if the aim was to merge this we'd probably want to avoid hard coding this constant here, and instead just make this generally handle a size missing from the public data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the idea was not to merge this like this. Should I refactor this so that it can be merged?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's fine do this as a one-off, was just double checking.

It'd be helpful to add a note to the PR description that this PR is for discussion purposes and isn't actually a request to merge

torperf_db['data'] = [torperf_db['dataset']['download_times'][bytes_key]]
else:
torperf_db['data'] = []

dbs_to_plot = torperf_dbs + tornet_dbs

Expand Down Expand Up @@ -310,6 +321,29 @@ def __plot_client_goodput_5MiB(args, circuittype, torperf_dbs, tornet_dbs):
yscale="taillog",
xlabel=f"{circuittype} Client Transfer Goodput (Mbit/s): 4 to 5 MiB")

def __plot_client_goodput_10MiB(args, circuittype, torperf_dbs, tornet_dbs):
if circuittype == 'onionservice':
# TODO: parse and split onionservice data
torperf_dbs = []

# Computes throughput for last of 10MiB transfer

# cache the corresponding data in the 'data' keyword for __plot_cdf_figure
for tornet_db in tornet_dbs:
# For compatibility with legacy parsed data, the output of the parse
# step is in *mebi* bits per second. Convert to *mega* here.
tornet_db['data'] = [[x * 2**20 / 1e6 for x in ds] for ds in tornet_db['dataset']]

# We do not have public Tor data about 10 MiB downloads
for torperf_db in torperf_dbs:
torperf_db['data'] = []

dbs_to_plot = torperf_dbs + tornet_dbs

__plot_cdf_figure(args, dbs_to_plot, f'client_goodput_10MiB.{circuittype}',
yscale="taillog",
xlabel=f"{circuittype} Client Transfer Goodput (Mbit/s): 9 to 10 MiB")

def __plot_cdf_figure(args, dbs, filename, xscale=None, yscale=None, xlabel=None, ylabel="CDF"):
color_cycle = cycle(DEFAULT_COLORS)
linestyle_cycle = cycle(DEFAULT_LINESTYLES)
Expand Down