-
Notifications
You must be signed in to change notification settings - Fork 281
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test/mpi: add p2p benchmarks in test/mpi/bench
Add point-to-point benchmark code in MyDef. The tests have automatic warm-ups and adjusts number of iterations for measurement accuracy. It produces latency measurements with standard deviations and equivalent bandwidths. To run: mydef_page p2p.def # -> p2p_latency.c p2p_bw.c mpicc p2p_latency.c && mpi_run -n 2 ./a.out mpicc p2p_bw.c && mpi_run -n 2 ./a.out Alternatively use mydef_run (uses settings from config): mydef_run p2p.def
- Loading branch information
Showing
4 changed files
with
226 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
module: c | ||
output_dir: out | ||
CC: mpicc | ||
run: mpirun -n 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* bench_frame : boilerplate for mpi program | ||
* measure(iter) : measures `tf_dur` for $(iter) iterations | ||
* run_stat(N, var) : run N measurements and obtain (avg, std) in sum1, sum2 | ||
* warm_up(iter, dur): repeat until measurements (iter, dur) stabilize | ||
* report_latency(N) : print a line of latency result | ||
*/ | ||
|
||
subcode: bench_frame | ||
$include stdio | ||
$include stdlib | ||
$include mpi | ||
|
||
$global grank, gsize: int | ||
|
||
$function main | ||
int errs = 0; | ||
|
||
MPI_Init(NULL, NULL); | ||
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &grank); | ||
MPI_Comm_size(MPI_COMM_WORLD, &gsize); | ||
|
||
MPI_Comm comm = MPI_COMM_WORLD; | ||
char *buf = malloc($(MAX_MSG)); | ||
|
||
$call @report_title | ||
$call main | ||
|
||
MPI_Finalize(); | ||
|
||
return errs | ||
|
||
macros: | ||
use_double: 1 | ||
data: buf, size, MPI_CHAR | ||
MAX_MSG: 5000000 | ||
|
||
#---------------------------------------- | ||
subcode: _autoload | ||
$register_prefix(comm) MPI_Comm | ||
|
||
subcode: foreach_size | ||
$for int size = 0; size < $(MAX_MSG); size = (size==0)?1:size*2 | ||
$(set:MSG_SIZE=size) | ||
BLOCK | ||
|
||
subcode: measure(iter) | ||
tf_start = MPI_Wtime() | ||
$for 0:$(iter) | ||
BLOCK | ||
tf_dur = MPI_Wtime() - tf_start | ||
|
||
subcode: run_stat(N, var) | ||
$my double sum1=0, double sum2=0 | ||
$for 0:$(N) | ||
BLOCK | ||
sum1 += $(var) | ||
sum2 += $(var) * $(var) | ||
sum1 /= $(N) | ||
sum2 /= $(N) | ||
sum2 = sqrt(sum2 - sum1 * sum1) | ||
|
||
subcode: warm_up(iter, dur) | ||
$(set:MIN_ITER=(int) ($(iter) * 0.001 / $(dur))) | ||
$(iter) = 2 | ||
$my double last_dur = 1.0 | ||
$my int num_best = 0 | ||
$while num_best < 10 | ||
BLOCK | ||
$if $(iter) < $(MIN_ITER) | ||
$(iter) = $(MIN_ITER) | ||
num_best = 0 | ||
continue | ||
# check that t_dur is no longer monotonically decreasing | ||
$if $(dur) > last_dur | ||
num_best++ | ||
last_dur = $(dur) | ||
|
||
subcode: report_latency(N) | ||
tf_latency = sum1 / ($(N)) * 1e6 | ||
tf_sigma = sum2 / ($(N)) * 1e6 | ||
$(if:MSG_SIZE) | ||
tf_bw = $(MSG_SIZE) / tf_latency | ||
printf(" %10d %10.3f %6.3f %10.3f\n", $(MSG_SIZE), tf_latency, tf_sigma, tf_bw) | ||
$(else) | ||
printf(" %10.3f %6.3f\n", tf_latency, tf_sigma) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* Defines following functions: | ||
* bench_p2p | ||
* bench_send, bench_warmup | ||
* bench_recv | ||
* | ||
* For each measurement - | ||
* First sender tells receiver the `iter` parameter. `iter = 0` means to quit. | ||
* For each iteration runs `send_side` and `recv_side` assuming the measurement on sender side represents a latency measurement. | ||
* | ||
* Caller page defines - | ||
* subcode: sender_side, recv_side | ||
* macro: | ||
* params: function parameters for bench_p2p etc. | ||
* MSG_SIZE: if defined report_latency will include bw | ||
* MULTIPLICITY: divisor for each measurement | ||
*/ | ||
|
||
subcode: _autoload | ||
$register_name(src) int | ||
$register_name(dst) int | ||
$define TAG 0 | ||
$define SYNC_TAG 100 | ||
|
||
subcode: report_title | ||
$if gsize != 2 | ||
printf("! Test $(_pagename) requires 2 processes !\n"); | ||
return 0 | ||
$if grank == 0 | ||
printf("# Test $(_pagename): msg-size avg-latency sigma avg-bandwidth\n") | ||
|
||
fncode: bench_p2p(comm, src, dst, @params) | ||
int rank; | ||
MPI_Comm_rank(comm, &rank) | ||
|
||
$(if:!REPEAT) | ||
$(set:REPEAT=20) | ||
$(if:!MULTIPLICITY) | ||
$(set:MULTIPLICITY=1) | ||
|
||
$if rank == src | ||
iter = bench_warmup(comm, dst, $(params)) | ||
&call run_stat, $(REPEAT), tf_latency | ||
tf_latency = bench_send(iter, comm, dst, $(params)) | ||
tf_latency /= iter | ||
$call report_latency, $(MULTIPLICITY) | ||
$call send_stop | ||
$elif rank == dst | ||
bench_recv(comm, src, $(params)) | ||
|
||
subcode: send_stop | ||
iter = 0; | ||
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm) | ||
|
||
#---------------------------------------- | ||
fncode: bench_send(int iter, comm, dst, @params) | ||
# synchronize with receiver | ||
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm); | ||
|
||
&call measure, iter | ||
$call @send_side | ||
|
||
return tf_dur | ||
|
||
fncode: bench_recv(comm, src, @params) | ||
$while 1 | ||
int iter; | ||
# synchronize with sender */ | ||
MPI_Recv(&iter, 1, MPI_INT, src, SYNC_TAG, comm, MPI_STATUS_IGNORE); | ||
$if iter == 0 | ||
# time to quit | ||
break | ||
$for i=0:iter | ||
$call @recv_side | ||
|
||
fncode: bench_warmup(comm, dst, @params): int | ||
&call warm_up, iter, tf_dur | ||
tf_dur = bench_send(iter, comm, dst, $(params)) | ||
return iter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/* Instructions: | ||
* mydef_page p2p.def # -> p2p_latency.c p2p_bw.c | ||
* mpicc p2p_latency.c && mpi_run -n 2 ./a.out | ||
* mpicc p2p_bw.c && mpi_run -n 2 ./a.out | ||
* | ||
* Reference the output C code or bench_{frame,p2p}.def. | ||
*/ | ||
|
||
include: macros/bench_frame.def | ||
include: macros/bench_p2p.def | ||
|
||
subcode: _autoload | ||
$register_name(buf) void * | ||
$register_name(size) int | ||
$register_name(batch_size) int | ||
|
||
page: p2p_latency, bench_frame | ||
params: buf, size | ||
MSG_SIZE: size | ||
MULTIPLICITY: 2 | ||
|
||
bench_p2p(comm, 0, 1, buf, 0) | ||
$for int size = 1; size < $(MAX_MSG); size *= 2 | ||
bench_p2p(comm, 0, 1, buf, size) | ||
|
||
subcode: send_side | ||
MPI_Send($(data), dst, TAG, comm); | ||
MPI_Recv($(data), dst, TAG, comm, MPI_STATUS_IGNORE); | ||
|
||
subcode: recv_side | ||
MPI_Recv($(data), src, TAG, comm, MPI_STATUS_IGNORE); | ||
MPI_Send($(data), src, TAG, comm); | ||
|
||
page: p2p_bw, bench_frame | ||
params: buf, size, batch_size | ||
MSG_SIZE: size | ||
MULTIPLICITY: batch_size | ||
MAX_BATCH_SIZE: 64 | ||
|
||
$for int size = 1; size < $(MAX_MSG); size *= 2 | ||
bench_p2p(comm, 0, 1, buf, size, 64) | ||
|
||
subcode: send_side | ||
$my MPI_Request reqs[$(MAX_BATCH_SIZE)] | ||
$for j=0:batch_size | ||
MPI_Isend($(data), dst, TAG, comm, &reqs[j]) | ||
MPI_Waitall(batch_size, reqs, MPI_STATUSES_IGNORE) | ||
MPI_Recv(NULL, 0, MPI_DATATYPE_NULL, dst, TAG, comm, MPI_STATUS_IGNORE) | ||
|
||
subcode: recv_side | ||
$my MPI_Request reqs[$(MAX_BATCH_SIZE)] | ||
$for j=0:batch_size | ||
MPI_Irecv($(data), src, TAG, comm, &reqs[j]) | ||
MPI_Waitall(batch_size, reqs, MPI_STATUSES_IGNORE) | ||
MPI_Send(NULL, 0, MPI_DATATYPE_NULL, src, TAG, comm) |