Skip to content

Commit

Permalink
Add Space Filling Curve
Browse files Browse the repository at this point in the history
Add Space Filling Curve as a load balance option and clean up some of the other files removing typos and removing more references to deleted code.
  • Loading branch information
ctvaugh authored Apr 23, 2020
1 parent 48e8fc0 commit 18dfcd7
Show file tree
Hide file tree
Showing 15 changed files with 912 additions and 488 deletions.
4 changes: 3 additions & 1 deletion ref/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ EXEC = miniAMR.x

OBJS = block.o check_sum.o comm_block.o comm.o comm_parent.o comm_refine.o \
comm_util.o driver.o init.o main.o move.o pack.o plot.o profile.o \
rcb.o refine.o stencil.o util.o
rcb.o refine.o sfc.o stencil.o util.o

$(EXEC): $(OBJS)
$(LD) $(LDFLAGS) -o $@ $(OBJS) $(LDLIBS)
Expand Down Expand Up @@ -54,6 +54,8 @@ rcb.o: block.h comm.h proto.h timer.h

refine.o: block.h comm.h proto.h timer.h

sfc.o: block.h comm.h proto.h timer.h

stencil.o: block.h comm.h proto.h

util.o: block.h comm.h proto.h timer.h
14 changes: 12 additions & 2 deletions ref/README
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,18 @@ The list of arguments and their defaults is as follows:

--reorder - ordering of blocks
This controls whether the blocks are ordered by the RCB algorithm
or by a natural ordering of the processors. The default is 1 which
selects the RCB ordering and the natural ordering is 0.
or by a natural ordering of the processors. A setting of 1 selects
the RCB ordering and the natural ordering is 0. The default depends
on which load balance algorithm is chosen. If the RCB algorithm is
chosen then the default is the RCB ordering and if the space filling
curve algorithm is chosen then the default is the natural ordering.

--rcb or --sfc - chooses the algorithm for load balancing
These two options choose the load balance algorithm. The Recursive
Coordinate Bisection (RCB) algorithm is the default, but the option
in included for completeness. The other option is the Space Filling
Curve (SFC). This option is based on a Morton style space filling
curve.

--npx - number of processors in the x direction
--npy - number of processors in the y direction
Expand Down
3 changes: 3 additions & 0 deletions ref/block.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ void split_blocks(void)
num_refined++;
pp = &parents[p];
pp->number = bp->number;
pp->num_prime = bp->num_prime;
pp->level = bp->level;
pp->parent = bp->parent;
pp->parent_node = bp->parent_node;
Expand Down Expand Up @@ -138,6 +139,7 @@ void split_blocks(void)
(p2[level+1]*npy*init_block_y) +
(2*yp+j1))*(p2[level+1]*npx*init_block_x) +
2*xp + i1 + block_start[level+1];
bp1->num_prime = bp->num_prime + o*p8[num_refine - level - 1];
add_sorted_list(m, bp1->number, (level+1));
bp1->cen[0] = bp->cen[0] +
(2*i1 - 1)*p2[num_refine - level - 1];
Expand Down Expand Up @@ -375,6 +377,7 @@ void consolidate_blocks(void)
local_num_blocks[level]++;
local_num_blocks[level+1] -= 8;
bp->number = pp->number;
bp->num_prime = pp->num_prime;
pp->number = -1;
bp->level = pp->level;
bp->parent = pp->parent;
Expand Down
31 changes: 21 additions & 10 deletions ref/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ typedef long long num_sz;

typedef struct {
num_sz number;
num_sz num_prime;
int level;
int refine;
int new_proc;
Expand All @@ -47,6 +48,7 @@ block *blocks;

typedef struct {
num_sz number;
num_sz num_prime;
int level;
num_sz parent; // -1 if original block
int parent_node;
Expand All @@ -73,38 +75,40 @@ int max_num_blocks;
int num_refine;
int uniform_refine;
int x_block_size, y_block_size, z_block_size;
int num_cells;
int num_vars;
int mat;
int comm_vars;
int init_block_x, init_block_y, init_block_z;
int reorder;
int npx, npy, npz;
int inbalance;
int refine_freq;
int report_diffusion;
int checksum_freq;
int stages_per_ts;
int error_tol;
int num_tsteps;
int use_time;
double end_time;
int stages_per_ts;
int checksum_freq;
int stencil;
int report_perf;
int plot_freq;
int num_objects;
int lb_opt;
int block_change;
int code;
int permute;
int nonblocking;
int refine_ghost;
int use_time;
double end_time;
int send_faces;
int change_dir;
int group_blocks;
int limit_move;
int send_faces;
int use_rcb;

int first;
int *dirs;

int num_cells;
int mat;
int max_num_parents;
int num_parents;
int max_active_parent;
Expand All @@ -130,7 +134,6 @@ double total_fp_divs;
double total_fp_adds;
double total_fp_muls;

int num_objects;
typedef struct {
int type;
int bounce;
Expand All @@ -148,10 +151,18 @@ int num_dots;
int max_num_dots;
int max_active_dot;
typedef struct {
int cen[3];
num_sz number;
int n;
int proc;
int new_proc;
int cen[3];
} dot;
dot *dots;
typedef struct {
num_sz number;
num_sz num_prime;
int n;
int proc;
int new_proc;
} spot;
spot *spots;
9 changes: 5 additions & 4 deletions ref/driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void driver(void)
comm(start, number, comm_stage);
t4 = timer();
timer_comm_all += t4 - t3;
for (var = start; var < (start+number); var ++) {
for (var = start; var < (start+number); var++) {
stencil_driver(var, calc_stage);
t3 = timer();
timer_calc_all += t3 - t4;
Expand Down Expand Up @@ -120,11 +120,12 @@ void driver(void)
delta = calc_time_step();
if (sim_time >= end_time)
done = 1;
else
sim_time += delta;
} else
if (ts >= num_tsteps)
done = 1;

if (!done)
sim_time += delta;
}

end_time = sim_time;
Expand Down Expand Up @@ -160,7 +161,7 @@ double calc_time_step(void)
}
if (done)
break;
for (done = dir = 0; dir < 3; dir++) {
for (dir = 0; dir < 3; dir++) {
tmp = (fabs(op->move[dir]) + fabs(op->inc[dir]))*inv_cell_size[dir];
if (tmp > delta)
delta = tmp;
Expand Down
10 changes: 8 additions & 2 deletions ref/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,13 @@ void init(void)
max_mesh_size = mesh_size[2];
if ((num_pes+1) > max_mesh_size)
max_mesh_size = num_pes + 1;
bin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
gbin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
if (use_rcb) {
bin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
gbin = (int *) ma_malloc(max_mesh_size*sizeof(int), __FILE__, __LINE__);
} else {
bin = (int *) ma_malloc(global_active*sizeof(int), __FILE__, __LINE__);
gbin = (int *) ma_malloc(global_active*sizeof(int), __FILE__, __LINE__);
}
if (stencil == 7)
f = 0;
else
Expand All @@ -217,6 +222,7 @@ void init(void)
bp = &blocks[o];
bp->level = 0;
bp->number = n;
bp->num_prime = n*p8[num_refine];
bp->parent = -1;
bp->parent_node = my_pe;
bp->cen[0] = i1*size + size/2;
Expand Down
33 changes: 25 additions & 8 deletions ref/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
int main(int argc, char** argv)
{
int i, ierr, object_num;
int params[38];
int params[39];
double *objs;
#include "param.h"

Expand Down Expand Up @@ -126,6 +126,10 @@ int main(int argc, char** argv)
limit_move = atoi(argv[++i]);
else if (!strcmp(argv[i], "--send_faces"))
send_faces = 1;
else if (!strcmp(argv[i], "--rcb"))
use_rcb = 1; // default, but included for completeness
else if (!strcmp(argv[i], "--sfc"))
use_rcb = 0;
else if (!strcmp(argv[i], "--num_objects")) {
num_objects = atoi(argv[++i]);
objects = (object *) ma_malloc(num_objects*sizeof(object),
Expand Down Expand Up @@ -165,6 +169,9 @@ int main(int argc, char** argv)
MPI_Abort(MPI_COMM_WORLD, -1);
}

if (reorder == -1)
reorder = use_rcb;

if (check_input())
MPI_Abort(MPI_COMM_WORLD, -1);

Expand Down Expand Up @@ -209,8 +216,9 @@ int main(int argc, char** argv)
params[35] = group_blocks;
params[36] = limit_move;
params[37] = send_faces;
params[38] = use_rcb;

MPI_Bcast(params, 38, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(params, 39, MPI_INT, 0, MPI_COMM_WORLD);

objs = (double *) ma_malloc(14*num_objects*sizeof(double),
__FILE__, __LINE__);
Expand All @@ -234,7 +242,7 @@ int main(int argc, char** argv)
MPI_Bcast(objs, (14*num_objects), MPI_DOUBLE, 0, MPI_COMM_WORLD);
free(objs);
} else {
MPI_Bcast(params, 38, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(params, 39, MPI_INT, 0, MPI_COMM_WORLD);
max_num_blocks = params[ 0];
num_refine = params[ 1];
uniform_refine = params[ 2];
Expand Down Expand Up @@ -273,6 +281,7 @@ int main(int argc, char** argv)
group_blocks = params[35];
limit_move = params[36];
send_faces = params[37];
use_rcb = params[38];

objects = (object *) ma_malloc(num_objects*sizeof(object),
__FILE__, __LINE__);
Expand Down Expand Up @@ -366,6 +375,8 @@ void print_help_message(void)
printf("--group_blocks - change the RCB algorithm so that a group of blocks with the same center all get put onto the same side of a cut\n");
printf("--limit_move - limit the number of blocks that can be moved during load balance (number that is a percentage of the total number of blocks)\n");
printf("--send_faces - send each face individually instead of packing all faces going to a rank together\n");
printf("--rcb - use RCB algorithm for load balancing (default)\n");
printf("--sfc - use Space Filling Curve algorithm for load balancing\n");
printf("--num_objects - (>= 0) number of objects to cause refinement\n");
printf("--object - type, position, movement, size, size rate of change\n");

Expand Down Expand Up @@ -420,9 +431,15 @@ void allocate(void)
parents[n].number = -1;

max_num_dots = 2*max_num_blocks; // Guess at number needed
dots = (dot *) ma_malloc(max_num_dots*sizeof(dot), __FILE__, __LINE__);
for (n = 0; n < max_num_dots; n++)
dots[n].number = -1;
if (use_rcb) {
dots = (dot *) ma_malloc(max_num_dots*sizeof(dot), __FILE__, __LINE__);
for (n = 0; n < max_num_dots; n++)
dots[n].number = -1;
} else {
spots = (spot *) ma_malloc(max_num_dots*sizeof(spot), __FILE__, __LINE__);
for (n = 0; n < max_num_dots; n++)
spots[n].number = -1;
}

grid_sum = (double *)ma_malloc(num_vars*sizeof(double), __FILE__, __LINE__);

Expand Down Expand Up @@ -529,8 +546,8 @@ void allocate(void)
if (num_refine) {
s_buf_size = (int) (0.10*((double)max_num_blocks))*comm_vars*
(x_block_size+2)*(y_block_size+2)*(z_block_size+2);
if (s_buf_size < (num_vars*x_block_size*y_block_size*z_block_size + 47))
s_buf_size = num_vars*x_block_size*y_block_size*z_block_size + 47;
if (s_buf_size < (num_vars*x_block_size*y_block_size*z_block_size + 49))
s_buf_size = num_vars*x_block_size*y_block_size*z_block_size + 49;
r_buf_size = 5*s_buf_size;
} else {
i = init_block_x*(x_block_size+2);
Expand Down
6 changes: 4 additions & 2 deletions ref/pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ void pack_block(int n)
send_ll[1] = (long long) (-2 - bp->parent);
else
send_ll[1] = (long long) bp->parent;
l = 4;
send_ll[2] = (long long) bp->num_prime;
l = 6;
send_int[l++] = bp->level;
send_int[l++] = bp->refine;
send_int[l++] = bp->parent_node;
Expand Down Expand Up @@ -81,7 +82,8 @@ void unpack_block(int n)

bp->number = (num_sz) recv_ll[0];
bp->parent = (num_sz) recv_ll[1];
l = 4;
bp->num_prime = (num_sz) recv_ll[2];
l = 6;
bp->level = recv_int[l++];
bp->refine = recv_int[l++];
bp->parent_node = recv_int[l++];
Expand Down
5 changes: 3 additions & 2 deletions ref/param.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ comm_vars = 0;
init_block_x = 1;
init_block_y = 1;
init_block_z = 1;
reorder = 1;
reorder = -1;
npx = 1;
npy = 1;
npz = 1;
Expand All @@ -49,7 +49,7 @@ end_time = 0.0;
stages_per_ts = 20;
checksum_freq = 5;
stencil = 7;
report_perf = 4;
report_perf = 12;
plot_freq = 0;
num_objects = 0;
lb_opt = 1;
Expand All @@ -62,3 +62,4 @@ change_dir = 0;
group_blocks = 0;
limit_move = 0;
send_faces = 0;
use_rcb = 1;
Loading

0 comments on commit 18dfcd7

Please sign in to comment.