Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU version of interface-discontinuity-based wavefield injection (pull request 1706). #1779

Merged
merged 3 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module load intel/2020u4 intelmpi/2020u4

## set number of processors
NPROC=4
GPU_MODE=false # set to true if using GPU

currentdir=`pwd`

Expand Down Expand Up @@ -95,10 +96,19 @@ mpirun -np $NPROC ./bin/xgenerate_databases
# DATABASES_MPI/proc*_wavefield_discontinuity.bin files
mpirun -np $NPROC ../fk_coupling/compute_fk_injection_field

echo
echo " launch solver..."
echo
mpirun -np $NPROC ./bin/xspecfem3D
if ${GPU_MODE}; then
sed -i "/^GPU_MODE/c\GPU_MODE = .true." DATA/Par_file
echo
echo " launch solver on GPU..."
echo
CUDA_VISIBLE_DEVICES=0 mpirun -np $NPROC ./bin/xspecfem3D
else
sed -i "/^GPU_MODE/c\GPU_MODE = .false." DATA/Par_file
echo
echo " launch solver on CPU ..."
echo
mpirun -np $NPROC ./bin/xspecfem3D
fi

# compute reference seismograms using FK
../fk_coupling/compute_fk_receiver
Expand Down
16 changes: 16 additions & 0 deletions src/gpu/compute_forces_viscoelastic_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
FORWARD_OR_ADJOINT);
Expand All @@ -1013,6 +1017,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
FORWARD_OR_ADJOINT);
Expand All @@ -1039,6 +1047,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
3); // 3 == backward
Expand All @@ -1063,6 +1075,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
3); // 3 == backward
Expand Down
29 changes: 29 additions & 0 deletions src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,28 @@ __device__ __forceinline__ void load_shared_memory_displ_visco(const int* tx, c

/* ----------------------------------------------------------------------------------------------- */

// add displacement discontinuity in case of wavefield discontinuity

__device__ __forceinline__ void add_displacement_discontinuity_element(
const int* tx, const int* working_element,
realw_const_p d_displ_wd,
const int* ispec_to_elem_wd,
const int* ibool_wd,
realw* sh_displx,
realw* sh_disply,
realw* sh_displz) {
int ispec_wd = ispec_to_elem_wd[(*working_element)]-1;
if (ispec_wd >= 0) {
int offset = ispec_wd * NGLL3_PADDED + (*tx);
int iglob_wd = ibool_wd[offset]-1;
sh_displx[(*tx)] = sh_displx[(*tx)] + d_displ_wd[iglob_wd*3];
sh_disply[(*tx)] = sh_disply[(*tx)] + d_displ_wd[iglob_wd*3 + 1];
sh_displz[(*tx)] = sh_displz[(*tx)] + d_displ_wd[iglob_wd*3 + 2];
}
}

/* ----------------------------------------------------------------------------------------------- */

// loads hprime into shared memory for element

__device__ __forceinline__ void load_shared_memory_hprime(const int* tx,
Expand Down Expand Up @@ -738,6 +760,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz,
realw_const_p d_kappav,
realw_const_p d_muv,
const int is_wavefield_discontinuity,
realw_const_p d_displ_wd,
const int* d_ispec_to_elem_wd,
const int* d_ibool_wd,
const int pml_conditions,
const int* d_is_CPML,
const int FORWARD_OR_ADJOINT){
Expand Down Expand Up @@ -849,6 +875,9 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
load_shared_memory_displ<3>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz);
}else{
load_shared_memory_displ<1>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz);
if (is_wavefield_discontinuity) {
add_displacement_discontinuity_element(&tx,&working_element,d_displ_wd,d_ispec_to_elem_wd,d_ibool_wd,sh_tempx,sh_tempy,sh_tempz);
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/gpu/kernels/kernel_cuda.mk
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ cuda_kernels_OBJS := \
$O/transfer_surface_to_host_kernel.cuda-kernel.o \
$O/UpdateDispVeloc_kernel.cuda-kernel.o \
$O/UpdatePotential_kernel.cuda-kernel.o \
$O/wavefield_discontinuity_kernel.cuda-kernel.o \
$(EMPTY_MACRO)


24 changes: 24 additions & 0 deletions src/gpu/kernels/kernel_proto.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz,
realw_const_p d_kappav,
realw_const_p d_muv,
const int is_wavefield_discontinuity,
realw_const_p d_displ_wd,
const int* d_ispec_to_element_wd,
const int* d_ibool_wd,
const int pml_conditions,
const int* d_is_CPML,
const int FORWARD_OR_ADJOINT);
Expand Down Expand Up @@ -527,6 +531,26 @@ __global__ void compute_coupling_elastic_ac_kernel(field* potential_dot_dot_acou
int backward_simulation) ;


//
// src/gpu/kernels/wavefield_discontinuity_kernel.cu
//

__global__ void add_acceleration_discontinuity_kernel(
realw_const_p accel_wd,
realw_const_p mass_in_wd,
const int* boundary_to_iglob_wd,
const int size, realw* accel
);

__global__ void add_traction_discontinuity_kernel(
realw_const_p traction_wd,
const int* face_ispec_wd,
const int* face_ijk_wd,
realw_const_p face_jacobian2Dw_wd,
const int* d_ibool,
const int size, realw* accel);


//
// src/gpu/kernels/compute_coupling_ocean_cuda_kernel.cu
//
Expand Down
1 change: 1 addition & 0 deletions src/gpu/kernels/wavefield_discontinuity_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "wavefield_discontinuity_kernel.cu"
41 changes: 41 additions & 0 deletions src/gpu/kernels/wavefield_discontinuity_kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
__global__ void add_acceleration_discontinuity_kernel(
realw_const_p accel_wd,
realw_const_p mass_in_wd,
const int* boundary_to_iglob_wd,
const int size, realw* accel
) {
int id = threadIdx.x + (blockIdx.x + blockIdx.y*gridDim.x)*blockDim.x;
int iglob = boundary_to_iglob_wd[id] - 1;
realw mass_in = mass_in_wd[id];
if (id < size) {
accel[iglob*3] = accel[iglob*3] - accel_wd[id*3] * mass_in;
accel[iglob*3 + 1] = accel[iglob*3 + 1] - accel_wd[id*3 + 1] * mass_in;
accel[iglob*3 + 2] = accel[iglob*3 + 2] - accel_wd[id*3 + 2] * mass_in;
}
}

__global__ void add_traction_discontinuity_kernel(
realw_const_p traction_wd,
const int* face_ispec_wd,
const int* face_ijk_wd,
realw_const_p face_jacobian2Dw_wd,
const int* d_ibool,
const int size, realw* accel) {
int igll = threadIdx.x;
int iface_wd = blockIdx.x + gridDim.x*blockIdx.y;
int i, j, k, ispec, iglob;
realw jacobianw;
if (iface_wd < size) {
ispec = face_ispec_wd[iface_wd] - 1;
i = face_ijk_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)]-1;
j = face_ijk_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)]-1;
k = face_ijk_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)]-1;

iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;

jacobianw = face_jacobian2Dw_wd[INDEX2(NGLL2,igll,iface_wd)];
atomicAdd(&accel[iglob*3], traction_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)] * jacobianw);
atomicAdd(&accel[iglob*3+1], traction_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)] * jacobianw);
atomicAdd(&accel[iglob*3+2], traction_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)] * jacobianw);
}
}
14 changes: 14 additions & 0 deletions src/gpu/mesh_constants_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,20 @@ typedef struct mesh_ {
int use_Kelvin_Voigt_damping;
realw* d_Kelvin_Voigt_eta;

// prescribed wavefield discontinuity on an interface
int is_wavefield_discontinuity;
int* d_ispec_to_elem_wd;
int* d_ibool_wd;
int* d_boundary_to_iglob_wd;
realw* d_mass_in_wd;
int* d_face_ijk_wd;
int* d_face_ispec_wd;
realw* d_face_normal_wd;
realw* d_face_jacobian2Dw_wd;
realw* d_displ_wd;
realw* d_accel_wd;
realw* d_traction_wd;

// for option NB_RUNS_FOR_ACOUSTIC_GPU
int* run_number_of_the_source;

Expand Down
49 changes: 49 additions & 0 deletions src/gpu/prepare_mesh_constants_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ void FC_FUNC_(prepare_constants_device,
int* SAVE_SEISMOGRAMS_ACCELERATION,int* SAVE_SEISMOGRAMS_PRESSURE,
int* h_NB_RUNS_ACOUSTIC_GPU,
int* FAULT_SIMULATION,
int* IS_WAVEFIELD_DISCONTINUITY,
int* UNDO_ATTENUATION_AND_OR_PML,
int* PML_CONDITIONS) {

Expand Down Expand Up @@ -420,6 +421,9 @@ void FC_FUNC_(prepare_constants_device,
// Kelvin_voigt initialization
mp->use_Kelvin_Voigt_damping = 0;

// prescribed wavefield discontinuity
mp->is_wavefield_discontinuity = *IS_WAVEFIELD_DISCONTINUITY;

GPU_ERROR_CHECKING("prepare_constants_device");
}

Expand Down Expand Up @@ -1542,6 +1546,51 @@ void FC_FUNC_(prepare_fault_device,
}


/* ----------------------------------------------------------------------------------------------- */

// wavefield discontinuity

/* ----------------------------------------------------------------------------------------------- */

extern EXTERN_LANG
void FC_FUNC_(prepare_wavefield_discontinuity_device,
PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)(
long* Mesh_pointer,
int* ispec_to_elem_wd,
int* nglob_wd,
int* nspec_wd,
int* ibool_wd,
int* boundary_to_iglob_wd,
realw* mass_in_wd,
int* nfaces_wd,
int* face_ijk_wd,
int* face_ispec_wd,
realw* face_normal_wd,
realw* face_jacobian2Dw_wd) {
TRACE("prepare_wavefield_discontinuity_device");
Mesh* mp = (Mesh*)(*Mesh_pointer);

// arrays
gpuCreateCopy_todevice_int((void**)&mp->d_ispec_to_elem_wd, ispec_to_elem_wd, mp->NSPEC_AB);
gpuCreateCopy_todevice_int((void**)&mp->d_boundary_to_iglob_wd, boundary_to_iglob_wd, (*nglob_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_mass_in_wd, mass_in_wd, (*nglob_wd));
gpuCreateCopy_todevice_int((void**)&mp->d_face_ispec_wd, face_ispec_wd, (*nfaces_wd));
gpuCreateCopy_todevice_int((void**)&mp->d_face_ijk_wd, face_ijk_wd, 3*NGLL2*(*nfaces_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_face_normal_wd, face_normal_wd, NDIM*NGLL2*(*nfaces_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_face_jacobian2Dw_wd, face_jacobian2Dw_wd, NGLL2*(*nfaces_wd));
// global indexing for wavefield discontinuity points (padded)
int size_padded = NGLL3_PADDED * (*nspec_wd);
gpuMalloc_int((void**) &mp->d_ibool_wd, size_padded);
gpuMemcpy2D_todevice_int(mp->d_ibool_wd, NGLL3_PADDED, ibool_wd, NGLL3, NGLL3, (*nspec_wd));

// allocate discontinuity field
int size = NDIM * (*nglob_wd);
gpuMalloc_realw((void**)&(mp->d_displ_wd),size);
gpuMalloc_realw((void**)&(mp->d_accel_wd),size);
size = NDIM * NGLL2 * (*nfaces_wd);
gpuMalloc_realw((void**)&(mp->d_traction_wd),size);
}

/* ----------------------------------------------------------------------------------------------- */

// cleanup
Expand Down
1 change: 1 addition & 0 deletions src/gpu/rules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ gpu_specfem3D_OBJECTS = \
$O/transfer_fields_cuda.o \
$O/update_displacement_cuda.o \
$O/write_seismograms_cuda.o \
$O/wavefield_discontinuity_cuda.o \
$(EMPTY_MACRO)

gpu_specfem3D_STUBS = \
Expand Down
33 changes: 32 additions & 1 deletion src/gpu/specfem3D_gpu_cuda_method_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,21 @@
int* KELVIN_VOIGT_DAMPING,
realw* Kelvin_Voigt_eta) {}

void FC_FUNC_(prepare_wavefield_discontinuity_device,

Check warning on line 642 in src/gpu/specfem3D_gpu_cuda_method_stubs.c

View check run for this annotation

Codecov / codecov/patch

src/gpu/specfem3D_gpu_cuda_method_stubs.c#L642

Added line #L642 was not covered by tests
PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)(
long* Mesh_pointer,
int* ispec_to_elem_wd,
int* nglob_wd,
int* nspec_wd,
int* ibool_wd,
int* boundary_to_iglob_wd,
realw* mass_in_wd,
int* nfaces_wd,
int* face_ijk_wd,
int* face_ispec_wd,
realw* face_normal_wd,
realw* face_jacobian2Dw_wd) {}

Check warning on line 655 in src/gpu/specfem3D_gpu_cuda_method_stubs.c

View check run for this annotation

Codecov / codecov/patch

src/gpu/specfem3D_gpu_cuda_method_stubs.c#L655

Added line #L655 was not covered by tests

void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer,
int* ACOUSTIC_SIMULATION,
Expand Down Expand Up @@ -675,7 +690,6 @@
GET_SMOOTH_gpu)(long * smooth_pointer,
realw * data_smooth) {}


//
// src/gpu/transfer_fields_cuda.cu
//
Expand Down Expand Up @@ -738,6 +752,12 @@
void FC_FUNC_(transfer_pml_displ_to_device,
TRANSFER_PML_DISPL_TO_DEVICE)(int* size, realw* PML_displ_old, realw* PML_displ_new, long* Mesh_pointer) {}

void FC_FUNC_(transfer_wavefield_discontinuity_to_device,

Check warning on line 755 in src/gpu/specfem3D_gpu_cuda_method_stubs.c

View check run for this annotation

Codecov / codecov/patch

src/gpu/specfem3D_gpu_cuda_method_stubs.c#L755

Added line #L755 was not covered by tests
TRANSFER_WAVEFIELD_DISCONTINUITY_TO_DEVICE)(
int* size_point, int* size_face,
realw* displ_wd, realw* accel_wd,
realw* traction_wd, long* Mesh_pointer) {}

Check warning on line 759 in src/gpu/specfem3D_gpu_cuda_method_stubs.c

View check run for this annotation

Codecov / codecov/patch

src/gpu/specfem3D_gpu_cuda_method_stubs.c#L759

Added line #L759 was not covered by tests

void FC_FUNC_(transfer_b_rmemory_to_device,
TRANSFER_B_RMEMORY_TO_DEVICE)(long* Mesh_pointer,
realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,
Expand Down Expand Up @@ -943,3 +963,14 @@
int* ELASTIC_SIMULATION,
int* USE_TRICK_FOR_BETTER_PRESSURE) {}

//
// src/gpu/wavefield_discontinuity_cuda.cu
//

void FC_FUNC_(wavefield_discontinuity_add_traction_cuda,

Check warning on line 970 in src/gpu/specfem3D_gpu_cuda_method_stubs.c

View check run for this annotation

Codecov / codecov/patch

src/gpu/specfem3D_gpu_cuda_method_stubs.c#L970

Added line #L970 was not covered by tests
WAVEFIELD_DISCONTINUITY_ADD_TRACTION_CUDA)(int* size_points,
int* size_faces,
long* Mesh_pointer){}

Check warning on line 973 in src/gpu/specfem3D_gpu_cuda_method_stubs.c

View check run for this annotation

Codecov / codecov/patch

src/gpu/specfem3D_gpu_cuda_method_stubs.c#L973

Added line #L973 was not covered by tests



Loading
Loading