From 679618e51863a28d762b2f3f8cad5db28c912a62 Mon Sep 17 00:00:00 2001 From: Joseph Zhang Date: Thu, 12 Dec 2024 09:51:19 -0500 Subject: [PATCH] Edited Jerome's commit on shared mem communicator. Tested with SH_MEM_COMM OFF; can compile with it on --- cmake/SCHISM.local.build | 1 + src/CMakeLists.txt | 1 + src/Core/schism_glbl.F90 | 5 +++- src/Core/schism_msgp.F90 | 11 +++++-- src/Hydro/misc_subs.F90 | 15 ++++++---- src/Hydro/schism_finalize.F90 | 2 +- src/Hydro/schism_init.F90 | 54 +++++++++++++++++------------------ src/Hydro/schism_step.F90 | 21 +++++++------- 8 files changed, 63 insertions(+), 47 deletions(-) diff --git a/cmake/SCHISM.local.build b/cmake/SCHISM.local.build index d65271f0d..63c31c2d7 100644 --- a/cmake/SCHISM.local.build +++ b/cmake/SCHISM.local.build @@ -13,6 +13,7 @@ ##################################################################### #Leave this on set(BLD_STANDALONE ON CACHE BOOLEAN "SCHISM standalone") +set(SH_MEM_COMM ON CACHE BOOLEAN "Use shared memory communicator") #Default is NO_PARMETIS=OFF, i.e. use ParMETIS set(NO_PARMETIS OFF CACHE BOOLEAN "Turn off ParMETIS") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d8718685b..2cbdc33c6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -76,6 +76,7 @@ define_opt(USE_AGE "Enable age tracer module" OFF) define_opt(ICM_PH "Enable pH module in ICM" OFF) define_opt(USE_DVD "Enable DVD module" OFF) define_opt(BLD_STANDALONE "Standalone build mode" ON) +define_opt(SH_MEM_COMM "Shared mem communicator" OFF) define_opt(BUILD_TOOLS "Build pschism and all the utilities" ON) if(NOT DEFINED TVD_LIM) diff --git a/src/Core/schism_glbl.F90 b/src/Core/schism_glbl.F90 index d2f7f25cb..0e4066f0f 100644 --- a/src/Core/schism_glbl.F90 +++ b/src/Core/schism_glbl.F90 @@ -388,7 +388,10 @@ module schism_glbl &ath(:,:,:,:),carea(:),clen(:),eta_mean(:),q_block(:),vnth_block(:,:), & &dir_block(:,:),q_block_lcl(:) real(4),save,allocatable :: ath2(:,:,:,:,:) !used to read *.nc for b.c. time series -#ifndef SH_MEM_COMM +#ifdef SH_MEM_COMM + !Use more efficient share mem communicator for I/O + real(4),public,save,pointer :: ath3(:,:,:,:) +#else real(4),save,allocatable :: ath3(:,:,:,:) !used to read source/sink inputs #endif diff --git a/src/Core/schism_msgp.F90 b/src/Core/schism_msgp.F90 index abe8582bb..14214485b 100644 --- a/src/Core/schism_msgp.F90 +++ b/src/Core/schism_msgp.F90 @@ -39,6 +39,11 @@ module schism_msgp &ns_global,ns,nsg,nsa,islg,isgl,isdel,isidenode, & &errmsg,fdb,lfdb,ntracers,msc2,mdc2,i34,nea2, & &ielg2,iegl2,is_inter,iside_table,in_dir,out_dir,len_in_dir,len_out_dir + +#ifdef SH_MEM_COMM + use iso_c_binding, only: c_ptr, c_f_pointer +#endif + implicit none !#ifndef USE_MPIMODULE include 'mpif.h' @@ -91,10 +96,12 @@ module schism_msgp #ifdef SH_MEM_COMM ! variables supporting shared memory communications - integer,public,save :: h_win ! handle for shared window for ath3 + integer,public,save :: comm_node !communicator for SM + integer,public,save :: nproc_node, myrank_node + integer,public,save :: h_win ! handle for shared window for ath3 etc integer(KIND=MPI_ADDRESS_KIND),public,save :: win_size TYPE(C_PTR),public,save :: c_window_ptr - real(4),public,save,pointer :: ath3(:,:,:,:) +! real(4),public,save,pointer :: ath3(:,:,:,:) integer,public,save :: disp_unit ! displacement stride in shared window #endif ! SH_MEM_COMM diff --git a/src/Hydro/misc_subs.F90 b/src/Hydro/misc_subs.F90 index 8ae6e3e0b..d3eb88d63 100644 --- a/src/Hydro/misc_subs.F90 +++ b/src/Hydro/misc_subs.F90 @@ -720,12 +720,13 @@ subroutine other_hot_init(time) ath3(:,1,1,1:2)=0.d0 ath3(:,1,1,3)=-9999.d0 -#else ! USE_NWM_BMI +#else /*USE_NWM_BMI*/ + #ifdef SH_MEM_COMM if(if_source==1.and.myrank_node==0) then !ASCII -#else ! SH_MEM_COMM +#else if(if_source==1.and.myrank==0) then !ASCII -#endif ! SH_MEM_COMM +#endif if(nsources>0) then open(63,file=in_dir(1:len_in_dir)//'vsource.th',status='old') !values (>=0) in m^3/s rewind(63) @@ -778,9 +779,9 @@ subroutine other_hot_init(time) #ifdef SH_MEM_COMM if(if_source==-1.and.myrank_node==0) then !nc -#else ! SH_MEM_COMM +#else if(if_source==-1.and.myrank==0) then !nc -#endif ! SH_MEM_COMM +#endif if(nsources>0) then ninv=time/th_dt3(1) th_time3(1,1)=dble(ninv)*th_dt3(1) @@ -819,11 +820,13 @@ subroutine other_hot_init(time) ! Bcast if(if_source/=0) then + !First 2 vars are bcast from rank 0 of comm, which must be a member of myrank_node=0? call mpi_bcast(th_dt3,nthfiles3,rtype,0,comm,istat) call mpi_bcast(th_time3,2*nthfiles3,rtype,0,comm,istat) #ifndef SH_MEM_COMM + !For share mem, ath3 is already filled call mpi_bcast(ath3,max(1,nsources,nsinks)*ntracers*2*nthfiles3,MPI_REAL4,0,comm,istat) -#endif ! SH_MEM_COMM +#endif endif #endif /*USE_NWM_BMI*/ diff --git a/src/Hydro/schism_finalize.F90 b/src/Hydro/schism_finalize.F90 index 78a5825fc..b63010a95 100644 --- a/src/Hydro/schism_finalize.F90 +++ b/src/Hydro/schism_finalize.F90 @@ -119,7 +119,7 @@ subroutine schism_finalize #ifdef SH_MEM_COMM call mpi_win_free(h_win,ierr) ! free ath3 shared memory window -#endif ! SH_MEM_COMM +#endif if(ihydraulics/=0) call finalize_hydraulic_structures diff --git a/src/Hydro/schism_init.F90 b/src/Hydro/schism_init.F90 index ba862549d..221f8fbf4 100644 --- a/src/Hydro/schism_init.F90 +++ b/src/Hydro/schism_init.F90 @@ -32,7 +32,7 @@ subroutine schism_init(iorder,indir,iths,ntime) #ifdef SH_MEM_COMM use iso_c_binding, only: c_ptr, c_f_pointer -#endif ! SH_MEM_COMM +#endif #ifdef USE_PAHM use ParWind, only : ReadCsvBestTrackFile @@ -2822,13 +2822,13 @@ subroutine schism_init(iorder,indir,iths,ntime) if(iorder==0) then #ifdef SH_MEM_COMM allocate(ieg_sink(max(1,nsinks)),stat=istat) - if(istat/=0) call parallel_abort('INIT: ieg_sink failure') - ! On each node, rank 0 allocates the storage, other ranks allocate with size zero and get a pointer + if(istat/=0) call parallel_abort('INIT: ieg_sink failure(1)') + + !On each node, rank 0 allocates the storage, other ranks allocate with size zero and get a pointer disp_unit = 4 ! size of real(4) If (myrank_node==0) THEN win_size = disp_unit * max(1,nsources,nsinks) * ntracers * 2 * nthfiles3 - call MPI_Win_allocate_shared(win_size, disp_unit, MPI_INFO_NULL, comm_node, - c_window_ptr, h_win, istat) + call MPI_Win_allocate_shared(win_size, disp_unit, MPI_INFO_NULL, comm_node, c_window_ptr, h_win, istat) if(istat/=0) call parallel_abort('INIT: MPI_Win_allocate failed, node rank 0') ELSE win_size = 0 @@ -2836,13 +2836,13 @@ subroutine schism_init(iorder,indir,iths,ntime) if(istat/=0) call parallel_abort('INIT: MPI_Win_allocate failed, node rank>0') call MPI_Win_shared_query(h_win, 0, win_size, disp_unit, c_window_ptr, istat) if(istat/=0) call parallel_abort('INIT: MPI_Win_shared_query failed, node rank>0') - ENDIF - ! point ath3 array at the shared buffer + ENDIF !myrank_node + ! point ath3 array at the shared buffer call C_F_POINTER(c_window_ptr, ath3, SHAPE = [max(1,nsources,nsinks),ntracers,2,nthfiles3]) -#else ! SH_MEM_COMM +#else /*SH_MEM_COMM*/ allocate(ieg_sink(max(1,nsinks)),ath3(max(1,nsources,nsinks),ntracers,2,nthfiles3),stat=istat) if(istat/=0) call parallel_abort('INIT: ieg_sink failure') -#endif ! SH_MEM_COMM +#endif /*SH_MEM_COMM*/ endif if(myrank==0) then @@ -2857,11 +2857,11 @@ subroutine schism_init(iorder,indir,iths,ntime) endif !if_source if(if_source==-1) then !nc -#ifdef SH_MEM_COMM - if(myrank_node==0) then -#else ! SH_MEM_COMM +!#ifdef SH_MEM_COMM +! if(myrank_node==0) then +!#else /*SH_MEM_COMM*/ if(myrank==0) then -#endif ! SH_MEM_COMM +!#endif /*SH_MEM_COMM*/ j=nf90_open(in_dir(1:len_in_dir)//'source.nc',OR(NF90_NETCDF4,NF90_NOWRITE),ncid_source) if(j/=NF90_NOERR) call parallel_abort('init: source.nc') j=nf90_inq_dimid(ncid_source,'nsources',mm) @@ -2902,34 +2902,34 @@ subroutine schism_init(iorder,indir,iths,ntime) if(iorder==0) then #ifdef SH_MEM_COMM allocate(ieg_source(max(1,nsources)),ieg_sink(max(1,nsinks)),stat=istat) - if(istat/=0) call parallel_abort('INIT: ieg_sink failure') - ! On each node, rank 0 allocates the storage, other ranks allocate with size zero and get a pointer + if(istat/=0) call parallel_abort('INIT: ieg_sink failure(1)') + + !On each node, rank 0 allocates the storage, other ranks allocate with size zero and get a pointer disp_unit = 4 ! size of real(4) If (myrank_node==0) THEN win_size = disp_unit * max(1,nsources,nsinks) * ntracers * 2 * nthfiles3 - call MPI_Win_allocate_shared(win_size, disp_unit, MPI_INFO_NULL, comm_node, - c_window_ptr, h_win, istat) - if(istat/=0) call parallel_abort('INIT: MPI_Win_allocate failed, node rank 0') + call MPI_Win_allocate_shared(win_size, disp_unit, MPI_INFO_NULL, comm_node, c_window_ptr, h_win, istat) + if(istat/=0) call parallel_abort('INIT: MPI_Win_allocate failed, node rank 0(1)') ELSE win_size = 0 call MPI_Win_allocate_shared(win_size, disp_unit, MPI_INFO_NULL, comm_node, c_window_ptr, h_win, istat) if(istat/=0) call parallel_abort('INIT: MPI_Win_allocate failed, node rank>0') call MPI_Win_shared_query(h_win, 0, win_size, disp_unit, c_window_ptr, istat) - if(istat/=0) call parallel_abort('INIT: MPI_Win_shared_query failed, node rank>0') + if(istat/=0) call parallel_abort('INIT: MPI_Win_shared_query failed, node rank>0 (1)') ENDIF - ! point ath3 array at the shared buffer + !point ath3 array at the shared buffer call C_F_POINTER(c_window_ptr, ath3, SHAPE = [max(1,nsources,nsinks),ntracers,2,nthfiles3]) -#else ! SH_MEM_COMM - allocate(ieg_sink(max(1,nsinks)),ath3(max(1,nsources,nsinks),ntracers,2,nthfiles3),stat=istat) +#else /*SH_MEM_COMM*/ + allocate(ieg_source(max(1,nsources)),ieg_sink(max(1,nsinks)),ath3(max(1,nsources,nsinks),ntracers,2,nthfiles3),stat=istat) if(istat/=0) call parallel_abort('INIT: ieg_sink failure') -#endif ! SH_MEM_COMM +#endif /*SH_MEM_COMM*/ endif -#ifdef SH_MEM_COMM - if(myrank_node==0) then -#else ! SH_MEM_COMM +!#ifdef SH_MEM_COMM +! if(myrank_node==0) then +!#else if(myrank==0) then -#endif ! SH_MEM_COMM +!#endif if(nsources>0) then j=nf90_inq_varid(ncid_source, "source_elem",mm) if(j/=NF90_NOERR) call parallel_abort('init: source_elem') diff --git a/src/Hydro/schism_step.F90 b/src/Hydro/schism_step.F90 index a75256be5..8c37f8ee4 100644 --- a/src/Hydro/schism_step.F90 +++ b/src/Hydro/schism_step.F90 @@ -1620,13 +1620,14 @@ subroutine schism_step(it) th_time3(2,2)=th_time3(2,2)+th_dt3(2) endif -#else +#else /*USE_NWM_BMI*/ + !Reading by rank 0 #ifdef SH_MEM_COMM if(nsources>0.and.myrank_node==0) then -#else ! SH_MEM_COMM +#else if(nsources>0.and.myrank==0) then -#endif ! SH_MEM_COMM +#endif if(time>th_time3(2,1)) then !not '>=' to avoid last step ath3(:,1,1,1)=ath3(:,1,2,1) th_time3(1,1)=th_time3(2,1) @@ -1657,13 +1658,13 @@ subroutine schism_step(it) if(j/=NF90_NOERR) call parallel_abort('STEP: msource') endif !if_source endif !time - endif !nsources>0.and.myrank==0 + endif !nsources>0.and.myrank*==0 #ifdef SH_MEM_COMM if(nsinks>0.and.myrank_node==0) then -#else ! SH_MEM_COMM +#else if(nsinks>0.and.myrank==0) then -#endif ! SH_MEM_COMM +#endif if(time>th_time3(2,2)) then !not '>=' to avoid last step ath3(:,1,1,2)=ath3(:,1,2,2) th_time3(1,2)=th_time3(2,2) @@ -1679,14 +1680,14 @@ subroutine schism_step(it) endif !if_source endif !time endif !nsinks -! Finished reading; bcast +! Finished reading; bcast from rank 0 of comm (which must be a member of myrank_node=0) call mpi_bcast(th_time3,2*nthfiles3,rtype,0,comm,istat) #ifdef SH_MEM_COMM - ! ath3 data now in shared buffer, no longer necessary to broadcast + ! ath3 data now in shared buffer, no longer necessary to broadcast call mpi_barrier(comm_node, istat) - #else ! SH_MEM_COMM +#else call mpi_bcast(ath3,max(1,nsources,nsinks)*ntracers*2*nthfiles3,MPI_REAL4,0,comm,istat) -#endif ! SH_MEM_COMM +#endif #endif /*USE_NWM_BMI*/ if(nsources>0) then