Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rrfs.v1.0.0 release branch update with GDIT's performance optimization #811

Open
wants to merge 3 commits into
base: release/rrfs.v1.0.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion modulefiles/gsi_wcoss2.intel.lua
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ local cmake_ver= os.getenv("cmake_ver") or "3.20.2"
local python_ver=os.getenv("python_ver") or "3.8.6"
local prod_util_ver=os.getenv("prod_util_ver") or "2.0.10"

local netcdf_ver=os.getenv("netcdf_ver") or "4.7.4"
local zlib_ver=os.getenv("zlib_ver") or "1.2.11"
local hdf5_ver=os.getenv("hdf5_ver") or "1.14.0"
local netcdf_ver=os.getenv("netcdf_ver") or "4.9.2"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need those setup changes for this PR? if those changes are needed for this PR , does that mean similar changes are needed for other machines?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but we only run this on WCOSS2 for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! . I suspect the different modules used in this PR compared with the "control" branch (release/rrfs.v1.0.0) caused the failures of my ctests reported previously. I am working on this.

local bufr_ver=os.getenv("bufr_ver") or "11.7.0"
local bacio_ver=os.getenv("bacio_ver") or "2.4.1"
local w3emc_ver=os.getenv("w3emc_ver") or "2.9.2"
Expand All @@ -32,6 +34,8 @@ load(pathJoin("python", python_ver))

load(pathJoin("prod_util", prod_util_ver))

load(pathJoin("zlib", zlib_ver))
load(pathJoin("hdf5", hdf5_ver))
load(pathJoin("netcdf", netcdf_ver))
load(pathJoin("bufr", bufr_ver))
load(pathJoin("bacio", bacio_ver))
Expand Down
17 changes: 16 additions & 1 deletion src/enkf/observer_fv3reg.f90
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ subroutine setup_linhx(rlat, rlon, time, ix, delx, ixp, delxp, iy, dely, &
use params, only: nstatefields, nlons, nlats, nlevs, nhr_state, fhr_assim
use gridinfo, only: npts, latsgrd, lonsgrd
use statevec, only: nsdim
use constants, only: zero,one,pi
use constants, only: izero,zero,one,pi
use mpisetup
implicit none

Expand All @@ -54,6 +54,18 @@ subroutine setup_linhx(rlat, rlon, time, ix, delx, ixp, delxp, iy, dely, &
real(r_single) ,intent(in ) :: time ! observation time relative to middle of window
integer(i_kind), intent(out) :: ix, iy, it, ixp, iyp, itp
real(r_kind), intent(out) :: delx, dely, delxp, delyp, delt, deltp
ix=izero
iy=izero
it=izero
ixp=izero
iyp=izero
itp=izero
delx=zero
dely=zero
delxp=zero
delyp=zero
delt=zero
deltp=zero
write(6,*)'this is a dummy subroutine, running this means something wrong ,stop'
call stop2(555)

Expand Down Expand Up @@ -110,6 +122,7 @@ subroutine calc_linhx(hx, dens, dhx_dx, hxpert, hx_ens, &
type(raggedarr) ,intent(inout) :: hxpert ! interpolated background
real(r_single) ,intent( out) :: hx_ens ! H (x_ens)
integer(i_kind) i,j
hx_ens=zero
write(6,*)'this is a dummy subroutine, running this means something wrong ,stop'
call stop2(555)

Expand Down Expand Up @@ -145,6 +158,7 @@ subroutine calc_linhx_modens(hx, dhx_dx, hxpert, hx_ens, vscale)
!$$$
use kinds, only: r_kind,i_kind,r_single
use sparsearr, only: sparr, raggedarr
use constants, only: zero
use mpisetup
implicit none

Expand All @@ -155,6 +169,7 @@ subroutine calc_linhx_modens(hx, dhx_dx, hxpert, hx_ens, vscale)
real(r_single) ,intent( out) :: hx_ens(neigv)! H (x_ens)
real(r_double),dimension(neigv,nlevs+1) ,intent(in ) :: vscale ! vertical scaling (for modulated ens)
integer(i_kind) i
hx_ens=zero
write(6,*)'this is a dummy subroutine, running this means something wrong ,stop'
call stop2(555)

Expand Down
2 changes: 2 additions & 0 deletions src/gsi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ find_package(NetCDF REQUIRED Fortran)
if(OPENMP)
find_package(OpenMP REQUIRED)
endif()
find_package(ZLIB REQUIRED)

# NCEPLibs dependencies
find_package(bacio REQUIRED)
Expand Down Expand Up @@ -157,6 +158,7 @@ target_link_libraries(gsi_fortran_obj PUBLIC w3emc::w3emc_d)
target_link_libraries(gsi_fortran_obj PUBLIC sp::sp_d)
target_link_libraries(gsi_fortran_obj PUBLIC bufr::bufr_d)
target_link_libraries(gsi_fortran_obj PUBLIC crtm::crtm)
target_link_libraries(gsi_fortran_obj PUBLIC ZLIB::ZLIB)
if(GSI_MODE MATCHES "Regional")
target_link_libraries(gsi_fortran_obj PUBLIC wrf_io::wrf_io)
endif()
Expand Down
47 changes: 43 additions & 4 deletions src/gsi/cplr_get_fv3_regional_ensperts.f90
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
use netcdf_mod , only: nc_check
use gsi_rfv3io_mod, only: fv3lam_io_phymetvars3d_nouv
use obsmod, only: if_model_dbz,if_model_fed
use mpi, only : MPI_Wtime, MPI_REAL8, MPI_SUCCESS, MPI_MAX


implicit none
Expand Down Expand Up @@ -119,6 +120,8 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
integer(i_kind):: imem_start,n_fv3sar

integer(i_kind):: i_caseflag
real(kind=8) :: time_beg,time_end,walltime, tb,te,wt
integer(i_kind) :: ierr

if(n_ens/=(n_ens_gfs+n_ens_fv3sar)) then
write(6,*)'wrong, the sum of n_ens_gfs and n_ens_fv3sar not equal n_ens, stop'
Expand Down Expand Up @@ -275,7 +278,7 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
end if
end if


tb=MPI_Wtime()
do m=1,ntlevs_ens


Expand Down Expand Up @@ -452,7 +455,8 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
if( .not. parallelization_over_ensmembers )then
if (mype == 0) write(6,'(a,a)') &
'CALL READ_FV3_REGIONAL_ENSPERTS FOR ENS DATA with the filename str : ',trim(ensfilenam_str)


time_beg=MPI_Wtime()
select case (i_caseflag)
case (0)
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz)
Expand All @@ -469,9 +473,15 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz, &
g_ql=ql,g_qi=qi,g_qr=qr,g_qs=qs,g_qg=qg,g_qnr=qnr,g_w=w,g_fed=fed)
case (5)
!write(6,'("get_fv3_regional_ensperts_run: Before general_read_fv3_regional")')
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz, &
g_ql=ql,g_qi=qi,g_qr=qr,g_qs=qs,g_qg=qg,g_qnr=qnr,g_w=w,g_dbz=dbz,g_fed=fed)
end select
time_end=MPI_Wtime()
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("Maximum Walltime for general_read_fv3_regional" f15.4,I4)') walltime,i_caseflag

end if

if( parallelization_over_ensmembers )then
Expand Down Expand Up @@ -792,6 +802,11 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
end do

enddo ! it 4d loop
te=MPI_Wtime()
call MPI_Reduce(te-tb, wt, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("Maximum Walltime to read ",I4," ensemble members ", f15.4)') n_ens_fv3sar,wt

! CALCULATE ENSEMBLE SPREAD
if(write_ens_sprd ) then
call this%ens_spread_dualres_regional(mype,en_perts,nelen)
Expand Down Expand Up @@ -851,7 +866,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
use hybrid_ensemble_parameters, only: grd_ens,q_hyb_ens
use hybrid_ensemble_parameters, only: fv3sar_ensemble_opt,dual_res

use mpimod, only: mpi_comm_world,mpi_rtype
use mpimod, only: mpi_comm_world,mpi_rtype,mype
use gsi_rfv3io_mod,only: type_fv3regfilenameg
use gsi_rfv3io_mod,only:n2d
use constants, only: half,zero
Expand All @@ -870,6 +885,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
use directDA_radaruse_mod, only: l_use_cvpqx, cvpqx_pval, cld_nt_updt
use directDA_radaruse_mod, only: l_cvpnr, cvpnr_pval
use obsmod, only:if_model_dbz,if_model_fed
use mpi, only : MPI_Wtime, MPI_REAL8, MPI_MAX, MPI_SUCCESS


implicit none
Expand Down Expand Up @@ -913,6 +929,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
character(len=:),allocatable :: sfcdata !='fv3_sfcdata'
character(len=:),allocatable :: couplerres!='coupler.res'
integer (i_kind) ier,istatus
real(kind=8) :: time_beg,time_end,walltime
integer(i_kind) :: ierr


associate( this => this ) ! eliminates warning for unused dummy argument needed for binding
Expand All @@ -930,6 +948,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
couplerres=fv3_filenameginput%couplerres


!write(6,'("general_read_fv3_regional: fv3sar_ensemble_opt= " I4)') fv3sar_ensemble_opt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those commented out lines could be deleted or just uncommented if needed.



if (allocated(fv3lam_ens_io_dynmetvars2d_nouv) ) then
Expand All @@ -956,16 +975,28 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
end if


if(fv3sar_ensemble_opt == 0 ) then
if(fv3sar_ensemble_opt == 0 ) then
time_beg=MPI_Wtime()
call gsi_fv3ncdf_readuv(grd_fv3lam_ens_uv,g_u,g_v,fv3_filenameginput,dual_res)
time_end=MPI_Wtime()
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("general_read_fv3_regional: Maximum Walltime for gsi_fv3ncdf_readuv" f15.4)') walltime

else
call gsi_fv3ncdf_readuv_v1(grd_fv3lam_ens_uv,g_u,g_v,fv3_filenameginput,dual_res)
endif
if(fv3sar_ensemble_opt == 0) then
time_beg=MPI_Wtime()
call gsi_fv3ncdf_read(grd_fv3lam_ens_dynvar_io_nouv,gsibundle_fv3lam_ens_dynvar_nouv,&
fv3_filenameginput%dynvars,fv3_filenameginput,dual_res)
call gsi_fv3ncdf_read(grd_fv3lam_ens_tracer_io_nouv,gsibundle_fv3lam_ens_tracer_nouv,&
fv3_filenameginput%tracers,fv3_filenameginput,dual_res)
time_end=MPI_Wtime()
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("general_read_fv3_regional: Maximum Walltime for gsi_fv3ncdf_read" f15.4)') walltime

if( if_model_dbz .or. if_model_fed ) then
call gsi_fv3ncdf_read(grd_fv3lam_ens_phyvar_io_nouv,gsibundle_fv3lam_ens_phyvar_nouv,&
fv3_filenameginput%phyvars,fv3_filenameginput,dual_res)
Expand Down Expand Up @@ -1013,6 +1044,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
endif

!! tsen2tv !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!$omp parallel do default(none),private(k,i,j) &
!$omp shared(grd_ens,g_q,g_tsen,g_tv,fv)
do k=1,grd_ens%nsig
do j=1,grd_ens%lon2
do i=1,grd_ens%lat2
Expand All @@ -1023,6 +1056,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
if (.not.q_hyb_ens) then
ice=.true.
iderivative=0
!$omp parallel do default(none),private(k,i,j,kp) &
!$omp shared(grd_ens,g_prsi,g_prsl)
do k=1,grd_ens%nsig
kp=k+1
do j=1,grd_ens%lon2
Expand All @@ -1032,6 +1067,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
end do
end do
call genqsat(g_rh,g_tsen(1,1,1),g_prsl(1,1,1),grd_ens%lat2,grd_ens%lon2,grd_ens%nsig,ice,iderivative)
!$omp parallel do default(none),private(k,i,j) &
!$omp shared(grd_ens,g_rh,g_q)
do k=1,grd_ens%nsig
do j=1,grd_ens%lon2
do i=1,grd_ens%lat2
Expand All @@ -1051,6 +1088,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g


! CV transform
!$omp parallel do default(none),private(k,i,j) &
!$omp shared(grd_ens,l_use_cvpqx,g_qr,cvpqx_pval,g_qs,g_qg,g_qnr,cld_nt_updt,l_cvpnr,cvpnr_pval)
do k=1,grd_ens%nsig
do i=1,grd_ens%lon2
do j=1,grd_ens%lat2
Expand Down
23 changes: 23 additions & 0 deletions src/gsi/crc32.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module crc32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could "crc32" be replaced with a more informative name?

use iso_c_binding
implicit none
public :: digest

interface
integer function digest_c(message) bind(c)
use iso_c_binding, only: c_char
character(kind=c_char), intent(in) :: message(*)
end function digest_c
end interface

contains

integer function digest(m)
use iso_c_binding, only: c_null_char
implicit none
character(len=*), intent(in) :: m
!m='nid001019'
digest=abs(digest_c(trim(m)//c_null_char))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand digest_c will return a unique "checksum" to be used as an unique "id" for the later use.
But, to do this, GSI need to directly use zlib and add the crc32_c.c , a c code in gsi, which has been a pure fortran package. Can we use simpler while still working way to specify an unique string here?
For example, can we add a fortran hash function to do this ?
I am interested in hearing other GSI developers' opinions on this.

!write(6,'("Digest ",I12)') digest
end function digest
end module crc32
7 changes: 7 additions & 0 deletions src/gsi/crc32_c.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <stdio.h>
Copy link
Contributor

@TingLei-NOAA TingLei-NOAA Dec 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see the above comment.
I hesitate to justify adding such a c code to the currently pure fortran GSI only for this function.
A correction. I saw there is already a c code in gsi : blockIO.c.
Then, the question is for the use of the additionally required zlib.

#include <string.h>
#include <zlib.h>

uLong digest_c(char * message) {
return crc32(0, (const void*)message, strlen(message));
}
13 changes: 13 additions & 0 deletions src/gsi/get_gefs_for_regional.f90
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ subroutine get_gefs_for_regional
use get_wrf_mass_ensperts_mod, only: get_wrf_mass_ensperts_class
use gsi_io, only: verbose
use obsmod, only: l_wcp_cwm
use mpi, only : MPI_Wtime, MPI_REAL8, MPI_SUCCESS
implicit none

type(sub2grid_info) grd_gfs,grd_mix,grd_gfst
Expand Down Expand Up @@ -187,6 +188,8 @@ subroutine get_gefs_for_regional
real(r_kind), pointer :: ges_q (:,:,:)=>NULL()
logical :: print_verbose
real(r_kind), allocatable :: ges_z_ens(:,:)
real(kind=8) :: time_beg,time_end,walltime, tb,te,wt
integer(i_kind) :: ierr

print_verbose=.false.
if(verbose)print_verbose=.true.
Expand Down Expand Up @@ -621,6 +624,7 @@ subroutine get_gefs_for_regional

rewind(10)
inithead=.true.
tb=MPI_Wtime()
do n=1,n_ens_gfs
read(10,'(a)',err=20,end=20)filename
filename=trim(ensemble_path) // trim(filename)
Expand All @@ -641,8 +645,13 @@ subroutine get_gefs_for_regional
call general_read_gfsatm_nems(grd_gfst,sp_gfs,filename,uv_hyb_ens,.false.,.true., &
atm_bundle,.true.,iret)
else if (use_gfs_ncio) then
time_beg=MPI_Wtime()
call general_read_gfsatm_nc(grd_gfst,sp_gfs,filename,uv_hyb_ens,.false.,.true., &
atm_bundle,.true.,iret)
time_end=MPI_Wtime()
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("Maximum Walltime for general_read_gfsatm_nc" f15.4)') walltime
else
call general_read_gfsatm(grd_gfst,sp_gfs,sp_gfs,filename,uv_hyb_ens,.false.,.true., &
atm_bundle,inithead,iret)
Expand Down Expand Up @@ -955,6 +964,10 @@ subroutine get_gefs_for_regional
! if(mype==0) write(6,*)' with halo, n,min,max ges_ps - matt ps =',n,pdiffmin0,pdiffmax0

end do ! end loop over ensemble members.
te=MPI_Wtime()
call MPI_Reduce(te-tb, wt, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("Maximum Walltime to read ",I4," ensemble members ", f15.4)') n_ens_gfs,wt

deallocate(ges_z_ens)

Expand Down
2 changes: 2 additions & 0 deletions src/gsi/gsi_files.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
list(APPEND GSI_SRC_C
blockIO.c
crc32_c.c
)

# Class files for WRF interfaces
Expand Down Expand Up @@ -264,6 +265,7 @@ guess_grids.F90
half_nmm_grid2.f90
hdraobmod.f90
hilbert_curve.f90
crc32.F90
hybrid_ensemble_isotropic.F90
hybrid_ensemble_parameters.f90
inc2guess.f90
Expand Down
Loading
Loading