-
Notifications
You must be signed in to change notification settings - Fork 153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
rrfs.v1.0.0 release branch update with GDIT's performance optimization #811
base: release/rrfs.v1.0.0
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,7 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) | |
use netcdf_mod , only: nc_check | ||
use gsi_rfv3io_mod, only: fv3lam_io_phymetvars3d_nouv | ||
use obsmod, only: if_model_dbz,if_model_fed | ||
use mpi, only : MPI_Wtime, MPI_REAL8, MPI_SUCCESS, MPI_MAX | ||
|
||
|
||
implicit none | ||
|
@@ -119,6 +120,8 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) | |
integer(i_kind):: imem_start,n_fv3sar | ||
|
||
integer(i_kind):: i_caseflag | ||
real(kind=8) :: time_beg,time_end,walltime, tb,te,wt | ||
integer(i_kind) :: ierr | ||
|
||
if(n_ens/=(n_ens_gfs+n_ens_fv3sar)) then | ||
write(6,*)'wrong, the sum of n_ens_gfs and n_ens_fv3sar not equal n_ens, stop' | ||
|
@@ -275,7 +278,7 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) | |
end if | ||
end if | ||
|
||
|
||
tb=MPI_Wtime() | ||
do m=1,ntlevs_ens | ||
|
||
|
||
|
@@ -452,7 +455,8 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) | |
if( .not. parallelization_over_ensmembers )then | ||
if (mype == 0) write(6,'(a,a)') & | ||
'CALL READ_FV3_REGIONAL_ENSPERTS FOR ENS DATA with the filename str : ',trim(ensfilenam_str) | ||
|
||
|
||
time_beg=MPI_Wtime() | ||
select case (i_caseflag) | ||
case (0) | ||
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz) | ||
|
@@ -469,9 +473,15 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) | |
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz, & | ||
g_ql=ql,g_qi=qi,g_qr=qr,g_qs=qs,g_qg=qg,g_qnr=qnr,g_w=w,g_fed=fed) | ||
case (5) | ||
!write(6,'("get_fv3_regional_ensperts_run: Before general_read_fv3_regional")') | ||
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz, & | ||
g_ql=ql,g_qi=qi,g_qr=qr,g_qs=qs,g_qg=qg,g_qnr=qnr,g_w=w,g_dbz=dbz,g_fed=fed) | ||
end select | ||
time_end=MPI_Wtime() | ||
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) | ||
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr | ||
if(mype==0) write(6,'("Maximum Walltime for general_read_fv3_regional" f15.4,I4)') walltime,i_caseflag | ||
|
||
end if | ||
|
||
if( parallelization_over_ensmembers )then | ||
|
@@ -792,6 +802,11 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) | |
end do | ||
|
||
enddo ! it 4d loop | ||
te=MPI_Wtime() | ||
call MPI_Reduce(te-tb, wt, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) | ||
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr | ||
if(mype==0) write(6,'("Maximum Walltime to read ",I4," ensemble members ", f15.4)') n_ens_fv3sar,wt | ||
|
||
! CALCULATE ENSEMBLE SPREAD | ||
if(write_ens_sprd ) then | ||
call this%ens_spread_dualres_regional(mype,en_perts,nelen) | ||
|
@@ -851,7 +866,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
use hybrid_ensemble_parameters, only: grd_ens,q_hyb_ens | ||
use hybrid_ensemble_parameters, only: fv3sar_ensemble_opt,dual_res | ||
|
||
use mpimod, only: mpi_comm_world,mpi_rtype | ||
use mpimod, only: mpi_comm_world,mpi_rtype,mype | ||
use gsi_rfv3io_mod,only: type_fv3regfilenameg | ||
use gsi_rfv3io_mod,only:n2d | ||
use constants, only: half,zero | ||
|
@@ -870,6 +885,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
use directDA_radaruse_mod, only: l_use_cvpqx, cvpqx_pval, cld_nt_updt | ||
use directDA_radaruse_mod, only: l_cvpnr, cvpnr_pval | ||
use obsmod, only:if_model_dbz,if_model_fed | ||
use mpi, only : MPI_Wtime, MPI_REAL8, MPI_MAX, MPI_SUCCESS | ||
|
||
|
||
implicit none | ||
|
@@ -913,6 +929,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
character(len=:),allocatable :: sfcdata !='fv3_sfcdata' | ||
character(len=:),allocatable :: couplerres!='coupler.res' | ||
integer (i_kind) ier,istatus | ||
real(kind=8) :: time_beg,time_end,walltime | ||
integer(i_kind) :: ierr | ||
|
||
|
||
associate( this => this ) ! eliminates warning for unused dummy argument needed for binding | ||
|
@@ -930,6 +948,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
couplerres=fv3_filenameginput%couplerres | ||
|
||
|
||
!write(6,'("general_read_fv3_regional: fv3sar_ensemble_opt= " I4)') fv3sar_ensemble_opt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Those commented out lines could be deleted or just uncommented if needed. |
||
|
||
|
||
if (allocated(fv3lam_ens_io_dynmetvars2d_nouv) ) then | ||
|
@@ -956,16 +975,28 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
end if | ||
|
||
|
||
if(fv3sar_ensemble_opt == 0 ) then | ||
if(fv3sar_ensemble_opt == 0 ) then | ||
time_beg=MPI_Wtime() | ||
call gsi_fv3ncdf_readuv(grd_fv3lam_ens_uv,g_u,g_v,fv3_filenameginput,dual_res) | ||
time_end=MPI_Wtime() | ||
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) | ||
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr | ||
if(mype==0) write(6,'("general_read_fv3_regional: Maximum Walltime for gsi_fv3ncdf_readuv" f15.4)') walltime | ||
|
||
else | ||
call gsi_fv3ncdf_readuv_v1(grd_fv3lam_ens_uv,g_u,g_v,fv3_filenameginput,dual_res) | ||
endif | ||
if(fv3sar_ensemble_opt == 0) then | ||
time_beg=MPI_Wtime() | ||
call gsi_fv3ncdf_read(grd_fv3lam_ens_dynvar_io_nouv,gsibundle_fv3lam_ens_dynvar_nouv,& | ||
fv3_filenameginput%dynvars,fv3_filenameginput,dual_res) | ||
call gsi_fv3ncdf_read(grd_fv3lam_ens_tracer_io_nouv,gsibundle_fv3lam_ens_tracer_nouv,& | ||
fv3_filenameginput%tracers,fv3_filenameginput,dual_res) | ||
time_end=MPI_Wtime() | ||
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) | ||
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr | ||
if(mype==0) write(6,'("general_read_fv3_regional: Maximum Walltime for gsi_fv3ncdf_read" f15.4)') walltime | ||
|
||
if( if_model_dbz .or. if_model_fed ) then | ||
call gsi_fv3ncdf_read(grd_fv3lam_ens_phyvar_io_nouv,gsibundle_fv3lam_ens_phyvar_nouv,& | ||
fv3_filenameginput%phyvars,fv3_filenameginput,dual_res) | ||
|
@@ -1013,6 +1044,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
endif | ||
|
||
!! tsen2tv !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | ||
!$omp parallel do default(none),private(k,i,j) & | ||
!$omp shared(grd_ens,g_q,g_tsen,g_tv,fv) | ||
do k=1,grd_ens%nsig | ||
do j=1,grd_ens%lon2 | ||
do i=1,grd_ens%lat2 | ||
|
@@ -1023,6 +1056,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
if (.not.q_hyb_ens) then | ||
ice=.true. | ||
iderivative=0 | ||
!$omp parallel do default(none),private(k,i,j,kp) & | ||
!$omp shared(grd_ens,g_prsi,g_prsl) | ||
do k=1,grd_ens%nsig | ||
kp=k+1 | ||
do j=1,grd_ens%lon2 | ||
|
@@ -1032,6 +1067,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
end do | ||
end do | ||
call genqsat(g_rh,g_tsen(1,1,1),g_prsl(1,1,1),grd_ens%lat2,grd_ens%lon2,grd_ens%nsig,ice,iderivative) | ||
!$omp parallel do default(none),private(k,i,j) & | ||
!$omp shared(grd_ens,g_rh,g_q) | ||
do k=1,grd_ens%nsig | ||
do j=1,grd_ens%lon2 | ||
do i=1,grd_ens%lat2 | ||
|
@@ -1051,6 +1088,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g | |
|
||
|
||
! CV transform | ||
!$omp parallel do default(none),private(k,i,j) & | ||
!$omp shared(grd_ens,l_use_cvpqx,g_qr,cvpqx_pval,g_qs,g_qg,g_qnr,cld_nt_updt,l_cvpnr,cvpnr_pval) | ||
do k=1,grd_ens%nsig | ||
do i=1,grd_ens%lon2 | ||
do j=1,grd_ens%lat2 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
module crc32 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could "crc32" be replaced with a more informative name? |
||
use iso_c_binding | ||
implicit none | ||
public :: digest | ||
|
||
interface | ||
integer function digest_c(message) bind(c) | ||
use iso_c_binding, only: c_char | ||
character(kind=c_char), intent(in) :: message(*) | ||
end function digest_c | ||
end interface | ||
|
||
contains | ||
|
||
integer function digest(m) | ||
use iso_c_binding, only: c_null_char | ||
implicit none | ||
character(len=*), intent(in) :: m | ||
!m='nid001019' | ||
digest=abs(digest_c(trim(m)//c_null_char)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand digest_c will return a unique "checksum" to be used as an unique "id" for the later use. |
||
!write(6,'("Digest ",I12)') digest | ||
end function digest | ||
end module crc32 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#include <stdio.h> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see the above comment. |
||
#include <string.h> | ||
#include <zlib.h> | ||
|
||
uLong digest_c(char * message) { | ||
return crc32(0, (const void*)message, strlen(message)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need those setup changes for this PR? if those changes are needed for this PR , does that mean similar changes are needed for other machines?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but we only run this on WCOSS2 for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! . I suspect the different modules used in this PR compared with the "control" branch (release/rrfs.v1.0.0) caused the failures of my ctests reported previously. I am working on this.