Skip to content

Commit

Permalink
Make MPI restart test more robust
Browse files Browse the repository at this point in the history
Sometimes the shared filesystem is slow and the folder is properly not
synced across MPI processes. This commit adds an extra check to ensure
that all the MPI processes see the tmpfolder before moving forward.
  • Loading branch information
Sbozzolo committed Oct 30, 2024
1 parent 01a1d9e commit e98385f
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions test/restart.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import ClimaCore.DataLayouts: AbstractData
import ClimaCore.Geometry: AxisTensor
import ClimaCore.Spaces: AbstractSpace
import ClimaComms
import ClimaUtilities.OutputPathGenerator: maybe_wait_filesystem
pkgversion(ClimaComms) >= v"0.6" && ClimaComms.@import_required_backends
import Logging
import NCDatasets
Expand Down Expand Up @@ -379,6 +380,10 @@ if MANYTESTS
ClimaComms.iamroot(comms_ctx) ? mktempdir(pwd()) :
""
output_loc = ClimaComms.bcast(comms_ctx, output_loc)
# Sometimes the shared filesystem doesn't work properly
# and the folder is not synced across MPI processes.
# Let's add an additional check here.
maybe_wait_filesystem(comms_ctx, output_loc)

job_id = "$(configuration)_$(moisture)_$(precip)_$(topography)_$(radiation)_$(turbconv_mode)"
test_dict = Dict(
Expand Down

0 comments on commit e98385f

Please sign in to comment.