Skip to content

Commit

Permalink
Restructure output directory
Browse files Browse the repository at this point in the history
And don't use `atmos.p.output_dir`

The output structure looks like this:
```
coupler_output_dir_amip/
├── checkpoints
│       └── checkpoints for the various models
├── output_0000/
│   ├── atmos/
│   │   └── output of the atmos model
│   └── ocean/
│       └── output of the ocean model
├── output_0001/
│   └── ... component model outputs in their folders ...
├── output_0002/
│   └── ... component model outputs in their folders ...
└── output_active -> output_0002/
```
  • Loading branch information
Sbozzolo committed Dec 5, 2024
1 parent 32de9c3 commit b77d68d
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 37 deletions.
29 changes: 24 additions & 5 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,33 @@ by identifying where elevation is greater than 0. Note, this can lead to
misidentification of ocean in some areas of the globe that are inland but below
sea level (Dead Sea, Death Valley, ...).

### Code cleanup

#### Output path updates - PRs [#1106](https://github.com/CliMA/ClimaCoupler.jl/pull/1058),
[#1106](https://github.com/CliMA/ClimaCoupler.jl/pull/1106)

### Code cleanup
#### Output path update - PR [#1058](https://github.com/CliMA/ClimaCoupler.jl/pull/1058)
Previously, ClimaEarth simulation outputs were saved in a path
`experiments/ClimaEarth/output/$mode_name/$job_id/artifacts/`.
This PR removes `mode_name` has from this pattern, so output will now be in
`experiments/ClimaEarth/output/$job_id/artifacts/`.
`experiments/ClimaEarth/output/$mode_name/$job_id/artifacts/`. Now, `ClimaEarth`
creates output folders with an increment (increasing the counter every time the
simulation is run). This is in preparation to restarts. The output now looks
like
```
coupler_output_dir_amip/
├── checkpoints
│ └── checkpoints for the various models
├── artifacts
│ └── plots produced by the postporcessing step
├── output_0000/
│ ├── atmos/
│ │ └── output of the atmos model
│ └── ocean/
│ └── output of the ocean model
├── output_0001/
│ └── ... component model outputs in their folders ...
├── output_0002/
│ └── ... component model outputs in their folders ...
└── output_active -> output_0002/
``
Note that any external scripts that assume an output path will need to be updated.
#### Remove ClimaCoupler.Diagnostics module - PR [#953](https://github.com/CliMA/ClimaCoupler.jl/pull/953)
Expand Down
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884"
ClimaCoreTempestRemap = "d934ef94-cdd4-4710-83d6-720549644b70"
ClimaUtilities = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Expand All @@ -22,6 +23,7 @@ Artifacts = "1"
ClimaComms = "0.5.6, 0.6"
ClimaCore = "0.14.19"
ClimaCoreTempestRemap = "0.3"
ClimaUtilities = "0.1.14"
Dates = "1"
JLD2 = "0.4, 0.5"
Logging = "1"
Expand Down
6 changes: 3 additions & 3 deletions experiments/ClimaEarth/components/atmosphere/climaatmos.jl
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ FluxCalculator.get_surface_params(sim::ClimaAtmosSimulation) = CAP.surface_fluxe
### ClimaAtmos.jl model-specific functions (not explicitly required by ClimaCoupler.jl)
###
"""
get_atmos_config_dict(coupler_dict::Dict, job_id::String)
get_atmos_config_dict(coupler_dict::Dict, job_id::String, atmos_output_dir)
Returns the specified atmospheric configuration (`atmos_config`) overwitten by arguments
in the coupler dictionary (`config_dict`).
Expand All @@ -313,7 +313,7 @@ The TOML parameter file to use is chosen using the following priority:
If a coupler TOML file is provided, it is used. Otherwise we use an atmos TOML
file if it's provided. If neither is provided, we use a default coupler TOML file.
"""
function get_atmos_config_dict(coupler_dict::Dict, job_id::String)
function get_atmos_config_dict(coupler_dict::Dict, job_id::String, atmos_output_dir)
atmos_config_file = coupler_dict["atmos_config_file"]
atmos_config_repo = coupler_dict["atmos_config_repo"]
# override default or specified configs with coupler arguments, and set the correct atmos config_file
Expand Down Expand Up @@ -357,7 +357,7 @@ function get_atmos_config_dict(coupler_dict::Dict, job_id::String)
end

# Specify atmos output directory to be inside the coupler output directory
atmos_output_dir = joinpath(coupler_dict["coupler_output_dir"], job_id, "clima_atmos")
atmos_config["output_dir_style"] = "RemovePreexisting"
atmos_config["output_dir"] = atmos_output_dir

# Access extra atmosphere diagnostics from coupler so we can rename for atmos code
Expand Down
37 changes: 19 additions & 18 deletions experiments/ClimaEarth/run_amip.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,25 @@ add_extra_diagnostics!(config_dict)
plot_diagnostics,
) = get_coupler_args(config_dict)

#=
### I/O Directory Setup `setup_output_dirs` returns `dir_paths.output =
COUPLER_OUTPUT_DIR`, which is the directory where the output of the simulation
will be saved, `dir_paths.artifacts` is the directory where the plots (from
postprocessing and the conservation checks) of the simulation will be saved,
#and `dir_paths.checkpoints`, where restart files are saved.
=#

COUPLER_OUTPUT_DIR = joinpath(output_dir_root, job_id)
dir_paths = Utilities.setup_output_dirs(output_dir = COUPLER_OUTPUT_DIR, comms_ctx = comms_ctx)
@info "Coupler output directory $(dir_paths.output)"
@info "Coupler artifacts directory $(dir_paths.artifacts)"
@info "Coupler checkpoint directory $(dir_paths.checkpoints)"

## get component model dictionaries (if applicable)
## Note this step must come after parsing the coupler config dictionary, since
## some parameters are passed from the coupler config to the component model configs
atmos_config_dict = get_atmos_config_dict(config_dict, job_id)
atmos_output_dir = joinpath(dir_paths.output, "clima_atmos")
atmos_config_dict = get_atmos_config_dict(config_dict, job_id, atmos_output_dir)
(; dt_rad, output_default_diagnostics) = get_atmos_args(atmos_config_dict)

## set unique random seed if desired, otherwise use default
Expand All @@ -138,18 +153,6 @@ Random.seed!(random_seed)

tspan = (t_start, t_end)

#=
### I/O Directory Setup
`Utilities.setup_output_dirs` returns `dir_paths.output = COUPLER_OUTPUT_DIR`, which is the directory where the output of the simulation will be saved, and `dir_paths.artifacts` is the directory where
the plots (from postprocessing and the conservation checks) of the simulation will be saved. `dir_paths.regrid` is the directory where the regridding
temporary files will be saved.
=#

COUPLER_OUTPUT_DIR = joinpath(output_dir_root, job_id)
dir_paths = Utilities.setup_output_dirs(output_dir = COUPLER_OUTPUT_DIR, comms_ctx = comms_ctx)
@info "Coupler output directory $(dir_paths.output)"
@info "Coupler artifacts directory $(dir_paths.artifacts)"

#=
## Data File Paths
=#
Expand Down Expand Up @@ -855,13 +858,12 @@ if ClimaComms.iamroot(comms_ctx)

# define variable names and output directories for each diagnostic
amip_short_names_atmos = ["ta", "ua", "hus", "clw", "pr", "ts", "toa_fluxes_net"]
output_dir_atmos = atmos_sim.integrator.p.output_dir
amip_short_names_coupler = ["F_turb_energy"]
output_dir_coupler = dir_paths.output

# Check if all output variables are available in the specified directories
make_diagnostics_plots(
output_dir_atmos,
atmos_output_dir,
dir_paths.artifacts,
short_names = amip_short_names_atmos,
output_prefix = "atmos_",
Expand All @@ -877,16 +879,15 @@ if ClimaComms.iamroot(comms_ctx)
# Check this because we only want monthly data for making plots
if t_end > 84600 * 31 * 3 && output_default_diagnostics
include("leaderboard/leaderboard.jl")
diagnostics_folder_path = atmos_sim.integrator.p.output_dir
leaderboard_base_path = dir_paths.artifacts
compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
compute_leaderboard(leaderboard_base_path, atmos_output_dir)
end
end
## plot extra atmosphere diagnostics if specified
if plot_diagnostics
@info "Plotting diagnostics"
include("user_io/diagnostics_plots.jl")
make_diagnostics_plots(atmos_sim.integrator.p.output_dir, dir_paths.artifacts)
make_diagnostics_plots(atmos_output_dir, dir_paths.artifacts)
end

## plot all model states and coupler fields (useful for debugging)
Expand Down
44 changes: 33 additions & 11 deletions src/Utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import Artifacts
import ClimaComms
import ClimaCore as CC
import Logging
import ClimaUtilities.OutputPathGenerator: generate_output_path

export swap_space!, get_device, get_comms_context, show_memory_usage, setup_output_dirs, time_to_seconds

Expand Down Expand Up @@ -100,33 +101,52 @@ Create output directories for the experiment. If `comms_ctx` is provided, only t
By default, the regrid directory is created as a temporary directory inside the output directory,
and the artifacts directory is created inside the output directory with the name `artifacts/`.
`ClimaUtilities.OutputPathGenerator` is used so that simulations can be re-run and re-started.
The output path looks like:
```
coupler_output_dir_amip/
├── checkpoints
│ └── checkpoints for the various models
├── artifacts
│ └── plots produced by the postporcessing step
├── output_0000/
│ ├── atmos/
│ │ └── output of the atmos model
│ └── ocean/
│ └── output of the ocean model
├── output_0001/
│ └── ... component model outputs in their folders ...
├── output_0002/
│ └── ... component model outputs in their folders ...
└── output_active -> output_0002/
```
# Arguments
- `output_dir::String`: The directory where the output files will be stored. Default is the current directory.
- `regrid_dir::String`: The directory where the regridded files will be stored. Default is `output_dir/regrid_tmp/`.
- `checkpoint_dir::String`: The directory where the checkpoint files will be stored. Default is `output_dir/checkpoints/`.
- `artifacts_dir::String`: The directory where the artifacts will be stored. Default is `output_dir/artifacts/`.
- `comms_ctx::Union{Nothing, ClimaComms.AbstractCommsContext}`: The communicator context. If provided, only the root process will create the directories.
# Returns
- A tuple with the paths to the output, regrid, and artifacts directories.
"""
function setup_output_dirs(; output_dir = nothing, artifacts_dir = nothing, comms_ctx)
if output_dir === nothing
output_dir = "."
end
if artifacts_dir === nothing
artifacts_dir = joinpath(output_dir, "artifacts")
end

@info(output_dir)
function setup_output_dirs(;
output_dir = pwd(),
artifacts_dir = joinpath(output_dir, "artifacts"),
checkpoints_dir = joinpath(output_dir, "checkpoints"),
comms_ctx,
)
output_dir = generate_output_path(output_dir)
regrid_dir = nothing
if ClimaComms.iamroot(comms_ctx)
mkpath(output_dir)
mkpath(artifacts_dir)
mkpath(checkpoints_dir)
regrid_dir = mktempdir(output_dir, prefix = "regrid_tmp_")
end
regrid_dir = ClimaComms.bcast(comms_ctx, regrid_dir)

return (; output = output_dir, artifacts = artifacts_dir, regrid = regrid_dir)
return (; output = output_dir, artifacts = artifacts_dir, regrid = regrid_dir, checkpoints = checkpoints_dir)
end

"""
Expand All @@ -152,4 +172,6 @@ function time_to_seconds(s::String)
end
error("Uncaught case in computing time from given string.")
end


end # module

0 comments on commit b77d68d

Please sign in to comment.