Skip to content

Commit

Permalink
Add pmap for backend
Browse files Browse the repository at this point in the history
  • Loading branch information
nefrathenrici committed Jul 31, 2024
1 parent c6033ec commit 56b90ab
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .buildkite/clima_server_test/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ steps:
key: "init_cpu_env"
command:
- echo "--- Instantiate SurfaceFluxes calibration project"
- julia --project=experiments/surface_fluxes_perfect_model -e 'using Pkg; Pkg.precompile()'
- julia --project=experiments/surface_fluxes_perfect_model -e 'using Pkg; Pkg.develop(;path="."); Pkg.precompile()'

- wait
- label: "SurfaceFluxes perfect model calibration"
Expand Down
2 changes: 1 addition & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ steps:
key: "init_cpu_env"
command:
- echo "--- Instantiate SurfaceFluxes calibration project"
- julia --project=experiments/surface_fluxes_perfect_model -e 'using Pkg; Pkg.precompile()'
- julia --project=experiments/surface_fluxes_perfect_model -e 'using Pkg; Pkg.develop(;path="."); Pkg.precompile()'

- wait
- label: "SurfaceFluxes perfect model calibration"
Expand Down
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name = "ClimaCalibrate"
uuid = "4347a170-ebd6-470c-89d3-5c705c0cacc2"
authors = ["Climate Modeling Alliance"]
version = "0.0.2"
version = "0.0.3"

[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
Expand Down
33 changes: 23 additions & 10 deletions src/backends.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Distributed

export get_backend, calibrate, model_run

abstract type AbstractBackend end
Expand Down Expand Up @@ -58,28 +60,39 @@ function module_load_string(::Type{DerechoBackend})
"""
end

calibrate(config::ExperimentConfig; ekp_kwargs...) =
calibrate(get_backend(), config; ekp_kwargs...)
calibrate(config::ExperimentConfig; reruns = 0, ekp_kwargs...) =
calibrate(get_backend(), config; reruns, ekp_kwargs...)

calibrate(experiment_dir::AbstractString; ekp_kwargs...) =
calibrate(get_backend(), ExperimentConfig(experiment_dir); ekp_kwargs...)
calibrate(experiment_dir::AbstractString; reruns = 0, ekp_kwargs...) =
calibrate(
get_backend(),
ExperimentConfig(experiment_dir);
reruns,
ekp_kwargs...,
)

calibrate(
b::Type{JuliaBackend},
experiment_dir::AbstractString;
reruns = 0,
ekp_kwargs...,
) = calibrate(b, ExperimentConfig(experiment_dir); ekp_kwargs...)
) = calibrate(b, ExperimentConfig(experiment_dir); reruns, ekp_kwargs...)

function calibrate(
::Type{JuliaBackend},
config::ExperimentConfig;
reruns = 0,
ekp_kwargs...,
)
(; n_iterations, ensemble_size) = config
eki = initialize(config; ekp_kwargs...)
on_error(e::InterruptException) = rethrow(e)
on_error(e) =
@error "Single ensemble member has errored. See stacktrace" exception =
(e, catch_backtrace())
for i in 0:(n_iterations - 1)
@info "Running iteration $i"
for m in 1:ensemble_size
pmap(1:ensemble_size; retry_delays = reruns, on_error) do m
run_forward_model(set_up_forward_model(m, i, config))
@info "Completed member $m"
end
Expand All @@ -100,11 +113,11 @@ Takes either an ExperimentConfig or an experiment folder.
Available Backends: CaltechHPCBackend, ClimaGPUBackend, DerechoBackend, JuliaBackend
# Keyword Arguments
- `experiment_dir: Directory containing experiment configurations.
- `model_interface: Path to the model interface file.
- `hpc_kwargs`: Dictionary of resource arguments, passed to the job scheduler.
- `reruns`: Number of times to retry a failed ensemble member.
- `verbose::Bool`: Enable verbose logging.
# Usage
Expand Down Expand Up @@ -168,7 +181,7 @@ function calibrate(
)
end

statuses = wait_for_jobs(
wait_for_jobs(
jobids,
output_dir,
iter,
Expand Down Expand Up @@ -206,7 +219,7 @@ Arguments:
- hpc_kwargs: Dictionary containing the resources for the job. Easily generated using [`kwargs`](@ref).
"""
model_run(
b::Type{<:SlurmBackend},
::Type{<:SlurmBackend},
iter,
member,
output_dir,
Expand All @@ -224,7 +237,7 @@ model_run(
hpc_kwargs,
)
model_run(
b::Type{DerechoBackend},
::Type{DerechoBackend},
iter,
member,
output_dir,
Expand Down
3 changes: 3 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
EnsembleKalmanProcesses = "< 1.1.6"

0 comments on commit 56b90ab

Please sign in to comment.