diff --git a/.buildkite/pipeline.sh b/.buildkite/pipeline.sh index aa6a0a9c..96ad9a34 100644 --- a/.buildkite/pipeline.sh +++ b/.buildkite/pipeline.sh @@ -28,7 +28,7 @@ steps: - label: Initialize key: init command: | - julia --project=$exp_dir -e 'import Pkg; Pkg.instantiate(;verbose=true)' + julia --project=$exp_dir -e 'import Pkg; Pkg.build("CalibrateAtmos"); Pkg.instantiate(;verbose=true)' EOM if [ "$generate_data" == "true" ] ; then diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e83910a6..42215e21 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,7 +39,7 @@ jobs: - name: Surface Fluxes Perfect Model Test run: | julia --project=experiments/surface_fluxes_perfect_model -e 'using Pkg; Pkg.instantiate(;verbose=true)' - julia -t 10 --project=experiments/surface_fluxes_perfect_model test/test_sf.jl + julia --project=experiments/surface_fluxes_perfect_model test/test_sf.jl - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v3 diff --git a/Project.toml b/Project.toml index cf11c223..d75052b0 100644 --- a/Project.toml +++ b/Project.toml @@ -10,9 +10,7 @@ ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" @@ -23,8 +21,6 @@ ClimaParams = "0.10" Distributions = "0.25" EnsembleKalmanProcesses = "1" JLD2 = "0.4" -PrecompileTools = "1" Random = "1" -SciMLBase = "1, 2" TOML = "1" YAML = "0.4" diff --git a/docs/src/api.md b/docs/src/api.md index 89b44cbc..b988c959 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -10,6 +10,12 @@ CalibrateAtmos.observation_map ## EnsembleKalmanProcesses Interface ```@docs +CalibrateAtmos.calibrate CalibrateAtmos.initialize +CalibrateAtmos.save_G_ensemble CalibrateAtmos.update_ensemble +CalibrateAtmos.ExperimentConfig +CalibrateAtmos.get_prior +CalibrateAtmos.get_param_dict +CalibrateAtmos.path_to_iteration ``` diff --git a/docs/src/experiment_setup_guide.md b/docs/src/experiment_setup_guide.md index b67b79f9..87b7fa01 100644 --- a/docs/src/experiment_setup_guide.md +++ b/docs/src/experiment_setup_guide.md @@ -148,8 +148,8 @@ Pseudocode for `observation_map(iteration)`: function observation_map(::Val{:sphere_held_suarez_rhoe_equilmoist}, iteration) # Get Configuration experiment_id = "sphere_held_suarez_rhoe_equilmoist" - config = load_config(experiment_id) - ensemble_size = config["ensemble_size"] + config = ExperimentConfig(experiment_id) + ensemble_size = config.ensemble_size # Setup output array # dims = size of individual member observation map output @@ -215,8 +215,8 @@ The EKP configuration file must contain the following: - `n_iterations`, the number of iterations to run - `ensemble_size`, the ensemble size - `prior_path`, the path to the TOML file with the prior parameter distributions -- `truth_data`, the truth data -- `truth_noise`, the covariance of the truth data +- `observations`, the observational data +- `noise`, the covariance of the observational data - `output_dir`, the folder where you want calibration data and logs to be output. This must be the same as the `output_dir` in the model configuration file. Example: ``` @@ -224,8 +224,8 @@ output_dir: output/sphere_held_suarez_rhoe_equilmoist prior_path: experiments/sphere_held_suarez_rhoe_equilmoist/prior.toml ensemble_size: 10 n_iterations: 3 -truth_data: experiments/sphere_held_suarez_rhoe_equilmoist/obs_mean.jld2 -truth_noise: experiments/sphere_held_suarez_rhoe_equilmoist/obs_noise_cov.jld2 +observations: experiments/sphere_held_suarez_rhoe_equilmoist/obs_mean.jld2 +noise: experiments/sphere_held_suarez_rhoe_equilmoist/obs_noise_cov.jld2 ``` ## Plotting Results diff --git a/docs/src/precompilation.md b/docs/src/precompilation.md index 076977a4..e2a5400a 100644 --- a/docs/src/precompilation.md +++ b/docs/src/precompilation.md @@ -16,11 +16,8 @@ import ClimaAtmos as CA import YAML @setup_workload begin - output = joinpath("precompilation") job_id = "your configuration" - config_file = joinpath("experiments", job_id, "atmos_config.yml") - config_dict = YAML.load_file(config_file) - config_dict["output_dir"] = output + ExperimentConfig(job_id; output_dir = "precompilation") @compile_workload begin initialize(job_id) config = CA.AtmosConfig(config_dict) diff --git a/experiments/sphere_held_suarez_rhoe_equilmoist/ekp_config.yml b/experiments/sphere_held_suarez_rhoe_equilmoist/ekp_config.yml index 235a1a41..b1ceac52 100644 --- a/experiments/sphere_held_suarez_rhoe_equilmoist/ekp_config.yml +++ b/experiments/sphere_held_suarez_rhoe_equilmoist/ekp_config.yml @@ -2,5 +2,5 @@ output_dir: output/sphere_held_suarez_rhoe_equilmoist prior_path: experiments/sphere_held_suarez_rhoe_equilmoist/prior.toml ensemble_size: 10 n_iterations: 3 -truth_data: experiments/sphere_held_suarez_rhoe_equilmoist/obs_mean.jld2 -truth_noise: experiments/sphere_held_suarez_rhoe_equilmoist/obs_noise_cov.jld2 +observations: experiments/sphere_held_suarez_rhoe_equilmoist/obs_mean.jld2 +noise: experiments/sphere_held_suarez_rhoe_equilmoist/obs_noise_cov.jld2 diff --git a/experiments/sphere_held_suarez_rhoe_equilmoist/model_interface.jl b/experiments/sphere_held_suarez_rhoe_equilmoist/model_interface.jl index 568366c1..a9e91724 100644 --- a/experiments/sphere_held_suarez_rhoe_equilmoist/model_interface.jl +++ b/experiments/sphere_held_suarez_rhoe_equilmoist/model_interface.jl @@ -69,11 +69,7 @@ Runs the atmosphere model with the given an AtmosConfig object. Currently only has basic error handling. """ -function run_forward_model( - ::ClimaAtmosModel, - atmos_config::CA.AtmosConfig; - lk = nothing, -) +function run_forward_model(::ClimaAtmosModel, atmos_config::CA.AtmosConfig) simulation = CA.get_simulation(atmos_config) sol_res = CA.solve_atmos!(simulation) if sol_res.ret_code == :simulation_crashed diff --git a/experiments/sphere_held_suarez_rhoe_equilmoist/observation_map.jl b/experiments/sphere_held_suarez_rhoe_equilmoist/observation_map.jl index d6530088..651823ea 100644 --- a/experiments/sphere_held_suarez_rhoe_equilmoist/observation_map.jl +++ b/experiments/sphere_held_suarez_rhoe_equilmoist/observation_map.jl @@ -3,15 +3,14 @@ using Statistics import YAML import EnsembleKalmanProcesses: TOMLInterface import JLD2 -import CalibrateAtmos: observation_map, get_ekp_config +import CalibrateAtmos: observation_map, ExperimentConfig using ClimaAnalysis export observation_map function observation_map(::Val{:sphere_held_suarez_rhoe_equilmoist}, iteration) experiment_id = "sphere_held_suarez_rhoe_equilmoist" - config = get_ekp_config(experiment_id) - output_dir = config["output_dir"] - ensemble_size = config["ensemble_size"] + configuration = ExperimentConfig(experiment_id) + (; ensemble_size, output_dir) = configuration dims = 1 G_ensemble = Array{Float64}(undef, dims..., ensemble_size) diff --git a/experiments/surface_fluxes_perfect_model/Manifest.toml b/experiments/surface_fluxes_perfect_model/Manifest.toml index 7cb9a2d7..b192e3f6 100644 --- a/experiments/surface_fluxes_perfect_model/Manifest.toml +++ b/experiments/surface_fluxes_perfect_model/Manifest.toml @@ -4,11 +4,6 @@ julia_version = "1.10.2" manifest_format = "2.0" project_hash = "2edfa41926269520ea7e69208df499eef7522bd0" -[[deps.ADTypes]] -git-tree-sha1 = "41c37aa88889c171f1300ceac1313c06e891d245" -uuid = "47edcb42-4c32-4615-8424-f2b9edc5f35b" -version = "0.2.6" - [[deps.AMD]] deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"] git-tree-sha1 = "45a1272e3f809d36431e57ab22703c6896b8908f" @@ -294,7 +289,7 @@ uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" version = "1.16.1+1" [[deps.CalibrateAtmos]] -deps = ["CalibrateEmulateSample", "ClimaComms", "ClimaParams", "Distributions", "EnsembleKalmanProcesses", "JLD2", "PrecompileTools", "Random", "SciMLBase", "TOML", "YAML"] +deps = ["CalibrateEmulateSample", "ClimaComms", "ClimaParams", "Distributions", "EnsembleKalmanProcesses", "JLD2", "Random", "TOML", "YAML"] path = "../.." uuid = "4347a170-ebd6-470c-89d3-5c705c0cacc2" version = "0.1.0" @@ -396,11 +391,6 @@ git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" version = "1.0.2" -[[deps.CommonSolve]] -git-tree-sha1 = "0eee5eb66b1cf62cd6ad1b460238e60e4b09400c" -uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" -version = "0.2.4" - [[deps.CommonSubexpressions]] deps = ["MacroTools", "Test"] git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" @@ -769,17 +759,6 @@ git-tree-sha1 = "aa31987c2ba8704e23c6c8ba8a4f769d5d7e4f91" uuid = "559328eb-81f9-559d-9380-de523a88c83c" version = "1.0.10+0" -[[deps.FunctionWrappers]] -git-tree-sha1 = "d62485945ce5ae9c0c48f124a84998d755bae00e" -uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e" -version = "1.1.3" - -[[deps.FunctionWrappersWrappers]] -deps = ["FunctionWrappers"] -git-tree-sha1 = "b104d487b34566608f8b4e1c39fb0b10aa279ff8" -uuid = "77dc65aa-8811-40c2-897b-53d922fa7daf" -version = "0.1.3" - [[deps.Future]] deps = ["Random"] uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" @@ -1879,12 +1858,6 @@ git-tree-sha1 = "40b9edad2e5287e05bd413a38f61a8ff55b9557b" uuid = "5eaf0fd0-dfba-4ccb-bf02-d820a40db705" version = "0.2.1" -[[deps.RuntimeGeneratedFunctions]] -deps = ["ExprTools", "SHA", "Serialization"] -git-tree-sha1 = "6aacc5eefe8415f47b3e34214c1d79d2674a0ba2" -uuid = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47" -version = "0.5.12" - [[deps.SCS]] deps = ["MathOptInterface", "Requires", "SCS_GPU_jll", "SCS_jll", "SparseArrays"] git-tree-sha1 = "8d908b7c81e199ee92d17b6192849e8c43d2f31d" @@ -1918,37 +1891,6 @@ git-tree-sha1 = "3aac6d68c5e57449f5b9b865c9ba50ac2970c4cf" uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" version = "0.6.42" -[[deps.SciMLBase]] -deps = ["ADTypes", "ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables"] -git-tree-sha1 = "3a281a9fce9cd62b849d7f16e412933a5fe755cb" -uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462" -version = "2.29.0" - - [deps.SciMLBase.extensions] - SciMLBaseChainRulesCoreExt = "ChainRulesCore" - SciMLBaseMakieExt = "Makie" - SciMLBasePartialFunctionsExt = "PartialFunctions" - SciMLBasePyCallExt = "PyCall" - SciMLBasePythonCallExt = "PythonCall" - SciMLBaseRCallExt = "RCall" - SciMLBaseZygoteExt = "Zygote" - - [deps.SciMLBase.weakdeps] - ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" - PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b" - PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" - PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" - RCall = "6f49c342-dc21-5d91-9882-a32aef131414" - Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" - -[[deps.SciMLOperators]] -deps = ["ArrayInterface", "DocStringExtensions", "LinearAlgebra", "MacroTools", "Setfield", "SparseArrays", "StaticArraysCore"] -git-tree-sha1 = "10499f619ef6e890f3f4a38914481cc868689cd5" -uuid = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" -version = "0.3.8" - [[deps.ScientificTypesBase]] git-tree-sha1 = "a8e18eb383b5ecf1b5e6fc237eb39255044fd92b" uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161" diff --git a/experiments/surface_fluxes_perfect_model/ekp_config.yml b/experiments/surface_fluxes_perfect_model/ekp_config.yml index 412e0eee..cfbc0e55 100644 --- a/experiments/surface_fluxes_perfect_model/ekp_config.yml +++ b/experiments/surface_fluxes_perfect_model/ekp_config.yml @@ -2,6 +2,6 @@ output_dir: output/surface_fluxes_perfect_model prior_path: experiments/surface_fluxes_perfect_model/prior.toml ensemble_size: 10 n_iterations: 10 -truth_data: experiments/surface_fluxes_perfect_model/data/obs_mean.jld2 -truth_noise: experiments/surface_fluxes_perfect_model/data/obs_noise_cov.jld2 +observations: experiments/surface_fluxes_perfect_model/data/obs_mean.jld2 +noise: experiments/surface_fluxes_perfect_model/data/obs_noise_cov.jld2 generate_data: true diff --git a/experiments/surface_fluxes_perfect_model/model_interface.jl b/experiments/surface_fluxes_perfect_model/model_interface.jl index 37dc3d30..9d02c61b 100644 --- a/experiments/surface_fluxes_perfect_model/model_interface.jl +++ b/experiments/surface_fluxes_perfect_model/model_interface.jl @@ -1,7 +1,11 @@ import EnsembleKalmanProcesses as EKP using CalibrateAtmos import CalibrateAtmos: - AbstractPhysicalModel, get_config, run_forward_model, get_forward_model + AbstractPhysicalModel, + get_config, + run_forward_model, + get_forward_model, + ExperimentConfig import YAML """ @@ -43,13 +47,12 @@ function get_config( iteration, experiment_id::AbstractString, ) - config_dict = YAML.load_file("experiments/$experiment_id/model_config.yml") - return get_config(model, member, iteration, config_dict) + return get_config(model, member, iteration, ExperimentConfig(experiment_id)) end """ get_config(member, iteration, experiment_id::AbstractString) - get_config(member, iteration, config_dict::AbstractDict) + get_config(member, iteration, experiment_config::ExperimentConfig) Returns an config dictionary object for the given member and iteration. If given an experiment id string, it will load the config from the corresponding YAML file. @@ -60,22 +63,29 @@ function get_config( ::SurfaceFluxModel, member, iteration, - config_dict::AbstractDict, + experiment_config::ExperimentConfig, ) # Specify member path for output_dir - output_dir = config_dict["output_dir"] + model_config = YAML.load_file( + joinpath( + "experiments", + "surface_fluxes_perfect_model", + "model_config.yml", + ), + ) + output_dir = (experiment_config.output_dir) # Set TOML to use EKP parameter(s) member_path = EKP.TOMLInterface.path_to_ensemble_member(output_dir, iteration, member) - config_dict["output_dir"] = member_path + model_config["output_dir"] = member_path parameter_path = joinpath(member_path, "parameters.toml") - if haskey(config_dict, "toml") - push!(config_dict["toml"], parameter_path) + if haskey(model_config, "toml") + push!(model_config["toml"], parameter_path) else - config_dict["toml"] = [parameter_path] + model_config["toml"] = [parameter_path] end - return config_dict + return model_config end """ @@ -84,18 +94,8 @@ end Runs the model with the given an AbstractDict object. """ -function run_forward_model( - ::SurfaceFluxModel, - config::AbstractDict; - lk = nothing, -) - x_inputs = if isnothing(lk) - load_profiles(config["x_data_file"]) - else - lock(lk) do - load_profiles(config["x_data_file"]) - end - end +function run_forward_model(::SurfaceFluxModel, config::AbstractDict) + x_inputs = load_profiles(config["x_data_file"]) FT = typeof(x_inputs.profiles_int[1].T) obtain_ustar(FT, x_inputs, config) end diff --git a/experiments/surface_fluxes_perfect_model/observation_map.jl b/experiments/surface_fluxes_perfect_model/observation_map.jl index 61d078be..fd828313 100644 --- a/experiments/surface_fluxes_perfect_model/observation_map.jl +++ b/experiments/surface_fluxes_perfect_model/observation_map.jl @@ -2,7 +2,7 @@ using Statistics import YAML import EnsembleKalmanProcesses: TOMLInterface import JLD2 -import CalibrateAtmos: observation_map, get_ekp_config +import CalibrateAtmos: observation_map """ observation_map(::Val{:surface_fluxes_perfect_model}, iteration) @@ -11,10 +11,8 @@ Returns the observation map (from the raw model output to the observable y), as specified by process_member_data, for the given iteration. """ function observation_map(::Val{:surface_fluxes_perfect_model}, iteration) - experiment_id = "surface_fluxes_perfect_model" - config = get_ekp_config(experiment_id) - output_dir = config["output_dir"] - ensemble_size = config["ensemble_size"] + config = ExperimentConfig("surface_fluxes_perfect_model") + (; output_dir, ensemble_size) = config model_output = "model_ustar_array.jld2" dims = 1 diff --git a/experiments/surface_fluxes_perfect_model/plot.jl b/experiments/surface_fluxes_perfect_model/plot.jl index 639021ff..d283cff4 100644 --- a/experiments/surface_fluxes_perfect_model/plot.jl +++ b/experiments/surface_fluxes_perfect_model/plot.jl @@ -104,11 +104,9 @@ pkg_dir = pkgdir(CalibrateAtmos) model_config = YAML.load_file( joinpath(pkg_dir, "experiments", experiment_id, "model_config.yml"), ) -ekp_config = YAML.load_file( - joinpath(pkg_dir, "experiments", experiment_id, "ekp_config.yml"), -) -N_iter = ekp_config["n_iterations"] -N_mem = ekp_config["ensemble_size"] +ekp_config = ExperimentConfig(experiment_id) +N_iter = ekp_config.n_iterations +N_mem = ekp_config.ensemble_size eki_path = joinpath( joinpath(pkg_dir, model_config["output_dir"]), "iteration_$(lpad(N_iter, 3, '0'))", @@ -116,8 +114,7 @@ eki_path = joinpath( ); eki = JLD2.load_object(eki_path); EKP.get_u(eki) -prior_path = joinpath(pkg_dir, ekp_config["prior_path"]) -prior = CalibrateAtmos.get_prior(prior_path) +prior = ekp_config.prior theta_star_vec = (; coefficient_a_m_businger = 4.7, coefficient_a_h_businger = 4.7) diff --git a/src/ekp_interface.jl b/src/ekp_interface.jl index 49428a5a..e702bf57 100644 --- a/src/ekp_interface.jl +++ b/src/ekp_interface.jl @@ -7,6 +7,70 @@ using EnsembleKalmanProcesses.ParameterDistributions using EnsembleKalmanProcesses.TOMLInterface import ClimaComms +export ExperimentConfig + +""" + ExperimentConfig(experiment_id) + + ExperimentConfig( + experiment_id, + n_iterations, + ensemble_size, + observations, + noise, + prior, + output_dir, + generate_plots, + emulate_sample, + ) + +ExperimentConfig stores the configuration for a specific experiment. +If just the experiment ID string is passed in, the config will be constructed from the +file `experiments/experiment_id/ekp_config.yml`. +For customizable interactive experiments, arguments can be passed in directly. +""" +struct ExperimentConfig + id::AbstractString + n_iterations::Integer + ensemble_size::Integer + observations::Any + noise::Any + prior::ParameterDistribution + output_dir::Any + generate_plots::Bool + emulate_sample::Bool + +end + +function ExperimentConfig(experiment_id) + + config_file = joinpath("experiments", experiment_id, "ekp_config.yml") + config_dict = YAML.load_file(config_file) + + default_output = + haskey(ENV, "CI") ? experiment_id : joinpath("output", experiment_id) + output_dir = get(config_dict, "output_dir", default_output) + + n_iterations = config_dict["n_iterations"] + ensemble_size = config_dict["ensemble_size"] + observations = JLD2.load_object(config_dict["observations"]) + noise = JLD2.load_object(config_dict["noise"]) + prior = get_prior(config_dict["prior_path"]) + + return ExperimentConfig( + experiment_id, + n_iterations, + ensemble_size, + observations, + noise, + prior, + output_dir, + get(config_dict, "generate_plots", false), + get(config_dict, "emulate_sample", false), + ) +end + + """ path_to_iteration(output_dir, iteration) Returns the path to the iteration folder within `output_dir` for the given iteration number. @@ -34,69 +98,66 @@ function get_prior(param_dict::AbstractDict; names = nothing) end """ - get_ekp_config(experiment_id) + get_param_dict(distribution; names) -Load the EKP configuration for a given `experiment_id` +Generate a parameter dictionary for use in `EKP.TOMLInterface.save_parameter_ensemble`. +Assumes that all variables in the distribution are floating-point types. """ -get_ekp_config(experiment_id) = - YAML.load_file(joinpath("experiments", experiment_id, "ekp_config.yml")) +function get_param_dict( + distribution::PD; + names = distribution.name, +) where {PD <: ParameterDistributions.ParameterDistribution} + return Dict( + name => Dict{Any, Any}("type" => "float") for name in distribution.name + ) +end """ save_G_ensemble(experiment_id, iteration, G_ensemble) Save an ensemble's observation map output to the correct folder. """ -function save_G_ensemble(experiment_id, iteration, G_ensemble) - config = get_ekp_config(experiment_id) - iter_path = path_to_iteration(config["output_dir"], iteration) +function save_G_ensemble(experiment_id, iteration, G_ensemble; config_kwargs...) + config = ExperimentConfig(experiment_id; config_kwargs...) + return save_G_ensemble(config, iteration, G_ensemble) +end + +function save_G_ensemble(config::ExperimentConfig, iteration, G_ensemble) + iter_path = path_to_iteration(config.output_dir, iteration) JLD2.save_object(joinpath(iter_path, "G_ensemble.jld2"), G_ensemble) + return G_ensemble end + """ + initialize(config::ExperimentConfig) initialize( - experiment_id; - config = YAML.load_file("experiments/\$experiment_id/ekp_config.yml"), - Γ = JLD2.load(config["truth_noise"]), - y = JLD2.load(config["truth_data"]), - rng_seed = 1234, + experiment_id::AbstractString; + kwargs... ) -Initializes the EKP object and the model ensemble. - -Takes in - - `experiment_id`: the name of the experiment, which corresponds to the name of the subfolder in `experiments/` - - `config`: a dictionary of configuration values -""" -function initialize( - experiment_id; - config = YAML.load_file( - joinpath("experiments", experiment_id, "ekp_config.yml"), - ), - Γ = JLD2.load_object(config["truth_noise"]), - y = JLD2.load_object(config["truth_data"]), - rng_seed = 1234, -) +Initializes the EKP object and the model ensemble. See ExperimentConfig for a full list of keyword arguments. +""" +initialize(experiment_id::AbstractString; kwargs...) = + initialize(ExperimentConfig(experiment_id); kwargs...) + +function initialize(config::ExperimentConfig; rng_seed = 1234) Random.seed!(rng_seed) rng_ekp = Random.MersenneTwister(rng_seed) - output_dir = config["output_dir"] - ensemble_size = config["ensemble_size"] - # Save in EKI object in iteration_000 folder - eki_path = joinpath(output_dir, "iteration_000", "eki_file.jld2") - - param_dict = TOML.parsefile(config["prior_path"]) - prior = get_prior(param_dict) - + (; observations, ensemble_size, noise, prior, output_dir) = config initial_ensemble = EKP.construct_initial_ensemble(rng_ekp, prior, ensemble_size) eki = EKP.EnsembleKalmanProcess( initial_ensemble, - y, - Γ, + observations, + noise, EKP.Inversion(); rng = rng_ekp, failure_handler_method = EKP.SampleSuccGauss(), ) + param_dict = get_param_dict(prior) + save_parameter_ensemble( EKP.get_u_final(eki), # constraints applied when saving prior, @@ -105,6 +166,9 @@ function initialize( "parameters.toml", 0, # Initial iteration = 0 ) + + # Save in EKI object in iteration_000 folder + eki_path = joinpath(output_dir, "iteration_000", "eki_file.jld2") JLD2.save_object(eki_path, eki) return eki end @@ -118,27 +182,22 @@ end Updates the EKI object and saves parameters for the next iteration. Assumes that the observation map has been run and saved in the current iteration folder. """ -function update_ensemble( - experiment_id, - iteration; - config = YAML.load_file("experiments/$experiment_id/ekp_config.yml"), -) - output_dir = config["output_dir"] +update_ensemble(experiment_id, iteration; kwargs...) = + update_ensemble(ExperimentConfig(experiment_id; kwargs...), iteration) + +function update_ensemble(configuration::ExperimentConfig, iteration;) + (; prior, output_dir) = configuration # Load EKI object from iteration folder iter_path = path_to_iteration(output_dir, iteration) - eki_path = joinpath(iter_path, "eki_file.jld2") - eki = JLD2.load_object(eki_path) + eki = JLD2.load_object(joinpath(iter_path, "eki_file.jld2")) # Load data from the ensemble G_ens = JLD2.load_object(joinpath(iter_path, "G_ensemble.jld2")) - # Update EKP.update_ensemble!(eki, G_ens) iteration += 1 - # Update and save parameters for next iteration - param_dict = TOML.parsefile(config["prior_path"]) - prior = get_prior(param_dict) + param_dict = get_param_dict(prior) save_parameter_ensemble( EKP.get_u_final(eki), # constraints applied when saving @@ -174,33 +233,39 @@ include(joinpath(experiment_path, "generate_data.jl")) eki = CalibrateAtmos.calibrate(experiment_id) ``` """ -function calibrate(experiment_id; device = ClimaComms.device()) - ekp_config = get_ekp_config(experiment_id) +function calibrate(experiment_id; device = ClimaComms.device(), kwargs...) + configuration = ExperimentConfig(experiment_id; kwargs...) + return calibrate(configuration; device, kwargs...) +end + +function calibrate( + configuration::ExperimentConfig; + device = ClimaComms.device(), + kwargs..., +) # initialize the CalibrateAtmos - initialize(experiment_id) + initialize(configuration) # run experiment with CalibrateAtmos for N_iter iterations - N_iter = ekp_config["n_iterations"] - N_mem = ekp_config["ensemble_size"] - output_dir = ekp_config["output_dir"] + (; n_iterations, id, ensemble_size) = configuration + eki = nothing - physical_model = get_forward_model(Val(Symbol(experiment_id))) - lk = ReentrantLock() - for i in 0:(N_iter - 1) - ClimaComms.@threaded device for m in 1:N_mem + physical_model = get_forward_model(Val(Symbol(id))) + for i in 0:(n_iterations - 1) + @info "Running iteration $i" + for m in 1:ensemble_size # model run for each ensemble member run_forward_model( physical_model, - get_config(physical_model, m, i, experiment_id); - lk, + get_config(physical_model, m, i, id), ) - @info "Finished model run for member $m at iteration $i" + @info "Completed member $m" end # update EKP with the ensemble output and update calibrated parameters - G_ensemble = observation_map(Val(Symbol(experiment_id)), i) - save_G_ensemble(experiment_id, i, G_ensemble) - eki = update_ensemble(experiment_id, i) + G_ensemble = observation_map(Val(Symbol(id)), i) + save_G_ensemble(configuration, i, G_ensemble; kwargs...) + eki = update_ensemble(configuration, i) end return eki end diff --git a/src/model_interface.jl b/src/model_interface.jl index 9de3fd8e..6c9564c8 100644 --- a/src/model_interface.jl +++ b/src/model_interface.jl @@ -8,35 +8,31 @@ function get_config( physical_model::AbstractPhysicalModel, member, iteration, - experiment_id::AbstractString, + experiment_id::AbstractString; + kwargs..., ) - config_dict = YAML.load_file("experiments/$experiment_id/model_config.yml") - return get_config(physical_model, member, iteration, config_dict) + experiment_config = ExperimentConfig(experiment_id; kwargs...) + return get_config(physical_model, member, iteration, experiment_config) end """ get_config(member, iteration, experiment_id::AbstractString) - get_config(member, iteration, config_dict::AbstractDict) + get_config(member, iteration, experiment_config::AbstractDict) Returns an AtmosConfig object for the given member and iteration. If given an experiment id string, it will load the config from the corresponding YAML file. Turns off default diagnostics and sets the TOML parameter file to the member's path. This assumes that the config dictionary has `output_dir` and `restart_file` keys. """ -get_config( - physical_model::AbstractPhysicalModel, - member, - iteration, - config_dict::AbstractDict, -) = error("get_config not implemented for $physical_model") +get_config(physical_model::AbstractPhysicalModel, member, iteration, _) = + error("get_config not implemented for $physical_model") """ - run_forward_model(config::AbstractDict) + run_forward_model(physical_model, config) -Runs the atmosphere model with the given an AtmosConfig object. -Currently only has basic error handling. +Runs the forward model with the given configuration, returned by `get_config`. """ -run_forward_model(physical_model::AbstractPhysicalModel, config::AbstractDict) = +run_forward_model(physical_model::AbstractPhysicalModel, config) = error("run_forward_model not implemented for $physical_model") """ diff --git a/test/e2e_test.jl b/test/e2e_test.jl new file mode 100644 index 00000000..1d494f1d --- /dev/null +++ b/test/e2e_test.jl @@ -0,0 +1,95 @@ +using Test + +import EnsembleKalmanProcesses as EKP +using EnsembleKalmanProcesses.ParameterDistributions +using EnsembleKalmanProcesses.TOMLInterface +import ClimaParams as CP + +import CalibrateAtmos: + AbstractPhysicalModel, + get_config, + run_forward_model, + get_forward_model, + ExperimentConfig, + calibrate, + observation_map + +import JLD2 + +# Experiment Info +id = "e2e_test" +output_file = "model_output.jld2" +prior = constrained_gaussian("test_param", 10, 5, 0, Inf) +n_iterations = 1 +ensemble_size = 10 +observations = [20.0] +noise = [0.01;;] +output_dir = joinpath("test", "e2e_test_output") + +experiment_config = ExperimentConfig( + id, + n_iterations, + ensemble_size, + observations, + noise, + prior, + output_dir, + false, + false, +) + +# Model interface +struct TestModel end + +TestModel <: AbstractPhysicalModel + +get_forward_model(::Val{:e2e_test}) = TestModel() + +function get_config( + physical_model::TestModel, + member, + iteration, + experiment_id::AbstractString, +) + model_config = Dict() + output_dir = (experiment_config.output_dir) + member_path = path_to_ensemble_member(output_dir, iteration, member) + model_config["output_dir"] = member_path + parameter_path = joinpath(member_path, "parameters.toml") + model_config["toml"] = parameter_path + return model_config +end + +function run_forward_model(::TestModel, config) + toml_dict = CP.create_toml_dict(Float64; override_file = config["toml"]) + (; test_param) = CP.get_parameter_values(toml_dict, "test_param") + output = test_param + JLD2.save_object(joinpath(config["output_dir"], output_file), output) +end + +# Observation map +function observation_map(::Val{:e2e_test}, iteration) + + (; ensemble_size) = experiment_config + dims = 1 + G_ensemble = Array{Float64}(undef, dims..., ensemble_size) + for m in 1:ensemble_size + member_path = + TOMLInterface.path_to_ensemble_member(output_dir, iteration, m) + output = JLD2.load_object(joinpath(member_path, output_file)) + G_ensemble[:, m] .= output + end + return G_ensemble +end + +# Test! +ekp = calibrate(experiment_config) + +@testset "Test dummy end-to-end calibration" begin + parameter_values = + [EKP.get_ϕ_mean(prior, ekp, it) for it in 1:(n_iterations + 1)] + @test parameter_values[1][1] ≈ 9.779 rtol = 0.01 + @test parameter_values[end][1] ≈ 19.63 rtol = 0.01 +end + +rm(output_dir; recursive = true) diff --git a/test/runtests.jl b/test/runtests.jl index b6a72385..6bd1221e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,3 +7,4 @@ include("test_init.jl") # include("test_atmos_config.jl") # TODO: to be moved to ClimaAtmos include("test_model_interface.jl") include("test_emulate_sample.jl") +include("e2e_test.jl") diff --git a/test/test_init.jl b/test/test_init.jl index e7835126..3ed8251c 100644 --- a/test/test_init.jl +++ b/test/test_init.jl @@ -9,23 +9,38 @@ FT = Float64 output_dir = "test_init" prior_path = joinpath("test_case_inputs", "prior.toml") param_names = ["one", "two"] -config = Dict( - "output_dir" => output_dir, - "prior_path" => prior_path, - "ensemble_size" => 1, + +prior = CalibrateAtmos.get_prior(prior_path) +noise = 0.1 * I +observations = zeros(Float64, 1) +n_iterations = 1 +ensemble_size = 10 + +config = CalibrateAtmos.ExperimentConfig( + "test", + n_iterations, + ensemble_size, + observations, + noise, + prior, + output_dir, + false, + false, ) -Γ = 0.1 * I -y = zeros(Float64, 1) -CalibrateAtmos.initialize("test"; config, Γ, y, rng_seed = 4444) +CalibrateAtmos.initialize(config) -override_file = - joinpath(output_dir, "iteration_000", "member_001", "parameters.toml") +override_file = joinpath( + config.output_dir, + "iteration_000", + "member_001", + "parameters.toml", +) td = CP.create_toml_dict(FT; override_file) params = CP.get_parameter_values(td, param_names) @testset "Initialized parameter values" begin # This checks for random seed as well - @test params.one == 2.7212695972038583 - @test params.two == 4.891199610995353 + @test params.one == 1.8171573383720587 + @test params.two == 5.408386812503563 end diff --git a/test/test_model_interface.jl b/test/test_model_interface.jl index 3ce8f863..ea700f85 100644 --- a/test/test_model_interface.jl +++ b/test/test_model_interface.jl @@ -1,4 +1,6 @@ import CalibrateAtmos + +using EnsembleKalmanProcesses.ParameterDistributions using Test # Tests for ensuring CalibrateAtmos has protected interfaces. The API tested below must be defined for each model, @@ -6,36 +8,48 @@ using Test struct TestPhysicalModel <: CalibrateAtmos.AbstractPhysicalModel end -@testset "get_config" begin - @test_throws ErrorException( - "get_config not implemented for TestPhysicalModel()", - ) CalibrateAtmos.get_config(TestPhysicalModel(), 1, 1, Dict{Any, Any}()) -end +@testset "Model Interface stubs" begin -@testset "run_forward_model" begin - @test_throws ErrorException( - "run_forward_model not implemented for TestPhysicalModel()", - ) CalibrateAtmos.run_forward_model(TestPhysicalModel(), Dict{Any, Any}()) -end + @testset "get_config" begin + @test_throws ErrorException( + "get_config not implemented for TestPhysicalModel()", + ) CalibrateAtmos.get_config(TestPhysicalModel(), 1, 1, Dict{Any, Any}()) + end -@testset "get_forward_model" begin - @test_throws ErrorException( - "get_forward_model not implemented for Val{:test}()", - ) CalibrateAtmos.get_forward_model(Val(:test)) -end + @testset "run_forward_model" begin + @test_throws ErrorException( + "run_forward_model not implemented for TestPhysicalModel()", + ) CalibrateAtmos.run_forward_model( + TestPhysicalModel(), + Dict{Any, Any}(), + ) + end -@testset "observation_map" begin - @test_throws ErrorException( - "observation_map not implemented for experiment Val{:test}() at iteration 1", - ) CalibrateAtmos.observation_map(Val(:test), 1) -end + @testset "get_forward_model" begin + @test_throws ErrorException( + "get_forward_model not implemented for Val{:test}()", + ) CalibrateAtmos.get_forward_model(Val(:test)) + end + + @testset "observation_map" begin + @test_throws ErrorException( + "observation_map not implemented for experiment Val{:test}() at iteration 1", + ) CalibrateAtmos.observation_map(Val(:test), 1) + end + + @testset "calibrate func" begin + experiment_config = CalibrateAtmos.ExperimentConfig( + "test", + 1, + 1, + [20.0], + [0.01;;], + constrained_gaussian("test_param", 10, 5, 0, Inf), + joinpath("test", "e2e_test_output"), + false, + false, + ) + @test_throws ErrorException CalibrateAtmos.calibrate(experiment_config) + end -# This test depends on `surface_fluxes_perfect_model` in the `experiments/` folder -@testset "calibrate func" begin - dir = pwd() - cd(pkgdir(CalibrateAtmos)) - @test_throws ErrorException CalibrateAtmos.calibrate( - "surface_fluxes_perfect_model", - ) - cd(dir) end diff --git a/test/test_sf.jl b/test/test_sf.jl index faef8715..b0f8c4cd 100644 --- a/test/test_sf.jl +++ b/test/test_sf.jl @@ -8,9 +8,10 @@ include(joinpath(experiment_path, "model_interface.jl")) include(joinpath(experiment_path, "generate_data.jl")) prior = CalibrateAtmos.get_prior(joinpath(experiment_path, "prior.toml")) + eki = CalibrateAtmos.calibrate(experiment_id) -@testset "Test pure Julia calibration (surface fluxes perfect model)" begin +@testset "End to end test using file config (surface fluxes perfect model)" begin parameter_values = get_ϕ_mean_final(prior, eki) test_parameter_values = [4.8684152849621976, 5.2022848059758875] @test all(isapprox.(parameter_values, test_parameter_values; rtol = 1e-3))