From e5eeb6a89c05f42c5dc11b455cc3053216c86598 Mon Sep 17 00:00:00 2001 From: ph-kev <98072684+ph-kev@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:11:17 -0800 Subject: [PATCH 1/6] Add Artifacts.ilamb_dataset_path --- Artifacts.toml | 7 +++++++ src/Artifacts.jl | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/Artifacts.toml b/Artifacts.toml index 9d421d220e..81abdead25 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -77,3 +77,10 @@ git-tree-sha1 = "49386718eda19d2fa00ae6981d83948c6664057a" [[lehmann2008_evaporation.download]] sha256 = "e9603add174709a8eb7f70487bdc0f519843c03d83cb40cf07acd428f0d9d881" url = "https://caltech.box.com/shared/static/khhop0nvuumrtzdakemaab8hsq8emh8e.gz" + +[ilamb_data] +git-tree-sha1 = "839224a62b59d73073bdb9a5c55d3dc75e30fe33" + + [[ilamb_data.download]] + sha256 = "64a9a344ebfbb0113014178a1f93a655c401431565907c07fd33aff8860b62d6" + url = "https://caltech.box.com/shared/static/eii2bfwfp47axfeuysgxlgzbczz27u5g.gz" diff --git a/src/Artifacts.jl b/src/Artifacts.jl index 0e951bc7b8..ff6d1e52f3 100644 --- a/src/Artifacts.jl +++ b/src/Artifacts.jl @@ -316,4 +316,17 @@ function bareground_albedo_dataset_path(; context = nothing) ) end +""" + ilamb_dataset_path(; context = nothing) + +Triggers the download of the ILAMB dataset, if not already downloaded, using +Julia Artifacts, and returns the path to this file. + +There are only three datasets available which are "rlus_CERESed4.2_rlus.nc", +"gpp_FLUXCOM_gpp.nc", and "evspsbl_MODIS_et_0.5x0.5.nc". +""" +function ilamb_dataset_path(; context = nothing) + return joinpath(@clima_artifact("ilamb_data"), context) +end + end From a99b2462d43959edd47bcb9053ff84df8d4caff3 Mon Sep 17 00:00:00 2001 From: ph-kev <98072684+ph-kev@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:17:52 -0700 Subject: [PATCH 2/6] Update ClimaAnalysis to 0.5.12 --- .buildkite/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/Project.toml b/.buildkite/Project.toml index 7f298bb8a0..3e15187523 100644 --- a/.buildkite/Project.toml +++ b/.buildkite/Project.toml @@ -43,6 +43,6 @@ Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c" cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" [compat] -ClimaAnalysis = "0.5.7" +ClimaAnalysis = "0.5.12" ClimaTimeSteppers = "0.7" Statistics = "1" From 76f372c81b597ed7dbab0b3051707a26ec05f734 Mon Sep 17 00:00:00 2001 From: ph-kev <98072684+ph-kev@users.noreply.github.com> Date: Thu, 5 Dec 2024 10:11:34 -0800 Subject: [PATCH 3/6] Run snowy_land simulation for 2 years --- experiments/long_runs/snowy_land.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/experiments/long_runs/snowy_land.jl b/experiments/long_runs/snowy_land.jl index 21ebcd8790..65dcdef361 100644 --- a/experiments/long_runs/snowy_land.jl +++ b/experiments/long_runs/snowy_land.jl @@ -1,7 +1,7 @@ # # Global run of land model # The code sets up and runs ClimaLand v1, which -# includes soil, canopy, and snow, for 365 days on a spherical domain, +# includes soil, canopy, and snow, for 730 days on a spherical domain, # using ERA5 data as forcing. In this simulation, we have # turned lateral flow off because horizontal boundary conditions and the # land/sea mask are not yet supported by ClimaCore. @@ -9,7 +9,7 @@ # Simulation Setup # Number of spatial elements: 101 in horizontal, 15 in vertical # Soil depth: 50 m -# Simulation duration: 365 d +# Simulation duration: 730 d # Timestep: 450 s # Timestepper: ARS111 # Fixed number of iterations: 3 @@ -389,7 +389,7 @@ end function setup_and_solve_problem(; greet = false) t0 = 0.0 - tf = 60 * 60.0 * 24 * 365 * 1 + tf = 60 * 60.0 * 24 * 365 * 2 Δt = 450.0 nelements = (101, 15) if greet From 04957755b134114690c2213f7b19d38ca0945847 Mon Sep 17 00:00:00 2001 From: ph-kev <98072684+ph-kev@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:34:54 -0800 Subject: [PATCH 4/6] Save start date for .nc files in snowy land --- experiments/long_runs/snowy_land.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/experiments/long_runs/snowy_land.jl b/experiments/long_runs/snowy_land.jl index 65dcdef361..514a555f43 100644 --- a/experiments/long_runs/snowy_land.jl +++ b/experiments/long_runs/snowy_land.jl @@ -367,7 +367,11 @@ function setup_prob(t0, tf, Δt; outdir = outdir, nelements = (101, 15)) # ClimaDiagnostics - nc_writer = ClimaDiagnostics.Writers.NetCDFWriter(subsurface_space, outdir) + nc_writer = ClimaDiagnostics.Writers.NetCDFWriter( + subsurface_space, + outdir; + start_date, + ) diags = ClimaLand.default_diagnostics( land, From 251ed5d7817ffaf87ff14a20e36a270e589b4201 Mon Sep 17 00:00:00 2001 From: ph-kev <98072684+ph-kev@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:12:56 -0800 Subject: [PATCH 5/6] Add postprocessing pipeline --- .../long_runs/leaderboard/data_sources.jl | 209 ++++++++++++++++++ .../long_runs/leaderboard/leaderboard.jl | 177 +++++++++++++++ experiments/long_runs/snowy_land.jl | 5 + 3 files changed, 391 insertions(+) create mode 100644 experiments/long_runs/leaderboard/data_sources.jl create mode 100644 experiments/long_runs/leaderboard/leaderboard.jl diff --git a/experiments/long_runs/leaderboard/data_sources.jl b/experiments/long_runs/leaderboard/data_sources.jl new file mode 100644 index 0000000000..abf9276e01 --- /dev/null +++ b/experiments/long_runs/leaderboard/data_sources.jl @@ -0,0 +1,209 @@ +import ClimaAnalysis + +""" + get_sim_var_dict(diagnostics_folder_path) + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +simulation data. This is used by the function `compute_leaderboard`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`sim_var_dict` whose key is the short name of the variable and the value is an +anonymous function that returns a `OutputVar`. For each variable, any +preprocessing should be done in the corresponding anonymous function which +includes unit conversion and shifting the dates. + +The variable should have only three dimensions: time, longitude, and latitude. +""" +function get_sim_var_dict(diagnostics_folder_path) + # Dict for loading in simulation data + sim_var_dict = Dict{String, Any}() + + sim_var_dict["lwu"] = + () -> begin + sim_var = get( + ClimaAnalysis.SimDir(diagnostics_folder_path), + short_name = "lwu", + ) + sim_var = + ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end + + sim_var_dict["et"] = + () -> begin + sim_var = get( + ClimaAnalysis.SimDir(diagnostics_folder_path), + short_name = "et", + ) + sim_var = + ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + (ClimaAnalysis.units(sim_var) == "kg m^-2 s^-1") && ( + sim_var = ClimaAnalysis.convert_units( + sim_var, + "mm / day", + conversion_function = units -> units * 86400.0, + ) + ) + return sim_var + end + + + sim_var_dict["gpp"] = + () -> begin + sim_var = get( + ClimaAnalysis.SimDir(diagnostics_folder_path), + short_name = "gpp", + ) + sim_var = + ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + # converting from to `mol CO2 m^-2 s^-1` in sim to `g C m-2 day-1` in obs + (ClimaAnalysis.units(sim_var) == "mol CO2 m^-2 s^-1") && ( + sim_var = ClimaAnalysis.convert_units( + sim_var, + "g m-2 day-1", + conversion_function = units -> units * 86400.0 * 12.011, + ) + ) + return sim_var + end + return sim_var_dict +end + +""" + get_obs_var_dict() + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +observational data. This is used by the function `compute_leaderboard`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`obs_var_dict` whose key is the short name of the variable and the value is an +anonymous function that returns a `OutputVar`. The function must take in a +start date which is used to align the times in the observational data to match +the simulation data. The short name must be the same as in `sim_var_dict` in the +function `sim_var_dict`. For each variable, any preprocessing is done in the +corresponding anonymous function which includes unit conversion and shifting the +dates. + +The variable should have only three dimensions: latitude, longitude, and time. +""" +function get_obs_var_dict() + # Dict for loading in observational data + obs_var_dict = Dict{String, Any}() + obs_var_dict["et"] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + ClimaLand.Artifacts.ilamb_dataset_path(; + context = "evspsbl_MODIS_et_0.5x0.5.nc", + ), + "et", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + (ClimaAnalysis.units(obs_var) == "kg/m2/s") && ( + obs_var = ClimaAnalysis.convert_units( + obs_var, + "mm / day", + conversion_function = units -> units * 86400.0, + ) + ) + obs_var = ClimaAnalysis.replace(obs_var, missing => NaN) + return obs_var + end + + obs_var_dict["gpp"] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + ClimaLand.Artifacts.ilamb_dataset_path(; + context = "gpp_FLUXCOM_gpp.nc", + ), + "gpp", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + ClimaAnalysis.dim_units(obs_var, "lon") == "degree" && + (obs_var.dim_attributes["lon"]["units"] = "degrees_east") + ClimaAnalysis.dim_units(obs_var, "lat") == "degree" && + (obs_var.dim_attributes["lat"]["units"] = "degrees_north") + obs_var = ClimaAnalysis.replace(obs_var, missing => NaN) + return obs_var + end + + obs_var_dict["lwu"] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + ClimaLand.Artifacts.ilamb_dataset_path(; + context = "rlus_CERESed4.2_rlus.nc", + ), + "rlus", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + ClimaAnalysis.units(obs_var) == "W m-2" && + (obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2")) + return obs_var + end + return obs_var_dict +end + +""" + get_mask_dict() + +Return a dictionary mapping short names to a function which takes in `sim_var`, +a `OutputVar` containing simulation data, and `obs_var`, a `OutputVar` +containing observational data, and return a masking function. + +To add a variable to the leaderboard, add a key-value pair to the dictionary +`mask_dict` whose key is the same short name in `sim_var_dict` and the value is +a function that takes in a `OutputVar` representing simulation data and a +`OutputVar` representing observational data and returns a masking function or +`nothing` if a masking function is not needed. The masking function is used to +correctly normalize the global bias and global RMSE. +""" +function get_mask_dict() + # Dict for loading in masks + mask_dict = Dict{String, Any}() + + mask_dict["et"] = + (sim_var, obs_var) -> begin + return ClimaAnalysis.make_lonlat_mask( + ClimaAnalysis.slice( + obs_var, + time = ClimaAnalysis.times(obs_var) |> first, + ); + set_to_val = isnan, + ) + end + + mask_dict["gpp"] = + (sim_var, obs_var) -> begin + return ClimaAnalysis.make_lonlat_mask( + ClimaAnalysis.slice( + obs_var, + time = ClimaAnalysis.times(obs_var) |> first, + ); + set_to_val = isnan, + ) + end + + mask_dict["lwu"] = (sim_var, obs_var) -> begin + return nothing + end + return mask_dict +end + +""" + get_compare_vars_biases_plot_extrema() + +Return a dictionary mapping short names to ranges for the bias plots. + +To add a variable to the leaderboard, add a key-value pair to the dictionary +`compare_vars_biases_plot_extrema` whose key is a short name key is the same +short name in `sim_var_pfull_dict` in the function `get_sim_var_pfull_dict` and +the value is a tuple, where the first element is the lower bound and the last +element is the upper bound for the bias plots. +""" +function get_compare_vars_biases_plot_extrema() + compare_vars_biases_plot_extrema = + Dict("et" => (-2.0, 2.0), "gpp" => (-6.0, 6.0), "lwu" => (-40.0, 40.0)) + return compare_vars_biases_plot_extrema +end diff --git a/experiments/long_runs/leaderboard/leaderboard.jl b/experiments/long_runs/leaderboard/leaderboard.jl new file mode 100644 index 0000000000..1a377ac311 --- /dev/null +++ b/experiments/long_runs/leaderboard/leaderboard.jl @@ -0,0 +1,177 @@ +import ClimaAnalysis +import GeoMakie +import CairoMakie +import Dates + +include("data_sources.jl") + +""" + compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + +Plot the biases and a leaderboard of various variables defined over longitude, latitude, and +time. + +The parameter `leaderboard_base_path` is the path to save the leaderboards and bias plots, +and `diagnostics_folder_path` is the path to the simulation data. + +Loading and preprocessing simulation data is done by `get_sim_var_dict`. Loading and +preprocessing observational data is done by `get_obs_var_dict`. The masks are normalizing +the global RMSE and bias are determined by `get_mask_dict`. The ranges of the bias plots are +determined by `get_compare_vars_biases_plot_extrema`. See the functions defined in +data_sources.jl. +""" +function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + sim_var_dict = get_sim_var_dict(diagnostics_folder_path) + obs_var_dict = get_obs_var_dict() + mask_dict = get_mask_dict() + compare_vars_biases_plot_extrema = get_compare_vars_biases_plot_extrema() + + @info "Error against observations" + + # Set up dict for storing simulation and observational data after processing + sim_obs_comparsion_dict = Dict() + + # Print dates for debugging + _, var_func = first(sim_var_dict) + var = var_func() + output_dates = + Dates.DateTime(var.attributes["start_date"]) .+ + Dates.Second.(ClimaAnalysis.times(var)) + @info "Working with dates:" + @info output_dates + + short_names = keys(sim_var_dict) + for short_name in short_names + @info short_name + # Simulation data + sim_var = sim_var_dict[short_name]() + + # Observational data + obs_var = obs_var_dict[short_name](sim_var.attributes["start_date"]) + + # Remove first spin_up_months months from simulation + spin_up_months = 12 + spinup_cutoff = spin_up_months * 30 * 86400.0 + ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff && ( + sim_var = + ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff) + ) + + # Get 12 or less months of data + num_times = length(ClimaAnalysis.times(sim_var)) + num_times > 12 && ( + sim_var = ClimaAnalysis.window( + sim_var, + "time", + right = ClimaAnalysis.times(sim_var)[12], + ) + ) + + obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var) + + sim_obs_comparsion_dict[short_name] = (sim_var, obs_var) + end + + # Get the right number of months for plotting + _, sim_obs_tuple = first(sim_obs_comparsion_dict) + num_times = length(ClimaAnalysis.times(sim_obs_tuple[1])) + months = Dates.monthname.(1:num_times) + + # Plot monthly comparsions + for short_name in short_names + fig = CairoMakie.Figure(size = (650 * ceil(num_times / 2), 450 * 2)) + sim_var, obs_var = sim_obs_comparsion_dict[short_name] + mask = mask_dict[short_name](sim_var, obs_var) + times = ClimaAnalysis.times(sim_var) |> copy + times = vcat( + times, + Array{Union{Missing, eltype(times)}}(missing, 12 - num_times), + ) + times = reshape(times, (2, 6)) + for ((indices, t), month) in zip(pairs(times), months) + layout = fig[Tuple(indices)...] = CairoMakie.GridLayout() + ClimaAnalysis.Visualize.plot_bias_on_globe!( + layout, + ClimaAnalysis.slice(sim_var, time = t), + ClimaAnalysis.slice(obs_var, time = t), + cmap_extrema = compare_vars_biases_plot_extrema[short_name], + mask = mask, + ) + CairoMakie.Label( + layout[0, 1], + month, + tellwidth = false, + fontsize = 30, + ) + end + CairoMakie.save( + joinpath(leaderboard_base_path, "$(short_name)_bias_plot.png"), + fig, + ) + end + + # Plot month (x-axis) and global bias and global RMSE (y-axis) + fig = CairoMakie.Figure(size = (450 * length(keys(sim_var_dict)), 600)) + for (col, short_name) in enumerate(keys(sim_var_dict)) + sim_var, obs_var = sim_obs_comparsion_dict[short_name] + mask = mask_dict[short_name](sim_var, obs_var) + # Compute globas bias and global rmse + rmse_vec = [] + bias_vec = [] + times = ClimaAnalysis.times(sim_var) |> copy + for t in times + g_rmse = ClimaAnalysis.global_rmse( + ClimaAnalysis.slice(sim_var, time = t), + ClimaAnalysis.slice(obs_var, time = t), + mask = mask, + ) + g_bias = ClimaAnalysis.global_bias( + ClimaAnalysis.slice(sim_var, time = t), + ClimaAnalysis.slice(obs_var, time = t), + mask = mask, + ) + push!(rmse_vec, g_rmse) + push!(bias_vec, g_bias) + end + ax_rmse = CairoMakie.Axis( + fig[1, col], + title = "Global RMSE for $short_name", + xlabel = "Month", + ylabel = "Global RMSE ($(ClimaAnalysis.units(sim_var)))", + xticks = ( + 1:num_times, + Dates.monthname.(1:num_times) .|> x -> x[1:3], + ), + ) + CairoMakie.lines!(ax_rmse, 1:num_times |> collect, rmse_vec) + CairoMakie.scatter!(ax_rmse, 1:num_times |> collect, rmse_vec) + + ax_bias = CairoMakie.Axis( + fig[2, col], + title = "Global Bias for $short_name", + xlabel = "Month", + ylabel = "Global Bias ($(ClimaAnalysis.units(sim_var)))", + xticks = ( + 1:num_times, + Dates.monthname.(1:num_times) .|> x -> x[1:3], + ), + ) + CairoMakie.lines!(ax_bias, 1:num_times |> collect, bias_vec) + CairoMakie.scatter!(ax_bias, 1:num_times |> collect, bias_vec) + end + CairoMakie.save( + joinpath(leaderboard_base_path, "global_rmse_and_bias_graphs.png"), + fig, + ) +end + +if abspath(PROGRAM_FILE) == @__FILE__ + if length(ARGS) < 2 + error( + "Usage: julia leaderboard.jl ", + ) + end + leaderboard_base_path = ARGS[begin] + diagnostics_folder_path = ARGS[begin + 1] + compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) +end diff --git a/experiments/long_runs/snowy_land.jl b/experiments/long_runs/snowy_land.jl index 514a555f43..461dc42597 100644 --- a/experiments/long_runs/snowy_land.jl +++ b/experiments/long_runs/snowy_land.jl @@ -500,3 +500,8 @@ mktempdir(root_path) do tmpdir ) end end + +include("leaderboard/leaderboard.jl") +diagnostics_folder_path = outdir +leaderboard_base_path = root_path +compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) From 9871b4f859cda02f748b7077d26b5c0f1c901ecd Mon Sep 17 00:00:00 2001 From: ph-kev <98072684+ph-kev@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:00:52 -0800 Subject: [PATCH 6/6] Add documentation for postprocessing --- docs/make.jl | 1 + docs/src/leaderboard/leaderboard.md | 122 ++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 docs/src/leaderboard/leaderboard.md diff --git a/docs/make.jl b/docs/make.jl index 037fab5311..584e9e74d8 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -55,6 +55,7 @@ pages = Any[ "Tutorials" => tutorials, "Standalone models" => standalone_models, "Diagnostics" => diagnostics, + "Leaderboard" => "leaderboard/leaderboard.md", "Restarts" => "restarts.md", "Contribution guide" => "Contributing.md", "Repository structure" => "folderstructure.md", diff --git a/docs/src/leaderboard/leaderboard.md b/docs/src/leaderboard/leaderboard.md new file mode 100644 index 0000000000..13bb32fcff --- /dev/null +++ b/docs/src/leaderboard/leaderboard.md @@ -0,0 +1,122 @@ +# Leaderboard + +## Long run + +### Add a new variable to compare against observations +The infrastructure to compute errors against observations is in the `leaderboard` folder. +This folder contains two files: `data_sources.jl`, responsible for loading and preprocessing +variables of interest, and `leaderboard.jl`, which computes error and draw plots. To add a +new variable to the comparison, you modify the `data_sources.jl`. + +### Computation +As of now, the leaderboard produces bias plots with the global bias and global root mean +squared error (RMSE). These quantities are computed for each month with the first year of +the simulation not considered as that is the spinup time. The start date of the simulation +is 2008 which means that only the year 2009 is used to compare against observational data. + +### Add a new variable to the bias plots +There are four functions that you need to modify to add a new variable which are +`get_sim_var_dict`, `get_obs_var_dict`, `get_mask_dict`, and +`get_compare_vars_biases_plot_extrema`. Each function returns a dictionary that must be +modified to add a new variable to the leaderboard. The dictionaries are `sim_var_dict`, +`obs_var_dict`, `mask_dict`, and `compare_vars_biases_plot_extrema`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary `sim_var_dict` +whose key is the short name of the variable and the value is a function that returns a +[`OutputVar`](https://clima.github.io/ClimaAnalysis.jl/dev/var/). Any preprocessing is done +in the function which includes unit conversion and shifting the dates. + +```julia +sim_var_dict["et"] = + () -> begin + # Load in variable + sim_var = get( + ClimaAnalysis.SimDir(diagnostics_folder_path), + short_name = "et", + ) + # Shift to the first day and subtract one month as preprocessing + sim_var = + ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end +``` + +Then, add a key-value pair to the dictionary `obs_var_dict` whose key is the same short name +as before and the value is a function that takes in a start date and returns a `OutputVar`. +Any preprocessing is done in the function. + +```julia +obs_var_dict["et"] = + (start_date) -> begin + # We use ClimaArtifacts to use a dataset from ILAMB + obs_var = ClimaAnalysis.OutputVar( + ClimaLand.Artifacts.ilamb_dataset_path(; + context = "evspsbl_MODIS_et_0.5x0.5.nc", + ), + "et", + # start_date is used to align the dates in the observational data + # with the simulation data + new_start_date = start_date, + # Shift dates to the first day of the month before aligning the dates + shift_by = Dates.firstdayofmonth, + ) + # More preprocessing to match the units with the simulation data + ClimaAnalysis.units(obs_var) == "kg/m2/s" && + (obs_var = ClimaAnalysis.set_units(obs_var, "kg m^-2 s^-1")) + # ClimaAnalysis cannot handle `missing` values, but does support handling NaNs + obs_var = ClimaAnalysis.replace(obs_var, missing => NaN) + return obs_var + end +``` + +!!! tip "Preprocessing" + Observational and simulational data should be preprocessed for dates and units. When + using ClimaDiagnostics to report monthly averages from a simulation, monthly averages + are output on the first day following the month when the average was computed. For + instance, the monthly average corresponding to January 2010 is on the date 1 Feb 2010. + Preprocessing is done to shift this date to 1 Jan 2010. When preprocessing data, we + follow the convention that the first day corresponds to the monthly average for that + month. For observational data, you should check the convention being followed and + preprocess the dates if necessary. + + For `obs_var_dict`, the anonymous function must take in a start date. The start date is + used in `leaderboard.jl` to adjust the seconds in the `OutputVar` to match between start + date in the simulation data. + + Units should be the same between the simulation and observational data. + +Next, add a key-value pair to the dictionary `mask_dict` whose key is the same short name +as before and the value is a function that takes in a `OutputVar` representing simulation +data and a `OutputVar` representing observational data and returns a masking function or +`nothing` if no masking function is needed. The masking function is used to correctly +normalize the global bias and global RMSE. See the example below where a mask is made using +the observational data. + +```julia +mask_dict["et"] = + (sim_var, obs_var) -> begin + return ClimaAnalysis.make_lonlat_mask( + # We do this to get a `OutputVar` with only two dimensions: + # longitude and latitude + ClimaAnalysis.slice( + obs_var, + time = ClimaAnalysis.times(obs_var) |> first, + ); + # Any values that are NaN should be 0.0 + set_to_val = isnan, + true_val = 0.0 + ) + end +``` + +Finally, add a key-value pair to the dictionary `compare_vars_biases_plot_extrema` whose +key is the same short name as before and the value is a tuple of floats which determine +the range of the bias plots. + +```julia +compare_vars_biases_plot_extrema = Dict( + "et" => (-0.00001, 0.00001), + "gpp" => (-8.0, 8.0), + "lwu" => (-40.0, 40.0), +) +```