Skip to content

Commit

Permalink
Add postprocessing pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
ph-kev committed Dec 6, 2024
1 parent 0495775 commit 251ed5d
Show file tree
Hide file tree
Showing 3 changed files with 391 additions and 0 deletions.
209 changes: 209 additions & 0 deletions experiments/long_runs/leaderboard/data_sources.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
import ClimaAnalysis

"""
get_sim_var_dict(diagnostics_folder_path)
Return a dictionary mapping short names to `OutputVar` containing preprocessed
simulation data. This is used by the function `compute_leaderboard`.
To add a variable for the leaderboard, add a key-value pair to the dictionary
`sim_var_dict` whose key is the short name of the variable and the value is an
anonymous function that returns a `OutputVar`. For each variable, any
preprocessing should be done in the corresponding anonymous function which
includes unit conversion and shifting the dates.
The variable should have only three dimensions: time, longitude, and latitude.
"""
function get_sim_var_dict(diagnostics_folder_path)
# Dict for loading in simulation data
sim_var_dict = Dict{String, Any}()

sim_var_dict["lwu"] =
() -> begin
sim_var = get(
ClimaAnalysis.SimDir(diagnostics_folder_path),
short_name = "lwu",
)
sim_var =
ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
return sim_var
end

sim_var_dict["et"] =
() -> begin
sim_var = get(
ClimaAnalysis.SimDir(diagnostics_folder_path),
short_name = "et",
)
sim_var =
ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
(ClimaAnalysis.units(sim_var) == "kg m^-2 s^-1") && (
sim_var = ClimaAnalysis.convert_units(
sim_var,
"mm / day",
conversion_function = units -> units * 86400.0,
)
)
return sim_var
end


sim_var_dict["gpp"] =
() -> begin
sim_var = get(
ClimaAnalysis.SimDir(diagnostics_folder_path),
short_name = "gpp",
)
sim_var =
ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
# converting from to `mol CO2 m^-2 s^-1` in sim to `g C m-2 day-1` in obs
(ClimaAnalysis.units(sim_var) == "mol CO2 m^-2 s^-1") && (
sim_var = ClimaAnalysis.convert_units(
sim_var,
"g m-2 day-1",
conversion_function = units -> units * 86400.0 * 12.011,
)
)
return sim_var
end
return sim_var_dict
end

"""
get_obs_var_dict()
Return a dictionary mapping short names to `OutputVar` containing preprocessed
observational data. This is used by the function `compute_leaderboard`.
To add a variable for the leaderboard, add a key-value pair to the dictionary
`obs_var_dict` whose key is the short name of the variable and the value is an
anonymous function that returns a `OutputVar`. The function must take in a
start date which is used to align the times in the observational data to match
the simulation data. The short name must be the same as in `sim_var_dict` in the
function `sim_var_dict`. For each variable, any preprocessing is done in the
corresponding anonymous function which includes unit conversion and shifting the
dates.
The variable should have only three dimensions: latitude, longitude, and time.
"""
function get_obs_var_dict()
# Dict for loading in observational data
obs_var_dict = Dict{String, Any}()
obs_var_dict["et"] =
(start_date) -> begin
obs_var = ClimaAnalysis.OutputVar(
ClimaLand.Artifacts.ilamb_dataset_path(;
context = "evspsbl_MODIS_et_0.5x0.5.nc",
),
"et",
new_start_date = start_date,
shift_by = Dates.firstdayofmonth,
)
(ClimaAnalysis.units(obs_var) == "kg/m2/s") && (
obs_var = ClimaAnalysis.convert_units(
obs_var,
"mm / day",
conversion_function = units -> units * 86400.0,
)
)
obs_var = ClimaAnalysis.replace(obs_var, missing => NaN)
return obs_var
end

obs_var_dict["gpp"] =
(start_date) -> begin
obs_var = ClimaAnalysis.OutputVar(
ClimaLand.Artifacts.ilamb_dataset_path(;
context = "gpp_FLUXCOM_gpp.nc",
),
"gpp",
new_start_date = start_date,
shift_by = Dates.firstdayofmonth,
)
ClimaAnalysis.dim_units(obs_var, "lon") == "degree" &&
(obs_var.dim_attributes["lon"]["units"] = "degrees_east")
ClimaAnalysis.dim_units(obs_var, "lat") == "degree" &&
(obs_var.dim_attributes["lat"]["units"] = "degrees_north")
obs_var = ClimaAnalysis.replace(obs_var, missing => NaN)
return obs_var
end

obs_var_dict["lwu"] =
(start_date) -> begin
obs_var = ClimaAnalysis.OutputVar(
ClimaLand.Artifacts.ilamb_dataset_path(;
context = "rlus_CERESed4.2_rlus.nc",
),
"rlus",
new_start_date = start_date,
shift_by = Dates.firstdayofmonth,
)
ClimaAnalysis.units(obs_var) == "W m-2" &&
(obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2"))
return obs_var
end
return obs_var_dict
end

"""
get_mask_dict()
Return a dictionary mapping short names to a function which takes in `sim_var`,
a `OutputVar` containing simulation data, and `obs_var`, a `OutputVar`
containing observational data, and return a masking function.
To add a variable to the leaderboard, add a key-value pair to the dictionary
`mask_dict` whose key is the same short name in `sim_var_dict` and the value is
a function that takes in a `OutputVar` representing simulation data and a
`OutputVar` representing observational data and returns a masking function or
`nothing` if a masking function is not needed. The masking function is used to
correctly normalize the global bias and global RMSE.
"""
function get_mask_dict()
# Dict for loading in masks
mask_dict = Dict{String, Any}()

mask_dict["et"] =
(sim_var, obs_var) -> begin
return ClimaAnalysis.make_lonlat_mask(
ClimaAnalysis.slice(
obs_var,
time = ClimaAnalysis.times(obs_var) |> first,
);
set_to_val = isnan,
)
end

mask_dict["gpp"] =
(sim_var, obs_var) -> begin
return ClimaAnalysis.make_lonlat_mask(
ClimaAnalysis.slice(
obs_var,
time = ClimaAnalysis.times(obs_var) |> first,
);
set_to_val = isnan,
)
end

mask_dict["lwu"] = (sim_var, obs_var) -> begin
return nothing
end
return mask_dict
end

"""
get_compare_vars_biases_plot_extrema()
Return a dictionary mapping short names to ranges for the bias plots.
To add a variable to the leaderboard, add a key-value pair to the dictionary
`compare_vars_biases_plot_extrema` whose key is a short name key is the same
short name in `sim_var_pfull_dict` in the function `get_sim_var_pfull_dict` and
the value is a tuple, where the first element is the lower bound and the last
element is the upper bound for the bias plots.
"""
function get_compare_vars_biases_plot_extrema()
compare_vars_biases_plot_extrema =
Dict("et" => (-2.0, 2.0), "gpp" => (-6.0, 6.0), "lwu" => (-40.0, 40.0))
return compare_vars_biases_plot_extrema
end
177 changes: 177 additions & 0 deletions experiments/long_runs/leaderboard/leaderboard.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import ClimaAnalysis
import GeoMakie
import CairoMakie
import Dates

include("data_sources.jl")

"""
compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
Plot the biases and a leaderboard of various variables defined over longitude, latitude, and
time.
The parameter `leaderboard_base_path` is the path to save the leaderboards and bias plots,
and `diagnostics_folder_path` is the path to the simulation data.
Loading and preprocessing simulation data is done by `get_sim_var_dict`. Loading and
preprocessing observational data is done by `get_obs_var_dict`. The masks are normalizing
the global RMSE and bias are determined by `get_mask_dict`. The ranges of the bias plots are
determined by `get_compare_vars_biases_plot_extrema`. See the functions defined in
data_sources.jl.
"""
function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
sim_var_dict = get_sim_var_dict(diagnostics_folder_path)
obs_var_dict = get_obs_var_dict()
mask_dict = get_mask_dict()
compare_vars_biases_plot_extrema = get_compare_vars_biases_plot_extrema()

@info "Error against observations"

# Set up dict for storing simulation and observational data after processing
sim_obs_comparsion_dict = Dict()

# Print dates for debugging
_, var_func = first(sim_var_dict)
var = var_func()
output_dates =
Dates.DateTime(var.attributes["start_date"]) .+
Dates.Second.(ClimaAnalysis.times(var))
@info "Working with dates:"
@info output_dates

short_names = keys(sim_var_dict)
for short_name in short_names
@info short_name
# Simulation data
sim_var = sim_var_dict[short_name]()

# Observational data
obs_var = obs_var_dict[short_name](sim_var.attributes["start_date"])

# Remove first spin_up_months months from simulation
spin_up_months = 12
spinup_cutoff = spin_up_months * 30 * 86400.0
ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff && (
sim_var =
ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff)
)

# Get 12 or less months of data
num_times = length(ClimaAnalysis.times(sim_var))
num_times > 12 && (
sim_var = ClimaAnalysis.window(
sim_var,
"time",
right = ClimaAnalysis.times(sim_var)[12],
)
)

obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var)

sim_obs_comparsion_dict[short_name] = (sim_var, obs_var)
end

# Get the right number of months for plotting
_, sim_obs_tuple = first(sim_obs_comparsion_dict)
num_times = length(ClimaAnalysis.times(sim_obs_tuple[1]))
months = Dates.monthname.(1:num_times)

# Plot monthly comparsions
for short_name in short_names
fig = CairoMakie.Figure(size = (650 * ceil(num_times / 2), 450 * 2))
sim_var, obs_var = sim_obs_comparsion_dict[short_name]
mask = mask_dict[short_name](sim_var, obs_var)
times = ClimaAnalysis.times(sim_var) |> copy
times = vcat(
times,
Array{Union{Missing, eltype(times)}}(missing, 12 - num_times),
)
times = reshape(times, (2, 6))
for ((indices, t), month) in zip(pairs(times), months)
layout = fig[Tuple(indices)...] = CairoMakie.GridLayout()
ClimaAnalysis.Visualize.plot_bias_on_globe!(
layout,
ClimaAnalysis.slice(sim_var, time = t),
ClimaAnalysis.slice(obs_var, time = t),
cmap_extrema = compare_vars_biases_plot_extrema[short_name],
mask = mask,
)
CairoMakie.Label(
layout[0, 1],
month,
tellwidth = false,
fontsize = 30,
)
end
CairoMakie.save(
joinpath(leaderboard_base_path, "$(short_name)_bias_plot.png"),
fig,
)
end

# Plot month (x-axis) and global bias and global RMSE (y-axis)
fig = CairoMakie.Figure(size = (450 * length(keys(sim_var_dict)), 600))
for (col, short_name) in enumerate(keys(sim_var_dict))
sim_var, obs_var = sim_obs_comparsion_dict[short_name]
mask = mask_dict[short_name](sim_var, obs_var)
# Compute globas bias and global rmse
rmse_vec = []
bias_vec = []
times = ClimaAnalysis.times(sim_var) |> copy
for t in times
g_rmse = ClimaAnalysis.global_rmse(
ClimaAnalysis.slice(sim_var, time = t),
ClimaAnalysis.slice(obs_var, time = t),
mask = mask,
)
g_bias = ClimaAnalysis.global_bias(
ClimaAnalysis.slice(sim_var, time = t),
ClimaAnalysis.slice(obs_var, time = t),
mask = mask,
)
push!(rmse_vec, g_rmse)
push!(bias_vec, g_bias)
end
ax_rmse = CairoMakie.Axis(
fig[1, col],
title = "Global RMSE for $short_name",
xlabel = "Month",
ylabel = "Global RMSE ($(ClimaAnalysis.units(sim_var)))",
xticks = (
1:num_times,
Dates.monthname.(1:num_times) .|> x -> x[1:3],
),
)
CairoMakie.lines!(ax_rmse, 1:num_times |> collect, rmse_vec)
CairoMakie.scatter!(ax_rmse, 1:num_times |> collect, rmse_vec)

ax_bias = CairoMakie.Axis(
fig[2, col],
title = "Global Bias for $short_name",
xlabel = "Month",
ylabel = "Global Bias ($(ClimaAnalysis.units(sim_var)))",
xticks = (
1:num_times,
Dates.monthname.(1:num_times) .|> x -> x[1:3],
),
)
CairoMakie.lines!(ax_bias, 1:num_times |> collect, bias_vec)
CairoMakie.scatter!(ax_bias, 1:num_times |> collect, bias_vec)
end
CairoMakie.save(
joinpath(leaderboard_base_path, "global_rmse_and_bias_graphs.png"),
fig,
)
end

if abspath(PROGRAM_FILE) == @__FILE__
if length(ARGS) < 2
error(
"Usage: julia leaderboard.jl <Filepath to save leaderboard and bias plots> <Filepath to simulation data>",
)
end
leaderboard_base_path = ARGS[begin]
diagnostics_folder_path = ARGS[begin + 1]
compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
end
5 changes: 5 additions & 0 deletions experiments/long_runs/snowy_land.jl
Original file line number Diff line number Diff line change
Expand Up @@ -500,3 +500,8 @@ mktempdir(root_path) do tmpdir
)
end
end

include("leaderboard/leaderboard.jl")
diagnostics_folder_path = outdir
leaderboard_base_path = root_path
compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)

0 comments on commit 251ed5d

Please sign in to comment.