Skip to content

Commit

Permalink
Regrid truncated data
Browse files Browse the repository at this point in the history
  • Loading branch information
anastasia-popova committed Apr 11, 2024
1 parent 8a70236 commit a49cc15
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 4 deletions.
18 changes: 18 additions & 0 deletions artifacts/artifact_funcs.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import ArtifactWrappers as AW
#using NCDatasets

function sst_dataset_path()
sst_dataset = AW.ArtifactWrapper(
Expand Down Expand Up @@ -59,3 +60,20 @@ function pr_obs_data_path()
)
return AW.get_data_folder(pr_obs_data)
end

"""
artifact_data(datapath_full, name; datapath_trunc, date0, time_start, time_end, comms_ctx)
Truncates given data set, and constructs a new dataset containing only the dates that are used in the simulation
"""
function artifact_data(datapath_full, name)
datafile_truncated = joinpath(datapath_full, string(name, ".nc"))
return datafile_truncated
end

function artifact_data(datapath_full, name, datapath_trunc, date0, time_start, time_end, comms_ctx)
datafile = joinpath(datapath_full, string(name, ".nc"))
datafile_truncated =
Regridder.truncate_dataset(datafile, name, datapath_trunc, date0, time_start, time_end, comms_ctx)
return datafile_truncated
end
8 changes: 4 additions & 4 deletions experiments/AMIP/coupler_driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ original sources.
=#

include(joinpath(pkgdir(ClimaCoupler), "artifacts", "artifact_funcs.jl"))
sst_data = joinpath(sst_dataset_path(), "sst.nc")
sic_data = joinpath(sic_dataset_path(), "sic.nc")
co2_data = joinpath(co2_dataset_path(), "mauna_loa_co2.nc")
land_mask_data = joinpath(mask_dataset_path(), "seamask.nc")
sst_data = artifact_data(sst_dataset_path(), "sst", REGRID_DIR, date0, t_start, t_end, comms_ctx)
sic_data = artifact_data(sic_dataset_path(), "sic", REGRID_DIR, date0, t_start, t_end, comms_ctx)
co2_data = artifact_data(co2_dataset_path(), "mauna_loa_co2", REGRID_DIR, date0, t_start, t_end, comms_ctx)
land_mask_data = artifact_data(mask_dataset_path(), "seamask")

#=
## Component Model Initialization
Expand Down
69 changes: 69 additions & 0 deletions src/Regridder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -633,5 +633,74 @@ function cgll2latlonz(field; DIR = "cgll2latlonz_dir", nlat = 360, nlon = 720, c
return new_data, coords
end

"""
truncate_dataset(date0, timeStart, timeEnd, datafile, datapath, name)
Truncates given data set, and constructs a new dataset containing only the dates that are used in the simulation
"""
function truncate_dataset(
datafile,
name,
datapath_trunc,
date0,
time_start,
time_end,
comms_ctx::ClimaComms.AbstractCommsContext,
)
date_start = date0 + Dates.Second(time_start)
date_end = date0 + Dates.Second(time_start + time_end)

file_name = replace(string(name, "_truncated_data_", string(date_start), string(date_end), ".nc"), r":" => "")
datafile_truncated = joinpath(datapath_trunc, file_name)

if ClimaComms.iamroot(comms_ctx)
ds = NCDataset(datafile, "r")
dates = ds["time"][:]

# if the simulation start date is before our first date in the dataset
# leave the beginning of the truncated dataset to be first date available
if date_start < dates[1]
start_id = 1
# if the simulation start date is after the last date in the dataset
# start the truncated dataset at its last possible date
elseif date_start > last(dates)
start_id = length(dates)
# if the simulation start date falls within the range of the dataset
# find the closest date to the start date and truncate there
else
(~, start_id) = findmin(x -> abs(x - date_start), dates)
# if the closest date is after the start date, add one more date before
if dates[start_id] > date_start
start_id = start_id - 1
end
end

# if the simulation end date is before our first date in the dataset
# truncate the end of the dataset to be the first date
if date_end < dates[1]
end_id = 1
# if the simulation end date is after the last date in the dataset
# leave the end of the dataset as is
elseif date_end > last(dates)
end_id = length(dates)
# if the simulation end date falls within the range of the dataset
# find the closest date to the end date and truncate there
else
(~, end_id) = findmin(x -> abs(x - date_end), dates)
# if the closest date is before the end date, add one more date after
if dates[end_id] < date_end
end_id = end_id + 1
end
end

ds_truncated = NCDataset(datafile_truncated, "c")
ds_truncated = write(ds_truncated, view(ds, time = start_id:end_id))

close(ds)
close(ds_truncated)

return datafile_truncated
end
end

end # Module
65 changes: 65 additions & 0 deletions test/regridder_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Unit tests for ClimaCoupler Regridder module
=#

using ClimaCoupler
using ClimaCore: Geometry, Meshes, Domains, Topologies, Spaces, Fields, InputOutput
using ClimaComms
using Test
Expand Down Expand Up @@ -314,3 +315,67 @@ for FT in (Float32, Float64)
end
end
end
# test dataset truncation
@testset "test dataset truncation" begin
# Get the original dataset set up
include(joinpath(pkgdir(ClimaCoupler), "artifacts", "artifact_funcs.jl"))
sst_data_all = joinpath(sst_dataset_path(), "sst.nc")
ds = NCDataset(sst_data_all, "r")
dates = ds["time"][:]
first_date = dates[1]
last_date = last(dates)

# set up comms_ctx
device = ClimaComms.device()
comms_ctx = ClimaComms.context(device)
ClimaComms.init(comms_ctx)

# make path for truncated datasets
COUPLER_OUTPUT_DIR = joinpath("experiments", "AMIP", "output", "tests")
mkpath(COUPLER_OUTPUT_DIR)

REGRID_DIR = joinpath(COUPLER_OUTPUT_DIR, "regrid_tmp", "")
mkpath(REGRID_DIR)

# values for the truncations
time_start = 0.0
time_end = 1.728e6
# tests five cases:
# 1. both start and end are earlier than in datafile
# 2. start is early, end is within datafine
# 3. both start and end within datafile
# 4. start is in datafile end is not
# 5. both start and end are later than in datafile
date0test = ["18690101", "18700101", "19790228", "20220301", "20230101"]
for date in date0test
date0 = DateTime(date, dateformat"yyyymmdd")
sst_data = Regridder.truncate_dataset(sst_data_all, "test", REGRID_DIR, date0, time_start, time_end, comms_ctx)
ds_truncated = NCDataset(sst_data, "r")
new_dates = ds_truncated["time"][:]

date_start = date0 + Dates.Second(time_start)
date_end = date0 + Dates.Second(time_start + time_end)

if date_start < first_date
@test new_dates[1] == first_date
elseif date_start > last_date
@test new_dates[1] == last_date
else
@test new_dates[1] <= date_start
@test new_dates[2] >= date_start
end

if date_end < first_date
@test last(new_dates) == first_date
elseif date_end > last_date
@test last(new_dates) == last_date
else
@test last(new_dates) >= date_end
@test new_dates[length(new_dates) - 1] <= date_end
end

close(ds_truncated)
end

close(ds)
end

0 comments on commit a49cc15

Please sign in to comment.