diff --git a/artifacts/artifact_funcs.jl b/artifacts/artifact_funcs.jl index 93eac7cf82..2ad520781a 100644 --- a/artifacts/artifact_funcs.jl +++ b/artifacts/artifact_funcs.jl @@ -1,4 +1,5 @@ import ArtifactWrappers as AW +#using NCDatasets function sst_dataset_path() sst_dataset = AW.ArtifactWrapper( @@ -59,3 +60,20 @@ function pr_obs_data_path() ) return AW.get_data_folder(pr_obs_data) end + +""" + artifact_data(datapath_full, name; datapath_trunc, date0, time_start, time_end, comms_ctx) + +Truncates given data set, and constructs a new dataset containing only the dates that are used in the simulation +""" +function artifact_data(datapath_full, name) + datafile_truncated = joinpath(datapath_full, string(name, ".nc")) + return datafile_truncated +end + +function artifact_data(datapath_full, name, datapath_trunc, date0, time_start, time_end, comms_ctx) + datafile = joinpath(datapath_full, string(name, ".nc")) + datafile_truncated = + Regridder.truncate_dataset(datafile, name, datapath_trunc, date0, time_start, time_end, comms_ctx) + return datafile_truncated +end diff --git a/experiments/AMIP/coupler_driver.jl b/experiments/AMIP/coupler_driver.jl index 806eb62be5..89878c1084 100644 --- a/experiments/AMIP/coupler_driver.jl +++ b/experiments/AMIP/coupler_driver.jl @@ -177,10 +177,10 @@ original sources. =# include(joinpath(pkgdir(ClimaCoupler), "artifacts", "artifact_funcs.jl")) -sst_data = joinpath(sst_dataset_path(), "sst.nc") -sic_data = joinpath(sic_dataset_path(), "sic.nc") -co2_data = joinpath(co2_dataset_path(), "mauna_loa_co2.nc") -land_mask_data = joinpath(mask_dataset_path(), "seamask.nc") +sst_data = artifact_data(sst_dataset_path(), "sst", REGRID_DIR, date0, t_start, t_end, comms_ctx) +sic_data = artifact_data(sic_dataset_path(), "sic", REGRID_DIR, date0, t_start, t_end, comms_ctx) +co2_data = artifact_data(co2_dataset_path(), "mauna_loa_co2", REGRID_DIR, date0, t_start, t_end, comms_ctx) +land_mask_data = artifact_data(mask_dataset_path(), "seamask") #= ## Component Model Initialization diff --git a/src/Regridder.jl b/src/Regridder.jl index 4f715b049d..ada39da579 100644 --- a/src/Regridder.jl +++ b/src/Regridder.jl @@ -633,5 +633,74 @@ function cgll2latlonz(field; DIR = "cgll2latlonz_dir", nlat = 360, nlon = 720, c return new_data, coords end +""" + truncate_dataset(date0, timeStart, timeEnd, datafile, datapath, name) + +Truncates given data set, and constructs a new dataset containing only the dates that are used in the simulation +""" +function truncate_dataset( + datafile, + name, + datapath_trunc, + date0, + time_start, + time_end, + comms_ctx::ClimaComms.AbstractCommsContext, +) + date_start = date0 + Dates.Second(time_start) + date_end = date0 + Dates.Second(time_start + time_end) + + file_name = replace(string(name, "_truncated_data_", string(date_start), string(date_end), ".nc"), r":" => "") + datafile_truncated = joinpath(datapath_trunc, file_name) + + if ClimaComms.iamroot(comms_ctx) + ds = NCDataset(datafile, "r") + dates = ds["time"][:] + + # if the simulation start date is before our first date in the dataset + # leave the beginning of the truncated dataset to be first date available + if date_start < dates[1] + start_id = 1 + # if the simulation start date is after the last date in the dataset + # start the truncated dataset at its last possible date + elseif date_start > last(dates) + start_id = length(dates) + # if the simulation start date falls within the range of the dataset + # find the closest date to the start date and truncate there + else + (~, start_id) = findmin(x -> abs(x - date_start), dates) + # if the closest date is after the start date, add one more date before + if dates[start_id] > date_start + start_id = start_id - 1 + end + end + + # if the simulation end date is before our first date in the dataset + # truncate the end of the dataset to be the first date + if date_end < dates[1] + end_id = 1 + # if the simulation end date is after the last date in the dataset + # leave the end of the dataset as is + elseif date_end > last(dates) + end_id = length(dates) + # if the simulation end date falls within the range of the dataset + # find the closest date to the end date and truncate there + else + (~, end_id) = findmin(x -> abs(x - date_end), dates) + # if the closest date is before the end date, add one more date after + if dates[end_id] < date_end + end_id = end_id + 1 + end + end + + ds_truncated = NCDataset(datafile_truncated, "c") + ds_truncated = write(ds_truncated, view(ds, time = start_id:end_id)) + + close(ds) + close(ds_truncated) + + return datafile_truncated + end +end end # Module diff --git a/test/regridder_tests.jl b/test/regridder_tests.jl index b553fda251..8184e3ff4d 100644 --- a/test/regridder_tests.jl +++ b/test/regridder_tests.jl @@ -2,6 +2,7 @@ Unit tests for ClimaCoupler Regridder module =# +using ClimaCoupler using ClimaCore: Geometry, Meshes, Domains, Topologies, Spaces, Fields, InputOutput using ClimaComms using Test @@ -314,3 +315,67 @@ for FT in (Float32, Float64) end end end +# test dataset truncation +@testset "test dataset truncation" begin + # Get the original dataset set up + include(joinpath(pkgdir(ClimaCoupler), "artifacts", "artifact_funcs.jl")) + sst_data_all = joinpath(sst_dataset_path(), "sst.nc") + ds = NCDataset(sst_data_all, "r") + dates = ds["time"][:] + first_date = dates[1] + last_date = last(dates) + + # set up comms_ctx + device = ClimaComms.device() + comms_ctx = ClimaComms.context(device) + ClimaComms.init(comms_ctx) + + # make path for truncated datasets + COUPLER_OUTPUT_DIR = joinpath("experiments", "AMIP", "output", "tests") + mkpath(COUPLER_OUTPUT_DIR) + + REGRID_DIR = joinpath(COUPLER_OUTPUT_DIR, "regrid_tmp", "") + mkpath(REGRID_DIR) + + # values for the truncations + time_start = 0.0 + time_end = 1.728e6 + # tests five cases: + # 1. both start and end are earlier than in datafile + # 2. start is early, end is within datafine + # 3. both start and end within datafile + # 4. start is in datafile end is not + # 5. both start and end are later than in datafile + date0test = ["18690101", "18700101", "19790228", "20220301", "20230101"] + for date in date0test + date0 = DateTime(date, dateformat"yyyymmdd") + sst_data = Regridder.truncate_dataset(sst_data_all, "test", REGRID_DIR, date0, time_start, time_end, comms_ctx) + ds_truncated = NCDataset(sst_data, "r") + new_dates = ds_truncated["time"][:] + + date_start = date0 + Dates.Second(time_start) + date_end = date0 + Dates.Second(time_start + time_end) + + if date_start < first_date + @test new_dates[1] == first_date + elseif date_start > last_date + @test new_dates[1] == last_date + else + @test new_dates[1] <= date_start + @test new_dates[2] >= date_start + end + + if date_end < first_date + @test last(new_dates) == first_date + elseif date_end > last_date + @test last(new_dates) == last_date + else + @test last(new_dates) >= date_end + @test new_dates[length(new_dates) - 1] <= date_end + end + + close(ds_truncated) + end + + close(ds) +end