From 308dbfcded7645778558c467b2162df1a41dac60 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Wed, 13 Mar 2024 11:12:31 -0700 Subject: [PATCH 1/2] Use SpaceVaryingInput for runoff --- .../standalone/Soil/richards_runoff.jl | 30 +++---------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/experiments/standalone/Soil/richards_runoff.jl b/experiments/standalone/Soil/richards_runoff.jl index 027e44701d..49674eb858 100644 --- a/experiments/standalone/Soil/richards_runoff.jl +++ b/experiments/standalone/Soil/richards_runoff.jl @@ -11,6 +11,7 @@ using ClimaLand using ClimaLand.Soil import ClimaLand import ClimaLand.Parameters as LP +import ClimaLand.SpaceVaryingInputs: SpaceVaryingInput context = ClimaComms.context() outdir = joinpath(pkgdir(ClimaLand), "experiments/standalone/Soil/artifacts") @@ -37,31 +38,12 @@ topmodel_dataset = ArtifactWrapper( ),], ) infile_path = joinpath(get_data_folder(topmodel_dataset), "means_2.5_new.nc") -regrid_dirpath = "regrid" outfile_root = joinpath(pkgdir(ClimaLand), "experiments/standalone/Soil/static_data_cgll") -ClimaLand.Regridder.hdwrite_regridfile_rll_to_cgll( - FT, - regrid_dirpath, - infile_path, - ["fmax", "landsea_mask"], - surface_space, - outfile_root; - mono = true, -) -file_info = ClimaLand.FileReader.FileInfo( - infile_path, - regrid_dirpath, - ["fmax", "landsea_mask"], - outfile_root, - [], - [], -) -data = ClimaLand.FileReader.PrescribedDataStatic{typeof(file_info)}(file_info) -f_max = ClimaLand.FileReader.get_data_at_date(data, surface_space, "fmax") -mask = - ClimaLand.FileReader.get_data_at_date(data, surface_space, "landsea_mask") +f_max = SpaceVaryingInput(infile_path, "fmax", surface_space) +mask = SpaceVaryingInput(infile_path, "landsea_mask", surface_space) + oceans_to_zero(field, mask) = mask > 0.5 ? field : eltype(field)(0) f_over = FT(3.28) # 1/m R_sb = FT(1.484e-4 / 1000) # m/s @@ -311,7 +293,3 @@ CairoMakie.heatmap!( Colorbar(fig[1, 4], colorrange = clims2) outfile = joinpath(outdir, string("heatmap_∫ϑdz.png")) CairoMakie.save(outfile, fig) - - -# Delete testing directory and files -rm(regrid_dirpath; recursive = true, force = true) From 1f020ff4bb26f40faf854bcea18847e50b786666 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Thu, 14 Mar 2024 11:30:02 -0700 Subject: [PATCH 2/2] Add support for reading input maps Okay, this is a massive commit (and I feel bad about it). This commit splits PrescribedDataStatic and PrescibedDataTemporal in multiple independent modules. Before, PrescribedData was: - reading files - regridding - discovering dates available - interpolating - taking care of edge cases (e.g., what to do when reading outside of where the date is outside of the definition range) This commit splits all the different functions in different independent modules so that they can be more easily maintained and upgraded. This commit also introduces InterpolationsRemapper, which uses Interpolations.jl to do online/ and distributed remapping from files. This will soon be needed to efficiently support reading and buffering reads for GPU runs. --- Project.toml | 3 +- docs/Manifest.toml | 18 +- docs/src/APIs/Regridder.md | 6 +- experiments/Manifest.toml | 18 +- .../Bucket/global_bucket_staticmap.jl | 17 +- .../Bucket/global_bucket_temporalmap.jl | 21 +- src/ClimaLand.jl | 7 +- src/shared_utilities/DataHandling.jl | 330 ++++++++ src/shared_utilities/FileReader.jl | 549 -------------- src/shared_utilities/FileReaders.jl | 229 ++++++ .../{Regridder.jl => Regridders.jl} | 278 ++++++- src/shared_utilities/SpaceVaryingInputs.jl | 42 +- src/shared_utilities/TimeVaryingInputs.jl | 39 +- .../analytic_time_varying_input.jl | 16 +- .../interpolating_time_varying_input0d.jl | 34 +- .../interpolating_time_varying_input2d.jl | 100 +++ .../interpolating_time_varying_inputs.jl | 18 +- src/standalone/Bucket/Bucket.jl | 95 +-- test/runtests.jl | 7 +- test/shared_utilities/data_handling.jl | 166 ++++ test/shared_utilities/file_reader.jl | 716 ------------------ test/shared_utilities/file_readers.jl | 73 ++ test/shared_utilities/regridders.jl | 53 ++ test/shared_utilities/time_varying_inputs.jl | 115 ++- test/standalone/Bucket/albedo_types.jl | 145 ++-- 25 files changed, 1503 insertions(+), 1592 deletions(-) create mode 100644 src/shared_utilities/DataHandling.jl delete mode 100644 src/shared_utilities/FileReader.jl create mode 100644 src/shared_utilities/FileReaders.jl rename src/shared_utilities/{Regridder.jl => Regridders.jl} (58%) create mode 100644 src/shared_utilities/interpolating_time_varying_input2d.jl create mode 100644 test/shared_utilities/data_handling.jl delete mode 100644 test/shared_utilities/file_reader.jl create mode 100644 test/shared_utilities/file_readers.jl create mode 100644 test/shared_utilities/regridders.jl diff --git a/Project.toml b/Project.toml index 1e9454f5b9..2ae3ab0100 100644 --- a/Project.toml +++ b/Project.toml @@ -18,6 +18,7 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" Insolation = "e98cc03f-d57e-4e3c-b70c-8d51efe9e0d8" +Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" @@ -38,12 +39,12 @@ CreateParametersExt = "ClimaParams" Adapt = "3, 4" ArtifactWrappers = "0.2" CFTime = "0.1" -ClimaParams = "0.10.2" CSV = "0.10" CUDA = "5" ClimaComms = "0.5.6" ClimaCore = "0.13.2" ClimaCoreTempestRemap = "0.3" +ClimaParams = "0.10.2" DataFrames = "1" Dates = "1" DocStringExtensions = "0.8, 0.9" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index 54473f1025..ca8e9cde9c 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.10.1" +julia_version = "1.10.0" manifest_format = "2.0" project_hash = "5f713b750235cf07465e21e92a6bd2f152ad3358" @@ -273,9 +273,9 @@ version = "0.5.7" [[deps.ClimaCore]] deps = ["Adapt", "BandedMatrices", "BlockArrays", "CUDA", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"] -git-tree-sha1 = "bc6a0154e3bcc1657d3a75f697e216fb70121969" +git-tree-sha1 = "844afbd6a7c4f112f974e0031ebf4fb13d0b4157" uuid = "d414da3d-4745-48bb-8d80-42e94e092884" -version = "0.13.2" +version = "0.13.1" weakdeps = ["Krylov"] [deps.ClimaCore.extensions] @@ -299,9 +299,9 @@ weakdeps = ["ClimaParams"] [[deps.ClimaParams]] deps = ["DocStringExtensions", "TOML", "Test"] -git-tree-sha1 = "ec67949db856e01df4cbf7d6ddafefeda02f93ee" +git-tree-sha1 = "323dd6c5423caf31f0da81bb9c288683cbdafb01" uuid = "5c42b081-d73a-476f-9059-fd94b934656c" -version = "0.10.3" +version = "0.10.2" [[deps.ClimaTimeSteppers]] deps = ["ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"] @@ -379,7 +379,7 @@ weakdeps = ["Dates", "LinearAlgebra"] [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.1.0+0" +version = "1.0.5+1" [[deps.CompositionsBase]] git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" @@ -394,9 +394,9 @@ version = "0.1.2" [[deps.ConcurrentUtilities]] deps = ["Serialization", "Sockets"] -git-tree-sha1 = "6cbbd4d241d7e6579ab354737f4dd95ca43946e1" +git-tree-sha1 = "9c4708e3ed2b799e6124b5673a712dda0b596a9b" uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.4.1" +version = "2.3.1" [[deps.ConstructionBase]] deps = ["LinearAlgebra"] @@ -1495,7 +1495,7 @@ version = "0.3.24+0" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.23+4" +version = "0.3.23+2" [[deps.OpenLibm_jll]] deps = ["Artifacts", "Libdl"] diff --git a/docs/src/APIs/Regridder.md b/docs/src/APIs/Regridder.md index 861d9155a9..2da979ece0 100644 --- a/docs/src/APIs/Regridder.md +++ b/docs/src/APIs/Regridder.md @@ -1,12 +1,12 @@ # Bucket ```@meta -CurrentModule = ClimaLand.Regridder +CurrentModule = ClimaLand.Regridders ``` ## Functions ```@docs -ClimaLand.Regridder.hdwrite_regridfile_rll_to_cgll -ClimaLand.Regridder.swap_space +ClimaLand.Regridders.hdwrite_regridfile_rll_to_cgll +ClimaLand.Regridders.swap_space ``` diff --git a/experiments/Manifest.toml b/experiments/Manifest.toml index 5516c46643..e1f64ad70d 100644 --- a/experiments/Manifest.toml +++ b/experiments/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.10.1" +julia_version = "1.10.0" manifest_format = "2.0" project_hash = "960bdc0a1ed4b3f2aee66ed2e0c02065769614a9" @@ -330,9 +330,9 @@ version = "0.5.7" [[deps.ClimaCore]] deps = ["Adapt", "BandedMatrices", "BlockArrays", "CUDA", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"] -git-tree-sha1 = "bc6a0154e3bcc1657d3a75f697e216fb70121969" +git-tree-sha1 = "844afbd6a7c4f112f974e0031ebf4fb13d0b4157" uuid = "d414da3d-4745-48bb-8d80-42e94e092884" -version = "0.13.2" +version = "0.13.1" weakdeps = ["Krylov"] [deps.ClimaCore.extensions] @@ -356,9 +356,9 @@ weakdeps = ["ClimaParams"] [[deps.ClimaParams]] deps = ["DocStringExtensions", "TOML", "Test"] -git-tree-sha1 = "ec67949db856e01df4cbf7d6ddafefeda02f93ee" +git-tree-sha1 = "323dd6c5423caf31f0da81bb9c288683cbdafb01" uuid = "5c42b081-d73a-476f-9059-fd94b934656c" -version = "0.10.3" +version = "0.10.2" [[deps.ClimaTimeSteppers]] deps = ["ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"] @@ -453,7 +453,7 @@ weakdeps = ["Dates", "LinearAlgebra"] [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.1.0+0" +version = "1.0.5+1" [[deps.CompositionsBase]] git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" @@ -468,9 +468,9 @@ version = "0.1.2" [[deps.ConcurrentUtilities]] deps = ["Serialization", "Sockets"] -git-tree-sha1 = "6cbbd4d241d7e6579ab354737f4dd95ca43946e1" +git-tree-sha1 = "9c4708e3ed2b799e6124b5673a712dda0b596a9b" uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.4.1" +version = "2.3.1" [[deps.ConstructionBase]] deps = ["LinearAlgebra"] @@ -1793,7 +1793,7 @@ version = "0.3.24+0" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.23+4" +version = "0.3.23+2" [[deps.OpenEXR]] deps = ["Colors", "FileIO", "OpenEXR_jll"] diff --git a/experiments/standalone/Bucket/global_bucket_staticmap.jl b/experiments/standalone/Bucket/global_bucket_staticmap.jl index 39e9ff15f6..132747e9bd 100644 --- a/experiments/standalone/Bucket/global_bucket_staticmap.jl +++ b/experiments/standalone/Bucket/global_bucket_staticmap.jl @@ -87,18 +87,9 @@ tf = 7 * 86400; Δt = 3600.0; # Construct albedo parameter object using static map -# Use separate regridding directory for CPU and GPU runs to avoid race condition -device_suffix = - typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? - "cpu" : "gpu" -regrid_dir = joinpath( - pkgdir(ClimaLand), - "experiments/standalone/Bucket/$device_suffix/regrid-static/", -) -!ispath(regrid_dir) && mkpath(regrid_dir) surface_space = bucket_domain.space.surface α_snow = FT(0.8) -albedo = PrescribedBaregroundAlbedo{FT}(α_snow, regrid_dir, surface_space); +albedo = PrescribedBaregroundAlbedo{FT}(α_snow, surface_space); bucket_parameters = BucketModelParameters(FT; albedo, z_0m, z_0b, τc); @@ -213,6 +204,9 @@ F_sfc = [ ]; # save prognostic state to CSV - for comparison between GPU and CPU output +device_suffix = + typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? + "cpu" : "gpu" open(joinpath(outdir, "tf_state_$device_suffix.txt"), "w") do io writedlm(io, hcat(T_sfc[end][:], W[end][:], Ws[end][:], σS[end][:]), ',') end; @@ -261,6 +255,3 @@ for (i, (field_ts, field_name)) in enumerate( end outfile = joinpath(outdir, string("ts_$device_suffix.png")) CairoMakie.save(outfile, fig_ts) - -# delete regrid directory -rm(regrid_dir, recursive = true) diff --git a/experiments/standalone/Bucket/global_bucket_temporalmap.jl b/experiments/standalone/Bucket/global_bucket_temporalmap.jl index 724436352d..fd91ac92c0 100644 --- a/experiments/standalone/Bucket/global_bucket_temporalmap.jl +++ b/experiments/standalone/Bucket/global_bucket_temporalmap.jl @@ -68,21 +68,12 @@ outdir = joinpath( "experiments/standalone/Bucket/artifacts_temporalmap", ) !ispath(outdir) && mkpath(outdir) -# Use separate output directory for CPU and GPU runs to avoid race condition -device_suffix = - typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? - "cpu" : "gpu" -regrid_dir = joinpath( - pkgdir(ClimaLand), - "experiments/standalone/Bucket/$device_suffix/regrid-temporal/", -) -!ispath(regrid_dir) && mkpath(regrid_dir) + t0 = 0.0; # run for 50 days to test monthly file update tf = 50 * 86400; Δt = 3600.0; - function setup_prob(t0, tf, Δt) # We set up the problem in a function so that we can make multiple copies (for profiling) @@ -108,8 +99,7 @@ function setup_prob(t0, tf, Δt) surface_space = bucket_domain.space.surface # Construct albedo parameter object using temporal map - albedo = - PrescribedSurfaceAlbedo{FT}(regrid_dir, ref_time, t0, surface_space) + albedo = PrescribedSurfaceAlbedo{FT}(ref_time, t0, surface_space) bucket_parameters = BucketModelParameters(FT; albedo, z_0m, z_0b, τc) @@ -148,7 +138,6 @@ function setup_prob(t0, tf, Δt) ref_time, ) - model = BucketModel( parameters = bucket_parameters, domain = bucket_domain, @@ -262,6 +251,9 @@ F_sfc = [ ]; # save prognostic state to CSV - for comparison between GPU and CPU output +device_suffix = + typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? + "cpu" : "gpu" open(joinpath(outdir, "tf_state_$device_suffix.txt"), "w") do io writedlm(io, hcat(T_sfc[end][:], W[end][:], Ws[end][:], σS[end][:]), ',') end; @@ -310,6 +302,3 @@ for (i, (field_ts, field_name)) in enumerate( end outfile = joinpath(outdir, string("ts_$device_suffix.png")) CairoMakie.save(outfile, fig_ts) - -# delete regrid directory -rm(regrid_dir, recursive = true) diff --git a/src/ClimaLand.jl b/src/ClimaLand.jl index f1101846dd..5db6ab8cb8 100644 --- a/src/ClimaLand.jl +++ b/src/ClimaLand.jl @@ -8,12 +8,13 @@ include("shared_utilities/Parameters.jl") import .Parameters as LP include("shared_utilities/general_utils.jl") +include("shared_utilities/Domains.jl") +include("shared_utilities/FileReaders.jl") +include("shared_utilities/Regridders.jl") +include("shared_utilities/DataHandling.jl") include("shared_utilities/TimeVaryingInputs.jl") using .TimeVaryingInputs export TimeVaryingInput, evaluate! -include("shared_utilities/Regridder.jl") -include("shared_utilities/Domains.jl") -include("shared_utilities/FileReader.jl") include("shared_utilities/SpaceVaryingInputs.jl") using .SpaceVaryingInputs export SpaceVaryingInput diff --git a/src/shared_utilities/DataHandling.jl b/src/shared_utilities/DataHandling.jl new file mode 100644 index 0000000000..ec30ee1379 --- /dev/null +++ b/src/shared_utilities/DataHandling.jl @@ -0,0 +1,330 @@ +""" + DataHandling + +The `DataHandling` module is responsible for read data from files and resample the data onto +the simulation grid. + +This is no trivial task. Among the challenges: +- data can be large and cannot be read all in one go and/or held in memory +- regridding onto the simulation grid can be very expensive +- IO can be very expensive +- CPU/GPU communication can be a bottleneck + +The `DataHandling` takes the divide and conquer approach: the various core tasks and +features and split into other independent modules (chiefly `FileReaders`, and `Regridders`). +Such modules can be developed, tested, and extended independently (as long as they maintain +a consistent interface). For instance, if need arises, the `DataHandler` can be used +(almost) directly to process files with a different format from NetCDF. + +The key struct in `DataHandling` is the `DataHandler`. The `DataHandler` contains a +`FileReaders`, a `Regridders`, and other metadata necessary to perform its operations (e.g., +target `ClimaCore.Space`). The `DataHandler` can be used for static or temporal data, and +exposes the following key functions: +- `regridded_snapshot(time)`: to obtain the regridded field at the given `time`. `time` has to be + available in the data. +- `available_times` (`available_dates`): to list all the `times` (`dates`) over which the + data is defined. +- `previous_time(time/date)` (`next_time(time/date)`): to obtain the time of the snapshot + before the given `time` or `date`. This can be used to compute the + interpolation weight for linear interpolation, or in combination + with `regridded_snapshot` to read a particular snapshot +Most `DataHandling` functions take either `time` or `date`, with the difference being that +`time` is intended as "simulation time" and is expected to be in seconds; `date` is a +calendar date (from `Dates.DateTime`). Conversion between time and date is performed using +the reference date and simulation starting time provided to the `DataHandler`. + +The `DataHandler` has a caching mechanism in place: once a field is read and regridded, it +is stored in the local cache to be used again (if needed). + +""" +module DataHandling + +using Dates + +import ClimaCore + +import ..FileReaders: AbstractFileReader, NCFileReader, read +import ..Regridders +import ..Regridders: + AbstractRegridder, TempestRegridder, regrid, AVAILABLE_REGRIDDERS + +""" + DataHandler{ + FR <: AbstractFileReader, + REG <: AbstractRegridder, + SPACE <: ClimaCore.Spaces.AbstractSpace, + REF_DATE <: Dates.DateTime, + TSTART <: AbstractFloat, + DATES <: AbstractArray{<:Dates.DateTime}, + DIMS, + TIMES <: AbstractArray{<:AbstractFloat}, + CACHE, + } + +Currently, the `DataHandler` works with one variable at the time. This might not be the most +efficiently way to tackle the problem: we should be able to reuse the interpolation weights if +multiple variables are defined on the same grid and have to be remapped to the same grid. + +Assumptions: +- There is only one file with the entire time development of the given variable +- The file has well-defined physical dimensions (e.g., lat/lon) +- Currently, the time dimension has to be either "time" or "date", the spatial + dimensions have to be lat and lon (restriction from TempestRegridder) + +DataHandler is meant to live on the CPU, but the Fields can be on the GPU as well. +""" +struct DataHandler{ + FR <: AbstractFileReader, + REG <: AbstractRegridder, + SPACE <: ClimaCore.Spaces.AbstractSpace, + REF_DATE <: Dates.DateTime, + TSTART <: AbstractFloat, + DATES <: AbstractArray{<:Dates.DateTime}, + DIMS, + TIMES <: AbstractArray{<:AbstractFloat}, + CACHE, +} + """Object responsible for getting the data from disk to memory""" + file_reader::FR + + """Object responsible for resampling a rectangular grid to the simulation grid""" + regridder::REG + + """ClimaCore Space over which the data has to be resampled""" + target_space::SPACE + + """Tuple of linear arrays where the data is defined (typically long/lat)""" + dimensions::DIMS + + """Calendar dates over which the data is defined""" + available_dates::DATES + + """Simulation time at the beginning of the simulation in seconds (typically 0, but + could be different, e.g., for restarted simulations)""" + t_start::TSTART + + """Reference calendar date at the beginning of the simulation.""" + reference_date::REF_DATE + + """Timesteps over which the data is defined (in seconds)""" + available_times::TIMES + + """Private field where cached data is stored""" + _cached_regridded_fields::CACHE +end + +""" + DataHandler(file_path::AbstractString, + varname::AbstractString, + target_space::ClimaCore.Spaces.AbstractSpace; + reference_date::Dates.DateTime = Dates.DateTime(1979, 1, 1), + t_start::AbstractFloat = 0.0, + regridder_type = :TempestRegridder) + +Create a `DataHandler` to read `varname` from `file_path` and remap it to `target_space`. + +The DataHandler maintains a cache of Fields that were previously computed. + +TODO: Add function to clear cache, and/or CACHE_MAX_SIZE (this will probably require +developing a LRU cache scheme) + +Positional arguments +===================== + +- `file_path`: Path of the NetCDF file that contains the data. +- `varname`: Name of the dataset in the NetCDF that has to be read and processed. +- `target_space`: Space where the simulation is run, where the data has to be regridded to. + +Keyword arguments +=================== + +Time/date information will be ignored for static input files. (They are still set to make +everything more type stable.) + +- `reference_date`: Calendar date corresponding to the start of the simulation. +- `t_start`: Simulation time at the beginning of the simulation. Typically this is 0 + (seconds), but if might be different if the simulation was restarted. +- `regridder_type`: What type of regridding to perform. Currently, the only one implemented + is `:tempest` to use `TempestRemap`. `TempestRemap` regrids everything + ahead of time and saves the result to HDF5 files. +""" +function DataHandler( + file_path::AbstractString, + varname::AbstractString, + target_space::ClimaCore.Spaces.AbstractSpace; + reference_date::Dates.DateTime = Dates.DateTime(1979, 1, 1), + t_start::AbstractFloat = 0.0, + regridder_type = :TempestRegridder, +) + + # File reader, deals with ingesting data, possibly buffered/cached + file_reader = NCFileReader(file_path, varname) + + # Regridder, deals with converting and interpolating the input data onto the simulation + # grid + regridder_type in AVAILABLE_REGRIDDERS || + error("Regridder $regridder_type not implemented") + + regridder_args = () + + if regridder_type == :TempestRegridder + regridder_args = (target_space, mktempdir(), varname, file_path) + elseif regridder_type == :InterpolationsRegridder + regridder_args = (target_space,) + end + + RegridderConstructor = getfield(Regridders, regridder_type) + regridder = RegridderConstructor(regridder_args...) + + # NOTE: this is not concretely typed + _cached_regridded_fields = Dict{Dates.DateTime, ClimaCore.Fields.Field}() + + available_dates = file_reader.available_dates + times_s = Second.(available_dates .- reference_date) ./ Second(1) + available_times = times_s .- t_start + + return DataHandler( + file_reader, + regridder, + target_space, + file_reader.dimensions, + available_dates, + t_start, + reference_date, + available_times, + _cached_regridded_fields, + ) +end + +""" + close(data_handler::DataHandler) + +Close any file associated to the given `data_handler`. +""" +function Base.close(data_handler::DataHandler) + close(data_handler.file_reader) + return nothing +end + +""" + available_times(data_handler::DataHandler) + +Return the time in seconds of the snapshots in the data, measured considering +the starting time of the simulation and the reference date +""" +function available_times(data_handler::DataHandler) + return data_handler.available_times +end + +""" + available_dates(data_handler::DataHandler) + +Return the dates of the snapshots in the data. +""" +function available_dates(data_handler::DataHandler) + return data_handler.available_dates +end + +""" + previous_time(data_handler::DataHandler, time::AbstractFloat) + previous_time(data_handler::DataHandler, date::Dates.DateTime) + +Return the time in seconds of the snapshot before the given `time`. +If `time` is one of the snapshots, return itself. +""" +function previous_time(data_handler::DataHandler, time::AbstractFloat) + time in data_handler.available_times && return time + index = searchsortedfirst(data_handler.available_times, time) - 1 + return data_handler.available_times[index] +end + +function previous_time(data_handler::DataHandler, date::Dates.DateTime) + if date in data_handler.available_dates + index = searchsortedfirst(data_handler.available_dates, date) + else + index = searchsortedfirst(data_handler.available_dates, date) - 1 + end + return data_handler.available_times[index] +end + +""" + next_time(data_handler::DataHandler, time::AbstractFloat) + next_time(data_handler::DataHandler, date::Dates.DateTime) + +Return the time in seconds of the snapshot after the given `time`. +If `time` is one of the snapshots, return the next time. +""" +function next_time(data_handler::DataHandler, time::AbstractFloat) + index = searchsortedfirst(data_handler.available_times, time) + time in data_handler.available_times && (index += 1) + return data_handler.available_times[index] +end + +function next_time(data_handler::DataHandler, date::Dates.DateTime) + if date in data_handler.available_dates + index = searchsortedfirst(data_handler.available_dates, date) + 1 + else + index = searchsortedfirst(data_handler.available_dates, date) + end + return data_handler.available_times[index] +end + +""" + regridded_snapshot(data_handler::DataHandler, date::Dates.DateTime) + regridded_snapshot(data_handler::DataHandler, time::AbstractFloat) + regridded_snapshot(data_handler::DataHandler) + +Return the regridded snapshot from `data_handler` associated to the given `time` (if relevant). + +The `time` has to be available in the `data_handler`. + +`regridded_snapshot` potentially modifies the internal state of `data_handler` and it might be a very +expensive operation. + +TODO: Add `regridded_snapshot!` +""" +function regridded_snapshot(data_handler::DataHandler, date::Dates.DateTime) + + # Dates.DateTime(0) is the cache key for static maps + if date != Dates.DateTime(0) + date in data_handler.available_dates || error("date not available") + end + + regridder_type = nameof(typeof(data_handler.regridder)) + regrid_args = () + + regridded_snapshot = get!(data_handler._cached_regridded_fields, date) do + if regridder_type == :TempestRegridder + regrid_args = (date,) + elseif regridder_type == :InterpolationsRegridder + regrid_args = ( + read(data_handler.file_reader, date), + data_handler.dimensions, + ) + else + error("Uncaught case") + end + regrid(data_handler.regridder, regrid_args...) + end + + return regridded_snapshot +end + +function regridded_snapshot(data_handler::DataHandler, time::AbstractFloat) + date = + data_handler.reference_date + + Second(round(data_handler.t_start)) + + Second(round(time)) + return regridded_snapshot(data_handler, date) +end + +function regridded_snapshot(data_handler::DataHandler) + # This function can be called only when there are no dates (ie, the dataset is static) + isempty(data_handler.available_dates) || + error("DataHandler is function of time") + + # In this case, we use as cache key Dates.DateTime(0) + return regridded_snapshot(data_handler, Dates.DateTime(0)) +end + +end diff --git a/src/shared_utilities/FileReader.jl b/src/shared_utilities/FileReader.jl deleted file mode 100644 index e9d8e9b4b6..0000000000 --- a/src/shared_utilities/FileReader.jl +++ /dev/null @@ -1,549 +0,0 @@ -""" - FileReader - -This module coordinates reading, regridding, and interpolating -data from NetCDF files that is required for global land model -simulations, including globally varying parameters which may -or may not change in time. It also includes regridding and -temporal interpolations of this data. - -This is based on ClimaCoupler.jl's BCReader and TimeManager modules. -""" -module FileReader - -using ClimaComms -using ClimaCore: Fields, Spaces -using Dates -using CFTime -using NCDatasets - -using ClimaLand.Regridder - -export AbstractPrescribedData, - PrescribedDataTemporal, - PrescribedDataStatic, - FileInfo, - FileState, - SimInfo, - read_data_fields!, - next_date_in_file, - get_data_at_date, - to_datetime - - -""" - abstract type AbstractPrescribedData - -An abstract type for storing prescribed data info. Subtypes -include temporally-varying prescribed data and static prescribed data. -""" -abstract type AbstractPrescribedData end - -""" - PrescribedDataStatic <: AbstractPrescribedData - -Stores information to read in a prescribed variable from a file. -The data is read in once and stored without changing for the duration of a -simulation. This type is meant to be used with input data that does not have -a time dimension. -Each of the fields of this struct is itself a struct. - -# Inputs: -- file_info::FI # unchanging info about the input data file -""" -struct PrescribedDataStatic{FI} <: AbstractPrescribedData - file_info::FI -end - -""" - PrescribedDataTemporal <: AbstractPrescribedData - -Stores information to read in prescribed data from a file. -Contains sufficient information to read in the variables at various -timesteps, and to coordinate this reading between data coming from -different files. This type is meant to be used with input data that has -a time dimension. -The `file_states` field is a dictionary mapping variable names to `FileState` -structs, which contain information about the current data for that variable. - -# Inputs: -- file_info::FI # unchanging info about the input data file -- file_states::Dict{S, FS} # info about the data currently being read from file for each variable -- sim_info::SI # unchanging info about the start date/time of the simulation -""" -struct PrescribedDataTemporal{FI, S, FS, SI} <: AbstractPrescribedData - file_info::FI - file_states::Dict{S, FS} - sim_info::SI -end - -""" - FileInfo - -Stores information about the current data being read in from a file. - -# Inputs: -- infile_path::String # path to the input NetCDF data file -- regrid_dirpath::String # directory for storing files used in regridding -- varnames::Vector{String} # names of the variables we're reading from the input file -- outfile_root::String # root for regridded data files generated when writing data at each time from input file -- all_dates::Vector # vector containing all dates of the input file, which we assume are `DateTime`s or `DateTimeNoLeap`s -- date_idx0::Vector{Int} # index of the first data in the file being used for this simulation -""" -struct FileInfo - infile_path::String - regrid_dirpath::String - varnames::Vector{String} - outfile_root::String - all_dates::Vector - date_idx0::Vector{Int} -end - -""" - FileState - -Stores information about the current data being read in from a file for one variable. - -# Inputs: -- data_fields::F # tuple of two fields at consecutive dates, that will be used for interpolation -- date_idx::Vector{Int} # index in the input file of the first data field currently being used -- segment_length::Vector{Int} # length of the time interval between the two data field entries; used in temporal interpolation -""" -struct FileState{F} - data_fields::F - date_idx::Vector{Int} - segment_length::Vector{Int} -end - -""" - SimInfo - -Stores information about the simulation being run. We may want to store -multiple copies of an instance of this struct in multiple PrescribedDataTemporal -objects if we're reading in data over time for multiple variables. - -# Inputs: -- date_ref::D # a reference date before or at the start of the simulation -- t_start # time in seconds since `date_ref` -""" -struct SimInfo{D} - date_ref::D - t_start::Any -end - -""" - PrescribedDataStatic{FT}( - get_infile::Function, - regrid_dirpath::String, - varnames::Vector{String}, - surface_space::Spaces.AbstractSpace, - mono::Bool = true, - ) where {FT <: AbstractFloat} - -Constructor for the `PrescribedDataStatic`` type. -Regrids from the input lat-lon grid to the simulation cgll grid, saving -the regridded output in new files found at `regrid_dirpath`. The `mono` flag -here is used to determine whether or not the remapping is monotone. - -Creates a `FileInfo` object containing all the information needed to read in -the data stored in the input file, which will later be regridded to our -simulation grid. Date-related args (last 3 passed to FileInfo) are unused for -static data maps. -""" -function PrescribedDataStatic{FT}( - get_infile::Function, - regrid_dirpath::String, - varnames::Vector{String}, - surface_space::Spaces.AbstractSpace; - mono::Bool = true, -) where {FT <: AbstractFloat} - comms_ctx = ClimaComms.context(surface_space) - outfile_root = "static_data_cgll" - - # Download `infile_path` artifact on root process first to avoid race condition - if ClimaComms.iamroot(comms_ctx) - infile_path = get_infile() - Regridder.hdwrite_regridfile_rll_to_cgll( - FT, - regrid_dirpath, - infile_path, - varnames, - surface_space, - outfile_root; - mono = mono, - ) - end - ClimaComms.barrier(comms_ctx) - infile_path = get_infile() - - file_info = - FileInfo(infile_path, regrid_dirpath, varnames, outfile_root, [], []) - return PrescribedDataStatic{typeof(file_info)}(file_info) -end - - -""" - PrescribedDataTemporal{FT}( - regrid_dirpath, - get_infile, - varnames, - date_ref, - t_start, - surface_space; - mono = true, - ) where {FT <: AbstractFloat} - -Constructor for the `PrescribedDataTemporal` type. -Regrids from the input lat-lon grid to the simulation cgll grid, saving -the regridded output in a new file found at `regrid_dirpath`, and -returns the info required to run the simulation using this prescribed -data packaged into a single `PrescribedDataTemporal` struct. - -# Arguments -- `regrid_dirpath` # directory the data file is stored in. -- `get_infile` # function returning path to NCDataset file containing data to regrid. -- `varnames` # names of the variables to be regridded. -- `date_ref` # reference date to coordinate start of the simulation -- `t_start` # start time of the simulation relative to `date_ref` (date_start = date_ref + t_start) -- `surface_space` # the space to which we are mapping. -- `mono` # flag for monotone remapping of `infile_path`. - -# Returns -- `PrescribedDataTemporal` object -""" -function PrescribedDataTemporal{FT}( - regrid_dirpath::String, - get_infile::Function, - varnames::Vector{String}, - date_ref::Union{DateTime, DateTimeNoLeap}, - t_start, - surface_space::Spaces.AbstractSpace; - mono::Bool = true, -) where {FT <: AbstractFloat} - comms_ctx = ClimaComms.context(surface_space) - outfile_root = "temporal_data_cgll" - - # Initialize dummy date to be overwritten by actual dates during file read - all_dates = [DateTime(0)] - - # Regrid data at all times from lat/lon (RLL) to simulation grid (CGLL) - # Download `infile_path` artifact on root process first to avoid race condition - if ClimaComms.iamroot(comms_ctx) - infile_path = get_infile() - all_dates = Regridder.hdwrite_regridfile_rll_to_cgll( - FT, - regrid_dirpath, - infile_path, - varnames, - surface_space, - outfile_root; - mono = mono, - ) - - NCDataset(infile_path, "r") do ds - if !("time" in keys(ds)) - error( - "Using a temporal albedo map requires data with time dimension.", - ) - end - end - end - all_dates = ClimaComms.bcast(comms_ctx, all_dates) - ClimaComms.barrier(comms_ctx) - infile_path = get_infile() - - # Init time tracking info - data_fields = - Fields.zeros(FT, surface_space), Fields.zeros(FT, surface_space) - # Store `segment_length` as an array so we can modify it as a field of a struct - segment_length = Int[0] - - date_start = date_ref + Dates.Second(t_start) - if date_start < all_dates[1] - @warn "Simulation start date is before first file data" - end - - # Find the index of the start file closest to this simulation's start date - # Like `segment_length`, store in an array so we can modify in struct - date_idx0 = - [argmin(abs.(Dates.value(date_start) .- Dates.value.(all_dates[:])))] - - # Construct component structs of PrescribedDataTemporal object - file_info = FileInfo( - infile_path, - regrid_dirpath, - varnames, - outfile_root, - all_dates, - date_idx0, - ) - file_states = Dict{String, FileState{typeof(data_fields)}}() - for varname in varnames - file_states[varname] = - FileState(deepcopy(data_fields), copy(date_idx0), segment_length) - end - sim_info = SimInfo(date_ref, t_start) - - args = (file_info, file_states, sim_info) - - # Get types of `file_info`, the first `file_states` Dict pair, and `sim_info` - type_args = ( - typeof(file_info), - typeof(first(file_states)[1]), - typeof(first(file_states)[2]), - typeof(sim_info), - ) - return PrescribedDataTemporal{type_args...}(args...) -end - -""" - read_data_fields!( - prescribed_data::PrescribedDataTemporal, - date::DateTime, - space::Spaces.AbstractSpace - ) - -Extracts data from regridded (to model grid) NetCDF files. -The times for which data is extracted depends on the specifications in the -`prescribed_data` struct). -Data at one point in time is stored in `prescribed_data.file_state.data_fields[1]`, and -data at the next time is stored in `prescribed_data.file_state.data_fields[2]`. With these -two data fields saved, we can interpolate between them for any dates -in this range of time. - -# Arguments -- `prescribed_data` # containing data and file information. -- `date` # current date to read in data for. -- `space` # space we're remapping the data onto. -""" -function read_data_fields!( - prescribed_data::PrescribedDataTemporal, - date::DateTime, - space::Spaces.AbstractSpace, -) - comms_ctx = ClimaComms.context(space) - pd_file_info = prescribed_data.file_info - pd_file_states = prescribed_data.file_states - - (; regrid_dirpath, outfile_root, all_dates, varnames) = pd_file_info - - date_idx0 = pd_file_info.date_idx0[1] - # Assumes that all variables in `prescribed_data` have the same dates - date_idx = pd_file_states[varnames[1]].date_idx[1] - - # Case 1: Current date is before or at first date in data file - # Load in data at first date for both `data_fields[1]` and `data_fields[2]` - if (date_idx == date_idx0) && (date <= all_dates[date_idx]) - if date !== all_dates[date_idx] - @warn "this time period is before input data - using file from $(all_dates[date_idx0])" - end - - # Loop over all variables we need to read in - for (varname, file_state) in pd_file_states - file_state.data_fields[1] .= Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[Int(date_idx0)], - varname, - space, - ) - file_state.data_fields[2] .= file_state.data_fields[1] - file_state.segment_length .= 0 - end - - # Case 2: current date is at or after last date in input file - # Load in data at last date for both `data_fields[1]` and `data_fields[2]` - elseif date >= all_dates[end - 1] - @warn "this time period is after input data - using file from $(all_dates[end - 1])" - - # Loop over all variables we need to read in - for (varname, file_state) in pd_file_states - file_state.data_fields[1] .= Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[end], - varname, - space, - ) - file_state.data_fields[2] .= file_state.data_fields[1] - file_state.segment_length .= 0 - end - - # Case 3: current date is later than date of data being read in - # Load in data at most recent past date in `data_fields[1]` and - # next date in `data_fields[2]` - # elseif Dates.days(date - all_dates[Int(date_idx)]) > 0 - elseif date > all_dates[Int(date_idx)] - # Loop over all variables we need to read in - for (varname, file_state) in pd_file_states - file_state = pd_file_states[varname] - - # Increment `date_idx` to use next date - date_idx = file_state.date_idx[1] += Int(1) - # Time between consecutive dates being stored gives `segment_length` - file_state.segment_length .= - (all_dates[Int(date_idx + 1)] - all_dates[Int(date_idx)]).value - - # Read in data fields at both dates - file_state.data_fields[1] .= Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[Int(date_idx)], - varname, - space, - ) - file_state.data_fields[2] .= Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[Int(date_idx + 1)], - varname, - space, - ) - end - # Case 4: Everything else - else - throw(ErrorException("Check input file specification")) - end -end - -""" - next_date_in_file(prescribed_data::PrescribedDataTemporal) - -Returns the next date stored in the file `prescribed_data` struct after the -current date index given by `date_idx`. -Note: this function does not update `date_idx`, so repeated calls will -return the same value unless `date_idx` is modified elsewhere in between. -Assumes that all variables in `prescribed_data` have the same dates. - -# Arguments -- `prescribed_data` # contains all input file information needed for the simulation. - -# Returns -- DateTime or DateTimeNoLeap -""" -next_date_in_file(prescribed_data::PrescribedDataTemporal) = - prescribed_data.file_info.all_dates[first( - prescribed_data.file_states, - )[2].date_idx[1] + Int(1)] - -""" - get_data_at_date( - prescribed_data::PrescribedDataStatic, - space::Spaces.AbstractSpace, - varname::String, - ) - -Returns the data at a given date, interpolated if necessary. - -# Arguments -- `prescribed_data` # contains fields to be interpolated. -- `space` # the space of our simulation. -- `varname` # the name of the variable we want to read in. - -# Returns -- Fields.field -""" -function get_data_at_date( - prescribed_data::PrescribedDataStatic, - space::Spaces.AbstractSpace, - varname::String, -) - (; regrid_dirpath, outfile_root) = prescribed_data.file_info - - comms_ctx = ClimaComms.context(space) - field = Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - Dates.DateTime(0), # dummy date - varname, - space, - ) - return field -end - -""" - get_data_at_date( - prescribed_data::PrescribedDataTemporal, - space::Spaces.AbstractSpace, - varname::String, - date::Union{DateTime, DateTimeNoLeap}, - ) - -Returns the data for a specific variable at a given date, -interpolated if necessary. - -# Arguments -- `prescribed_data` # contains fields to be interpolated. -- `space` # the space of our simulation. -- `varname` # the name of the variable we want to read in. -- `date` # start date for data. - -# Returns -- Fields.field -""" -function get_data_at_date( - prescribed_data::PrescribedDataTemporal, - space::Spaces.AbstractSpace, - varname::String, - date::Union{DateTime, DateTimeNoLeap}, -) - FT = Spaces.undertype(space) - (; all_dates) = prescribed_data.file_info - - # Use the file state of the variable we want - file_state = prescribed_data.file_states[varname] - (; segment_length, date_idx, data_fields) = file_state - # Interpolate if the time period between dates is nonzero - if segment_length[1] > FT(0) && date != all_dates[Int(date_idx[1])] - Δt_tt1 = FT((date - all_dates[Int(date_idx[1])]).value) - interp_fraction = Δt_tt1 / FT(segment_length[1]) - @assert abs(interp_fraction) <= FT(1) "time interpolation weights must be <= 1, but `interp_fraction` = $interp_fraction" - return interpol.( - data_fields[1], - data_fields[2], - Δt_tt1, - FT(segment_length[1]), - ) - # Otherwise use the data at the first date - else - return data_fields[1] - end -end - - -""" - interpol(f1::FT, f2::FT, Δt_tt1::FT, Δt_t2t1::FT) - -Performs linear interpolation of `f` at time `t` within -a segment `Δt_t2t1 = (t2 - t1)`, of fields `f1` and `f2`, with `t2 > t1`. - -# Arguments -- `f1`::FT # first value to be interpolated (`f(t1) = f1`). -- `f2`::FT # second value to be interpolated. -- `Δt_tt1`::FT # time between `t1` and some `t` (`Δt_tt1 = (t - t1)`). -- `Δt_t2t1`::FT # time between `t1` and `t2`. - -# Returns -- FT -""" -function interpol(f1::FT, f2::FT, Δt_tt1::FT, Δt_t2t1::FT) where {FT} - interp_fraction = Δt_tt1 / Δt_t2t1 - return f1 * (FT(1) - interp_fraction) + f2 * (interp_fraction) -end - -""" - to_datetime(date) - -Convert a DateTime-like object (e.g. DateTimeNoLeap) to a DateTime, -using CFTime.jl. We need this since the CESM2 albedo file contains -DateTimeNoLeap objects for dates, which can't be used for math with DateTimes. - -Note that this conversion may fail if the date to convert doesn't -exist in the DateTime calendar. - -# Arguments -- `date`: object to be converted to DateTime -""" -to_datetime(date) = CFTime.reinterpret(DateTime, date) - -end diff --git a/src/shared_utilities/FileReaders.jl b/src/shared_utilities/FileReaders.jl new file mode 100644 index 0000000000..1ef2b7c5d9 --- /dev/null +++ b/src/shared_utilities/FileReaders.jl @@ -0,0 +1,229 @@ +""" + FileReaders + +The `FileReaders` module implements backends to read and process input files. + +Given that reading from disk can be an expensive operation, this module provides a pathway +to optimize the performance (if needed). + +The FileReaders module contains a global cache of all the NCDatasets that are currently open. +This allows multiple NCFileReader to share the underlying file without overhead. +""" +module FileReaders + +import Dates +import NCDatasets + +abstract type AbstractFileReader end + +# We allow multiple NCFileReader to share the same underlying NCDataset. For this, we put +# all the NCDataset into a dictionary where we keep track of them. OPEN_NCFILES is +# dictionary that maps file paths to a Tuple with the first element being the NCDataset and +# the second element being a Set of Strings, the variables that are being read from that +# file. Every time a NCFileReader is created, this Set is modified by adding or removing a +# varname. +const OPEN_NCFILES = Dict{String, Tuple{NCDatasets.NCDataset, Set{String}}}() + +""" + NCFileReader + +A struct to read and process NetCDF files. + +NCFileReader wants to be smart, e.g., caching reads, or spinning the I/O off to a different +thread (not implemented yet). +""" +struct NCFileReader{ + STR1 <: AbstractString, + STR2 <: AbstractString, + DIMS <: Tuple, + NC <: NCDatasets.NCDataset, + DATES <: AbstractArray{<:Dates.DateTime}, + CACHE, + PREP <: Function, +} <: AbstractFileReader + """Path of the NetCDF file""" + file_path::STR1 + + """Name of the dataset in the NetCDF file""" + varname::STR2 + + """A tuple of arrays with the various physical dimensions where the data is defined + (e.g., lon/lat)""" + dimensions::DIMS + + """A vector of DateTime collecting all the available dates in the file""" + available_dates::DATES + + """NetCDF file opened by NCDataset""" + dataset::NC + + """Optional function that is applied to the read dataset. Useful to do unit-conversion + or remove NaNs. Not suitable for anything more complicated than that.""" + preprocess_func::PREP + + """A place where to store values that have been already read. A dictionary mapping dates + to arrays. For static data sets, a sentinel data DateTime(0) is used as key.""" + _cached_reads::CACHE + + """Index of the time dimension in the array (typically first). -1 for static datasets""" + time_index::Int +end + +""" + NCFileReader(file_path::AbstractString, varname::AbstractString) + +A struct to efficiently read and process NetCDF files. +""" +function NCFileReader( + file_path::AbstractString, + varname::AbstractString; + preprocess_func = identity, +) + + # Get dataset from global dictionary. If not available, open new file and add entry to + # global dictionary + dataset, open_varnames = get!( + OPEN_NCFILES, + file_path, + (NCDatasets.NCDataset(file_path), Set([varname])), + ) + # push! will do nothing when file is opened for the first time + push!(open_varnames, varname) + + # NOTE: We are hardcoding the dimension name. + # At the moment, this is required by TempestRemap + dim_names = ["lon", "lat"] + if all(d in keys(dataset) for d in dim_names) + dimensions = Tuple(dataset[d] for d in dim_names) + else + @warn "Dimensions not found in $file_path" + # If lat and lon, we can try deducing from the number of points + if dim_names == ["lon", "lat"] || dim_names == ["long", "lat"] + @warn "Assuming cell-centered lon/lat (-90/90, -180/180)" + lon_edges = range(-90, 90, dataset.dim[dim_names[1]]) + lon = (lon_edges[1:(end - 1)] .+ lon_edges[2:end]) ./ 2.0 + + lat_edges = range(-180, 180, dataset.dim[dim_names[2]]) + lat = (lat_edges[1:(end - 1)] .+ lat_edges[2:end]) ./ 2.0 + dimensions = (lon, lat) + else + error("$file_path does not contain information about dimensions") + end + end + available_dates = read_available_dates(dataset) + + time_index = -1 + + if !isempty(available_dates) + time_index_vec = findall( + x -> x == "time" || x == "date" || x == "t", + NCDatasets.dimnames(dataset[varname]), + ) + length(time_index_vec) == 1 || + error("Could not find (unique) time dimension") + time_index = time_index_vec[] + + issorted(available_dates) || + error("Cannot process files that are not sorted in time") + end + + # NOTE: This is not concretely typed. + _cached_reads = Dict{Dates.DateTime, Array}() + + return NCFileReader( + file_path, + varname, + dimensions, + available_dates, + dataset, + preprocess_func, + _cached_reads, + time_index, + ) +end + +""" + close(file_reader::NCFileReader) + +Close `NCFileReader`. If no other `NCFileReader` is using the same file, close the NetCDF file. +""" +function Base.close(file_reader::NCFileReader) + open_variables = OPEN_NCFILES[file_reader.file_path][end] + pop!(open_variables, file_reader.varname) + if isempty(open_variables) + NCDatasets.close(file_reader.dataset) + delete!(OPEN_NCFILES, file_reader.file_path) + end + return nothing +end + + +""" + read_available_dates(ds::NCDatasets.NCDataset) + +Return all the dates in the given NCDataset. The dates are read from the "time" +or "date" datasets. If none is available, return an empty vector. +""" +function read_available_dates(ds::NCDatasets.NCDataset) + if "time" in keys(ds.dim) + return Dates.DateTime.( + reinterpret.(Ref(NCDatasets.DateTimeStandard), ds["time"][:]) + ) + elseif "date" in keys(ds.dim) + return strdate_to_datetime.(string.(ds["date"][:])) + else + return Dates.DateTime[] + end +end + +""" + read(file_reader::NCFileReader, date::Dates.DateTime) + +Read and preprocess the data at the given `date`. +""" +function read(file_reader::NCFileReader, date::Dates.DateTime) + # DateTime(0) is the sentinel value for static datasets + if date == Dates.DateTime(0) + return get!(file_reader._cached_reads, date) do + file_reader.preprocess_func.( + Array(file_reader.dataset[file_reader.varname]) + ) + end + end + + index = findall(d -> d == date, file_reader.available_dates) + length(index) == 1 || + error("Problem with date $date in $(file_reader.file_path)") + index = index[] + + return get!(file_reader._cached_reads, date) do + var = file_reader.dataset[file_reader.varname] + slicer = [ + i == file_reader.time_index ? index : Colon() for + i in 1:length(NCDatasets.dimnames(var)) + ] + return file_reader.preprocess_func.( + file_reader.dataset[file_reader.varname][slicer...] + ) + end +end + +""" + read(file_reader::NCFileReader) + +Read and preprocess data (for static datasets). +""" +function read(file_reader::NCFileReader) + isempty(file_reader.available_dates) || + error("File contains temporal data, date required") + + # When there's no dates, we use DateTime(0) as key + return get!(file_reader._cached_reads, Dates.DateTime(0)) do + return file_reader.preprocess_func.( + Array(file_reader.dataset[file_reader.varname]) + ) + end +end + + +end diff --git a/src/shared_utilities/Regridder.jl b/src/shared_utilities/Regridders.jl similarity index 58% rename from src/shared_utilities/Regridder.jl rename to src/shared_utilities/Regridders.jl index a06ae4f802..c60e9601d8 100644 --- a/src/shared_utilities/Regridder.jl +++ b/src/shared_utilities/Regridders.jl @@ -1,11 +1,215 @@ -# Code from ClimaCoupler Regridder.jl -module Regridder -using ClimaCore -using ClimaComms -using NCDatasets -using ClimaCoreTempestRemap +""" + Regridders + +The `Regridders` module implement structs and functions to remap datasets to simulation +grids. + +Currently, the schemes implemented are `TempestRegridder`, which uses +`ClimaCoreTempestRemap`, and `InterpolationsRegridder`, which uses `Interpolations.jl`. + +The key function exposed by `Regridder` is the `regrid` method. +""" +module Regridders + using Dates -using DocStringExtensions + +import Adapt + +import ClimaComms +import ClimaCore +import ClimaCore: Spaces, Fields, Geometry +import ClimaCoreTempestRemap +import Interpolations as Intp +import ..FileReaders: read_available_dates + +using NCDatasets + +# When adding a new regridder, you also have to change some functions in the DataHandler +# module. Find where :TempestRegridder is used. +abstract type AbstractRegridder end + +const AVAILABLE_REGRIDDERS = [:TempestRegridder, :InterpolationsRegridder] + +""" + InterpolationsRegridder + +An online regridder that uses Interpolations.jl + +InterpolationsRegridder is only implemented for LatLong and LatLongZ spaces. It performs +linear interpolation along each of the directions (separately), while imposing periodic +boundary conditions for longitude, flat for latitude, and throwing errors when extrapolating +in z. + +InterpolationsRegridder is GPU and MPI compatible in the simplest possible way. Each MPI +process has the entire input data and everything is copied to GPU. +""" +struct InterpolationsRegridder{ + SPACE <: ClimaCore.Spaces.AbstractSpace, + FIELD <: ClimaCore.Fields.Field, + BC, +} <: AbstractRegridder + target_space::SPACE + coordinates::FIELD + extrapolation_bc::BC +end + +# Note, we swap Lat and Long! This is because according to the CF conventions longitude +# should be first, so files will have longitude as first dimension. +totuple(pt::Geometry.LatLongZPoint) = pt.long, pt.lat, pt.z +totuple(pt::Geometry.LatLongPoint) = pt.long, pt.lat + +function InterpolationsRegridder(target_space::ClimaCore.Spaces.AbstractSpace) + coordinates = Fields.coordinate_field(target_space) + + extrapolation_bc = () + if eltype(coordinates) <: Geometry.LatLongPoint + extrapolation_bc = (Intp.Periodic(), Intp.Flat()) + elseif eltype(coordinates) <: Geometry.LatLongZPoint + extrapolation_bc = (Intp.Periodic(), Intp.Flat(), Intp.Throw()) + else + error("Only lat-long, lat-long-z spaces are supported") + end + + return InterpolationsRegridder(target_space, coordinates, extrapolation_bc) +end + +""" + regrid(regridder::InterpolationsRegridder, data, dimensions)::Field + +Regrid the given data as defined on the given dimensions to the `target_space` in `regridder`. + +This function is allocating. +""" +function regrid(regridder::InterpolationsRegridder, data, dimensions) + FT = Spaces.undertype(regridder.target_space) + dimensions_FT = map(d -> FT.(d), dimensions) + + # Make a linear spline + itp = Intp.extrapolate( + Intp.interpolate(dimensions_FT, FT.(data), Intp.Gridded(Intp.Linear())), + regridder.extrapolation_bc, + ) + + # Move it to GPU (if needed) + gpuitp = Adapt.adapt(ClimaComms.array_type(regridder.target_space), itp) + + return map(regridder.coordinates) do coord + gpuitp(totuple(coord)...) + end +end + +""" + TempestRegridder + +A struct to resample a given NetCDF file to `the target_space`. + +Currently, this work with ClimaCoreTempestRemap. `ClimaCoreTempestRemap` uses TempestRemap, +a command-line utility. Hence, This function has to process files, so, for this regridder, +we cannot really split file processing and remapping. + +TempestRegridder only works on CPU and on a single process. + +Implicit assumptions in the code: +- input_file contains "lat" and "lon" +- the time dimension, if available is called "time" or "date" +- the name of the variable in the NetCDF file is `varname` +- all the data is in one single file +""" +struct TempestRegridder{ + SPACE <: ClimaCore.Spaces.AbstractSpace, + STR1 <: AbstractString, + STR2 <: AbstractString, + STR3 <: AbstractString, +} <: AbstractRegridder + target_space::SPACE + regrid_dir::STR1 + outfile_root::STR2 + varname::STR3 + mono::Bool +end + +""" + TempestRegridder(target_space::ClimaCore.Spaces.AbstractSpace, + input_file::AbstractString, + varname::AbstractString, + regrid_dir::AbstractString; + mono::Bool) + +Set up a `TempestRegridder` object to regrid the variable in the given `input_file` to the +`target_space`. `TempestRegridder` works only on CPU and on a single process. + +Positional arguments +===================== + +- `target_space`: the ClimaCore Space where the simulation is being performed. +- `input_file`: the path of the NetCDF file that has to be read and processed. +- `varname`: the name of the variable in the NetCDF file. +- `regrid_dir`: a folder where regridded Fields are saved as HDF5 files. + +Keyword arguments +================== + +- `mono`: Whether remapping has to be monotonic or not. +""" +function TempestRegridder( + target_space::ClimaCore.Spaces.AbstractSpace, + regrid_dir::AbstractString, + varname::AbstractString, + input_file::AbstractString; + mono::Bool = true, +) + space = target_space + comms_ctx = ClimaComms.context(space) + + if ClimaComms.iamroot(comms_ctx) + @info "Saving TempestRegrid files to $regrid_dir" + + FT = Spaces.undertype(space) + outfile_root = varname + varnames = [varname] + + hdwrite_regridfile_rll_to_cgll( + FT, + regrid_dir, + input_file, + varnames, + target_space, + outfile_root; + mono, + ) + end + # We have to make sure that all the processes wait on the (expensive) regridding, + # otherwise they won't have access to the regridded fields + ClimaComms.barrier(comms_ctx) + + return TempestRegridder{ + typeof(target_space), + typeof(regrid_dir), + typeof(outfile_root), + typeof(varname), + }( + target_space, + regrid_dir, + outfile_root, + varname, + mono, + ) +end + +""" + regrid(regridder::TempestRegridder, date::Dates.DateTime) + +Return the field associated to the `regridder` at the given `date`. +""" +function regrid(regridder::TempestRegridder, date::Dates.DateTime) + return read_from_hdf5( + regridder.regrid_dir, + regridder.outfile_root, + date, + regridder.varname, + regridder.target_space, + ) +end export hdwrite_regridfile_rll_to_cgll @@ -238,22 +442,26 @@ function hdwrite_regridfile_rll_to_cgll( if isfile(datafile_cgll) == false isdir(REGRID_DIR) ? nothing : mkpath(REGRID_DIR) - nlat, nlon = NCDataset(datafile_rll) do ds + nlat, nlon = NCDatasets.NCDataset(datafile_rll) do ds (ds.dim["lat"], ds.dim["lon"]) end # write lat-lon mesh - rll_mesh(meshfile_rll; nlat = nlat, nlon = nlon) + ClimaCoreTempestRemap.rll_mesh(meshfile_rll; nlat = nlat, nlon = nlon) # write cgll mesh, overlap mesh and weight file - write_exodus(meshfile_cgll, topology) - overlap_mesh(meshfile_overlap, meshfile_rll, meshfile_cgll) + ClimaCoreTempestRemap.write_exodus(meshfile_cgll, topology) + ClimaCoreTempestRemap.overlap_mesh( + meshfile_overlap, + meshfile_rll, + meshfile_cgll, + ) # 'in_np = 1' and 'mono = true' arguments ensure mapping is conservative and monotone # Note: for a kwarg not followed by a value, set it to true here (i.e. pass 'mono = true' to produce '--mono') # Note: out_np = degrees of freedom = polynomial degree + 1 kwargs = (; out_type = out_type, out_np = Nq) kwargs = mono ? (; (kwargs)..., in_np = 1, mono = mono) : kwargs - remap_weights( + ClimaCoreTempestRemap.remap_weights( weightfile, meshfile_rll, meshfile_cgll, @@ -261,28 +469,16 @@ function hdwrite_regridfile_rll_to_cgll( kwargs..., ) - apply_remap(datafile_cgll, datafile_rll, weightfile, varnames) + ClimaCoreTempestRemap.apply_remap( + datafile_cgll, + datafile_rll, + weightfile, + varnames, + ) else @warn "Using the existing $datafile_cgll : check topology is consistent" end - function get_time(ds) - if "time" in keys(ds.dim) - data_dates = - Dates.DateTime.( - reinterpret.( - Ref(NCDatasets.DateTimeStandard), - Array(ds["time"]), - ) - ) - elseif "date" in keys(ds.dim) - data_dates = strdate_to_datetime.(string.(Array(ds["date"]))) - else - @warn "No dates available in file $datafile_rll" - data_dates = [Dates.DateTime(0)] - end - end - # weightfile info needed to populate all nodes and save into fields with # sparse matrices _, _, row_indices = NCDataset(weightfile, "r") do ds_wt @@ -309,13 +505,17 @@ function hdwrite_regridfile_rll_to_cgll( # Save regridded HDF5 file for each variable in `varnames` for varname in varnames # read the remapped file with sparse matrices - offline_outvector, times = NCDataset(datafile_cgll, "r") do ds_wt - ( - # read the data in, and remove missing type (will error if missing data is present) - offline_outvector = nomissing(Array(ds_wt[varname])[:, :]), # ncol, times - times = get_time(ds_wt), - ) - end + offline_outvector, times = + NCDatasets.NCDataset(datafile_cgll, "r") do ds_wt + ( + # read the data in, and remove missing type (will error if missing data is present) + offline_outvector = nomissing(Array(ds_wt[varname])[:, :]), # ncol, times + times = read_available_dates(ds_wt), + ) + end + + # Use dummy date when there are no real dates + isempty(times) && (times = [DateTime(0)]) # Convert input data float type if needed if eltype(offline_outvector) <: AbstractFloat && @@ -323,7 +523,9 @@ function hdwrite_regridfile_rll_to_cgll( @warn "Converting $varname data in $datafile_cgll from $(eltype(offline_outvector)) to $FT" offline_outvector = Array{FT}(offline_outvector) end - @assert length(times) == size(offline_outvector, 2) "Inconsistent time dimension in $datafile_cgll for $varname" + length_times = length(times) + size_outvector = size(offline_outvector, 2) + @assert length_times == size_outvector "Inconsistent time dimension in $datafile_cgll for $varname ($length_times != $size_outvector)" offline_fields = ntuple(x -> similar(offline_field), length(times)) ntuple( diff --git a/src/shared_utilities/SpaceVaryingInputs.jl b/src/shared_utilities/SpaceVaryingInputs.jl index 843012df5e..4ae0127d28 100644 --- a/src/shared_utilities/SpaceVaryingInputs.jl +++ b/src/shared_utilities/SpaceVaryingInputs.jl @@ -20,22 +20,22 @@ using ClimaComms using DocStringExtensions import ..searchsortednearest import ..linear_interpolation -using ClimaLand.FileReader +import ..DataHandling: DataHandler, regridded_snapshot export SpaceVaryingInput # Analytic case """ - SpaceVaryingInput(data_function::Function, space::ClimaCore.Spaces.AbstractSpace) - + SpaceVaryingInput(data_function::Function, space::ClimaCore.Spaces.AbstractSpace) + Returns the parameter field to be used in the model; appropriate when a parameter is defined using a function of the coordinates of the space. -Pass the ``data" as a function `data_function` which takes coordinates as arguments, +Pass the ``data" as a function `data_function` which takes coordinates as arguments, and the ClimaCore space of the model simulation. This returns a scalar field. Note that data_function is broadcasted over the coordinate field. Internally, inside -your function, this must be unpacked (coords.lat, coords.lon, e.g.) for +your function, this must be unpacked (coords.lat, coords.lon, e.g.) for use of the coordinate values directly. """ function SpaceVaryingInput( @@ -55,7 +55,7 @@ end space::S, ) where {S <: ClimaCore.Spaces.CenterFiniteDifferenceSpace} -Given a set of depths `data_z` and the observed values `data_values` +Given a set of depths `data_z` and the observed values `data_values` at those depths, create an interpolated field of values at each value of z in the model grid - defined implicitly by `space`. @@ -92,8 +92,8 @@ end DT, } -Returns a field of parameter structs to be used in the model; -appropriate when the parameter struct values vary in depth; +Returns a field of parameter structs to be used in the model; +appropriate when the parameter struct values vary in depth; the `dest_type` argument is the struct type - we assumed that your struct as a constructor which accepts the values of its arguments by kwarg, @@ -137,29 +137,21 @@ function SpaceVaryingInput( end """ - SpaceVaryingInput(data::PDS, varname::N; space::S) - where {PDS <: FileReader.PrescribedDataStatic, N <: String, S <: ClimaCore.Spaces.SpectralElement2D} - + SpaceVaryingInput(data_handler::DataHandler) + SpaceVaryingInput(file_path::AbstractString, varname::AbstractString, target_space::Spaces.AbstractSpace;) + Returns the parameter field to be used in the model; appropriate when a parameter is defined on the surface of the Earth. -Pass the data as a `FileReader.PrescribedDataStatic` object -as well as the variable name of the variable in the data file, and the ClimaCore space -of the model simulation. - Returns a ClimaCore.Fields.Field of scalars; analogous to the 1D case which also returns a ClimaCore.Fields.Field of scalars. """ -function SpaceVaryingInput( - data::PDS, - varname::N, - space::S, -) where { - PDS <: FileReader.PrescribedDataStatic, - N <: String, - S <: ClimaCore.Spaces.SpectralElementSpace2D, -} - return FileReader.get_data_at_date(data, space, varname) +function SpaceVaryingInput(data_handler::DataHandler) + return regridded_snapshot(data_handler) +end + +function SpaceVaryingInput(file_path, varname, target_space) + return SpaceVaryingInput(DataHandler(file_path, varname, target_space)) end end diff --git a/src/shared_utilities/TimeVaryingInputs.jl b/src/shared_utilities/TimeVaryingInputs.jl index 528787f91f..7ea1c21e37 100644 --- a/src/shared_utilities/TimeVaryingInputs.jl +++ b/src/shared_utilities/TimeVaryingInputs.jl @@ -19,7 +19,7 @@ # The three TimeVaryingInputs are: # - AnalyticTimeVaryingInput, # - InterpolatingTimeVaryingInput0D, -# - InterpolatingTimeVaryingInput3D. +# - InterpolatingTimeVaryingInput2D. # # Along side these TimeVaryingInputs, we also define InterpolationMethods that implement # specific interpolation strategies (e.g., linear interpolation). @@ -81,19 +81,54 @@ When passing single-site data When a `times` and `vals` are passed, `times` have to be sorted and the two arrays have to have the same length. +======= +When the input is a function, the signature of the function can be `func(time, args...; +kwargs...)`. The function will be called with the additional arguments and keyword arguments +passed to `evaluate!`. This can be used to access the state and the cache and use those to +set the output field. + +For example: +```julia +CO2fromp(time, Y, p) = p.atmos.co2 +input = TimeVaryingInput(CO2fromY) +evaluate!(dest, input, t, Y, p) +``` """ function TimeVaryingInput end """ - evaluate!(dest, input, time) + evaluate!(dest, input, time, args...; kwargs...) Evaluate the `input` at the given `time`, writing the output in-place to `dest`. Depending on the details of `input`, this function might do I/O and communication. + +Extra arguments +================ + +`args` and `kwargs` are used only when the `input` is a non-interpolating function, e.g., +an analytic one. In that case, `args` and `kwargs` are passed down to the function itself. """ function evaluate! end include("analytic_time_varying_input.jl") + +""" + NearestNeighbor + +Return the value corresponding to the point closest to the input time. +""" +struct NearestNeighbor <: AbstractInterpolationMethod end + +""" + LinearInterpolation + +Perform linear interpolation between the two neighboring points. +""" +struct LinearInterpolation <: AbstractInterpolationMethod end + include("interpolating_time_varying_input0d.jl") +include("interpolating_time_varying_input2d.jl") +include("interpolating_time_varying_inputs.jl") # Shared stuff end diff --git a/src/shared_utilities/analytic_time_varying_input.jl b/src/shared_utilities/analytic_time_varying_input.jl index acd7458d6c..228867c9fa 100644 --- a/src/shared_utilities/analytic_time_varying_input.jl +++ b/src/shared_utilities/analytic_time_varying_input.jl @@ -1,15 +1,23 @@ struct AnalyticTimeVaryingInput{F <: Function} <: AbstractTimeVaryingInput - # func here as to be GPU-compatible (e.g., splines are not) + # func here has to be GPU-compatible (e.g., splines are not) and reasonably fast (e.g., + # no large allocations) func::F end -function TimeVaryingInput(input::Function; method = nothing, device = nothing) +# _kwargs... is needed to seamlessly support the other TimeVaryingInputs. +function TimeVaryingInput(input::Function; method = nothing, _kwargs...) isnothing(method) || @warn "Interpolation method is ignored for analytical functions" return AnalyticTimeVaryingInput(input) end -function evaluate!(dest, input::AnalyticTimeVaryingInput, time) - dest .= input.func(time) +function evaluate!( + dest, + input::AnalyticTimeVaryingInput, + time, + args...; + kwargs..., +) + dest .= input.func(time, args...; kwargs...) return nothing end diff --git a/src/shared_utilities/interpolating_time_varying_input0d.jl b/src/shared_utilities/interpolating_time_varying_input0d.jl index 9dde4b3910..412a754f96 100644 --- a/src/shared_utilities/interpolating_time_varying_input0d.jl +++ b/src/shared_utilities/interpolating_time_varying_input0d.jl @@ -1,19 +1,3 @@ -import CUDA - -""" - NearestNeighbor - -Return the value corresponding to the point closest to the input time. -""" -struct NearestNeighbor <: AbstractInterpolationMethod end - -""" - LinearInterpolation - -Perform linear interpolation between the two neighboring points. -""" -struct LinearInterpolation <: AbstractInterpolationMethod end - """ InterpolatingTimeVaryingInput0D @@ -67,7 +51,13 @@ function Adapt.adapt_structure(to, itp::InterpolatingTimeVaryingInput0D) ) end -function evaluate!(destination, itp::InterpolatingTimeVaryingInput0D, time) +function evaluate!( + destination, + itp::InterpolatingTimeVaryingInput0D, + time, + args...; + kwargs..., +) time in itp || error("TimeVaryingInput does not cover time $time") if ClimaComms.device(itp.context) isa ClimaComms.CUDADevice CUDA.@cuda evaluate!(parent(destination), itp, time, itp.method) @@ -101,16 +91,6 @@ function TimeVaryingInput( ) end -""" - in(time, itp::InterpolatingTimeVaryingInput0D) - -Check if the given `time` is in the range of definition for `itp`. -""" -function Base.in(time, itp::InterpolatingTimeVaryingInput0D) - return itp.range[1] <= time <= itp.range[2] -end - - function evaluate!( dest, itp::InterpolatingTimeVaryingInput0D, diff --git a/src/shared_utilities/interpolating_time_varying_input2d.jl b/src/shared_utilities/interpolating_time_varying_input2d.jl new file mode 100644 index 0000000000..c96f52b6bd --- /dev/null +++ b/src/shared_utilities/interpolating_time_varying_input2d.jl @@ -0,0 +1,100 @@ +import ..DataHandling +import ..DataHandling: + DataHandler, previous_time, next_time, regridded_snapshot, available_times + +""" + InterpolatingTimeVaryingInput2D + +The constructor for InterpolatingTimeVaryingInput2D is not supposed to be used directly, unless you +know what you are doing. The constructor does not perform any check and does not take care of +GPU compatibility. It is responsibility of the user-facing constructor TimeVaryingInput() to do so. +""" +struct InterpolatingTimeVaryingInput2D{ + DH <: DataHandler, + M <: AbstractInterpolationMethod, + CC <: ClimaComms.AbstractCommsContext, + R <: Tuple, +} <: AbstractTimeVaryingInput + """Object that has all the information on how to deal with files, data, and so on. + Having to deal with files, it lives on the CPU.""" + data_handler::DH + + """Interpolation method""" + method::M + + """ClimaComms context""" + context::CC + + """Range of times over which the interpolator is defined. range is always defined on the + CPU. Used by the in() function.""" + range::R +end + +function TimeVaryingInput( + data_handler::DataHandler; + method = LinearInterpolation(), + context = ClimaComms.context(), +) + available_times = DataHandling.available_times(data_handler) + isempty(available_times) && + error("DataHandler does not contain temporal data") + issorted(available_times) || error("Can only interpolate with sorted times") + range = (available_times[begin], available_times[end]) + return InterpolatingTimeVaryingInput2D(data_handler, method, context, range) +end + +function evaluate!( + dest, + itp::InterpolatingTimeVaryingInput2D, + time, + args...; + kwargs..., +) + time in itp || error("TimeVaryingInput does not cover time $time") + evaluate!(dest, itp, time, itp.method) + return nothing +end + +function evaluate!( + dest, + itp::InterpolatingTimeVaryingInput2D, + time, + ::NearestNeighbor, + args...; + kwargs..., +) + t0, t1 = + previous_time(itp.data_handler, time), next_time(itp.data_handler, time) + + # The closest regridded_snapshot could be either the previous or the next one + if (time - t0) <= (t1 - time) + dest .= regridded_snapshot(itp.data_handler, t0) + else + dest .= regridded_snapshot(itp.data_handler, t1) + end + return nothing +end + +function evaluate!( + dest, + itp::InterpolatingTimeVaryingInput2D, + time, + ::LinearInterpolation, + args...; + kwargs..., +) + # Linear interpolation is: + # y = y0 + (y1 - y0) * (time - t0) / (t1 - t0) + # + # Define coeff = (time - t0) / (t1 - t0) + # + # y = (1 - coeff) * y0 + coeff * y1 + + t0, t1 = + previous_time(itp.data_handler, time), next_time(itp.data_handler, time) + coeff = (time - t0) / (t1 - t0) + dest .= + (1 - coeff) .* regridded_snapshot(itp.data_handler, t0) .+ + coeff .* regridded_snapshot(itp.data_handler, t1) + return nothing +end diff --git a/src/shared_utilities/interpolating_time_varying_inputs.jl b/src/shared_utilities/interpolating_time_varying_inputs.jl index f220071d28..1c0e273166 100644 --- a/src/shared_utilities/interpolating_time_varying_inputs.jl +++ b/src/shared_utilities/interpolating_time_varying_inputs.jl @@ -2,24 +2,10 @@ InterpolatingTimeVaryingInput = Union{InterpolatingTimeVaryingInput0D, InterpolatingTimeVaryingInput2D} """ - NearestNeighbor - -Return the value corresponding to the point closest to the input time. -""" -struct NearestNeighbor <: AbstractInterpolationMethod end - -""" - LinearInterpolation - -Perform linear interpolation between the two neighboring points. -""" -struct LinearInterpolation <: AbstractInterpolationMethod end - -""" - in(time, itp::InterpolatingTimeVaryingInput0D) + in(time, itp::InterpolatingTimeVaryingInput) Check if the given `time` is in the range of definition for `itp`. """ -function Base.in(time, itp::InterpolatingTimeVaryingInput0D) +function Base.in(time, itp::InterpolatingTimeVaryingInput) return itp.range[1] <= time <= itp.range[2] end diff --git a/src/standalone/Bucket/Bucket.jl b/src/standalone/Bucket/Bucket.jl index ce3cc77e6a..64efd7ba36 100644 --- a/src/standalone/Bucket/Bucket.jl +++ b/src/standalone/Bucket/Bucket.jl @@ -11,12 +11,12 @@ using ClimaComms using ClimaLand -using ClimaLand.FileReader import ..Parameters as LP import ClimaLand.Domains: coordinates, SphericalShell using ClimaLand: AbstractAtmosphericDrivers, AbstractRadiativeDrivers, + AbstractTimeVaryingInput, liquid_precipitation, snow_precipitation, turbulent_fluxes, @@ -27,6 +27,7 @@ using ClimaLand: heaviside, PrescribedAtmosphere, add_dss_buffer_to_aux +import ClimaLand: DataHandling, TimeVaryingInputs import ClimaLand: make_update_aux, make_compute_exp_tendency, @@ -103,10 +104,9 @@ end """ PrescribedBaregroundAlbedo{FT}(α_snow::FT, - regrid_dirpath::String, surface_space::ClimaCore.Spaces.AbstractSpace; varnames = ["sw_alb"], - get_infile::Function = Bucket.bareground_albedo_dataset_path, + albedo_file_path::AbstractString = Bucket.bareground_albedo_dataset_path(), ) where{FT} An outer constructor for the PrescribedBaregroundAlbedo model which uses data @@ -116,24 +116,16 @@ This particular method can only be used with global runs. """ function PrescribedBaregroundAlbedo{FT}( α_snow::FT, - regrid_dirpath::String, surface_space::ClimaCore.Spaces.AbstractSpace; varnames = ["sw_alb"], - get_infile::Function = Bucket.bareground_albedo_dataset_path, + albedo_file_path::AbstractString = Bucket.bareground_albedo_dataset_path(), ) where {FT} if surface_space isa ClimaCore.Spaces.PointSpace error("Using an albedo map requires a global run.") end - α_bareground_data = PrescribedDataStatic{FT}( - get_infile, - regrid_dirpath, - varnames, - surface_space, - ) - # Albedo file only has one variable, so access first `varname` - varname = varnames[1] - α_bareground = SpaceVaryingInput(α_bareground_data, varname, surface_space) + α_bareground = + SpaceVaryingInput(albedo_file_path, varnames[begin], surface_space) return PrescribedBaregroundAlbedo{FT, typeof(α_bareground)}( α_snow, α_bareground, @@ -141,7 +133,7 @@ function PrescribedBaregroundAlbedo{FT}( end """ - PrescribedSurfaceAlbedo{FT, FR <: FileReader.PrescribedDataTemporal} + PrescribedSurfaceAlbedo{FT, TV <: AbstractTimeVaryingInput} <: AbstractBucketAlbedoModel An albedo model where the albedo of different surface types @@ -152,14 +144,13 @@ This albedo type changes over time according to the input file. Note that this option should only be used with global simulations, i.e. with a `ClimaLand.LSMSphericalShellDomain.` """ -struct PrescribedSurfaceAlbedo{FT, FR <: FileReader.PrescribedDataTemporal} <: +struct PrescribedSurfaceAlbedo{FT, TV <: AbstractTimeVaryingInput} <: AbstractBucketAlbedoModel{FT} - albedo_info::FR + albedo::TV end """ PrescribedSurfaceAlbedo{FT}( - regrid_dirpath::String, date_ref::Union{DateTime, DateTimeNoLeap}, t_start, Space::ClimaCore.Spaces.AbstractSpace; @@ -175,11 +166,10 @@ and download the data if it doesn't already exist on the machine. The input data file must have a time component. """ function PrescribedSurfaceAlbedo{FT}( - regrid_dirpath::String, date_ref::Union{DateTime, DateTimeNoLeap}, t_start, space::ClimaCore.Spaces.AbstractSpace; - get_infile = Bucket.cesm2_albedo_dataset_path, + albedo_file_path = Bucket.cesm2_albedo_dataset_path(), varname = "sw_alb", ) where {FT} # Verify inputs @@ -187,16 +177,17 @@ function PrescribedSurfaceAlbedo{FT}( error("Using an albedo map requires a global run.") end - # Construct object containing info to read in surface albedo over time - data_info = PrescribedDataTemporal{FT}( - regrid_dirpath, - get_infile, - [varname], - date_ref, + data_handler = DataHandling.DataHandler( + albedo_file_path, + varname, + space; + reference_date = date_ref, t_start, - space, ) - return PrescribedSurfaceAlbedo{FT, typeof(data_info)}(data_info) + + # Construct object containing info to read in surface albedo over time + albedo = TimeVaryingInput(data_handler) + return PrescribedSurfaceAlbedo{FT, typeof(albedo)}(albedo) end @@ -460,16 +451,24 @@ function make_update_aux(model::BucketModel{FT}) where {FT} p.bucket.R_n .= net_radiation(model.radiation, model, Y, p, t) # Surface albedo - p.bucket.α_sfc .= - next_albedo(model.parameters.albedo, model.parameters, Y, p, t) + next_albedo!( + p.bucket.α_sfc, + model.parameters.albedo, + model.parameters, + Y, + p, + t, + ) end return update_aux! end """ - next_albedo(model_albedo::PrescribedBaregroundAlbedo{FT}, parameters, Y, p, t) where {FT} + next_albedo!(next_α_sfc, + model_albedo::PrescribedBaregroundAlbedo{FT}, + parameters, Y, p, t) -Update the surface albedo for time `t`: the albedo is calculated by +Update the surface albedo for time `t`: the albedo is calculated by linearly interpolating between the albedo of snow and of the bareground surface, based on the snow water equivalent `S` relative to the parameter `S_c`. The linear interpolation is taken from Lague et al 2019. @@ -480,7 +479,8 @@ fractions that are not a heaviside function, we have a small inconsistency for 0 < σS < eps(FT) where the snow cover fraction is zero, but there is a small contribution of snow to the albedo. """ -function next_albedo( +function next_albedo!( + next_α_sfc, model_albedo::PrescribedBaregroundAlbedo{FT}, parameters, Y, @@ -490,42 +490,25 @@ function next_albedo( (; α_snow, α_bareground) = model_albedo (; σS_c) = parameters σS = max.(Y.bucket.σS, FT(0))# if σS is small and negative, clip to zero for albedo estimation - return @. ( - (1 - σS / (σS + σS_c)) * α_bareground + σS / (σS + σS_c) * α_snow - ) + @. next_α_sfc = + ((1 - σS / (σS + σS_c)) * α_bareground + σS / (σS + σS_c) * α_snow) end """ - next_albedo(model_albedo::PrescribedSurfaceAlbedo{FT}, parameters, Y, p, t) where {FT} + next_albedo!(next_α_sfc, model_albedo::PrescribedSurfaceAlbedo{FT}, parameters, Y, p, t) Update the surface albedo for time `t`: for a file containing surface albedo information over time, this reads in the value for time t. """ -function next_albedo( +function next_albedo!( + next_α_sfc, model_albedo::PrescribedSurfaceAlbedo{FT}, parameters, Y, p, t, ) where {FT} - # Get the current date from `t` - sim_info = model_albedo.albedo_info.sim_info - sim_date = to_datetime( - sim_info.date_ref + Second(round(sim_info.t_start)) + Second(round(t)), - ) - # Read next data fields if initializing or next date is closest to current time - # This maintains `all_dates[date_idx]` <= `sim_date` < `all_dates[date_idx + 1]` - if t == sim_info.t_start || - sim_date >= to_datetime(next_date_in_file(model_albedo.albedo_info)) - read_data_fields!(model_albedo.albedo_info, sim_date, axes(Y.bucket.W)) - end - # Interpolate data value to current time - return get_data_at_date( - model_albedo.albedo_info, - axes(Y.bucket.W), - "sw_alb", - sim_date, - ) + TimeVaryingInputs.evaluate!(next_α_sfc, model_albedo.albedo, t) end function ClimaLand.get_drivers(model::BucketModel) diff --git a/test/runtests.jl b/test/runtests.jl index 06f322e97e..9c4f166b89 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,8 +14,11 @@ end @safetestset "Domains module tests" begin include("shared_utilities/domains.jl") end -@safetestset "FileReader module tests" begin - include("shared_utilities/file_reader.jl") +@safetestset "FileReaders module tests" begin + include("shared_utilities/file_readers.jl") +end +@safetestset "DataHandling module tests" begin + include("shared_utilities/data_handling.jl") end @safetestset "SpaceVaryingInput module tests" begin include("shared_utilities/space_varying_inputs.jl") diff --git a/test/shared_utilities/data_handling.jl b/test/shared_utilities/data_handling.jl new file mode 100644 index 0000000000..b29cedad6a --- /dev/null +++ b/test/shared_utilities/data_handling.jl @@ -0,0 +1,166 @@ +#= + Unit tests for ClimaLand DataHandling module +=# + +using Dates +using Test + +import ClimaCore: Fields +import ClimaLand +import ClimaLand: DataHandling +using NCDatasets + +include(joinpath(pkgdir(ClimaLand), "test", "artifacts", "artifacts.jl")) +temporal_data_path = era5_t2m_sp_u10n_dataset_path() +static_data_path = era5_t2m_sp_u10n_static_dataset_path() + +@testset "DataHandler, static" begin + PATH = static_data_path + varname = "sp" + for regridder_type in (:InterpolationsRegridder, :TempestRegridder) + for FT in (Float32, Float64) + target_space = + ClimaLand.Domains.SphericalShell(; + radius = FT(6731e3), + depth = FT(1.0), + nelements = (40, 40), + npolynomial = 4, + ).space.surface + + data_handler = DataHandling.DataHandler( + PATH, + varname, + target_space; + regridder_type, + ) + + @test DataHandling.available_dates(data_handler) == DateTime[] + + field = DataHandling.regridded_snapshot(data_handler) + # Most basic testing, to get a sense that we are doing things right + @test field isa Fields.Field + @test axes(field) == target_space + @test eltype(field) == FT + @test minimum(field) >= + minimum(data_handler.file_reader.dataset[varname][:, :]) + @test maximum(field) <= + maximum(data_handler.file_reader.dataset[varname][:, :]) + + close(data_handler) + end + end +end + +@testset "DataHandler, TempestRegridder, time data" begin + PATH = temporal_data_path + varname = "sp" + for regridder_type in (:InterpolationsRegridder, :TempestRegridder) + for FT in (Float32, Float64) + target_space = + ClimaLand.Domains.SphericalShell(; + radius = FT(6731e3), + depth = FT(1.0), + nelements = (4, 4), + npolynomial = 4, + ).space.surface + data_handler = DataHandling.DataHandler( + PATH, + varname, + target_space, + reference_date = Dates.DateTime(2000, 1, 1), + t_start = 0.0; + regridder_type, + ) + + @test DataHandling.available_dates(data_handler) == + data_handler.file_reader.available_dates + @test data_handler.reference_date .+ + Second.(DataHandling.available_times(data_handler)) == + DataHandling.available_dates(data_handler) + + available_times = DataHandling.available_times(data_handler) + available_dates = DataHandling.available_dates(data_handler) + + # Previous time with time + @test DataHandling.previous_time( + data_handler, + available_times[10] + 1, + ) == available_times[10] + @test DataHandling.previous_time( + data_handler, + available_times[1] + 1, + ) == available_times[1] + # Previous time with date + @test DataHandling.previous_time( + data_handler, + available_dates[10] + Second(1), + ) == available_times[10] + @test DataHandling.previous_time( + data_handler, + available_dates[10], + ) == available_times[10] + + # On node + @test DataHandling.previous_time( + data_handler, + available_times[10], + ) == available_times[10] + @test DataHandling.previous_time( + data_handler, + available_times[1], + ) == available_times[1] + @test DataHandling.previous_time( + data_handler, + available_dates[10], + ) == available_times[10] + + # Next time with time + @test DataHandling.next_time( + data_handler, + available_times[10] + 1, + ) == available_times[11] + @test DataHandling.next_time( + data_handler, + available_times[1] + 1, + ) == available_times[2] + # Next time with date + @test DataHandling.next_time( + data_handler, + available_dates[10] + Second(1), + ) == available_times[11] + + # On node + @test DataHandling.next_time(data_handler, available_times[10]) == + available_times[11] + @test DataHandling.next_time(data_handler, available_dates[10]) == + available_times[11] + + # Asking for a regridded_snapshot without specifying the time + @test_throws ErrorException DataHandling.regridded_snapshot( + data_handler, + ) + + # Asking for a regridded_snapshot with time that does not exist + @test_throws ErrorException DataHandling.regridded_snapshot( + data_handler, + -1234.0, + ) + + field = DataHandling.regridded_snapshot( + data_handler, + available_times[10], + ) + + # Most basic testing, to get a sense that we are doing things right + @test field isa Fields.Field + @test axes(field) == target_space + @test eltype(field) == FT + @test minimum(field) >= + minimum(data_handler.file_reader.dataset[varname][:, :, 10]) + @test maximum(field) <= + maximum(data_handler.file_reader.dataset[varname][:, :, 10]) + + close(data_handler) + end + end +end diff --git a/test/shared_utilities/file_reader.jl b/test/shared_utilities/file_reader.jl deleted file mode 100644 index 0960f9fdab..0000000000 --- a/test/shared_utilities/file_reader.jl +++ /dev/null @@ -1,716 +0,0 @@ -#= - Unit tests for ClimaLand FileReader module -=# - -using ClimaLand -using ClimaLand: Bucket, Regridder, FileReader -using ClimaCore -using ClimaCore: Fields, Meshes, Domains, Topologies, Spaces -using ClimaComms -using Test -using Dates -using NCDatasets - -# Include testing artifacts from Bucket -albedo_temporal_data = Bucket.cesm2_albedo_dataset_path -albedo_bareground_data = Bucket.bareground_albedo_dataset_path - -# Include testing artifacts from test directory -include(joinpath(pkgdir(ClimaLand), "test", "artifacts", "artifacts.jl")) -era5_t2m_sp_u10n_data = era5_t2m_sp_u10n_dataset_path -era5_t2m_sp_u10n_static_data = era5_t2m_sp_u10n_static_dataset_path - -comms_ctx = ClimaComms.SingletonCommsContext() - -# Use two separate regrid dirs to avoid duplicate filenames since files have same varname -regrid_dir_static = joinpath(pkgdir(ClimaLand), "test", "regridding") -regrid_dir_temporal = - joinpath(pkgdir(ClimaLand), "test", "regridding", "temporal") - -""" -Set NaN and Missing to zero (GPU compatible) -""" -function replace_nan_missing!(field::Fields.Field) - # For GPU runs, we perform the substitution on the CPU and move back to GPU - parent_without_NaN_missing = - replace(Array(parent(field)), NaN => 0, missing => 0) - ArrayType = ClimaComms.array_type(ClimaComms.device()) - parent(field) .= ArrayType(parent_without_NaN_missing) -end - -FT = Float32 -@testset "test interpol, FT = $FT" begin - # Setup - x1 = FT(0) - x2 = FT(10) - - f1 = FT(-1) - f2 = FT(1) - - # Case 1: x1 < x < x2 - x = FT(5) - @test FileReader.interpol(f1, f2, x - x1, x2 - x1) == 0 - - # Case 2: x1 = x < x2 - x = FT(0) - @test FileReader.interpol(f1, f2, x - x1, x2 - x1) == f1 - - # Case 3: x1 < x = x2 - x = FT(10) - @test FileReader.interpol(f1, f2, x - x1, x2 - x1) == f2 -end - -@testset "test to_datetime, FT = $FT" begin - year = 2000 - dt_noleap = DateTimeNoLeap(year) - @test FileReader.to_datetime(dt_noleap) == DateTime(year) -end - -# Add tests which use TempestRemap here - -# TempestRemap is not built on Windows because of NetCDF support limitations -# `PrescribedData` constructors use TR via a call to `hdwrite_regridfile_rll_to_cgll` -if !Sys.iswindows() - @testset "test PrescribedDataStatic construction, FT = $FT" begin - # setup for test - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - - # Loop over files with one and multiple variables - for (get_infile, varnames) in [ - (albedo_bareground_data, ["sw_alb"]), - (era5_t2m_sp_u10n_static_data, ["t2m", "sp", "u10n"]), - ] - # reset regrid directory between files - isdir(regrid_dir_static) ? nothing : mkpath(regrid_dir_static) - - ps_data_spatial = FileReader.PrescribedDataStatic{FT}( - get_infile, - regrid_dir_static, - varnames, - surface_space_t, - ) - - # test that created object exists - @test @isdefined(ps_data_spatial) - @test ps_data_spatial isa FileReader.AbstractPrescribedData - - # test fields that we've passed into constructor as args - @test ps_data_spatial.file_info.infile_path == get_infile() - @test ps_data_spatial.file_info.regrid_dirpath == regrid_dir_static - @test ps_data_spatial.file_info.varnames == varnames - - # test fields which are set internally by constructor - @test ps_data_spatial.file_info.outfile_root == "static_data_cgll" - @test ps_data_spatial.file_info.all_dates == [] - @test ps_data_spatial.file_info.date_idx0 == [] - rm(regrid_dir_static; recursive = true, force = true) - end - end - - @testset "test get_data_at_date for PrescribedDataStatic, FT = $FT" begin - isdir(regrid_dir_static) ? nothing : mkpath(regrid_dir_static) - # test `get_data_at_date` with interpolation (default) - dummy_dates = [DateTime(1999, 1, 1)] - date_idx0 = Int[1] - - # construct space and dummy field - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain_sphere = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain_sphere, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - data_field = ones(surface_space_t) .* FT(0.5) - - # Add 2 variables to `FileInfo`, but only read/write `var1` - varnames = ["var1", "var2"] - outfile_root = "static_data_cgll" - - # write data to dummy HDF5 file, which gets read in by `get_data_at_date` - field = Regridder.write_to_hdf5( - regrid_dir_static, - outfile_root, - Dates.DateTime(0), # dummy date - data_field, - varnames[1], - comms_ctx, - ) - - # create structs manually since we aren't reading from a data file - file_info = FileReader.FileInfo( - "", # infile_path - regrid_dir_static, # regrid_dirpath - varnames, # varnames - outfile_root, # outfile_root - dummy_dates, # all_dates - date_idx0, # date_idx0 - ) - - prescribed_data_static = - FileReader.PrescribedDataStatic{typeof(file_info)}(file_info) - - # Read in dummy data that has been written by `write_to_hdf5` - field_out = FileReader.get_data_at_date( - prescribed_data_static, - surface_space_t, - varnames[1], - ) - @test field_out == data_field - rm(regrid_dir_static; recursive = true, force = true) - end - - @testset "test PrescribedDataTemporal construction, FT = $FT" begin - # setup for test - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - - date_idx0 = Int[1] - date_ref = DateTime(1800, 1, 1) - t_start = Float64(0) - - # Loop over files with one and multiple variables - for (get_infile, varnames) in [ - (albedo_temporal_data, ["sw_alb"]), - (era5_t2m_sp_u10n_data, ["t2m", "sp", "u10n"]), - ] - # reset regrid directory between files - isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) - - ps_data_temp = FileReader.PrescribedDataTemporal{FT}( - regrid_dir_temporal, - get_infile, - varnames, - date_ref, - t_start, - surface_space_t, - ) - - # test that created object exists - @test @isdefined(ps_data_temp) - @test ps_data_temp isa FileReader.AbstractPrescribedData - - # test fields that we've passed into constructor as args - @test ps_data_temp.file_info.regrid_dirpath == regrid_dir_temporal - @test ps_data_temp.file_info.varnames == varnames - @test ps_data_temp.file_info.date_idx0 == date_idx0 - @test ps_data_temp.sim_info.date_ref == date_ref - @test ps_data_temp.sim_info.t_start == t_start - - # test fields which are set internally by constructor - @test ps_data_temp.file_info.outfile_root == "temporal_data_cgll" - @test !isnothing(ps_data_temp.file_info.all_dates) - @test sort(collect(keys(ps_data_temp.file_states))) == - sort(varnames) - - # check varnames individually - for varname in varnames - @test ps_data_temp.file_states[varname].date_idx == date_idx0 - @test ps_data_temp.file_states[varname].date_idx == date_idx0 - @test !isnothing(ps_data_temp.file_states[varname].data_fields) - @test !isnothing( - ps_data_temp.file_states[varname].segment_length, - ) - end - rm(regrid_dir_temporal; recursive = true, force = true) - end - end - - @testset "test get_data_at_date for PrescribedDataTemporal, FT = $FT" begin - # test `get_data_at_date` with interpolation (default) - dummy_dates = - Vector(range(DateTime(1999, 1, 1); step = Day(1), length = 100)) - date_idx0 = Int[1] - date_ref = dummy_dates[Int(date_idx0[1]) + 1] - t_start = Float64(0) - date0 = date_ref + Dates.Second(t_start) - - # these values give an `interp_fraction` of 0.5 in `interpol` for ease of testing - segment_length = - [Int(2) * ((date_ref - dummy_dates[Int(date_idx0[1])]).value)] - - # construct space and dummy field - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain_sphere = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain_sphere, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - data_fields = (zeros(surface_space_t), ones(surface_space_t)) - - # Add 2 variables to `FileInfo`, but only read/write `var1` - varnames = ["var1", "var2"] - - # create structs manually since we aren't reading from a data file - file_info = FileReader.FileInfo( - "", # infile_path - "", # regrid_dirpath - varnames, # varnames - "", # outfile_root - dummy_dates, # all_dates - date_idx0, # date_idx0 - ) - file_state1 = FileReader.FileState( - data_fields, # data_fields - copy(date_idx0), # date_idx - segment_length, # segment_length - ) - file_state2 = FileReader.FileState( - data_fields, # data_fields - copy(date_idx0), # date_idx - segment_length, # segment_length - ) - file_states = - Dict(varnames[1] => file_state1, varnames[2] => file_state2) - sim_info = FileReader.SimInfo( - date_ref, # date_ref - t_start, # t_start - ) - - pd_args = (file_info, file_states, sim_info) - pd_type_args = ( - typeof(file_info), - typeof(varnames[1]), - typeof(file_states[varnames[1]]), - typeof(sim_info), - ) - prescribed_data = - FileReader.PrescribedDataTemporal{pd_type_args...}(pd_args...) - - # Note: we expect this to give a warning "No dates available in file..." - @test FileReader.get_data_at_date( - prescribed_data, - surface_space_t, - varnames[1], - date0, - ) == ones(surface_space_t) .* FT(0.5) - end - - @testset "test next_date_in_file with PrescribedDataTemporal, FT = $FT" begin - dummy_dates = - Vector(range(DateTime(1999, 1, 1); step = Day(1), length = 10)) - date_ref = dummy_dates[1] - t_start = Float64(0) - date0 = date_ref + Dates.Second(t_start) - date_idx0 = - [argmin(abs.(Dates.value(date0) .- Dates.value.(dummy_dates[:])))] - varname = "var" - - # manually create structs to avoid creating space for outer constructor - file_info = FileReader.FileInfo( - "", # infile_path - "", # regrid_dirpath - [varname], # varnames - "", # outfile_root - dummy_dates, # all_dates - date_idx0, # date_idx0 - ) - file_state = FileReader.FileState( - nothing, # data_fields - copy(date_idx0), # date_idx - Int[], # segment_length - ) - file_states = Dict(varname => file_state) - sim_info = nothing - - pd_args = (file_info, file_states, sim_info) - pd_type_args = ( - typeof(file_info), - typeof(varname), - typeof(file_states[varname]), - typeof(sim_info), - ) - prescribed_data = - FileReader.PrescribedDataTemporal{pd_type_args...}(pd_args...) - - # read in next dates, manually compare to `dummy_dates` array - idx = date_idx0[1] - next_date = date0 - for i in 1:(length(dummy_dates) - 1) - current_date = next_date - next_date = FileReader.next_date_in_file(prescribed_data) - - @test next_date == dummy_dates[idx + 1] - - prescribed_data.file_states[varname].date_idx[1] += 1 - idx = date_idx0[1] + i - end - end - - @testset "test read_data_fields! for single variable, FT = $FT" begin - # Create regridding directory - isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) - - get_infile = albedo_temporal_data - infile_path = get_infile() - varname = "sw_alb" - varnames = [varname] - - # Start with first date in data file - date0 = NCDataset(infile_path) do ds - ds["time"][1] - end - date0 = FileReader.to_datetime(date0) - dates = collect(date0:Day(10):(date0 + Day(100))) # includes both endpoints - date_ref = date0 - t_start = Float64(0) - - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - - prescribed_data = FileReader.PrescribedDataTemporal{FT}( - regrid_dir_temporal, - get_infile, - varnames, - date_ref, - t_start, - surface_space_t, - ) - - # Test each case (1-4) - current_fields = - Fields.zeros(FT, surface_space_t), Fields.zeros(FT, surface_space_t) - - # Use this function to reset values between test cases - function reset_ps_data(ps_data) - ps_data.file_states[varname].data_fields[1] .= current_fields[1] - ps_data.file_states[varname].data_fields[2] .= current_fields[2] - ps_data.file_states[varname].segment_length[1] = - ps_data.file_states[varname].date_idx[1] = - ps_data.file_info.date_idx0[1] = Int(1) - end - - # Case 1: test date before first date of file, and date aligned with first date of file - for date in [date0 - Day(1), date0] - reset_ps_data(prescribed_data) - prescribed_data_copy = prescribed_data - FileReader.read_data_fields!(prescribed_data, date, surface_space_t) - - # Test that both data fields store the same data - # Remove NaNs and missings before comparison - (; data_fields) = prescribed_data.file_states[varname] - - foreach(replace_nan_missing!, data_fields) - - @test prescribed_data.file_states[varname].data_fields[1] == - prescribed_data.file_states[varname].data_fields[2] - # date_idx and date_idx0 should be unchanged - @test prescribed_data.file_states[varname].segment_length == Int[0] - @test prescribed_data.file_states[varname].date_idx[1] == - prescribed_data_copy.file_states[varname].date_idx[1] - @test prescribed_data.file_info.date_idx0[1] == - prescribed_data_copy.file_info.date_idx0[1] - end - - # Case 2: test date after dates in file - # This case should print 1 warning "this time period is after input data..." - reset_ps_data(prescribed_data) - date_after = prescribed_data.file_info.all_dates[end] + Dates.Day(1) - prescribed_data_copy = prescribed_data - FileReader.read_data_fields!( - prescribed_data, - date_after, - surface_space_t, - ) - - # Test that both data fields store the same data - @test prescribed_data.file_states[varname].segment_length == Int[0] - - # Remove NaNs and missings before comparison - (; data_fields) = prescribed_data.file_states[varname] - foreach(replace_nan_missing!, data_fields) - - @test prescribed_data.file_states[varname].data_fields[1] == - prescribed_data.file_states[varname].data_fields[2] - - # Case 3: loop over simulation dates (general case) - # This case should print 3 warnings "updating data files: ..." - data_saved = [] - updating_dates = [] - - # Read in data fields over dummy times - # With the current test setup, read_data_fields! should get called 3 times - for date in dates - callback_date = FileReader.next_date_in_file(prescribed_data) - - if (date >= callback_date) - FileReader.read_data_fields!( - prescribed_data, - date, - surface_space_t, - ) - push!( - data_saved, - deepcopy( - prescribed_data.file_states[varname].data_fields[1], - ), - ) - push!(updating_dates, deepcopy(date)) - end - end - - # Replace NaNs and missings for testing - (; data_fields) = prescribed_data.file_states[varname] - foreach(replace_nan_missing!, data_saved) - - # Manually read in data from HDF5 - f = prescribed_data.file_states[varname].data_fields[1] - data_manual = [similar(f), similar(f), similar(f)] - (; regrid_dirpath, outfile_root, varnames, all_dates) = - prescribed_data.file_info - for i in eachindex(data_saved) - data_manual[i] = Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[i + 1], - varnames[1], - surface_space_t, - ) - # Replace NaNs and missings for testing comparison - replace_nan_missing!(data_manual[i]) - - @test parent(data_saved[i]) == parent(data_manual[i]) - end - - # Test that the data_saved array was modified - @test length(data_saved) > 0 - # Check that the final saved date is as expected (midmonth day of last month) - midmonth_end = - DateTime(year(dates[end]), month(dates[end]), 15, hour(dates[end])) - @test updating_dates[end] == midmonth_end - - # Case 4: everything else - reset_ps_data(prescribed_data) - prescribed_data.file_states[varname].date_idx[1] = - prescribed_data.file_info.date_idx0[1] + Int(1) - date = - prescribed_data.file_info.all_dates[prescribed_data.file_states[varname].date_idx[1]] - - Dates.Day(1) - - @test_throws ErrorException FileReader.read_data_fields!( - prescribed_data, - date, - surface_space_t, - ) - # Delete regridding directory and files - rm(regrid_dir_temporal; recursive = true, force = true) - end - - @testset "test read_data_fields! for multiple variables, FT = $FT" begin - # Create regridding directory - isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) - - get_infile = era5_t2m_sp_u10n_data - infile_path = get_infile() - varnames = ["t2m", "sp", "u10n"] - - # Start with first date in data file - date0 = NCDataset(infile_path) do ds - ds["time"][1] - end - date0 = FileReader.to_datetime(date0) - # Gives dates [00:45, 01:30, 02:15, 03:00, 03:45, 04:30, 05:15, 06:00, 06:45, 07:30] - dates = collect(date0:Minute(45):(date0 + Minute(450))) # includes both endpoints - date_ref = date0 - t_start = Float64(0) - - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - - prescribed_data = FileReader.PrescribedDataTemporal{FT}( - regrid_dir_temporal, - get_infile, - varnames, - date_ref, - t_start, - surface_space_t, - ) - - # Test each case (1-4) - current_fields = - Fields.zeros(FT, surface_space_t), Fields.zeros(FT, surface_space_t) - - # Use this function to reset values between test cases - function reset_ps_data(ps_data) - for varname in varnames - ps_data.file_states[varname].data_fields[1] .= current_fields[1] - ps_data.file_states[varname].data_fields[2] .= current_fields[2] - ps_data.file_states[varname].segment_length[1] = - ps_data.file_states[varname].date_idx[1] = - ps_data.file_info.date_idx0[1] = Int(1) - end - end - - # Case 1: test date before first date of file, and date aligned with first date of file - for date in [date0 - Day(1), date0] - reset_ps_data(prescribed_data) - prescribed_data_copy = prescribed_data - FileReader.read_data_fields!(prescribed_data, date, surface_space_t) - - # Test that both data fields store the same data - # Remove NaNs and missings before comparison - for varname in varnames - (; data_fields) = prescribed_data.file_states[varname] - - foreach(replace_nan_missing!, data_fields) - - @test prescribed_data.file_states[varname].data_fields[1] == - prescribed_data.file_states[varname].data_fields[2] - # date_idx and date_idx0 should be unchanged - @test prescribed_data.file_states[varname].segment_length == - Int[0] - @test prescribed_data.file_states[varname].date_idx[1] == - prescribed_data_copy.file_states[varname].date_idx[1] - @test prescribed_data.file_info.date_idx0[1] == - prescribed_data_copy.file_info.date_idx0[1] - end - end - - # Case 2: test date after dates in file - # This case should print 1 warning "this time period is after input data..." - reset_ps_data(prescribed_data) - date_after = prescribed_data.file_info.all_dates[end] + Dates.Day(1) - prescribed_data_copy = prescribed_data - FileReader.read_data_fields!( - prescribed_data, - date_after, - surface_space_t, - ) - - for varname in varnames - # Test that both data fields store the same data - @test prescribed_data.file_states[varname].segment_length == Int[0] - @test prescribed_data.file_states[varname].data_fields[1] == - prescribed_data.file_states[varname].data_fields[2] - end - - # Case 3: loop over simulation dates (general case) - # This case should print 3 warnings "updating data files: ..." - reset_ps_data(prescribed_data) - - # Read in data fields over dummy times - # With the current test setup, read_data_fields! should get called 3 times - data_saved = Dict{String, Vector{Fields.Field}}( - varnames[1] => [], - varnames[2] => [], - varnames[3] => [], - ) - updating_dates = [] - for date in dates - callback_date = FileReader.next_date_in_file(prescribed_data) - - if (date >= callback_date) - FileReader.read_data_fields!( - prescribed_data, - date, - surface_space_t, - ) - for varname in varnames - push!( - data_saved[varname], - deepcopy( - prescribed_data.file_states[varname].data_fields[1], - ), - ) - # Replace NaNs and missings for testing - replace_nan_missing!(data_saved[varname][end]) - end - - push!(updating_dates, callback_date) - end - end - - # Manually read in data from HDF5 - data_manual = Dict{String, Vector{Fields.Field}}( - varnames[1] => [], - varnames[2] => [], - varnames[3] => [], - ) - (; regrid_dirpath, outfile_root, varnames, all_dates) = - prescribed_data.file_info - - for i in eachindex(updating_dates) - for varname in varnames - push!( - data_manual[varname], - Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - updating_dates[i], - varname, - surface_space_t, - ), - ) - # Replace NaNs and missings for testing comparison - replace_nan_missing!(data_manual[varname][end]) - end - end - - # Test read_data_fields and manually read data are the same - for varname in varnames - @test all( - parent(data_saved[varname]) .== parent(data_manual[varname]), - ) - end - - # Test that the data_saved array was modified - @test length(data_saved) > 0 - # Check that the final saved date is as expected (midmonth day of last month) - midmonth_end = DateTime( - year(dates[end]), - month(dates[end]), - day(dates[end]), - hour(dates[end]), - ) - @test updating_dates[end] == midmonth_end - - # Case 4: everything else - reset_ps_data(prescribed_data) - for varname in varnames - prescribed_data.file_states[varname].date_idx[1] = - prescribed_data.file_info.date_idx0[1] + Int(1) - date = - prescribed_data.file_info.all_dates[prescribed_data.file_states[varname].date_idx[1]] - - Dates.Day(1) - - @test_throws ErrorException FileReader.read_data_fields!( - prescribed_data, - date, - surface_space_t, - ) - end - # Delete regridding directory and files - rm(regrid_dir_temporal; recursive = true, force = true) - end -end - -# Delete testing directory and files -rm(regrid_dir_static; recursive = true, force = true) -rm(regrid_dir_temporal; recursive = true, force = true) diff --git a/test/shared_utilities/file_readers.jl b/test/shared_utilities/file_readers.jl new file mode 100644 index 0000000000..06cb717912 --- /dev/null +++ b/test/shared_utilities/file_readers.jl @@ -0,0 +1,73 @@ +#= + Unit tests for ClimaLand FileReaders module +=# + +using Dates +using Test + +import ClimaLand +import ClimaLand: FileReaders +using NCDatasets + +# Include testing artifacts from Bucket +albedo_temporal_path = ClimaLand.Bucket.cesm2_albedo_dataset_path() +albedo_bareground_path = ClimaLand.Bucket.bareground_albedo_dataset_path() +include(joinpath(pkgdir(ClimaLand), "test", "artifacts", "artifacts.jl")) +multiple_variables_path = era5_t2m_sp_u10n_dataset_path() + +@testset "NCFileReader with time" begin + PATH = albedo_temporal_path + NCDataset(PATH) do nc + available_dates = FileReaders.read_available_dates(nc) + @test length(available_dates) == 1980 + @test available_dates[2] == DateTime(1850, 02, 14) + + ncreader = FileReaders.NCFileReader(PATH, "sw_alb") + + @test ncreader.dimensions[1] == nc["lon"][:] + @test ncreader.dimensions[2] == nc["lat"][:] + + @test FileReaders.read(ncreader, DateTime(1850, 02, 14)) == + nc["sw_alb"][:, :, 2] + + # Read it a second time to check that the cache works + @test FileReaders.read(ncreader, DateTime(1850, 02, 14)) == + nc["sw_alb"][:, :, 2] + + close(ncreader) + + @test isempty(FileReaders.OPEN_NCFILES) + end + + PATH = multiple_variables_path + NCDataset(PATH) do nc + ncreader_sp = FileReaders.NCFileReader(PATH, "sp") + ncreader_u = FileReaders.NCFileReader(PATH, "u10n") + + # Test that the underlying dataset is the same + @test ncreader_u.dataset === ncreader_sp.dataset + + # Test that we need to close all the variables to close the file + close(ncreader_sp) + @test !isempty(FileReaders.OPEN_NCFILES) + close(ncreader_u) + @test isempty(FileReaders.OPEN_NCFILES) + end +end + +@testset "NCFileReader without time" begin + PATH = albedo_bareground_path + NCDataset(albedo_bareground_path) do nc + available_dates = FileReaders.read_available_dates(nc) + @test isempty(available_dates) + + ncreader = FileReaders.NCFileReader(PATH, "sw_alb") + + @test ncreader.dimensions[1] == nc["lon"][:] + @test ncreader.dimensions[2] == nc["lat"][:] + + @test FileReaders.read(ncreader) == nc["sw_alb"][:, :] + + close(ncreader) + end +end diff --git a/test/shared_utilities/regridders.jl b/test/shared_utilities/regridders.jl new file mode 100644 index 0000000000..c18b71586c --- /dev/null +++ b/test/shared_utilities/regridders.jl @@ -0,0 +1,53 @@ +using Test + +import ClimaLand +import ClimaLand: Regridders + +@testset "InterpolationsRegridder" begin + + lon, lat = collect(0.0:1:360), collect(-90.0:1:90) + dimensions = (lon, lat) + sized = (361, 181) + data_lat = zeros(sized) + data_lon = zeros(sized) + for i in 1:length(lon) + data_lat[i, :] .= lat + end + for i in 1:length(lat) + data_lon[:, i] .= lon + end + + for FT in (Float32, Float64) + target_space = + ClimaLand.Domains.SphericalShell(; + radius = FT(6731e3), + depth = FT(1.0), + nelements = (4, 4), + npolynomial = 4, + ).space.surface + + reg = Regridders.InterpolationsRegridder(target_space) + + regridded_lat = Regridders.regrid(reg, data_lat, dimensions) + regridded_lon = Regridders.regrid(reg, data_lon, dimensions) + + coordinates = ClimaCore.Fields.coordinate_field(target_space) + + # Compute max err + err_lat = coordinates.lat .- regridded_lat + err_lon = coordinates.long .- regridded_lon + + @test maximum(err_lat) < 1e-5 + @test maximum(err_lon) < 1e-5 + end + + @test_throws ErrorException Regridders.InterpolationsRegridder( + ClimaLand.Domains.HybridBox(; + xlim = (-1.0, 1.0), + ylim = (-1.0, 1.0), + zlim = (-1.0, 1.0), + nelements = (3, 3, 3), + npolynomial = 0, + ).space.surface, + ) +end diff --git a/test/shared_utilities/time_varying_inputs.jl b/test/shared_utilities/time_varying_inputs.jl index 973cc51fd3..f798808e56 100644 --- a/test/shared_utilities/time_varying_inputs.jl +++ b/test/shared_utilities/time_varying_inputs.jl @@ -1,5 +1,8 @@ using Test +using Dates + import ClimaLand +import ClimaLand: DataHandling import ClimaLand: TimeVaryingInputs import ClimaCore: Domains, Geometry, Fields, Meshes, Topologies, Spaces @@ -28,9 +31,16 @@ context = ClimaComms.SingletonCommsContext(device) TimeVaryingInputs.evaluate!(column_field, input, 10.0) @test Array(parent(column_field))[1] == fun(10.0) + + # Check with args and kwargs + fun2 = (x, y; z) -> 2x * y * z + input2 = TimeVaryingInputs.TimeVaryingInput(fun2) + + TimeVaryingInputs.evaluate!(column_field, input2, 10.0, 20.0; z = 30.0) + @test Array(parent(column_field))[1] == fun2(10.0, 20.0; z = 30.0) end -@testset "Temporal TimeVaryingInput 1D" begin +@testset "InteprolatingTimeVaryingInput0D" begin # Check times not sorted xs = [1.0, 0.0] ys = [1.0, 2.0] @@ -102,3 +112,106 @@ end end end end + +@testset "InteprolatingTimeVaryingInput2D" begin + PATH = ClimaLand.Bucket.cesm2_albedo_dataset_path() + for FT in (Float32, Float64) + target_space = + ClimaLand.Domains.SphericalShell(; + radius = FT(6731e3), + depth = FT(1.0), + nelements = (4, 4), + npolynomial = 4, + ).space.surface + data_handler = DataHandling.DataHandler( + PATH, + "sw_alb", + target_space, + reference_date = Dates.DateTime(2000, 1, 1), + t_start = 0.0, + ) + + input_nearest = TimeVaryingInputs.TimeVaryingInput( + data_handler, + method = TimeVaryingInputs.NearestNeighbor(), + ) + + @test 0.0 in input_nearest + @test !(1e23 in input_nearest) + + dest = Fields.zeros(target_space) + + available_times = DataHandling.available_times(data_handler) + + # We are testing NearestNeighbor, so we can just have to check if the fields agree + + # Left nearest point + target_time = available_times[10] + 1 + TimeVaryingInputs.evaluate!(dest, input_nearest, target_time) + + # We use isequal to handle NaNs + @test isequal( + Array(parent(dest)), + Array( + parent( + DataHandling.regridded_snapshot( + data_handler, + available_times[10], + ), + ), + ), + ) + + # Right nearest point + target_time = available_times[9] - 1 + TimeVaryingInputs.evaluate!(dest, input_nearest, target_time) + + @test isequal( + Array(parent(dest)), + Array( + parent( + DataHandling.regridded_snapshot( + data_handler, + available_times[9], + ), + ), + ), + ) + + # On node + target_time = available_times[11] + TimeVaryingInputs.evaluate!(dest, input_nearest, target_time) + + @test isequal( + parent(dest), + parent( + DataHandling.regridded_snapshot( + data_handler, + available_times[11], + ), + ), + ) + + # Now testing LinearInterpolation + input_linear = TimeVaryingInputs.TimeVaryingInput(data_handler) + + left_value = + DataHandling.regridded_snapshot(data_handler, available_times[10]) + right_value = + DataHandling.regridded_snapshot(data_handler, available_times[11]) + + target_time = available_times[10] + 30 + left_time = available_times[10] + right_time = available_times[11] + + TimeVaryingInputs.evaluate!(dest, input_linear, target_time) + + expected = Fields.zeros(target_space) + expected .= + left_value .+ + (target_time - left_time) / (right_time - left_time) .* + (right_value .- left_value) + + @test isapprox(parent(dest), parent(expected), nans = true) + end +end diff --git a/test/standalone/Bucket/albedo_types.jl b/test/standalone/Bucket/albedo_types.jl index 93a54d18a1..cdc3b5580f 100644 --- a/test/standalone/Bucket/albedo_types.jl +++ b/test/standalone/Bucket/albedo_types.jl @@ -7,8 +7,7 @@ import ClimaParams as CP using Dates using NCDatasets -using ClimaLand.Regridder: read_from_hdf5 -using ClimaLand.FileReader: next_date_in_file, to_datetime, get_data_at_date +using ClimaLand.Regridders: read_from_hdf5 using ClimaLand.Bucket: BucketModel, BucketModelParameters, @@ -16,7 +15,7 @@ using ClimaLand.Bucket: PrescribedSurfaceAlbedo, bareground_albedo_dataset_path, cesm2_albedo_dataset_path, - next_albedo + next_albedo! using ClimaLand.Domains: coordinates, Column, SphericalShell using ClimaLand: initialize, @@ -24,7 +23,11 @@ using ClimaLand: make_set_initial_cache, PrescribedAtmosphere, PrescribedRadiativeFluxes, - TimeVaryingInput + DataHandling, + Regridders, + TimeVaryingInput, + SpaceVaryingInput + # Bucket model parameters import ClimaLand @@ -55,11 +58,6 @@ function replace_nan_missing!(field::Fields.Field) end FT = Float32 -# Use two separate regrid dirs to avoid duplicate filenames since files have same varname -regrid_dir_static = joinpath(pkgdir(ClimaLand), "test", "static") -regrid_dir_temporal = joinpath(pkgdir(ClimaLand), "test", "temporal") -isdir(regrid_dir_static) ? nothing : mkpath(regrid_dir_static) -isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) @testset "Test next_albedo for PrescribedBaregroundAlbedo, from a function, FT = $FT" begin # set up each argument for function call @@ -88,7 +86,9 @@ isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) Y_σS / (Y_σS + param_σS_c) * a_α_snow ) - @test next_albedo(albedo, parameters, Y, p, FT(0)) == next_alb_manual + next_albedo!(p.bucket.α_sfc, albedo, parameters, Y, p, FT(0)) + + @test p.bucket.α_sfc == next_alb_manual end # Add tests which use TempestRemap here - @@ -104,8 +104,7 @@ if !Sys.iswindows() # set up each argument for function call α_snow = FT(0.8) - albedo = - PrescribedBaregroundAlbedo{FT}(α_snow, regrid_dir_static, space) + albedo = PrescribedBaregroundAlbedo{FT}(α_snow, space) σS_c = FT(0.2) parameters = (; σS_c = σS_c) @@ -126,7 +125,8 @@ if !Sys.iswindows() Y_σS / (Y_σS + param_σS_c) * a_α_snow ) - @test next_albedo(albedo, parameters, Y, p, FT(0)) == next_alb_manual + next_albedo!(p.bucket.α_sfc, albedo, parameters, Y, p, FT(0)) + @test p.bucket.α_sfc == next_alb_manual end @testset "Test next_albedo for PrescribedSurfaceAlbedo, FT = $FT" begin @@ -136,24 +136,20 @@ if !Sys.iswindows() surface_coords = Fields.coordinate_field(space) infile_path = cesm2_albedo_dataset_path() - date_ref = to_datetime(NCDataset(infile_path, "r") do ds + date_ref_noleap = NCDataset(infile_path, "r") do ds ds["time"][1] - end) + end + date_ref = CFTime.reinterpret(DateTime, date_ref_noleap) t_start = Float64(0) - albedo = PrescribedSurfaceAlbedo{FT}( - regrid_dir_temporal, - date_ref, - t_start, - space, - ) + albedo = PrescribedSurfaceAlbedo{FT}(date_ref, t_start, space) Y = (; bucket = (; W = Fields.zeros(space))) p = (; bucket = (; α_sfc = Fields.zeros(space))) # set up for manual data reading varname = "sw_alb" - file_dates = albedo.albedo_info.file_info.all_dates + file_dates = DataHandling.available_dates(albedo.albedo.data_handler) new_date = date_ref + Second(t_start) t_curr = t_start @@ -161,12 +157,13 @@ if !Sys.iswindows() @assert new_date == file_dates[i] # manually read in data at this date (not testing interpolation) - field = - get_data_at_date(albedo.albedo_info, space, varname, new_date) - albedo_next = next_albedo(albedo, (;), Y, p, t_curr) - replace_nan_missing!(albedo_next) - replace_nan_missing!(field) - @test albedo_next == field + field = DataHandling.regridded_snapshot( + albedo.albedo.data_handler, + new_date, + ) + + next_albedo!(p.bucket.α_sfc, albedo, (;), Y, p, t_curr) + @test p.bucket.α_sfc == field # Update manual date to match next date in file dt = Second(file_dates[i + 1] - file_dates[i]) @@ -179,8 +176,6 @@ if !Sys.iswindows() earth_param_set = LP.LandParameters(FT) varname = "sw_alb" path = bareground_albedo_dataset_path() - regrid_dirpath = joinpath(pkgdir(ClimaLand), "test/albedo_tmpfiles/") - mkpath(regrid_dirpath) α_snow = FT(0.8) σS_c = FT(0.2) W_f = FT(0.15) @@ -203,14 +198,10 @@ if !Sys.iswindows() for bucket_domain in bucket_domains if bucket_domain isa SphericalShell surface_space = bucket_domain.space.surface - albedo = PrescribedBaregroundAlbedo{FT}( - α_snow, - regrid_dirpath, - surface_space, - ) + albedo = PrescribedBaregroundAlbedo{FT}(α_snow, surface_space) # Radiation - ref_time = DateTime(2005) + ref_time = DateTime(2005, 1, 15, 12) SW_d = (t) -> 0.0 LW_d = (t) -> 5.67e-8 * 280.0^4.0 bucket_rad = PrescribedRadiativeFluxes( @@ -256,59 +247,25 @@ if !Sys.iswindows() set_initial_cache!(p, Y, FT(0.0)) # Read in data manually - outfile_root = "static_data_cgll" comms_ctx = ClimaComms.context(surface_space) - field = read_from_hdf5( - regrid_dirpath, - outfile_root, - Dates.DateTime(0), # dummy date - varname, - surface_space, - ) - replace_nan_missing!(field) - data_manual = field - - @test p.bucket.α_sfc == data_manual + α_bareground = SpaceVaryingInput(path, varname, surface_space) + @test p.bucket.α_sfc == α_bareground else surface_space = bucket_domain.space.surface @test_throws "Using an albedo map requires a global run." PrescribedBaregroundAlbedo{ FT, }( α_snow, - regrid_dirpath, surface_space, ) end end - rm(regrid_dirpath, recursive = true) - end - - @testset "Test PrescribedSurfaceAlbedo error with static map, FT = $FT" begin - get_infile = bareground_albedo_dataset_path - date_ref = Dates.DateTime(1900, 1, 1) - t_start = Float64(0) - - # Test error for non-time varying data - domain = create_domain_2d(FT) - space = domain.space.surface - - @test_throws "Using a temporal albedo map requires data with time dimension." PrescribedSurfaceAlbedo{ - FT, - }( - regrid_dir_temporal, - date_ref, - t_start, - space, - get_infile = get_infile, - ) end @testset "Test PrescribedSurfaceAlbedo - albedo from map over time, FT = $FT" begin earth_param_set = LP.LandParameters(FT) varname = "sw_alb" infile_path = cesm2_albedo_dataset_path() - regrid_dirpath = joinpath(pkgdir(ClimaLand), "test/albedo_tmpfiles/") - mkpath(regrid_dirpath) σS_c = FT(0.2) W_f = FT(0.15) @@ -319,9 +276,10 @@ if !Sys.iswindows() init_temp(z::FT, value::FT) where {FT} = FT(value) t_start = Float64(0) - file_dates = to_datetime(NCDataset(infile_path, "r") do ds + file_dates_noleap = NCDataset(infile_path, "r") do ds ds["time"][:] - end) + end + file_dates = CFTime.reinterpret.(Ref(DateTime), file_dates_noleap) date_ref = file_dates[1] bucket_domains = [ @@ -337,14 +295,10 @@ if !Sys.iswindows() for bucket_domain in bucket_domains space = bucket_domain.space.surface if bucket_domain isa SphericalShell - albedo_model = PrescribedSurfaceAlbedo{FT}( - regrid_dirpath, - date_ref, - t_start, - space, - ) + albedo_model = + PrescribedSurfaceAlbedo{FT}(date_ref, t_start, space) # Radiation - ref_time = DateTime(2005) + ref_time = DateTime(2005, 1, 15, 12) SW_d = (t) -> 0 LW_d = (t) -> 5.67e-8 * 280.0^4.0 bucket_rad = PrescribedRadiativeFluxes( @@ -360,7 +314,7 @@ if !Sys.iswindows() q_atmos = (t) -> 0.0 # no atmos water h_atmos = FT(1e-8) P_atmos = (t) -> 101325 - ref_time = DateTime(2005) + ref_time = DateTime(2005, 1, 15, 12) bucket_atmos = PrescribedAtmosphere( TimeVaryingInput(precip), TimeVaryingInput(precip), @@ -394,15 +348,11 @@ if !Sys.iswindows() Y.bucket.σS .= 0.0 set_initial_cache! = make_set_initial_cache(model) set_initial_cache!(p, Y, FT(0.0)) - data_manual = get_data_at_date( - albedo_model.albedo_info, - model.domain.space.surface, - varname, + data_manual = DataHandling.regridded_snapshot( + albedo_model.albedo.data_handler, date_ref, ) - # If there are any NaNs in the input data, replace them so we can compare results - replace_nan_missing!(p.bucket.α_sfc) - replace_nan_missing!(data_manual) + @test p.bucket.α_sfc == data_manual update_aux! = make_update_aux(model) @@ -412,14 +362,11 @@ if !Sys.iswindows() @assert new_date == file_dates[i] update_aux!(p, Y, t_curr) - data_manual = get_data_at_date( - albedo_model.albedo_info, - model.domain.space.surface, - varname, - file_dates[i], + data_manual = DataHandling.regridded_snapshot( + albedo_model.albedo.data_handler, + new_date, ) - replace_nan_missing!(p.bucket.α_sfc) - replace_nan_missing!(data_manual) + @test p.bucket.α_sfc == data_manual # Update manual date to match next date in file @@ -431,17 +378,11 @@ if !Sys.iswindows() @test_throws "Using an albedo map requires a global run." PrescribedSurfaceAlbedo{ FT, }( - regrid_dirpath, date_ref, t_start, space, ) end end - rm(regrid_dirpath, recursive = true) end end - -# Delete testing directory and files -rm(regrid_dir_static; recursive = true, force = true) -rm(regrid_dir_temporal; recursive = true, force = true)