diff --git a/docs/Manifest.toml b/docs/Manifest.toml index 54473f1025..ca8e9cde9c 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.10.1" +julia_version = "1.10.0" manifest_format = "2.0" project_hash = "5f713b750235cf07465e21e92a6bd2f152ad3358" @@ -273,9 +273,9 @@ version = "0.5.7" [[deps.ClimaCore]] deps = ["Adapt", "BandedMatrices", "BlockArrays", "CUDA", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"] -git-tree-sha1 = "bc6a0154e3bcc1657d3a75f697e216fb70121969" +git-tree-sha1 = "844afbd6a7c4f112f974e0031ebf4fb13d0b4157" uuid = "d414da3d-4745-48bb-8d80-42e94e092884" -version = "0.13.2" +version = "0.13.1" weakdeps = ["Krylov"] [deps.ClimaCore.extensions] @@ -299,9 +299,9 @@ weakdeps = ["ClimaParams"] [[deps.ClimaParams]] deps = ["DocStringExtensions", "TOML", "Test"] -git-tree-sha1 = "ec67949db856e01df4cbf7d6ddafefeda02f93ee" +git-tree-sha1 = "323dd6c5423caf31f0da81bb9c288683cbdafb01" uuid = "5c42b081-d73a-476f-9059-fd94b934656c" -version = "0.10.3" +version = "0.10.2" [[deps.ClimaTimeSteppers]] deps = ["ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"] @@ -379,7 +379,7 @@ weakdeps = ["Dates", "LinearAlgebra"] [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.1.0+0" +version = "1.0.5+1" [[deps.CompositionsBase]] git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" @@ -394,9 +394,9 @@ version = "0.1.2" [[deps.ConcurrentUtilities]] deps = ["Serialization", "Sockets"] -git-tree-sha1 = "6cbbd4d241d7e6579ab354737f4dd95ca43946e1" +git-tree-sha1 = "9c4708e3ed2b799e6124b5673a712dda0b596a9b" uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.4.1" +version = "2.3.1" [[deps.ConstructionBase]] deps = ["LinearAlgebra"] @@ -1495,7 +1495,7 @@ version = "0.3.24+0" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.23+4" +version = "0.3.23+2" [[deps.OpenLibm_jll]] deps = ["Artifacts", "Libdl"] diff --git a/docs/src/APIs/Regridder.md b/docs/src/APIs/Regridder.md index 861d9155a9..2da979ece0 100644 --- a/docs/src/APIs/Regridder.md +++ b/docs/src/APIs/Regridder.md @@ -1,12 +1,12 @@ # Bucket ```@meta -CurrentModule = ClimaLand.Regridder +CurrentModule = ClimaLand.Regridders ``` ## Functions ```@docs -ClimaLand.Regridder.hdwrite_regridfile_rll_to_cgll -ClimaLand.Regridder.swap_space +ClimaLand.Regridders.hdwrite_regridfile_rll_to_cgll +ClimaLand.Regridders.swap_space ``` diff --git a/experiments/Manifest.toml b/experiments/Manifest.toml index 5516c46643..e1f64ad70d 100644 --- a/experiments/Manifest.toml +++ b/experiments/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.10.1" +julia_version = "1.10.0" manifest_format = "2.0" project_hash = "960bdc0a1ed4b3f2aee66ed2e0c02065769614a9" @@ -330,9 +330,9 @@ version = "0.5.7" [[deps.ClimaCore]] deps = ["Adapt", "BandedMatrices", "BlockArrays", "CUDA", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"] -git-tree-sha1 = "bc6a0154e3bcc1657d3a75f697e216fb70121969" +git-tree-sha1 = "844afbd6a7c4f112f974e0031ebf4fb13d0b4157" uuid = "d414da3d-4745-48bb-8d80-42e94e092884" -version = "0.13.2" +version = "0.13.1" weakdeps = ["Krylov"] [deps.ClimaCore.extensions] @@ -356,9 +356,9 @@ weakdeps = ["ClimaParams"] [[deps.ClimaParams]] deps = ["DocStringExtensions", "TOML", "Test"] -git-tree-sha1 = "ec67949db856e01df4cbf7d6ddafefeda02f93ee" +git-tree-sha1 = "323dd6c5423caf31f0da81bb9c288683cbdafb01" uuid = "5c42b081-d73a-476f-9059-fd94b934656c" -version = "0.10.3" +version = "0.10.2" [[deps.ClimaTimeSteppers]] deps = ["ClimaComms", "Colors", "DataStructures", "DiffEqBase", "DiffEqCallbacks", "KernelAbstractions", "Krylov", "LinearAlgebra", "LinearOperators", "NVTX", "SciMLBase", "StaticArrays"] @@ -453,7 +453,7 @@ weakdeps = ["Dates", "LinearAlgebra"] [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.1.0+0" +version = "1.0.5+1" [[deps.CompositionsBase]] git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" @@ -468,9 +468,9 @@ version = "0.1.2" [[deps.ConcurrentUtilities]] deps = ["Serialization", "Sockets"] -git-tree-sha1 = "6cbbd4d241d7e6579ab354737f4dd95ca43946e1" +git-tree-sha1 = "9c4708e3ed2b799e6124b5673a712dda0b596a9b" uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.4.1" +version = "2.3.1" [[deps.ConstructionBase]] deps = ["LinearAlgebra"] @@ -1793,7 +1793,7 @@ version = "0.3.24+0" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.23+4" +version = "0.3.23+2" [[deps.OpenEXR]] deps = ["Colors", "FileIO", "OpenEXR_jll"] diff --git a/experiments/standalone/Bucket/global_bucket_staticmap.jl b/experiments/standalone/Bucket/global_bucket_staticmap.jl index 39e9ff15f6..132747e9bd 100644 --- a/experiments/standalone/Bucket/global_bucket_staticmap.jl +++ b/experiments/standalone/Bucket/global_bucket_staticmap.jl @@ -87,18 +87,9 @@ tf = 7 * 86400; Δt = 3600.0; # Construct albedo parameter object using static map -# Use separate regridding directory for CPU and GPU runs to avoid race condition -device_suffix = - typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? - "cpu" : "gpu" -regrid_dir = joinpath( - pkgdir(ClimaLand), - "experiments/standalone/Bucket/$device_suffix/regrid-static/", -) -!ispath(regrid_dir) && mkpath(regrid_dir) surface_space = bucket_domain.space.surface α_snow = FT(0.8) -albedo = PrescribedBaregroundAlbedo{FT}(α_snow, regrid_dir, surface_space); +albedo = PrescribedBaregroundAlbedo{FT}(α_snow, surface_space); bucket_parameters = BucketModelParameters(FT; albedo, z_0m, z_0b, τc); @@ -213,6 +204,9 @@ F_sfc = [ ]; # save prognostic state to CSV - for comparison between GPU and CPU output +device_suffix = + typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? + "cpu" : "gpu" open(joinpath(outdir, "tf_state_$device_suffix.txt"), "w") do io writedlm(io, hcat(T_sfc[end][:], W[end][:], Ws[end][:], σS[end][:]), ',') end; @@ -261,6 +255,3 @@ for (i, (field_ts, field_name)) in enumerate( end outfile = joinpath(outdir, string("ts_$device_suffix.png")) CairoMakie.save(outfile, fig_ts) - -# delete regrid directory -rm(regrid_dir, recursive = true) diff --git a/experiments/standalone/Bucket/global_bucket_temporalmap.jl b/experiments/standalone/Bucket/global_bucket_temporalmap.jl index 895f89f415..5ba990080b 100644 --- a/experiments/standalone/Bucket/global_bucket_temporalmap.jl +++ b/experiments/standalone/Bucket/global_bucket_temporalmap.jl @@ -70,7 +70,7 @@ bucket_domain = ClimaLand.Domains.SphericalShell(; npolynomial = 1, dz_tuple = FT.((1.0, 0.05)), ); -ref_time = DateTime(2005); +ref_time = DateTime(2005, 01, 15, 12); # Initialize parameters σS_c = FT(0.2); @@ -86,17 +86,8 @@ tf = 50 * 86400; Δt = 3600.0; # Construct albedo parameter object using temporal map -# Use separate regridding directory for CPU and GPU runs to avoid race condition -device_suffix = - typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? - "cpu" : "gpu" -regrid_dir = joinpath( - pkgdir(ClimaLand), - "experiments/standalone/Bucket/$device_suffix/regrid-temporal/", -) -!ispath(regrid_dir) && mkpath(regrid_dir) surface_space = bucket_domain.space.surface -albedo = PrescribedSurfaceAlbedo{FT}(regrid_dir, ref_time, t0, surface_space); +albedo = PrescribedSurfaceAlbedo{FT}(ref_time, t0, surface_space); bucket_parameters = BucketModelParameters(FT; albedo, z_0m, z_0b, τc); @@ -211,6 +202,9 @@ F_sfc = [ ]; # save prognostic state to CSV - for comparison between GPU and CPU output +device_suffix = + typeof(ClimaComms.context().device) <: ClimaComms.CPUSingleThreaded ? + "cpu" : "gpu" open(joinpath(outdir, "tf_state_$device_suffix.txt"), "w") do io writedlm(io, hcat(T_sfc[end][:], W[end][:], Ws[end][:], σS[end][:]), ',') end; @@ -259,6 +253,3 @@ for (i, (field_ts, field_name)) in enumerate( end outfile = joinpath(outdir, string("ts_$device_suffix.png")) CairoMakie.save(outfile, fig_ts) - -# delete regrid directory -rm(regrid_dir, recursive = true) diff --git a/src/ClimaLand.jl b/src/ClimaLand.jl index f1101846dd..5db6ab8cb8 100644 --- a/src/ClimaLand.jl +++ b/src/ClimaLand.jl @@ -8,12 +8,13 @@ include("shared_utilities/Parameters.jl") import .Parameters as LP include("shared_utilities/general_utils.jl") +include("shared_utilities/Domains.jl") +include("shared_utilities/FileReaders.jl") +include("shared_utilities/Regridders.jl") +include("shared_utilities/DataHandling.jl") include("shared_utilities/TimeVaryingInputs.jl") using .TimeVaryingInputs export TimeVaryingInput, evaluate! -include("shared_utilities/Regridder.jl") -include("shared_utilities/Domains.jl") -include("shared_utilities/FileReader.jl") include("shared_utilities/SpaceVaryingInputs.jl") using .SpaceVaryingInputs export SpaceVaryingInput diff --git a/src/shared_utilities/DataHandling.jl b/src/shared_utilities/DataHandling.jl new file mode 100644 index 0000000000..9a375da61c --- /dev/null +++ b/src/shared_utilities/DataHandling.jl @@ -0,0 +1,317 @@ +""" + DataHandling + +The `DataHandling` module is responsible for read data from files and resample the data onto +the simulation grid. + +This is no trivial task. Among the challenges: +- data can be large and cannot be read all in one go and/or held in memory +- regridding onto the simulation grid can be very expensive +- IO can be very expensive +- CPU/GPU communication can be a bottleneck + +The `DataHandling` takes the divide and conquer approach: the various core tasks and +features and split into other independent modules (chiefly `FileReaders`, and `Regridders`). +Such modules can be developed, tested, and extended independently (as long as they maintain +a consistent interface). For instance, if need arises, the `DataHandler` can be used +(almost) directly to process files with a different format from NetCDF. + +The key struct in `DataHandling` is the `DataHandler`. The `DataHandler` contains a +`FileReaders`, a `Regridders`, and other metadata necessary to perform its operations (e.g., +target `ClimaCore.Space`). The `DataHandler` can be used for static or temporal data, and +exposes the following key functions: +- `regridded_snapshot(time)`: to obtain the regridded field at the given `time`. `time` has to be + available in the data. +- `available_times` (`available_dates`): to list all the `times` (`dates`) over which the + data is defined. +- `previous_time(time/date)` (`next_time(time/date)`): to obtain the time of the snapshot + before the given `time` or `date`. This can be used to compute the + interpolation weight for linear interpolation, or in combination + with `regridded_snapshot` to read a particular snapshot +Most `DataHandling` functions take either `time` or `date`, with the difference being that +`time` is intended as "simulation time" and is expected to be in seconds; `date` is a +calendar date (from `Dates.DateTime`). Conversion between time and date is performed using +the reference date and simulation starting time provided to the `DataHandler`. + +The `DataHandler` has a caching mechanism in place: once a field is read and regridded, it +is stored in the local cache to be used again (if needed). + +""" +module DataHandling + +using Dates + +import ClimaCore + +import ..FileReaders: AbstractFileReader, NCFileReader +import ..Regridders +import ..Regridders: + AbstractRegridder, TempestRegridder, regrid, AVAILABLE_REGRIDDERS + +""" + DataHandler{ + FR <: AbstractFileReader, + REG <: AbstractRegridder, + SPACE <: ClimaCore.Spaces.AbstractSpace, + REF_DATE <: Dates.DateTime, + TSTART <: AbstractFloat, + DATES <: AbstractArray{<:Dates.DateTime}, + DIMS, + TIMES <: AbstractArray{<:AbstractFloat}, + CACHE, + } + +Currently, the `DataHandler` works with one variable at the time. This might not be the most +efficiently way to tackle the problem: we should be able to reuse the interpolation weights if +multiple variables are defined on the same grid and have to be remapped to the same grid. + +Assumptions: +- There is only one file with the entire time development of the given variable +- The file has well-defined physical dimensions (e.g., lat/lon) +- Currently, the time dimension has to be either "time" or "date", the spatial + dimensions have to be lat and lon (restriction from TempestRegridder) + +DataHandler is meant to live on the CPU, but the Fields can be on the GPU as well. +""" +struct DataHandler{ + FR <: AbstractFileReader, + REG <: AbstractRegridder, + SPACE <: ClimaCore.Spaces.AbstractSpace, + REF_DATE <: Dates.DateTime, + TSTART <: AbstractFloat, + DATES <: AbstractArray{<:Dates.DateTime}, + DIMS, + TIMES <: AbstractArray{<:AbstractFloat}, + CACHE, +} + """Object responsible for getting the data from disk to memory""" + file_reader::FR + + """Object responsible for resampling a rectangular grid to the simulation grid""" + regridder::REG + + """ClimaCore Space over which the data has to be resampled""" + target_space::SPACE + + """Tuple of linear arrays where the data is defined (typically long/lat)""" + dimensions::DIMS + + """Calendar dates over which the data is defined""" + available_dates::DATES + + """Simulation time at the beginning of the simulation in seconds (typically 0, but + could be different, e.g., for restarted simulations)""" + t_start::TSTART + + """Reference calendar date at the beginning of the simulation.""" + reference_date::REF_DATE + + """Timesteps over which the data is defined (in seconds)""" + available_times::TIMES + + """Private field where cached data is stored""" + _cached_regridded_fields::CACHE +end + +""" + DataHandler(file_path::AbstractString, + varname::AbstractString, + target_space::ClimaCore.Spaces.AbstractSpace; + reference_date::Dates.DateTime = Dates.DateTime(1979, 1, 1), + t_start::AbstractFloat = 0.0, + regridder_type = :TempestRegridder) + +Create a `DataHandler` to read `varname` from `file_path` and remap it to `target_space`. + +The DataHandler maintains a cache of Fields that were previously computed. + +TODO: Add function to clear cache, and/or CACHE_MAX_SIZE (this will probably require +developing a LRU cache scheme) + +Positional arguments +===================== + +- `file_path`: Path of the NetCDF file that contains the data. +- `varname`: Name of the dataset in the NetCDF that has to be read and processed. +- `target_space`: Space where the simulation is run, where the data has to be regridded to. + +Keyword arguments +=================== + +Time/date information will be ignored for static input files. (They are still set to make +everything more type stable.) + +- `reference_date`: Calendar date corresponding to the start of the simulation. +- `t_start`: Simulation time at the beginning of the simulation. Typically this is 0 + (seconds), but if might be different if the simulation was restarted. +- `regridder_type`: What type of regridding to perform. Currently, the only one implemented + is `:tempest` to use `TempestRemap`. `TempestRemap` regrids everything + ahead of time and saves the result to HDF5 files. +""" +function DataHandler( + file_path::AbstractString, + varname::AbstractString, + target_space::ClimaCore.Spaces.AbstractSpace; + reference_date::Dates.DateTime = Dates.DateTime(1979, 1, 1), + t_start::AbstractFloat = 0.0, + regridder_type = :TempestRegridder, +) + + # File reader, deals with ingesting data, possibly buffered/cached + file_reader = NCFileReader(file_path, varname) + + # Regridder, deals with converting and interpolating the input data onto the simulation + # grid + regridder_type in AVAILABLE_REGRIDDERS || + error("Regridder $regridder_type not implemented") + + regridder_args = Dict( + :TempestRegridder => + (target_space, mktempdir(), varname, file_path), + ) + RegridderConstructor = getfield(Regridders, regridder_type) + regridder = RegridderConstructor(regridder_args[regridder_type]...) + + # NOTE: this is not concretely typed + _cached_regridded_fields = Dict{Dates.DateTime, ClimaCore.Fields.Field}() + + available_dates = file_reader.available_dates + times_s = Second.(available_dates .- reference_date) ./ Second(1) + available_times = times_s .- t_start + + return DataHandler( + file_reader, + regridder, + target_space, + file_reader.dimensions, + available_dates, + t_start, + reference_date, + available_times, + _cached_regridded_fields, + ) +end + +""" + close(data_handler::DataHandler) + +Close any file associated to the given `data_handler`. +""" +function Base.close(data_handler::DataHandler) + close(data_handler.file_reader) + return nothing +end + +""" + available_times(data_handler::DataHandler) + +Return the time in seconds of the snapshots in the data, measured considering +the starting time of the simulation and the reference date +""" +function available_times(data_handler::DataHandler) + return data_handler.available_times +end + +""" + available_dates(data_handler::DataHandler) + +Return the dates of the snapshots in the data. +""" +function available_dates(data_handler::DataHandler) + return data_handler.available_dates +end + +""" + previous_time(data_handler::DataHandler, time::AbstractFloat) + previous_time(data_handler::DataHandler, date::Dates.DateTime) + +Return the time in seconds of the snapshot before the given `time`. +If `time` is one of the snapshots, return itself. +""" +function previous_time(data_handler::DataHandler, time::AbstractFloat) + time in data_handler.available_times && return time + index = searchsortedfirst(data_handler.available_times, time) - 1 + return data_handler.available_times[index] +end + +function previous_time(data_handler::DataHandler, date::Dates.DateTime) + if date in data_handler.available_dates + index = searchsortedfirst(data_handler.available_dates, date) + else + index = searchsortedfirst(data_handler.available_dates, date) - 1 + end + return data_handler.available_times[index] +end + +""" + next_time(data_handler::DataHandler, time::AbstractFloat) + next_time(data_handler::DataHandler, date::Dates.DateTime) + +Return the time in seconds of the snapshot after the given `time`. +If `time` is one of the snapshots, return the next time. +""" +function next_time(data_handler::DataHandler, time::AbstractFloat) + index = searchsortedfirst(data_handler.available_times, time) + time in data_handler.available_times && (index += 1) + return data_handler.available_times[index] +end + +function next_time(data_handler::DataHandler, date::Dates.DateTime) + if date in data_handler.available_dates + index = searchsortedfirst(data_handler.available_dates, date) + 1 + else + index = searchsortedfirst(data_handler.available_dates, date) + end + return data_handler.available_times[index] +end + +""" + regridded_snapshot(data_handler::DataHandler, date::Dates.DateTime) + regridded_snapshot(data_handler::DataHandler, time::AbstractFloat) + regridded_snapshot(data_handler::DataHandler) + +Return the regridded snapshot from `data_handler` associated to the given `time` (if relevant). + +The `time` has to be available in the `data_handler`. + +`regridded_snapshot` potentially modifies the internal state of `data_handler` and it might be a very +expensive operation. + +TODO: Add `regridded_snapshot!` +""" +function regridded_snapshot(data_handler::DataHandler, date::Dates.DateTime) + + # Dates.DateTime(0) is the cache key for static maps + if date != Dates.DateTime(0) + date in data_handler.available_dates || error("date not available") + end + + regridder_type = nameof(typeof(data_handler.regridder)) + + regridded_snapshot = get!(data_handler._cached_regridded_fields, date) do + # Add here the arguments when we have more regridders + regrid_args = Dict(:TempestRegridder => (date,)) + regrid(data_handler.regridder, regrid_args[regridder_type]...) + end + + return regridded_snapshot +end + +function regridded_snapshot(data_handler::DataHandler, time::AbstractFloat) + date = + data_handler.reference_date + + Second(round(data_handler.t_start)) + + Second(round(time)) + return regridded_snapshot(data_handler, date) +end + +function regridded_snapshot(data_handler::DataHandler) + # This function can be called only when there are no dates (ie, the dataset is static) + isempty(data_handler.available_dates) || + error("DataHandler is function of time") + + # In this case, we use as cache key Dates.DateTime(0) + return regridded_snapshot(data_handler, Dates.DateTime(0)) +end + +end diff --git a/src/shared_utilities/FileReader.jl b/src/shared_utilities/FileReader.jl deleted file mode 100644 index e9d8e9b4b6..0000000000 --- a/src/shared_utilities/FileReader.jl +++ /dev/null @@ -1,549 +0,0 @@ -""" - FileReader - -This module coordinates reading, regridding, and interpolating -data from NetCDF files that is required for global land model -simulations, including globally varying parameters which may -or may not change in time. It also includes regridding and -temporal interpolations of this data. - -This is based on ClimaCoupler.jl's BCReader and TimeManager modules. -""" -module FileReader - -using ClimaComms -using ClimaCore: Fields, Spaces -using Dates -using CFTime -using NCDatasets - -using ClimaLand.Regridder - -export AbstractPrescribedData, - PrescribedDataTemporal, - PrescribedDataStatic, - FileInfo, - FileState, - SimInfo, - read_data_fields!, - next_date_in_file, - get_data_at_date, - to_datetime - - -""" - abstract type AbstractPrescribedData - -An abstract type for storing prescribed data info. Subtypes -include temporally-varying prescribed data and static prescribed data. -""" -abstract type AbstractPrescribedData end - -""" - PrescribedDataStatic <: AbstractPrescribedData - -Stores information to read in a prescribed variable from a file. -The data is read in once and stored without changing for the duration of a -simulation. This type is meant to be used with input data that does not have -a time dimension. -Each of the fields of this struct is itself a struct. - -# Inputs: -- file_info::FI # unchanging info about the input data file -""" -struct PrescribedDataStatic{FI} <: AbstractPrescribedData - file_info::FI -end - -""" - PrescribedDataTemporal <: AbstractPrescribedData - -Stores information to read in prescribed data from a file. -Contains sufficient information to read in the variables at various -timesteps, and to coordinate this reading between data coming from -different files. This type is meant to be used with input data that has -a time dimension. -The `file_states` field is a dictionary mapping variable names to `FileState` -structs, which contain information about the current data for that variable. - -# Inputs: -- file_info::FI # unchanging info about the input data file -- file_states::Dict{S, FS} # info about the data currently being read from file for each variable -- sim_info::SI # unchanging info about the start date/time of the simulation -""" -struct PrescribedDataTemporal{FI, S, FS, SI} <: AbstractPrescribedData - file_info::FI - file_states::Dict{S, FS} - sim_info::SI -end - -""" - FileInfo - -Stores information about the current data being read in from a file. - -# Inputs: -- infile_path::String # path to the input NetCDF data file -- regrid_dirpath::String # directory for storing files used in regridding -- varnames::Vector{String} # names of the variables we're reading from the input file -- outfile_root::String # root for regridded data files generated when writing data at each time from input file -- all_dates::Vector # vector containing all dates of the input file, which we assume are `DateTime`s or `DateTimeNoLeap`s -- date_idx0::Vector{Int} # index of the first data in the file being used for this simulation -""" -struct FileInfo - infile_path::String - regrid_dirpath::String - varnames::Vector{String} - outfile_root::String - all_dates::Vector - date_idx0::Vector{Int} -end - -""" - FileState - -Stores information about the current data being read in from a file for one variable. - -# Inputs: -- data_fields::F # tuple of two fields at consecutive dates, that will be used for interpolation -- date_idx::Vector{Int} # index in the input file of the first data field currently being used -- segment_length::Vector{Int} # length of the time interval between the two data field entries; used in temporal interpolation -""" -struct FileState{F} - data_fields::F - date_idx::Vector{Int} - segment_length::Vector{Int} -end - -""" - SimInfo - -Stores information about the simulation being run. We may want to store -multiple copies of an instance of this struct in multiple PrescribedDataTemporal -objects if we're reading in data over time for multiple variables. - -# Inputs: -- date_ref::D # a reference date before or at the start of the simulation -- t_start # time in seconds since `date_ref` -""" -struct SimInfo{D} - date_ref::D - t_start::Any -end - -""" - PrescribedDataStatic{FT}( - get_infile::Function, - regrid_dirpath::String, - varnames::Vector{String}, - surface_space::Spaces.AbstractSpace, - mono::Bool = true, - ) where {FT <: AbstractFloat} - -Constructor for the `PrescribedDataStatic`` type. -Regrids from the input lat-lon grid to the simulation cgll grid, saving -the regridded output in new files found at `regrid_dirpath`. The `mono` flag -here is used to determine whether or not the remapping is monotone. - -Creates a `FileInfo` object containing all the information needed to read in -the data stored in the input file, which will later be regridded to our -simulation grid. Date-related args (last 3 passed to FileInfo) are unused for -static data maps. -""" -function PrescribedDataStatic{FT}( - get_infile::Function, - regrid_dirpath::String, - varnames::Vector{String}, - surface_space::Spaces.AbstractSpace; - mono::Bool = true, -) where {FT <: AbstractFloat} - comms_ctx = ClimaComms.context(surface_space) - outfile_root = "static_data_cgll" - - # Download `infile_path` artifact on root process first to avoid race condition - if ClimaComms.iamroot(comms_ctx) - infile_path = get_infile() - Regridder.hdwrite_regridfile_rll_to_cgll( - FT, - regrid_dirpath, - infile_path, - varnames, - surface_space, - outfile_root; - mono = mono, - ) - end - ClimaComms.barrier(comms_ctx) - infile_path = get_infile() - - file_info = - FileInfo(infile_path, regrid_dirpath, varnames, outfile_root, [], []) - return PrescribedDataStatic{typeof(file_info)}(file_info) -end - - -""" - PrescribedDataTemporal{FT}( - regrid_dirpath, - get_infile, - varnames, - date_ref, - t_start, - surface_space; - mono = true, - ) where {FT <: AbstractFloat} - -Constructor for the `PrescribedDataTemporal` type. -Regrids from the input lat-lon grid to the simulation cgll grid, saving -the regridded output in a new file found at `regrid_dirpath`, and -returns the info required to run the simulation using this prescribed -data packaged into a single `PrescribedDataTemporal` struct. - -# Arguments -- `regrid_dirpath` # directory the data file is stored in. -- `get_infile` # function returning path to NCDataset file containing data to regrid. -- `varnames` # names of the variables to be regridded. -- `date_ref` # reference date to coordinate start of the simulation -- `t_start` # start time of the simulation relative to `date_ref` (date_start = date_ref + t_start) -- `surface_space` # the space to which we are mapping. -- `mono` # flag for monotone remapping of `infile_path`. - -# Returns -- `PrescribedDataTemporal` object -""" -function PrescribedDataTemporal{FT}( - regrid_dirpath::String, - get_infile::Function, - varnames::Vector{String}, - date_ref::Union{DateTime, DateTimeNoLeap}, - t_start, - surface_space::Spaces.AbstractSpace; - mono::Bool = true, -) where {FT <: AbstractFloat} - comms_ctx = ClimaComms.context(surface_space) - outfile_root = "temporal_data_cgll" - - # Initialize dummy date to be overwritten by actual dates during file read - all_dates = [DateTime(0)] - - # Regrid data at all times from lat/lon (RLL) to simulation grid (CGLL) - # Download `infile_path` artifact on root process first to avoid race condition - if ClimaComms.iamroot(comms_ctx) - infile_path = get_infile() - all_dates = Regridder.hdwrite_regridfile_rll_to_cgll( - FT, - regrid_dirpath, - infile_path, - varnames, - surface_space, - outfile_root; - mono = mono, - ) - - NCDataset(infile_path, "r") do ds - if !("time" in keys(ds)) - error( - "Using a temporal albedo map requires data with time dimension.", - ) - end - end - end - all_dates = ClimaComms.bcast(comms_ctx, all_dates) - ClimaComms.barrier(comms_ctx) - infile_path = get_infile() - - # Init time tracking info - data_fields = - Fields.zeros(FT, surface_space), Fields.zeros(FT, surface_space) - # Store `segment_length` as an array so we can modify it as a field of a struct - segment_length = Int[0] - - date_start = date_ref + Dates.Second(t_start) - if date_start < all_dates[1] - @warn "Simulation start date is before first file data" - end - - # Find the index of the start file closest to this simulation's start date - # Like `segment_length`, store in an array so we can modify in struct - date_idx0 = - [argmin(abs.(Dates.value(date_start) .- Dates.value.(all_dates[:])))] - - # Construct component structs of PrescribedDataTemporal object - file_info = FileInfo( - infile_path, - regrid_dirpath, - varnames, - outfile_root, - all_dates, - date_idx0, - ) - file_states = Dict{String, FileState{typeof(data_fields)}}() - for varname in varnames - file_states[varname] = - FileState(deepcopy(data_fields), copy(date_idx0), segment_length) - end - sim_info = SimInfo(date_ref, t_start) - - args = (file_info, file_states, sim_info) - - # Get types of `file_info`, the first `file_states` Dict pair, and `sim_info` - type_args = ( - typeof(file_info), - typeof(first(file_states)[1]), - typeof(first(file_states)[2]), - typeof(sim_info), - ) - return PrescribedDataTemporal{type_args...}(args...) -end - -""" - read_data_fields!( - prescribed_data::PrescribedDataTemporal, - date::DateTime, - space::Spaces.AbstractSpace - ) - -Extracts data from regridded (to model grid) NetCDF files. -The times for which data is extracted depends on the specifications in the -`prescribed_data` struct). -Data at one point in time is stored in `prescribed_data.file_state.data_fields[1]`, and -data at the next time is stored in `prescribed_data.file_state.data_fields[2]`. With these -two data fields saved, we can interpolate between them for any dates -in this range of time. - -# Arguments -- `prescribed_data` # containing data and file information. -- `date` # current date to read in data for. -- `space` # space we're remapping the data onto. -""" -function read_data_fields!( - prescribed_data::PrescribedDataTemporal, - date::DateTime, - space::Spaces.AbstractSpace, -) - comms_ctx = ClimaComms.context(space) - pd_file_info = prescribed_data.file_info - pd_file_states = prescribed_data.file_states - - (; regrid_dirpath, outfile_root, all_dates, varnames) = pd_file_info - - date_idx0 = pd_file_info.date_idx0[1] - # Assumes that all variables in `prescribed_data` have the same dates - date_idx = pd_file_states[varnames[1]].date_idx[1] - - # Case 1: Current date is before or at first date in data file - # Load in data at first date for both `data_fields[1]` and `data_fields[2]` - if (date_idx == date_idx0) && (date <= all_dates[date_idx]) - if date !== all_dates[date_idx] - @warn "this time period is before input data - using file from $(all_dates[date_idx0])" - end - - # Loop over all variables we need to read in - for (varname, file_state) in pd_file_states - file_state.data_fields[1] .= Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[Int(date_idx0)], - varname, - space, - ) - file_state.data_fields[2] .= file_state.data_fields[1] - file_state.segment_length .= 0 - end - - # Case 2: current date is at or after last date in input file - # Load in data at last date for both `data_fields[1]` and `data_fields[2]` - elseif date >= all_dates[end - 1] - @warn "this time period is after input data - using file from $(all_dates[end - 1])" - - # Loop over all variables we need to read in - for (varname, file_state) in pd_file_states - file_state.data_fields[1] .= Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[end], - varname, - space, - ) - file_state.data_fields[2] .= file_state.data_fields[1] - file_state.segment_length .= 0 - end - - # Case 3: current date is later than date of data being read in - # Load in data at most recent past date in `data_fields[1]` and - # next date in `data_fields[2]` - # elseif Dates.days(date - all_dates[Int(date_idx)]) > 0 - elseif date > all_dates[Int(date_idx)] - # Loop over all variables we need to read in - for (varname, file_state) in pd_file_states - file_state = pd_file_states[varname] - - # Increment `date_idx` to use next date - date_idx = file_state.date_idx[1] += Int(1) - # Time between consecutive dates being stored gives `segment_length` - file_state.segment_length .= - (all_dates[Int(date_idx + 1)] - all_dates[Int(date_idx)]).value - - # Read in data fields at both dates - file_state.data_fields[1] .= Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[Int(date_idx)], - varname, - space, - ) - file_state.data_fields[2] .= Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[Int(date_idx + 1)], - varname, - space, - ) - end - # Case 4: Everything else - else - throw(ErrorException("Check input file specification")) - end -end - -""" - next_date_in_file(prescribed_data::PrescribedDataTemporal) - -Returns the next date stored in the file `prescribed_data` struct after the -current date index given by `date_idx`. -Note: this function does not update `date_idx`, so repeated calls will -return the same value unless `date_idx` is modified elsewhere in between. -Assumes that all variables in `prescribed_data` have the same dates. - -# Arguments -- `prescribed_data` # contains all input file information needed for the simulation. - -# Returns -- DateTime or DateTimeNoLeap -""" -next_date_in_file(prescribed_data::PrescribedDataTemporal) = - prescribed_data.file_info.all_dates[first( - prescribed_data.file_states, - )[2].date_idx[1] + Int(1)] - -""" - get_data_at_date( - prescribed_data::PrescribedDataStatic, - space::Spaces.AbstractSpace, - varname::String, - ) - -Returns the data at a given date, interpolated if necessary. - -# Arguments -- `prescribed_data` # contains fields to be interpolated. -- `space` # the space of our simulation. -- `varname` # the name of the variable we want to read in. - -# Returns -- Fields.field -""" -function get_data_at_date( - prescribed_data::PrescribedDataStatic, - space::Spaces.AbstractSpace, - varname::String, -) - (; regrid_dirpath, outfile_root) = prescribed_data.file_info - - comms_ctx = ClimaComms.context(space) - field = Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - Dates.DateTime(0), # dummy date - varname, - space, - ) - return field -end - -""" - get_data_at_date( - prescribed_data::PrescribedDataTemporal, - space::Spaces.AbstractSpace, - varname::String, - date::Union{DateTime, DateTimeNoLeap}, - ) - -Returns the data for a specific variable at a given date, -interpolated if necessary. - -# Arguments -- `prescribed_data` # contains fields to be interpolated. -- `space` # the space of our simulation. -- `varname` # the name of the variable we want to read in. -- `date` # start date for data. - -# Returns -- Fields.field -""" -function get_data_at_date( - prescribed_data::PrescribedDataTemporal, - space::Spaces.AbstractSpace, - varname::String, - date::Union{DateTime, DateTimeNoLeap}, -) - FT = Spaces.undertype(space) - (; all_dates) = prescribed_data.file_info - - # Use the file state of the variable we want - file_state = prescribed_data.file_states[varname] - (; segment_length, date_idx, data_fields) = file_state - # Interpolate if the time period between dates is nonzero - if segment_length[1] > FT(0) && date != all_dates[Int(date_idx[1])] - Δt_tt1 = FT((date - all_dates[Int(date_idx[1])]).value) - interp_fraction = Δt_tt1 / FT(segment_length[1]) - @assert abs(interp_fraction) <= FT(1) "time interpolation weights must be <= 1, but `interp_fraction` = $interp_fraction" - return interpol.( - data_fields[1], - data_fields[2], - Δt_tt1, - FT(segment_length[1]), - ) - # Otherwise use the data at the first date - else - return data_fields[1] - end -end - - -""" - interpol(f1::FT, f2::FT, Δt_tt1::FT, Δt_t2t1::FT) - -Performs linear interpolation of `f` at time `t` within -a segment `Δt_t2t1 = (t2 - t1)`, of fields `f1` and `f2`, with `t2 > t1`. - -# Arguments -- `f1`::FT # first value to be interpolated (`f(t1) = f1`). -- `f2`::FT # second value to be interpolated. -- `Δt_tt1`::FT # time between `t1` and some `t` (`Δt_tt1 = (t - t1)`). -- `Δt_t2t1`::FT # time between `t1` and `t2`. - -# Returns -- FT -""" -function interpol(f1::FT, f2::FT, Δt_tt1::FT, Δt_t2t1::FT) where {FT} - interp_fraction = Δt_tt1 / Δt_t2t1 - return f1 * (FT(1) - interp_fraction) + f2 * (interp_fraction) -end - -""" - to_datetime(date) - -Convert a DateTime-like object (e.g. DateTimeNoLeap) to a DateTime, -using CFTime.jl. We need this since the CESM2 albedo file contains -DateTimeNoLeap objects for dates, which can't be used for math with DateTimes. - -Note that this conversion may fail if the date to convert doesn't -exist in the DateTime calendar. - -# Arguments -- `date`: object to be converted to DateTime -""" -to_datetime(date) = CFTime.reinterpret(DateTime, date) - -end diff --git a/src/shared_utilities/FileReaders.jl b/src/shared_utilities/FileReaders.jl new file mode 100644 index 0000000000..a809b811ca --- /dev/null +++ b/src/shared_utilities/FileReaders.jl @@ -0,0 +1,164 @@ +""" + FileReaders + +The `FileReaders` module implements backends to read and process input files. + +Given that reading from disk can be an expensive operation, this module provides a pathway +to optimize the performance (if needed). + +The FileReaders module contains a global cache of all the NCDatasets that are currently open. +This allows multiple NCFileReader to share the underlying file without overhead. +""" +module FileReaders + +import Dates +import NCDatasets + +abstract type AbstractFileReader end + +# We allow multiple NCFileReader to share the same underlying NCDataset. For this, we put +# all the NCDataset into a dictionary where we keep track of them. OPEN_NCFILES is +# dictionary that maps file paths to a Tuple with the first element being the NCDataset and +# the second element being a Set of Strings, the variables that are being read from that +# file. Every time a NCFileReader is created, this Set is modified by adding or removing a +# varname. +const OPEN_NCFILES = Dict{String, Tuple{NCDatasets.NCDataset, Set{String}}}() + +""" + NCFileReader + +A struct to read and process NetCDF files. + +NCFileReader wants to be smart, e.g., caching reads, or spinning the I/O off to a different +thread (not implemented yet). +""" +struct NCFileReader{ + STR1 <: AbstractString, + STR2 <: AbstractString, + DIMS <: Tuple, + NC <: NCDatasets.NCDataset, + DATES <: AbstractArray{<:Dates.DateTime}, + CACHE, + PREP <: Function, +} <: AbstractFileReader + """Path of the NetCDF file""" + file_path::STR1 + + """Name of the dataset in the NetCDF file""" + varname::STR2 + + """A tuple of arrays with the various physical dimensions where the data is defined + (e.g., lon/lat)""" + dimensions::DIMS + + """A vector of DateTime collecting all the available dates in the file""" + available_dates::DATES + + """NetCDF file opened by NCDataset""" + dataset::NC + + """Optional function that is applied to the read dataset. Useful to do unit-conversion + or remove NaNs. Not suitable for anything more complicated than that.""" + preprocess_func::PREP + + """A place where to store values that have been already read. Currently not implemented.""" + _cached_reads::CACHE +end + +""" + NCFileReader(file_path::AbstractString, varname::AbstractString) + +A struct to efficiently read and process NetCDF files. +""" +function NCFileReader( + file_path::AbstractString, + varname::AbstractString; + preprocess_func = identity, +) + + # Get dataset from global dictionary. If not available, open new file and add entry to + # global dictionary + dataset, open_varnames = get!( + OPEN_NCFILES, + file_path, + (NCDatasets.NCDataset(file_path), Set([varname])), + ) + push!(open_varnames, varname) + + # NOTE: We are hardcoding the dimension name. + # At the moment, this is required by TempestRemap + dim_names = ["lon", "lat"] + if all(d in keys(dataset) for d in dim_names) + dimensions = Tuple(dataset[d] for d in dim_names) + else + @warn "Dimensions not found in $file_path" + # If lat and lon, we can try deducing from the number of points + if dim_names == ["lon", "lat"] || dim_names == ["long", "lat"] + @warn "Assuming cell-centered lon/lat (-90/90, 0/180)" + lon_edges = range(-90, 90, dataset.dim[dim_names[1]]) + lon = (lon_edges[1:(end - 1)] .+ lon_edges[2:end]) ./ 2.0 + + lat_edges = range(0, 180, dataset.dim[dim_names[2]]) + lat = (lat_edges[1:(end - 1)] .+ lat_edges[2:end]) ./ 2.0 + dimensions = (lon, lat) + else + error("$file_path does not contain information about dimensions") + end + end + available_dates = read_available_dates(dataset) + + if !isempty(available_dates) + issorted(available_dates) || + error("Cannot process files that are not sorted in time") + end + + # NOTE: This is not concretely typed. + _cached_reads = Dict{Dates.DateTime, Array}() + + return NCFileReader( + file_path, + varname, + dimensions, + available_dates, + dataset, + preprocess_func, + _cached_reads, + ) +end + +""" + close(file_reader::NCFileReader) + +Close `NCFileReader`. If no other `NCFileReader` is using the same file, close the NetCDF file. +""" +function Base.close(file_reader::NCFileReader) + open_variables = OPEN_NCFILES[file_reader.file_path][end] + pop!(open_variables, file_reader.varname) + if isempty(open_variables) + NCDatasets.close(file_reader.dataset) + delete!(OPEN_NCFILES, file_reader.file_path) + end + return nothing +end + + +""" + read_available_dates(ds::NCDatasets.NCDataset) + +Return all the dates in the given NCDataset. The dates are read from the "time" +or "date" datasets. If none is available, return an empty vector. +""" +function read_available_dates(ds::NCDatasets.NCDataset) + if "time" in keys(ds.dim) + println("YOOOO") + return Dates.DateTime.( + reinterpret.(Ref(NCDatasets.DateTimeStandard), ds["time"][:]) + ) + elseif "date" in keys(ds.dim) + return strdate_to_datetime.(string.(ds["date"][:])) + else + return Dates.DateTime[] + end +end + +en diff --git a/src/shared_utilities/Regridder.jl b/src/shared_utilities/Regridders.jl similarity index 67% rename from src/shared_utilities/Regridder.jl rename to src/shared_utilities/Regridders.jl index a06ae4f802..c768c70b68 100644 --- a/src/shared_utilities/Regridder.jl +++ b/src/shared_utilities/Regridders.jl @@ -1,11 +1,145 @@ -# Code from ClimaCoupler Regridder.jl -module Regridder -using ClimaCore -using ClimaComms -using NCDatasets -using ClimaCoreTempestRemap +""" + Regridders + +The `Regridders` module implement structs and functions to remap datasets to simulation +grids. + +Currently, the only scheme implemented is `TempestRegridder`, which uses +`ClimaCoreTempestRemap`. + +The key function exposed by `Regridder` is the `regrid` method. +""" +module Regridders + using Dates -using DocStringExtensions + +import ClimaComms +import ClimaCore +import ClimaCore: Spaces +import ClimaCoreTempestRemap +import ..FileReaders: read_available_dates + +using NCDatasets + +# When adding a new regridder, you also have to change some functions in the DataHandler +# module. Find where :TempestRegridder is used. +abstract type AbstractRegridder end + +const AVAILABLE_REGRIDDERS = [:TempestRegridder] + +""" + TempestRegridder + +A struct to resample a given NetCDF file to `the target_space`. + +Currently, this work with ClimaCoreTempestRemap. `ClimaCoreTempestRemap` uses TempestRemap, +a command-line utility. Hence, This function has to process files, so, for this regridder, +we cannot really split file processing and remapping. + +TempestRegridder only works on CPU and on a single process. + +Implicit assumptions in the code: +- input_file contains "lat" and "lon" +- the time dimension, if available is called "time" or "date" +- the name of the variable in the NetCDF file is `varname` +- all the data is in one single file +""" +struct TempestRegridder{ + SPACE <: ClimaCore.Spaces.AbstractSpace, + STR1 <: AbstractString, + STR2 <: AbstractString, + STR3 <: AbstractString, +} <: AbstractRegridder + + target_space::SPACE + regrid_dir::STR1 + outfile_root::STR2 + varname::STR3 + mono::Bool +end + +""" + TempestRegridder(target_space::ClimaCore.Spaces.AbstractSpace, + input_file::AbstractString, + varname::AbstractString, + regrid_dir::AbstractString; + mono::Bool) + +Set up a `TempestRegridder` object to regrid the variable in the given `input_file` to the +`target_space`. `TempestRegridder` works only on CPU and on a single process. + +Positional arguments +===================== + +- `target_space`: the ClimaCore Space where the simulation is being performed. +- `input_file`: the path of the NetCDF file that has to be read and processed. +- `varname`: the name of the variable in the NetCDF file. +- `regrid_dir`: a folder where regridded Fields are saved as HDF5 files. + +Keyword arguments +================== + +- `mono`: Whether remapping has to be monotonic or not. +""" +function TempestRegridder( + target_space::ClimaCore.Spaces.AbstractSpace, + regrid_dir::AbstractString, + varname::AbstractString, + input_file::AbstractString; + mono::Bool = true, +) + space = target_space + comms_ctx = ClimaComms.context(space) + + if ClimaComms.iamroot(comms_ctx) + @info "Saving TempestRegrid files to $regrid_dir" + + FT = Spaces.undertype(space) + outfile_root = varname + varnames = [varname] + + hdwrite_regridfile_rll_to_cgll( + FT, + regrid_dir, + input_file, + varnames, + target_space, + outfile_root; + mono, + ) + end + # We have to make sure that all the processes wait on the (expensive) regridding, + # otherwise they won't have access to the regridded fields + ClimaComms.barrier(comms_ctx) + + return TempestRegridder{ + typeof(target_space), + typeof(regrid_dir), + typeof(outfile_root), + typeof(varname), + }( + target_space, + regrid_dir, + outfile_root, + varname, + mono, + ) +end + +""" + regrid(regridder::TempestRegridder, date::Dates.DateTime) + +Return the field associated to the `regridder` at the given `date`. +""" +function regrid(regridder::TempestRegridder, date::Dates.DateTime) + return read_from_hdf5( + regridder.regrid_dir, + regridder.outfile_root, + date, + regridder.varname, + regridder.target_space, + ) +end export hdwrite_regridfile_rll_to_cgll @@ -238,22 +372,26 @@ function hdwrite_regridfile_rll_to_cgll( if isfile(datafile_cgll) == false isdir(REGRID_DIR) ? nothing : mkpath(REGRID_DIR) - nlat, nlon = NCDataset(datafile_rll) do ds + nlat, nlon = NCDatasets.NCDataset(datafile_rll) do ds (ds.dim["lat"], ds.dim["lon"]) end # write lat-lon mesh - rll_mesh(meshfile_rll; nlat = nlat, nlon = nlon) + ClimaCoreTempestRemap.rll_mesh(meshfile_rll; nlat = nlat, nlon = nlon) # write cgll mesh, overlap mesh and weight file - write_exodus(meshfile_cgll, topology) - overlap_mesh(meshfile_overlap, meshfile_rll, meshfile_cgll) + ClimaCoreTempestRemap.write_exodus(meshfile_cgll, topology) + ClimaCoreTempestRemap.overlap_mesh( + meshfile_overlap, + meshfile_rll, + meshfile_cgll, + ) # 'in_np = 1' and 'mono = true' arguments ensure mapping is conservative and monotone # Note: for a kwarg not followed by a value, set it to true here (i.e. pass 'mono = true' to produce '--mono') # Note: out_np = degrees of freedom = polynomial degree + 1 kwargs = (; out_type = out_type, out_np = Nq) kwargs = mono ? (; (kwargs)..., in_np = 1, mono = mono) : kwargs - remap_weights( + ClimaCoreTempestRemap.remap_weights( weightfile, meshfile_rll, meshfile_cgll, @@ -261,28 +399,16 @@ function hdwrite_regridfile_rll_to_cgll( kwargs..., ) - apply_remap(datafile_cgll, datafile_rll, weightfile, varnames) + ClimaCoreTempestRemap.apply_remap( + datafile_cgll, + datafile_rll, + weightfile, + varnames, + ) else @warn "Using the existing $datafile_cgll : check topology is consistent" end - function get_time(ds) - if "time" in keys(ds.dim) - data_dates = - Dates.DateTime.( - reinterpret.( - Ref(NCDatasets.DateTimeStandard), - Array(ds["time"]), - ) - ) - elseif "date" in keys(ds.dim) - data_dates = strdate_to_datetime.(string.(Array(ds["date"]))) - else - @warn "No dates available in file $datafile_rll" - data_dates = [Dates.DateTime(0)] - end - end - # weightfile info needed to populate all nodes and save into fields with # sparse matrices _, _, row_indices = NCDataset(weightfile, "r") do ds_wt @@ -309,13 +435,17 @@ function hdwrite_regridfile_rll_to_cgll( # Save regridded HDF5 file for each variable in `varnames` for varname in varnames # read the remapped file with sparse matrices - offline_outvector, times = NCDataset(datafile_cgll, "r") do ds_wt - ( - # read the data in, and remove missing type (will error if missing data is present) - offline_outvector = nomissing(Array(ds_wt[varname])[:, :]), # ncol, times - times = get_time(ds_wt), - ) - end + offline_outvector, times = + NCDatasets.NCDataset(datafile_cgll, "r") do ds_wt + ( + # read the data in, and remove missing type (will error if missing data is present) + offline_outvector = nomissing(Array(ds_wt[varname])[:, :]), # ncol, times + times = read_available_dates(ds_wt), + ) + end + + # Use dummy date when there are no real dates + isempty(times) && (times = [DateTime(0)]) # Convert input data float type if needed if eltype(offline_outvector) <: AbstractFloat && @@ -323,7 +453,9 @@ function hdwrite_regridfile_rll_to_cgll( @warn "Converting $varname data in $datafile_cgll from $(eltype(offline_outvector)) to $FT" offline_outvector = Array{FT}(offline_outvector) end - @assert length(times) == size(offline_outvector, 2) "Inconsistent time dimension in $datafile_cgll for $varname" + length_times = length(times) + size_outvector = size(offline_outvector, 2) + @assert length_times == size_outvector "Inconsistent time dimension in $datafile_cgll for $varname ($length_times != $size_outvector)" offline_fields = ntuple(x -> similar(offline_field), length(times)) ntuple( diff --git a/src/shared_utilities/SpaceVaryingInputs.jl b/src/shared_utilities/SpaceVaryingInputs.jl index 843012df5e..4ae0127d28 100644 --- a/src/shared_utilities/SpaceVaryingInputs.jl +++ b/src/shared_utilities/SpaceVaryingInputs.jl @@ -20,22 +20,22 @@ using ClimaComms using DocStringExtensions import ..searchsortednearest import ..linear_interpolation -using ClimaLand.FileReader +import ..DataHandling: DataHandler, regridded_snapshot export SpaceVaryingInput # Analytic case """ - SpaceVaryingInput(data_function::Function, space::ClimaCore.Spaces.AbstractSpace) - + SpaceVaryingInput(data_function::Function, space::ClimaCore.Spaces.AbstractSpace) + Returns the parameter field to be used in the model; appropriate when a parameter is defined using a function of the coordinates of the space. -Pass the ``data" as a function `data_function` which takes coordinates as arguments, +Pass the ``data" as a function `data_function` which takes coordinates as arguments, and the ClimaCore space of the model simulation. This returns a scalar field. Note that data_function is broadcasted over the coordinate field. Internally, inside -your function, this must be unpacked (coords.lat, coords.lon, e.g.) for +your function, this must be unpacked (coords.lat, coords.lon, e.g.) for use of the coordinate values directly. """ function SpaceVaryingInput( @@ -55,7 +55,7 @@ end space::S, ) where {S <: ClimaCore.Spaces.CenterFiniteDifferenceSpace} -Given a set of depths `data_z` and the observed values `data_values` +Given a set of depths `data_z` and the observed values `data_values` at those depths, create an interpolated field of values at each value of z in the model grid - defined implicitly by `space`. @@ -92,8 +92,8 @@ end DT, } -Returns a field of parameter structs to be used in the model; -appropriate when the parameter struct values vary in depth; +Returns a field of parameter structs to be used in the model; +appropriate when the parameter struct values vary in depth; the `dest_type` argument is the struct type - we assumed that your struct as a constructor which accepts the values of its arguments by kwarg, @@ -137,29 +137,21 @@ function SpaceVaryingInput( end """ - SpaceVaryingInput(data::PDS, varname::N; space::S) - where {PDS <: FileReader.PrescribedDataStatic, N <: String, S <: ClimaCore.Spaces.SpectralElement2D} - + SpaceVaryingInput(data_handler::DataHandler) + SpaceVaryingInput(file_path::AbstractString, varname::AbstractString, target_space::Spaces.AbstractSpace;) + Returns the parameter field to be used in the model; appropriate when a parameter is defined on the surface of the Earth. -Pass the data as a `FileReader.PrescribedDataStatic` object -as well as the variable name of the variable in the data file, and the ClimaCore space -of the model simulation. - Returns a ClimaCore.Fields.Field of scalars; analogous to the 1D case which also returns a ClimaCore.Fields.Field of scalars. """ -function SpaceVaryingInput( - data::PDS, - varname::N, - space::S, -) where { - PDS <: FileReader.PrescribedDataStatic, - N <: String, - S <: ClimaCore.Spaces.SpectralElementSpace2D, -} - return FileReader.get_data_at_date(data, space, varname) +function SpaceVaryingInput(data_handler::DataHandler) + return regridded_snapshot(data_handler) +end + +function SpaceVaryingInput(file_path, varname, target_space) + return SpaceVaryingInput(DataHandler(file_path, varname, target_space)) end end diff --git a/src/shared_utilities/TimeVaryingInputs.jl b/src/shared_utilities/TimeVaryingInputs.jl index 528787f91f..6a0a6db994 100644 --- a/src/shared_utilities/TimeVaryingInputs.jl +++ b/src/shared_utilities/TimeVaryingInputs.jl @@ -81,19 +81,54 @@ When passing single-site data When a `times` and `vals` are passed, `times` have to be sorted and the two arrays have to have the same length. +======= +When the input is a function, the signature of the function can be `func(time, args...; +kwargs...)`. The function will be called with the additional arguments and keyword arguments +passed to `evaluate!`. This can be used to access the state and the cache and use those to +set the output field. + +For example: +```julia +CO2fromp(time, Y, p) = p.atmos.co2 +input = TimeVaryingInput(CO2fromY) +evaluate!(dest, input, t, Y, p) +``` """ function TimeVaryingInput end """ - evaluate!(dest, input, time) + evaluate!(dest, input, time, args...; kwargs...) Evaluate the `input` at the given `time`, writing the output in-place to `dest`. Depending on the details of `input`, this function might do I/O and communication. + +Extra arguments +================ + +`args` and `kwargs` are used only when the `input` is a non-interpolating function, e.g., +an analytic one. In that case, `args` and `kwargs` are passed down to the function itself. """ function evaluate! end include("analytic_time_varying_input.jl") + +""" + NearestNeighbor + +Return the value corresponding to the point closest to the input time. +""" +struct NearestNeighbor <: AbstractInterpolationMethod end + +""" + LinearInterpolation + +Perform linear interpolation between the two neighboring points. +""" +struct LinearInterpolation <: AbstractInterpolationMethod end + include("interpolating_time_varying_input0d.jl") +include("interpolating_time_varying_input2d.jl") +include("interpolating_time_varying_inputs.jl") # Shared stuff end diff --git a/src/shared_utilities/analytic_time_varying_input.jl b/src/shared_utilities/analytic_time_varying_input.jl index acd7458d6c..228867c9fa 100644 --- a/src/shared_utilities/analytic_time_varying_input.jl +++ b/src/shared_utilities/analytic_time_varying_input.jl @@ -1,15 +1,23 @@ struct AnalyticTimeVaryingInput{F <: Function} <: AbstractTimeVaryingInput - # func here as to be GPU-compatible (e.g., splines are not) + # func here has to be GPU-compatible (e.g., splines are not) and reasonably fast (e.g., + # no large allocations) func::F end -function TimeVaryingInput(input::Function; method = nothing, device = nothing) +# _kwargs... is needed to seamlessly support the other TimeVaryingInputs. +function TimeVaryingInput(input::Function; method = nothing, _kwargs...) isnothing(method) || @warn "Interpolation method is ignored for analytical functions" return AnalyticTimeVaryingInput(input) end -function evaluate!(dest, input::AnalyticTimeVaryingInput, time) - dest .= input.func(time) +function evaluate!( + dest, + input::AnalyticTimeVaryingInput, + time, + args...; + kwargs..., +) + dest .= input.func(time, args...; kwargs...) return nothing end diff --git a/src/shared_utilities/interpolating_time_varying_input0d.jl b/src/shared_utilities/interpolating_time_varying_input0d.jl index 9dde4b3910..412a754f96 100644 --- a/src/shared_utilities/interpolating_time_varying_input0d.jl +++ b/src/shared_utilities/interpolating_time_varying_input0d.jl @@ -1,19 +1,3 @@ -import CUDA - -""" - NearestNeighbor - -Return the value corresponding to the point closest to the input time. -""" -struct NearestNeighbor <: AbstractInterpolationMethod end - -""" - LinearInterpolation - -Perform linear interpolation between the two neighboring points. -""" -struct LinearInterpolation <: AbstractInterpolationMethod end - """ InterpolatingTimeVaryingInput0D @@ -67,7 +51,13 @@ function Adapt.adapt_structure(to, itp::InterpolatingTimeVaryingInput0D) ) end -function evaluate!(destination, itp::InterpolatingTimeVaryingInput0D, time) +function evaluate!( + destination, + itp::InterpolatingTimeVaryingInput0D, + time, + args...; + kwargs..., +) time in itp || error("TimeVaryingInput does not cover time $time") if ClimaComms.device(itp.context) isa ClimaComms.CUDADevice CUDA.@cuda evaluate!(parent(destination), itp, time, itp.method) @@ -101,16 +91,6 @@ function TimeVaryingInput( ) end -""" - in(time, itp::InterpolatingTimeVaryingInput0D) - -Check if the given `time` is in the range of definition for `itp`. -""" -function Base.in(time, itp::InterpolatingTimeVaryingInput0D) - return itp.range[1] <= time <= itp.range[2] -end - - function evaluate!( dest, itp::InterpolatingTimeVaryingInput0D, diff --git a/src/shared_utilities/interpolating_time_varying_input2d.jl b/src/shared_utilities/interpolating_time_varying_input2d.jl new file mode 100644 index 0000000000..56c22e0ffb --- /dev/null +++ b/src/shared_utilities/interpolating_time_varying_input2d.jl @@ -0,0 +1,99 @@ +import ..DataHandling +import ..DataHandling: + DataHandler, previous_time, next_time, regridded_snapshot, available_times + +""" + InterpolatingTimeVaryingInput2D + +The constructor for InterpolatingTimeVaryingInput2D is not supposed to be used directly, unless you +know what you are doing. The constructor does not perform any check and does not take care of +GPU compatibility. It is responsibility of the user-facing constructor TimeVaryingInput() to do so. +""" +struct InterpolatingTimeVaryingInput2D{ + DH <: DataHandler, + M <: AbstractInterpolationMethod, + CC <: ClimaComms.AbstractCommsContext, + R <: Tuple, +} <: AbstractTimeVaryingInput + """Object that has all the information on how to deal with files, data, and so on. + Having to deal with files, it lives on the CPU.""" + data_handler::DH + + """Interpolation method""" + method::M + + """ClimaComms context""" + context::CC + + """Range of times over which the interpolator is defined. range is always defined on the + CPU. Used by the in() function.""" + range::R +end + +function TimeVaryingInput( + data_handler::DataHandler; + method = LinearInterpolation(), + context = ClimaComms.context(), +) + available_times = DataHandling.available_times(data_handler) + isempty(available_times) && + error("DataHandler does not contain temporal data") + issorted(available_times) || error("Can only interpolate with sorted times") + range = (available_times[begin], available_times[end]) + return InterpolatingTimeVaryingInput2D(data_handler, method, context, range) +end + +function evaluate!( + dest, + itp::InterpolatingTimeVaryingInput2D, + time, + args...; + kwargs..., +) + time in itp || error("TimeVaryingInput does not cover time $time") + evaluate!(dest, itp, time, itp.method) + return nothing +end + +function evaluate!( + dest, + itp::InterpolatingTimeVaryingInput2D, + time, + ::NearestNeighbor, + args...; + kwargs..., +) + t0, t1 = + previous_time(itp.data_handler, time), next_time(itp.data_handler, time) + + # The closest regridded_snapshot could be either the previous or the next one + if (time - t0) <= (t1 - time) + dest .= regridded_snapshot(itp.data_handler, t0) + else + dest .= regridded_snapshot(itp.data_handler, t1) + end + return nothing +end + +function evaluate!( + dest, + itp::InterpolatingTimeVaryingInput2D, + time, + ::LinearInterpolation, + args...; + kwargs..., +) + # Linear interpolation is: + # y = y0 + (y1 - y0) * (time - t0) / (t1 - t0) + # + # Define coeff = (time - t0) / (t1 - t0) + # + # y = (1 - coeff) * y0 + coeff * y1 + + t0, t1 = + previous_time(itp.data_handler, time), next_time(itp.data_handler, time) + coeff = (time - t0) / (t1 - t0) + dest .= + (1 - coeff) .* regridded_snapshot(itp.data_handler, t0) .+ + coeff .* regridded_snapshot(itp.data_handler, t1) +end diff --git a/src/shared_utilities/interpolating_time_varying_inputs.jl b/src/shared_utilities/interpolating_time_varying_inputs.jl index f220071d28..1c0e273166 100644 --- a/src/shared_utilities/interpolating_time_varying_inputs.jl +++ b/src/shared_utilities/interpolating_time_varying_inputs.jl @@ -2,24 +2,10 @@ InterpolatingTimeVaryingInput = Union{InterpolatingTimeVaryingInput0D, InterpolatingTimeVaryingInput2D} """ - NearestNeighbor - -Return the value corresponding to the point closest to the input time. -""" -struct NearestNeighbor <: AbstractInterpolationMethod end - -""" - LinearInterpolation - -Perform linear interpolation between the two neighboring points. -""" -struct LinearInterpolation <: AbstractInterpolationMethod end - -""" - in(time, itp::InterpolatingTimeVaryingInput0D) + in(time, itp::InterpolatingTimeVaryingInput) Check if the given `time` is in the range of definition for `itp`. """ -function Base.in(time, itp::InterpolatingTimeVaryingInput0D) +function Base.in(time, itp::InterpolatingTimeVaryingInput) return itp.range[1] <= time <= itp.range[2] end diff --git a/src/standalone/Bucket/Bucket.jl b/src/standalone/Bucket/Bucket.jl index ce3cc77e6a..64efd7ba36 100644 --- a/src/standalone/Bucket/Bucket.jl +++ b/src/standalone/Bucket/Bucket.jl @@ -11,12 +11,12 @@ using ClimaComms using ClimaLand -using ClimaLand.FileReader import ..Parameters as LP import ClimaLand.Domains: coordinates, SphericalShell using ClimaLand: AbstractAtmosphericDrivers, AbstractRadiativeDrivers, + AbstractTimeVaryingInput, liquid_precipitation, snow_precipitation, turbulent_fluxes, @@ -27,6 +27,7 @@ using ClimaLand: heaviside, PrescribedAtmosphere, add_dss_buffer_to_aux +import ClimaLand: DataHandling, TimeVaryingInputs import ClimaLand: make_update_aux, make_compute_exp_tendency, @@ -103,10 +104,9 @@ end """ PrescribedBaregroundAlbedo{FT}(α_snow::FT, - regrid_dirpath::String, surface_space::ClimaCore.Spaces.AbstractSpace; varnames = ["sw_alb"], - get_infile::Function = Bucket.bareground_albedo_dataset_path, + albedo_file_path::AbstractString = Bucket.bareground_albedo_dataset_path(), ) where{FT} An outer constructor for the PrescribedBaregroundAlbedo model which uses data @@ -116,24 +116,16 @@ This particular method can only be used with global runs. """ function PrescribedBaregroundAlbedo{FT}( α_snow::FT, - regrid_dirpath::String, surface_space::ClimaCore.Spaces.AbstractSpace; varnames = ["sw_alb"], - get_infile::Function = Bucket.bareground_albedo_dataset_path, + albedo_file_path::AbstractString = Bucket.bareground_albedo_dataset_path(), ) where {FT} if surface_space isa ClimaCore.Spaces.PointSpace error("Using an albedo map requires a global run.") end - α_bareground_data = PrescribedDataStatic{FT}( - get_infile, - regrid_dirpath, - varnames, - surface_space, - ) - # Albedo file only has one variable, so access first `varname` - varname = varnames[1] - α_bareground = SpaceVaryingInput(α_bareground_data, varname, surface_space) + α_bareground = + SpaceVaryingInput(albedo_file_path, varnames[begin], surface_space) return PrescribedBaregroundAlbedo{FT, typeof(α_bareground)}( α_snow, α_bareground, @@ -141,7 +133,7 @@ function PrescribedBaregroundAlbedo{FT}( end """ - PrescribedSurfaceAlbedo{FT, FR <: FileReader.PrescribedDataTemporal} + PrescribedSurfaceAlbedo{FT, TV <: AbstractTimeVaryingInput} <: AbstractBucketAlbedoModel An albedo model where the albedo of different surface types @@ -152,14 +144,13 @@ This albedo type changes over time according to the input file. Note that this option should only be used with global simulations, i.e. with a `ClimaLand.LSMSphericalShellDomain.` """ -struct PrescribedSurfaceAlbedo{FT, FR <: FileReader.PrescribedDataTemporal} <: +struct PrescribedSurfaceAlbedo{FT, TV <: AbstractTimeVaryingInput} <: AbstractBucketAlbedoModel{FT} - albedo_info::FR + albedo::TV end """ PrescribedSurfaceAlbedo{FT}( - regrid_dirpath::String, date_ref::Union{DateTime, DateTimeNoLeap}, t_start, Space::ClimaCore.Spaces.AbstractSpace; @@ -175,11 +166,10 @@ and download the data if it doesn't already exist on the machine. The input data file must have a time component. """ function PrescribedSurfaceAlbedo{FT}( - regrid_dirpath::String, date_ref::Union{DateTime, DateTimeNoLeap}, t_start, space::ClimaCore.Spaces.AbstractSpace; - get_infile = Bucket.cesm2_albedo_dataset_path, + albedo_file_path = Bucket.cesm2_albedo_dataset_path(), varname = "sw_alb", ) where {FT} # Verify inputs @@ -187,16 +177,17 @@ function PrescribedSurfaceAlbedo{FT}( error("Using an albedo map requires a global run.") end - # Construct object containing info to read in surface albedo over time - data_info = PrescribedDataTemporal{FT}( - regrid_dirpath, - get_infile, - [varname], - date_ref, + data_handler = DataHandling.DataHandler( + albedo_file_path, + varname, + space; + reference_date = date_ref, t_start, - space, ) - return PrescribedSurfaceAlbedo{FT, typeof(data_info)}(data_info) + + # Construct object containing info to read in surface albedo over time + albedo = TimeVaryingInput(data_handler) + return PrescribedSurfaceAlbedo{FT, typeof(albedo)}(albedo) end @@ -460,16 +451,24 @@ function make_update_aux(model::BucketModel{FT}) where {FT} p.bucket.R_n .= net_radiation(model.radiation, model, Y, p, t) # Surface albedo - p.bucket.α_sfc .= - next_albedo(model.parameters.albedo, model.parameters, Y, p, t) + next_albedo!( + p.bucket.α_sfc, + model.parameters.albedo, + model.parameters, + Y, + p, + t, + ) end return update_aux! end """ - next_albedo(model_albedo::PrescribedBaregroundAlbedo{FT}, parameters, Y, p, t) where {FT} + next_albedo!(next_α_sfc, + model_albedo::PrescribedBaregroundAlbedo{FT}, + parameters, Y, p, t) -Update the surface albedo for time `t`: the albedo is calculated by +Update the surface albedo for time `t`: the albedo is calculated by linearly interpolating between the albedo of snow and of the bareground surface, based on the snow water equivalent `S` relative to the parameter `S_c`. The linear interpolation is taken from Lague et al 2019. @@ -480,7 +479,8 @@ fractions that are not a heaviside function, we have a small inconsistency for 0 < σS < eps(FT) where the snow cover fraction is zero, but there is a small contribution of snow to the albedo. """ -function next_albedo( +function next_albedo!( + next_α_sfc, model_albedo::PrescribedBaregroundAlbedo{FT}, parameters, Y, @@ -490,42 +490,25 @@ function next_albedo( (; α_snow, α_bareground) = model_albedo (; σS_c) = parameters σS = max.(Y.bucket.σS, FT(0))# if σS is small and negative, clip to zero for albedo estimation - return @. ( - (1 - σS / (σS + σS_c)) * α_bareground + σS / (σS + σS_c) * α_snow - ) + @. next_α_sfc = + ((1 - σS / (σS + σS_c)) * α_bareground + σS / (σS + σS_c) * α_snow) end """ - next_albedo(model_albedo::PrescribedSurfaceAlbedo{FT}, parameters, Y, p, t) where {FT} + next_albedo!(next_α_sfc, model_albedo::PrescribedSurfaceAlbedo{FT}, parameters, Y, p, t) Update the surface albedo for time `t`: for a file containing surface albedo information over time, this reads in the value for time t. """ -function next_albedo( +function next_albedo!( + next_α_sfc, model_albedo::PrescribedSurfaceAlbedo{FT}, parameters, Y, p, t, ) where {FT} - # Get the current date from `t` - sim_info = model_albedo.albedo_info.sim_info - sim_date = to_datetime( - sim_info.date_ref + Second(round(sim_info.t_start)) + Second(round(t)), - ) - # Read next data fields if initializing or next date is closest to current time - # This maintains `all_dates[date_idx]` <= `sim_date` < `all_dates[date_idx + 1]` - if t == sim_info.t_start || - sim_date >= to_datetime(next_date_in_file(model_albedo.albedo_info)) - read_data_fields!(model_albedo.albedo_info, sim_date, axes(Y.bucket.W)) - end - # Interpolate data value to current time - return get_data_at_date( - model_albedo.albedo_info, - axes(Y.bucket.W), - "sw_alb", - sim_date, - ) + TimeVaryingInputs.evaluate!(next_α_sfc, model_albedo.albedo, t) end function ClimaLand.get_drivers(model::BucketModel) diff --git a/test/runtests.jl b/test/runtests.jl index 06f322e97e..9c4f166b89 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,8 +14,11 @@ end @safetestset "Domains module tests" begin include("shared_utilities/domains.jl") end -@safetestset "FileReader module tests" begin - include("shared_utilities/file_reader.jl") +@safetestset "FileReaders module tests" begin + include("shared_utilities/file_readers.jl") +end +@safetestset "DataHandling module tests" begin + include("shared_utilities/data_handling.jl") end @safetestset "SpaceVaryingInput module tests" begin include("shared_utilities/space_varying_inputs.jl") diff --git a/test/shared_utilities/data_handling.jl b/test/shared_utilities/data_handling.jl new file mode 100644 index 0000000000..da384f38db --- /dev/null +++ b/test/shared_utilities/data_handling.jl @@ -0,0 +1,142 @@ +#= + Unit tests for ClimaLand DataHandling module +=# + +using Dates +using Test + +import ClimaCore: Fields +import ClimaLand +import ClimaLand: DataHandling +using NCDatasets + +include(joinpath(pkgdir(ClimaLand), "test", "artifacts", "artifacts.jl")) +temporal_data_path = era5_t2m_sp_u10n_dataset_path() +static_data_path = era5_t2m_sp_u10n_static_dataset_path() + +@testset "DataHandler, TempestRegridder, static" begin + PATH = static_data_path + varname = "sp" + for FT in (Float32, Float64) + target_space = + ClimaLand.Domains.SphericalShell(; + radius = FT(6731e3), + depth = FT(1.0), + nelements = (4, 4), + npolynomial = 4, + ).space.surface + + data_handler = DataHandling.DataHandler(PATH, varname, target_space) + + @test DataHandling.available_dates(data_handler) == DateTime[] + + field = DataHandling.regridded_snapshot(data_handler) + # Most basic testing, to get a sense that we are doing things right + @test field isa Fields.Field + @test axes(field) == target_space + @test eltype(field) == FT + @test minimum(field) >= + minimum(data_handler.file_reader.dataset[varname][:, :]) + @test maximum(field) <= + maximum(data_handler.file_reader.dataset[varname][:, :]) + + close(data_handler) + end +end + +@testset "DataHandler, TempestRegridder, time data" begin + for FT in (Float32, Float64) + target_space = + ClimaLand.Domains.SphericalShell(; + radius = FT(6731e3), + depth = FT(1.0), + nelements = (4, 4), + npolynomial = 4, + ).space.surface + PATH = temporal_data_path + varname = "sp" + data_handler = DataHandling.DataHandler( + PATH, + varname, + target_space, + reference_date = Dates.DateTime(2000, 1, 1), + t_start = 0.0, + ) + + @test DataHandling.available_dates(data_handler) == + data_handler.file_reader.available_dates + @test data_handler.reference_date .+ + Second.(DataHandling.available_times(data_handler)) == + DataHandling.available_dates(data_handler) + + available_times = DataHandling.available_times(data_handler) + available_dates = DataHandling.available_dates(data_handler) + + # Previous time with time + @test DataHandling.previous_time( + data_handler, + available_times[10] + 1, + ) == available_times[10] + @test DataHandling.previous_time( + data_handler, + available_times[1] + 1, + ) == available_times[1] + # Previous time with date + @test DataHandling.previous_time( + data_handler, + available_dates[10] + Second(1), + ) == available_times[10] + @test DataHandling.previous_time(data_handler, available_dates[10]) == + available_times[10] + + # On node + @test DataHandling.previous_time(data_handler, available_times[10]) == + available_times[10] + @test DataHandling.previous_time(data_handler, available_times[1]) == + available_times[1] + @test DataHandling.previous_time(data_handler, available_dates[10]) == + available_times[10] + + # Next time with time + @test DataHandling.next_time(data_handler, available_times[10] + 1) == + available_times[11] + @test DataHandling.next_time(data_handler, available_times[1] + 1) == + available_times[2] + # Next time with date + @test DataHandling.next_time( + data_handler, + available_dates[10] + Second(1), + ) == available_times[11] + + # On node + @test DataHandling.next_time(data_handler, available_times[10]) == + available_times[11] + @test DataHandling.next_time(data_handler, available_dates[10]) == + available_times[11] + + # Asking for a regridded_snapshot without specifying the time + @test_throws ErrorException DataHandling.regridded_snapshot( + data_handler, + ) + + # Asking for a regridded_snapshot with time that does not exist + @test_throws ErrorException DataHandling.regridded_snapshot( + data_handler, + -1234.0, + ) + + field = + DataHandling.regridded_snapshot(data_handler, available_times[10]) + + # Most basic testing, to get a sense that we are doing things right + @test field isa Fields.Field + @test axes(field) == target_space + @test eltype(field) == FT + @test minimum(field) >= + minimum(data_handler.file_reader.dataset[varname][:, :, 10]) + @test maximum(field) <= + maximum(data_handler.file_reader.dataset[varname][:, :, 10]) + + close(data_handler) + end +end diff --git a/test/shared_utilities/file_reader.jl b/test/shared_utilities/file_reader.jl deleted file mode 100644 index 0960f9fdab..0000000000 --- a/test/shared_utilities/file_reader.jl +++ /dev/null @@ -1,716 +0,0 @@ -#= - Unit tests for ClimaLand FileReader module -=# - -using ClimaLand -using ClimaLand: Bucket, Regridder, FileReader -using ClimaCore -using ClimaCore: Fields, Meshes, Domains, Topologies, Spaces -using ClimaComms -using Test -using Dates -using NCDatasets - -# Include testing artifacts from Bucket -albedo_temporal_data = Bucket.cesm2_albedo_dataset_path -albedo_bareground_data = Bucket.bareground_albedo_dataset_path - -# Include testing artifacts from test directory -include(joinpath(pkgdir(ClimaLand), "test", "artifacts", "artifacts.jl")) -era5_t2m_sp_u10n_data = era5_t2m_sp_u10n_dataset_path -era5_t2m_sp_u10n_static_data = era5_t2m_sp_u10n_static_dataset_path - -comms_ctx = ClimaComms.SingletonCommsContext() - -# Use two separate regrid dirs to avoid duplicate filenames since files have same varname -regrid_dir_static = joinpath(pkgdir(ClimaLand), "test", "regridding") -regrid_dir_temporal = - joinpath(pkgdir(ClimaLand), "test", "regridding", "temporal") - -""" -Set NaN and Missing to zero (GPU compatible) -""" -function replace_nan_missing!(field::Fields.Field) - # For GPU runs, we perform the substitution on the CPU and move back to GPU - parent_without_NaN_missing = - replace(Array(parent(field)), NaN => 0, missing => 0) - ArrayType = ClimaComms.array_type(ClimaComms.device()) - parent(field) .= ArrayType(parent_without_NaN_missing) -end - -FT = Float32 -@testset "test interpol, FT = $FT" begin - # Setup - x1 = FT(0) - x2 = FT(10) - - f1 = FT(-1) - f2 = FT(1) - - # Case 1: x1 < x < x2 - x = FT(5) - @test FileReader.interpol(f1, f2, x - x1, x2 - x1) == 0 - - # Case 2: x1 = x < x2 - x = FT(0) - @test FileReader.interpol(f1, f2, x - x1, x2 - x1) == f1 - - # Case 3: x1 < x = x2 - x = FT(10) - @test FileReader.interpol(f1, f2, x - x1, x2 - x1) == f2 -end - -@testset "test to_datetime, FT = $FT" begin - year = 2000 - dt_noleap = DateTimeNoLeap(year) - @test FileReader.to_datetime(dt_noleap) == DateTime(year) -end - -# Add tests which use TempestRemap here - -# TempestRemap is not built on Windows because of NetCDF support limitations -# `PrescribedData` constructors use TR via a call to `hdwrite_regridfile_rll_to_cgll` -if !Sys.iswindows() - @testset "test PrescribedDataStatic construction, FT = $FT" begin - # setup for test - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - - # Loop over files with one and multiple variables - for (get_infile, varnames) in [ - (albedo_bareground_data, ["sw_alb"]), - (era5_t2m_sp_u10n_static_data, ["t2m", "sp", "u10n"]), - ] - # reset regrid directory between files - isdir(regrid_dir_static) ? nothing : mkpath(regrid_dir_static) - - ps_data_spatial = FileReader.PrescribedDataStatic{FT}( - get_infile, - regrid_dir_static, - varnames, - surface_space_t, - ) - - # test that created object exists - @test @isdefined(ps_data_spatial) - @test ps_data_spatial isa FileReader.AbstractPrescribedData - - # test fields that we've passed into constructor as args - @test ps_data_spatial.file_info.infile_path == get_infile() - @test ps_data_spatial.file_info.regrid_dirpath == regrid_dir_static - @test ps_data_spatial.file_info.varnames == varnames - - # test fields which are set internally by constructor - @test ps_data_spatial.file_info.outfile_root == "static_data_cgll" - @test ps_data_spatial.file_info.all_dates == [] - @test ps_data_spatial.file_info.date_idx0 == [] - rm(regrid_dir_static; recursive = true, force = true) - end - end - - @testset "test get_data_at_date for PrescribedDataStatic, FT = $FT" begin - isdir(regrid_dir_static) ? nothing : mkpath(regrid_dir_static) - # test `get_data_at_date` with interpolation (default) - dummy_dates = [DateTime(1999, 1, 1)] - date_idx0 = Int[1] - - # construct space and dummy field - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain_sphere = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain_sphere, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - data_field = ones(surface_space_t) .* FT(0.5) - - # Add 2 variables to `FileInfo`, but only read/write `var1` - varnames = ["var1", "var2"] - outfile_root = "static_data_cgll" - - # write data to dummy HDF5 file, which gets read in by `get_data_at_date` - field = Regridder.write_to_hdf5( - regrid_dir_static, - outfile_root, - Dates.DateTime(0), # dummy date - data_field, - varnames[1], - comms_ctx, - ) - - # create structs manually since we aren't reading from a data file - file_info = FileReader.FileInfo( - "", # infile_path - regrid_dir_static, # regrid_dirpath - varnames, # varnames - outfile_root, # outfile_root - dummy_dates, # all_dates - date_idx0, # date_idx0 - ) - - prescribed_data_static = - FileReader.PrescribedDataStatic{typeof(file_info)}(file_info) - - # Read in dummy data that has been written by `write_to_hdf5` - field_out = FileReader.get_data_at_date( - prescribed_data_static, - surface_space_t, - varnames[1], - ) - @test field_out == data_field - rm(regrid_dir_static; recursive = true, force = true) - end - - @testset "test PrescribedDataTemporal construction, FT = $FT" begin - # setup for test - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - - date_idx0 = Int[1] - date_ref = DateTime(1800, 1, 1) - t_start = Float64(0) - - # Loop over files with one and multiple variables - for (get_infile, varnames) in [ - (albedo_temporal_data, ["sw_alb"]), - (era5_t2m_sp_u10n_data, ["t2m", "sp", "u10n"]), - ] - # reset regrid directory between files - isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) - - ps_data_temp = FileReader.PrescribedDataTemporal{FT}( - regrid_dir_temporal, - get_infile, - varnames, - date_ref, - t_start, - surface_space_t, - ) - - # test that created object exists - @test @isdefined(ps_data_temp) - @test ps_data_temp isa FileReader.AbstractPrescribedData - - # test fields that we've passed into constructor as args - @test ps_data_temp.file_info.regrid_dirpath == regrid_dir_temporal - @test ps_data_temp.file_info.varnames == varnames - @test ps_data_temp.file_info.date_idx0 == date_idx0 - @test ps_data_temp.sim_info.date_ref == date_ref - @test ps_data_temp.sim_info.t_start == t_start - - # test fields which are set internally by constructor - @test ps_data_temp.file_info.outfile_root == "temporal_data_cgll" - @test !isnothing(ps_data_temp.file_info.all_dates) - @test sort(collect(keys(ps_data_temp.file_states))) == - sort(varnames) - - # check varnames individually - for varname in varnames - @test ps_data_temp.file_states[varname].date_idx == date_idx0 - @test ps_data_temp.file_states[varname].date_idx == date_idx0 - @test !isnothing(ps_data_temp.file_states[varname].data_fields) - @test !isnothing( - ps_data_temp.file_states[varname].segment_length, - ) - end - rm(regrid_dir_temporal; recursive = true, force = true) - end - end - - @testset "test get_data_at_date for PrescribedDataTemporal, FT = $FT" begin - # test `get_data_at_date` with interpolation (default) - dummy_dates = - Vector(range(DateTime(1999, 1, 1); step = Day(1), length = 100)) - date_idx0 = Int[1] - date_ref = dummy_dates[Int(date_idx0[1]) + 1] - t_start = Float64(0) - date0 = date_ref + Dates.Second(t_start) - - # these values give an `interp_fraction` of 0.5 in `interpol` for ease of testing - segment_length = - [Int(2) * ((date_ref - dummy_dates[Int(date_idx0[1])]).value)] - - # construct space and dummy field - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain_sphere = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain_sphere, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - data_fields = (zeros(surface_space_t), ones(surface_space_t)) - - # Add 2 variables to `FileInfo`, but only read/write `var1` - varnames = ["var1", "var2"] - - # create structs manually since we aren't reading from a data file - file_info = FileReader.FileInfo( - "", # infile_path - "", # regrid_dirpath - varnames, # varnames - "", # outfile_root - dummy_dates, # all_dates - date_idx0, # date_idx0 - ) - file_state1 = FileReader.FileState( - data_fields, # data_fields - copy(date_idx0), # date_idx - segment_length, # segment_length - ) - file_state2 = FileReader.FileState( - data_fields, # data_fields - copy(date_idx0), # date_idx - segment_length, # segment_length - ) - file_states = - Dict(varnames[1] => file_state1, varnames[2] => file_state2) - sim_info = FileReader.SimInfo( - date_ref, # date_ref - t_start, # t_start - ) - - pd_args = (file_info, file_states, sim_info) - pd_type_args = ( - typeof(file_info), - typeof(varnames[1]), - typeof(file_states[varnames[1]]), - typeof(sim_info), - ) - prescribed_data = - FileReader.PrescribedDataTemporal{pd_type_args...}(pd_args...) - - # Note: we expect this to give a warning "No dates available in file..." - @test FileReader.get_data_at_date( - prescribed_data, - surface_space_t, - varnames[1], - date0, - ) == ones(surface_space_t) .* FT(0.5) - end - - @testset "test next_date_in_file with PrescribedDataTemporal, FT = $FT" begin - dummy_dates = - Vector(range(DateTime(1999, 1, 1); step = Day(1), length = 10)) - date_ref = dummy_dates[1] - t_start = Float64(0) - date0 = date_ref + Dates.Second(t_start) - date_idx0 = - [argmin(abs.(Dates.value(date0) .- Dates.value.(dummy_dates[:])))] - varname = "var" - - # manually create structs to avoid creating space for outer constructor - file_info = FileReader.FileInfo( - "", # infile_path - "", # regrid_dirpath - [varname], # varnames - "", # outfile_root - dummy_dates, # all_dates - date_idx0, # date_idx0 - ) - file_state = FileReader.FileState( - nothing, # data_fields - copy(date_idx0), # date_idx - Int[], # segment_length - ) - file_states = Dict(varname => file_state) - sim_info = nothing - - pd_args = (file_info, file_states, sim_info) - pd_type_args = ( - typeof(file_info), - typeof(varname), - typeof(file_states[varname]), - typeof(sim_info), - ) - prescribed_data = - FileReader.PrescribedDataTemporal{pd_type_args...}(pd_args...) - - # read in next dates, manually compare to `dummy_dates` array - idx = date_idx0[1] - next_date = date0 - for i in 1:(length(dummy_dates) - 1) - current_date = next_date - next_date = FileReader.next_date_in_file(prescribed_data) - - @test next_date == dummy_dates[idx + 1] - - prescribed_data.file_states[varname].date_idx[1] += 1 - idx = date_idx0[1] + i - end - end - - @testset "test read_data_fields! for single variable, FT = $FT" begin - # Create regridding directory - isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) - - get_infile = albedo_temporal_data - infile_path = get_infile() - varname = "sw_alb" - varnames = [varname] - - # Start with first date in data file - date0 = NCDataset(infile_path) do ds - ds["time"][1] - end - date0 = FileReader.to_datetime(date0) - dates = collect(date0:Day(10):(date0 + Day(100))) # includes both endpoints - date_ref = date0 - t_start = Float64(0) - - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - - prescribed_data = FileReader.PrescribedDataTemporal{FT}( - regrid_dir_temporal, - get_infile, - varnames, - date_ref, - t_start, - surface_space_t, - ) - - # Test each case (1-4) - current_fields = - Fields.zeros(FT, surface_space_t), Fields.zeros(FT, surface_space_t) - - # Use this function to reset values between test cases - function reset_ps_data(ps_data) - ps_data.file_states[varname].data_fields[1] .= current_fields[1] - ps_data.file_states[varname].data_fields[2] .= current_fields[2] - ps_data.file_states[varname].segment_length[1] = - ps_data.file_states[varname].date_idx[1] = - ps_data.file_info.date_idx0[1] = Int(1) - end - - # Case 1: test date before first date of file, and date aligned with first date of file - for date in [date0 - Day(1), date0] - reset_ps_data(prescribed_data) - prescribed_data_copy = prescribed_data - FileReader.read_data_fields!(prescribed_data, date, surface_space_t) - - # Test that both data fields store the same data - # Remove NaNs and missings before comparison - (; data_fields) = prescribed_data.file_states[varname] - - foreach(replace_nan_missing!, data_fields) - - @test prescribed_data.file_states[varname].data_fields[1] == - prescribed_data.file_states[varname].data_fields[2] - # date_idx and date_idx0 should be unchanged - @test prescribed_data.file_states[varname].segment_length == Int[0] - @test prescribed_data.file_states[varname].date_idx[1] == - prescribed_data_copy.file_states[varname].date_idx[1] - @test prescribed_data.file_info.date_idx0[1] == - prescribed_data_copy.file_info.date_idx0[1] - end - - # Case 2: test date after dates in file - # This case should print 1 warning "this time period is after input data..." - reset_ps_data(prescribed_data) - date_after = prescribed_data.file_info.all_dates[end] + Dates.Day(1) - prescribed_data_copy = prescribed_data - FileReader.read_data_fields!( - prescribed_data, - date_after, - surface_space_t, - ) - - # Test that both data fields store the same data - @test prescribed_data.file_states[varname].segment_length == Int[0] - - # Remove NaNs and missings before comparison - (; data_fields) = prescribed_data.file_states[varname] - foreach(replace_nan_missing!, data_fields) - - @test prescribed_data.file_states[varname].data_fields[1] == - prescribed_data.file_states[varname].data_fields[2] - - # Case 3: loop over simulation dates (general case) - # This case should print 3 warnings "updating data files: ..." - data_saved = [] - updating_dates = [] - - # Read in data fields over dummy times - # With the current test setup, read_data_fields! should get called 3 times - for date in dates - callback_date = FileReader.next_date_in_file(prescribed_data) - - if (date >= callback_date) - FileReader.read_data_fields!( - prescribed_data, - date, - surface_space_t, - ) - push!( - data_saved, - deepcopy( - prescribed_data.file_states[varname].data_fields[1], - ), - ) - push!(updating_dates, deepcopy(date)) - end - end - - # Replace NaNs and missings for testing - (; data_fields) = prescribed_data.file_states[varname] - foreach(replace_nan_missing!, data_saved) - - # Manually read in data from HDF5 - f = prescribed_data.file_states[varname].data_fields[1] - data_manual = [similar(f), similar(f), similar(f)] - (; regrid_dirpath, outfile_root, varnames, all_dates) = - prescribed_data.file_info - for i in eachindex(data_saved) - data_manual[i] = Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - all_dates[i + 1], - varnames[1], - surface_space_t, - ) - # Replace NaNs and missings for testing comparison - replace_nan_missing!(data_manual[i]) - - @test parent(data_saved[i]) == parent(data_manual[i]) - end - - # Test that the data_saved array was modified - @test length(data_saved) > 0 - # Check that the final saved date is as expected (midmonth day of last month) - midmonth_end = - DateTime(year(dates[end]), month(dates[end]), 15, hour(dates[end])) - @test updating_dates[end] == midmonth_end - - # Case 4: everything else - reset_ps_data(prescribed_data) - prescribed_data.file_states[varname].date_idx[1] = - prescribed_data.file_info.date_idx0[1] + Int(1) - date = - prescribed_data.file_info.all_dates[prescribed_data.file_states[varname].date_idx[1]] - - Dates.Day(1) - - @test_throws ErrorException FileReader.read_data_fields!( - prescribed_data, - date, - surface_space_t, - ) - # Delete regridding directory and files - rm(regrid_dir_temporal; recursive = true, force = true) - end - - @testset "test read_data_fields! for multiple variables, FT = $FT" begin - # Create regridding directory - isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) - - get_infile = era5_t2m_sp_u10n_data - infile_path = get_infile() - varnames = ["t2m", "sp", "u10n"] - - # Start with first date in data file - date0 = NCDataset(infile_path) do ds - ds["time"][1] - end - date0 = FileReader.to_datetime(date0) - # Gives dates [00:45, 01:30, 02:15, 03:00, 03:45, 04:30, 05:15, 06:00, 06:45, 07:30] - dates = collect(date0:Minute(45):(date0 + Minute(450))) # includes both endpoints - date_ref = date0 - t_start = Float64(0) - - comms_ctx = ClimaComms.SingletonCommsContext() - radius = FT(6731e3) - Nq = 4 - domain = ClimaCore.Domains.SphereDomain(radius) - mesh = Meshes.EquiangularCubedSphere(domain, 4) - topology = Topologies.Topology2D(comms_ctx, mesh) - quad = Spaces.Quadratures.GLL{Nq}() - surface_space_t = Spaces.SpectralElementSpace2D(topology, quad) - - prescribed_data = FileReader.PrescribedDataTemporal{FT}( - regrid_dir_temporal, - get_infile, - varnames, - date_ref, - t_start, - surface_space_t, - ) - - # Test each case (1-4) - current_fields = - Fields.zeros(FT, surface_space_t), Fields.zeros(FT, surface_space_t) - - # Use this function to reset values between test cases - function reset_ps_data(ps_data) - for varname in varnames - ps_data.file_states[varname].data_fields[1] .= current_fields[1] - ps_data.file_states[varname].data_fields[2] .= current_fields[2] - ps_data.file_states[varname].segment_length[1] = - ps_data.file_states[varname].date_idx[1] = - ps_data.file_info.date_idx0[1] = Int(1) - end - end - - # Case 1: test date before first date of file, and date aligned with first date of file - for date in [date0 - Day(1), date0] - reset_ps_data(prescribed_data) - prescribed_data_copy = prescribed_data - FileReader.read_data_fields!(prescribed_data, date, surface_space_t) - - # Test that both data fields store the same data - # Remove NaNs and missings before comparison - for varname in varnames - (; data_fields) = prescribed_data.file_states[varname] - - foreach(replace_nan_missing!, data_fields) - - @test prescribed_data.file_states[varname].data_fields[1] == - prescribed_data.file_states[varname].data_fields[2] - # date_idx and date_idx0 should be unchanged - @test prescribed_data.file_states[varname].segment_length == - Int[0] - @test prescribed_data.file_states[varname].date_idx[1] == - prescribed_data_copy.file_states[varname].date_idx[1] - @test prescribed_data.file_info.date_idx0[1] == - prescribed_data_copy.file_info.date_idx0[1] - end - end - - # Case 2: test date after dates in file - # This case should print 1 warning "this time period is after input data..." - reset_ps_data(prescribed_data) - date_after = prescribed_data.file_info.all_dates[end] + Dates.Day(1) - prescribed_data_copy = prescribed_data - FileReader.read_data_fields!( - prescribed_data, - date_after, - surface_space_t, - ) - - for varname in varnames - # Test that both data fields store the same data - @test prescribed_data.file_states[varname].segment_length == Int[0] - @test prescribed_data.file_states[varname].data_fields[1] == - prescribed_data.file_states[varname].data_fields[2] - end - - # Case 3: loop over simulation dates (general case) - # This case should print 3 warnings "updating data files: ..." - reset_ps_data(prescribed_data) - - # Read in data fields over dummy times - # With the current test setup, read_data_fields! should get called 3 times - data_saved = Dict{String, Vector{Fields.Field}}( - varnames[1] => [], - varnames[2] => [], - varnames[3] => [], - ) - updating_dates = [] - for date in dates - callback_date = FileReader.next_date_in_file(prescribed_data) - - if (date >= callback_date) - FileReader.read_data_fields!( - prescribed_data, - date, - surface_space_t, - ) - for varname in varnames - push!( - data_saved[varname], - deepcopy( - prescribed_data.file_states[varname].data_fields[1], - ), - ) - # Replace NaNs and missings for testing - replace_nan_missing!(data_saved[varname][end]) - end - - push!(updating_dates, callback_date) - end - end - - # Manually read in data from HDF5 - data_manual = Dict{String, Vector{Fields.Field}}( - varnames[1] => [], - varnames[2] => [], - varnames[3] => [], - ) - (; regrid_dirpath, outfile_root, varnames, all_dates) = - prescribed_data.file_info - - for i in eachindex(updating_dates) - for varname in varnames - push!( - data_manual[varname], - Regridder.read_from_hdf5( - regrid_dirpath, - outfile_root, - updating_dates[i], - varname, - surface_space_t, - ), - ) - # Replace NaNs and missings for testing comparison - replace_nan_missing!(data_manual[varname][end]) - end - end - - # Test read_data_fields and manually read data are the same - for varname in varnames - @test all( - parent(data_saved[varname]) .== parent(data_manual[varname]), - ) - end - - # Test that the data_saved array was modified - @test length(data_saved) > 0 - # Check that the final saved date is as expected (midmonth day of last month) - midmonth_end = DateTime( - year(dates[end]), - month(dates[end]), - day(dates[end]), - hour(dates[end]), - ) - @test updating_dates[end] == midmonth_end - - # Case 4: everything else - reset_ps_data(prescribed_data) - for varname in varnames - prescribed_data.file_states[varname].date_idx[1] = - prescribed_data.file_info.date_idx0[1] + Int(1) - date = - prescribed_data.file_info.all_dates[prescribed_data.file_states[varname].date_idx[1]] - - Dates.Day(1) - - @test_throws ErrorException FileReader.read_data_fields!( - prescribed_data, - date, - surface_space_t, - ) - end - # Delete regridding directory and files - rm(regrid_dir_temporal; recursive = true, force = true) - end -end - -# Delete testing directory and files -rm(regrid_dir_static; recursive = true, force = true) -rm(regrid_dir_temporal; recursive = true, force = true) diff --git a/test/shared_utilities/file_readers.jl b/test/shared_utilities/file_readers.jl new file mode 100644 index 0000000000..bbfc5cf212 --- /dev/null +++ b/test/shared_utilities/file_readers.jl @@ -0,0 +1,64 @@ +#= + Unit tests for ClimaLand FileReaders module +=# + +using Dates +using Test + +import ClimaLand +import ClimaLand: FileReaders +using NCDatasets + +# Include testing artifacts from Bucket +albedo_temporal_path = ClimaLand.Bucket.cesm2_albedo_dataset_path() +albedo_bareground_path = ClimaLand.Bucket.bareground_albedo_dataset_path() +include(joinpath(pkgdir(ClimaLand), "test", "artifacts", "artifacts.jl")) +multiple_variables_path = era5_t2m_sp_u10n_dataset_path() + +@testset "NCFileReader with time" begin + PATH = albedo_temporal_path + NCDataset(PATH) do nc + available_dates = FileReaders.read_available_dates(nc) + @test length(available_dates) == 1980 + @test available_dates[2] == DateTime(1850, 02, 14) + + ncreader = FileReaders.NCFileReader(PATH, "sw_alb") + + @test ncreader.dimensions[1] == nc["lon"][:] + @test ncreader.dimensions[2] == nc["lat"][:] + + close(ncreader) + + @test isempty(FileReaders.OPEN_NCFILES) + end + + PATH = multiple_variables_path + NCDataset(PATH) do nc + ncreader_sp = FileReaders.NCFileReader(PATH, "sp") + ncreader_u = FileReaders.NCFileReader(PATH, "u10n") + + # Test that the underlying dataset is the same + @test ncreader_u.dataset === ncreader_sp.dataset + + # Test that we need to close all the variables to close the file + close(ncreader_sp) + @test !isempty(FileReaders.OPEN_NCFILES) + close(ncreader_u) + @test isempty(FileReaders.OPEN_NCFILES) + end +end + +@testset "NCFileReader without time" begin + PATH = albedo_bareground_path + NCDataset(albedo_bareground_path) do nc + available_dates = FileReaders.read_available_dates(nc) + @test isempty(available_dates) + + ncreader = FileReaders.NCFileReader(PATH, "sw_alb") + + @test ncreader.dimensions[1] == nc["lon"][:] + @test ncreader.dimensions[2] == nc["lat"][:] + + close(ncreader) + end +end diff --git a/test/shared_utilities/time_varying_inputs.jl b/test/shared_utilities/time_varying_inputs.jl index 973cc51fd3..f798808e56 100644 --- a/test/shared_utilities/time_varying_inputs.jl +++ b/test/shared_utilities/time_varying_inputs.jl @@ -1,5 +1,8 @@ using Test +using Dates + import ClimaLand +import ClimaLand: DataHandling import ClimaLand: TimeVaryingInputs import ClimaCore: Domains, Geometry, Fields, Meshes, Topologies, Spaces @@ -28,9 +31,16 @@ context = ClimaComms.SingletonCommsContext(device) TimeVaryingInputs.evaluate!(column_field, input, 10.0) @test Array(parent(column_field))[1] == fun(10.0) + + # Check with args and kwargs + fun2 = (x, y; z) -> 2x * y * z + input2 = TimeVaryingInputs.TimeVaryingInput(fun2) + + TimeVaryingInputs.evaluate!(column_field, input2, 10.0, 20.0; z = 30.0) + @test Array(parent(column_field))[1] == fun2(10.0, 20.0; z = 30.0) end -@testset "Temporal TimeVaryingInput 1D" begin +@testset "InteprolatingTimeVaryingInput0D" begin # Check times not sorted xs = [1.0, 0.0] ys = [1.0, 2.0] @@ -102,3 +112,106 @@ end end end end + +@testset "InteprolatingTimeVaryingInput2D" begin + PATH = ClimaLand.Bucket.cesm2_albedo_dataset_path() + for FT in (Float32, Float64) + target_space = + ClimaLand.Domains.SphericalShell(; + radius = FT(6731e3), + depth = FT(1.0), + nelements = (4, 4), + npolynomial = 4, + ).space.surface + data_handler = DataHandling.DataHandler( + PATH, + "sw_alb", + target_space, + reference_date = Dates.DateTime(2000, 1, 1), + t_start = 0.0, + ) + + input_nearest = TimeVaryingInputs.TimeVaryingInput( + data_handler, + method = TimeVaryingInputs.NearestNeighbor(), + ) + + @test 0.0 in input_nearest + @test !(1e23 in input_nearest) + + dest = Fields.zeros(target_space) + + available_times = DataHandling.available_times(data_handler) + + # We are testing NearestNeighbor, so we can just have to check if the fields agree + + # Left nearest point + target_time = available_times[10] + 1 + TimeVaryingInputs.evaluate!(dest, input_nearest, target_time) + + # We use isequal to handle NaNs + @test isequal( + Array(parent(dest)), + Array( + parent( + DataHandling.regridded_snapshot( + data_handler, + available_times[10], + ), + ), + ), + ) + + # Right nearest point + target_time = available_times[9] - 1 + TimeVaryingInputs.evaluate!(dest, input_nearest, target_time) + + @test isequal( + Array(parent(dest)), + Array( + parent( + DataHandling.regridded_snapshot( + data_handler, + available_times[9], + ), + ), + ), + ) + + # On node + target_time = available_times[11] + TimeVaryingInputs.evaluate!(dest, input_nearest, target_time) + + @test isequal( + parent(dest), + parent( + DataHandling.regridded_snapshot( + data_handler, + available_times[11], + ), + ), + ) + + # Now testing LinearInterpolation + input_linear = TimeVaryingInputs.TimeVaryingInput(data_handler) + + left_value = + DataHandling.regridded_snapshot(data_handler, available_times[10]) + right_value = + DataHandling.regridded_snapshot(data_handler, available_times[11]) + + target_time = available_times[10] + 30 + left_time = available_times[10] + right_time = available_times[11] + + TimeVaryingInputs.evaluate!(dest, input_linear, target_time) + + expected = Fields.zeros(target_space) + expected .= + left_value .+ + (target_time - left_time) / (right_time - left_time) .* + (right_value .- left_value) + + @test isapprox(parent(dest), parent(expected), nans = true) + end +end diff --git a/test/standalone/Bucket/albedo_types.jl b/test/standalone/Bucket/albedo_types.jl index 93a54d18a1..cdc3b5580f 100644 --- a/test/standalone/Bucket/albedo_types.jl +++ b/test/standalone/Bucket/albedo_types.jl @@ -7,8 +7,7 @@ import ClimaParams as CP using Dates using NCDatasets -using ClimaLand.Regridder: read_from_hdf5 -using ClimaLand.FileReader: next_date_in_file, to_datetime, get_data_at_date +using ClimaLand.Regridders: read_from_hdf5 using ClimaLand.Bucket: BucketModel, BucketModelParameters, @@ -16,7 +15,7 @@ using ClimaLand.Bucket: PrescribedSurfaceAlbedo, bareground_albedo_dataset_path, cesm2_albedo_dataset_path, - next_albedo + next_albedo! using ClimaLand.Domains: coordinates, Column, SphericalShell using ClimaLand: initialize, @@ -24,7 +23,11 @@ using ClimaLand: make_set_initial_cache, PrescribedAtmosphere, PrescribedRadiativeFluxes, - TimeVaryingInput + DataHandling, + Regridders, + TimeVaryingInput, + SpaceVaryingInput + # Bucket model parameters import ClimaLand @@ -55,11 +58,6 @@ function replace_nan_missing!(field::Fields.Field) end FT = Float32 -# Use two separate regrid dirs to avoid duplicate filenames since files have same varname -regrid_dir_static = joinpath(pkgdir(ClimaLand), "test", "static") -regrid_dir_temporal = joinpath(pkgdir(ClimaLand), "test", "temporal") -isdir(regrid_dir_static) ? nothing : mkpath(regrid_dir_static) -isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) @testset "Test next_albedo for PrescribedBaregroundAlbedo, from a function, FT = $FT" begin # set up each argument for function call @@ -88,7 +86,9 @@ isdir(regrid_dir_temporal) ? nothing : mkpath(regrid_dir_temporal) Y_σS / (Y_σS + param_σS_c) * a_α_snow ) - @test next_albedo(albedo, parameters, Y, p, FT(0)) == next_alb_manual + next_albedo!(p.bucket.α_sfc, albedo, parameters, Y, p, FT(0)) + + @test p.bucket.α_sfc == next_alb_manual end # Add tests which use TempestRemap here - @@ -104,8 +104,7 @@ if !Sys.iswindows() # set up each argument for function call α_snow = FT(0.8) - albedo = - PrescribedBaregroundAlbedo{FT}(α_snow, regrid_dir_static, space) + albedo = PrescribedBaregroundAlbedo{FT}(α_snow, space) σS_c = FT(0.2) parameters = (; σS_c = σS_c) @@ -126,7 +125,8 @@ if !Sys.iswindows() Y_σS / (Y_σS + param_σS_c) * a_α_snow ) - @test next_albedo(albedo, parameters, Y, p, FT(0)) == next_alb_manual + next_albedo!(p.bucket.α_sfc, albedo, parameters, Y, p, FT(0)) + @test p.bucket.α_sfc == next_alb_manual end @testset "Test next_albedo for PrescribedSurfaceAlbedo, FT = $FT" begin @@ -136,24 +136,20 @@ if !Sys.iswindows() surface_coords = Fields.coordinate_field(space) infile_path = cesm2_albedo_dataset_path() - date_ref = to_datetime(NCDataset(infile_path, "r") do ds + date_ref_noleap = NCDataset(infile_path, "r") do ds ds["time"][1] - end) + end + date_ref = CFTime.reinterpret(DateTime, date_ref_noleap) t_start = Float64(0) - albedo = PrescribedSurfaceAlbedo{FT}( - regrid_dir_temporal, - date_ref, - t_start, - space, - ) + albedo = PrescribedSurfaceAlbedo{FT}(date_ref, t_start, space) Y = (; bucket = (; W = Fields.zeros(space))) p = (; bucket = (; α_sfc = Fields.zeros(space))) # set up for manual data reading varname = "sw_alb" - file_dates = albedo.albedo_info.file_info.all_dates + file_dates = DataHandling.available_dates(albedo.albedo.data_handler) new_date = date_ref + Second(t_start) t_curr = t_start @@ -161,12 +157,13 @@ if !Sys.iswindows() @assert new_date == file_dates[i] # manually read in data at this date (not testing interpolation) - field = - get_data_at_date(albedo.albedo_info, space, varname, new_date) - albedo_next = next_albedo(albedo, (;), Y, p, t_curr) - replace_nan_missing!(albedo_next) - replace_nan_missing!(field) - @test albedo_next == field + field = DataHandling.regridded_snapshot( + albedo.albedo.data_handler, + new_date, + ) + + next_albedo!(p.bucket.α_sfc, albedo, (;), Y, p, t_curr) + @test p.bucket.α_sfc == field # Update manual date to match next date in file dt = Second(file_dates[i + 1] - file_dates[i]) @@ -179,8 +176,6 @@ if !Sys.iswindows() earth_param_set = LP.LandParameters(FT) varname = "sw_alb" path = bareground_albedo_dataset_path() - regrid_dirpath = joinpath(pkgdir(ClimaLand), "test/albedo_tmpfiles/") - mkpath(regrid_dirpath) α_snow = FT(0.8) σS_c = FT(0.2) W_f = FT(0.15) @@ -203,14 +198,10 @@ if !Sys.iswindows() for bucket_domain in bucket_domains if bucket_domain isa SphericalShell surface_space = bucket_domain.space.surface - albedo = PrescribedBaregroundAlbedo{FT}( - α_snow, - regrid_dirpath, - surface_space, - ) + albedo = PrescribedBaregroundAlbedo{FT}(α_snow, surface_space) # Radiation - ref_time = DateTime(2005) + ref_time = DateTime(2005, 1, 15, 12) SW_d = (t) -> 0.0 LW_d = (t) -> 5.67e-8 * 280.0^4.0 bucket_rad = PrescribedRadiativeFluxes( @@ -256,59 +247,25 @@ if !Sys.iswindows() set_initial_cache!(p, Y, FT(0.0)) # Read in data manually - outfile_root = "static_data_cgll" comms_ctx = ClimaComms.context(surface_space) - field = read_from_hdf5( - regrid_dirpath, - outfile_root, - Dates.DateTime(0), # dummy date - varname, - surface_space, - ) - replace_nan_missing!(field) - data_manual = field - - @test p.bucket.α_sfc == data_manual + α_bareground = SpaceVaryingInput(path, varname, surface_space) + @test p.bucket.α_sfc == α_bareground else surface_space = bucket_domain.space.surface @test_throws "Using an albedo map requires a global run." PrescribedBaregroundAlbedo{ FT, }( α_snow, - regrid_dirpath, surface_space, ) end end - rm(regrid_dirpath, recursive = true) - end - - @testset "Test PrescribedSurfaceAlbedo error with static map, FT = $FT" begin - get_infile = bareground_albedo_dataset_path - date_ref = Dates.DateTime(1900, 1, 1) - t_start = Float64(0) - - # Test error for non-time varying data - domain = create_domain_2d(FT) - space = domain.space.surface - - @test_throws "Using a temporal albedo map requires data with time dimension." PrescribedSurfaceAlbedo{ - FT, - }( - regrid_dir_temporal, - date_ref, - t_start, - space, - get_infile = get_infile, - ) end @testset "Test PrescribedSurfaceAlbedo - albedo from map over time, FT = $FT" begin earth_param_set = LP.LandParameters(FT) varname = "sw_alb" infile_path = cesm2_albedo_dataset_path() - regrid_dirpath = joinpath(pkgdir(ClimaLand), "test/albedo_tmpfiles/") - mkpath(regrid_dirpath) σS_c = FT(0.2) W_f = FT(0.15) @@ -319,9 +276,10 @@ if !Sys.iswindows() init_temp(z::FT, value::FT) where {FT} = FT(value) t_start = Float64(0) - file_dates = to_datetime(NCDataset(infile_path, "r") do ds + file_dates_noleap = NCDataset(infile_path, "r") do ds ds["time"][:] - end) + end + file_dates = CFTime.reinterpret.(Ref(DateTime), file_dates_noleap) date_ref = file_dates[1] bucket_domains = [ @@ -337,14 +295,10 @@ if !Sys.iswindows() for bucket_domain in bucket_domains space = bucket_domain.space.surface if bucket_domain isa SphericalShell - albedo_model = PrescribedSurfaceAlbedo{FT}( - regrid_dirpath, - date_ref, - t_start, - space, - ) + albedo_model = + PrescribedSurfaceAlbedo{FT}(date_ref, t_start, space) # Radiation - ref_time = DateTime(2005) + ref_time = DateTime(2005, 1, 15, 12) SW_d = (t) -> 0 LW_d = (t) -> 5.67e-8 * 280.0^4.0 bucket_rad = PrescribedRadiativeFluxes( @@ -360,7 +314,7 @@ if !Sys.iswindows() q_atmos = (t) -> 0.0 # no atmos water h_atmos = FT(1e-8) P_atmos = (t) -> 101325 - ref_time = DateTime(2005) + ref_time = DateTime(2005, 1, 15, 12) bucket_atmos = PrescribedAtmosphere( TimeVaryingInput(precip), TimeVaryingInput(precip), @@ -394,15 +348,11 @@ if !Sys.iswindows() Y.bucket.σS .= 0.0 set_initial_cache! = make_set_initial_cache(model) set_initial_cache!(p, Y, FT(0.0)) - data_manual = get_data_at_date( - albedo_model.albedo_info, - model.domain.space.surface, - varname, + data_manual = DataHandling.regridded_snapshot( + albedo_model.albedo.data_handler, date_ref, ) - # If there are any NaNs in the input data, replace them so we can compare results - replace_nan_missing!(p.bucket.α_sfc) - replace_nan_missing!(data_manual) + @test p.bucket.α_sfc == data_manual update_aux! = make_update_aux(model) @@ -412,14 +362,11 @@ if !Sys.iswindows() @assert new_date == file_dates[i] update_aux!(p, Y, t_curr) - data_manual = get_data_at_date( - albedo_model.albedo_info, - model.domain.space.surface, - varname, - file_dates[i], + data_manual = DataHandling.regridded_snapshot( + albedo_model.albedo.data_handler, + new_date, ) - replace_nan_missing!(p.bucket.α_sfc) - replace_nan_missing!(data_manual) + @test p.bucket.α_sfc == data_manual # Update manual date to match next date in file @@ -431,17 +378,11 @@ if !Sys.iswindows() @test_throws "Using an albedo map requires a global run." PrescribedSurfaceAlbedo{ FT, }( - regrid_dirpath, date_ref, t_start, space, ) end end - rm(regrid_dirpath, recursive = true) end end - -# Delete testing directory and files -rm(regrid_dir_static; recursive = true, force = true) -rm(regrid_dir_temporal; recursive = true, force = true)