From 3f5daf6b7f9bc89d335523efa88ebcc1958e7922 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Thu, 18 Apr 2024 18:17:28 -0700 Subject: [PATCH] Use ClimaDiagnostics.jl --- .buildkite/pipeline.yml | 8 - Project.toml | 2 + docs/Manifest.toml | 30 +- docs/Project.toml | 1 + docs/src/diagnostics.md | 101 +-- examples/Manifest.toml | 40 +- examples/Project.toml | 3 +- perf/Manifest.toml | 30 +- perf/Project.toml | 1 + perf/benchmark_netcdf_io.jl | 123 --- perf/flame.jl | 12 +- post_processing/ci_plots.jl | 20 +- src/callbacks/callback_helpers.jl | 8 +- src/callbacks/get_callbacks.jl | 94 +-- src/diagnostics/Diagnostics.jl | 14 +- src/diagnostics/default_diagnostics.jl | 125 ++- src/diagnostics/diagnostic.jl | 738 +---------------- src/diagnostics/diagnostics_utils.jl | 134 --- src/diagnostics/hdf5_writer.jl | 71 -- src/diagnostics/netcdf_writer.jl | 784 ------------------ src/diagnostics/reduction_identities.jl | 19 - .../standard_diagnostic_frequencies.jl | 188 +++-- src/diagnostics/writers.jl | 11 - src/solver/solve.jl | 2 +- src/solver/type_getters.jl | 81 +- 25 files changed, 348 insertions(+), 2292 deletions(-) delete mode 100644 perf/benchmark_netcdf_io.jl delete mode 100644 src/diagnostics/diagnostics_utils.jl delete mode 100644 src/diagnostics/hdf5_writer.jl delete mode 100644 src/diagnostics/netcdf_writer.jl delete mode 100644 src/diagnostics/reduction_identities.jl delete mode 100644 src/diagnostics/writers.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 0a78975de36..bd25a1d37aa 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -820,14 +820,6 @@ steps: slurm_mem: 24GB slurm_cpus_per_task: 8 - # Flame graphs - - label: ":fire: Flame graph: perf target (IO)" - command: > - julia --color=yes --project=perf perf/benchmark_netcdf_io.jl - artifact_paths: "flame_perf_io/*.html" - agents: - slurm_gpus: 1 - - label: ":fire: Flame graph: perf target (default)" command: > julia --color=yes --project=perf perf/flame.jl diff --git a/Project.toml b/Project.toml index 12971725efe..64f32111ec3 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" AtmosphericProfilesLibrary = "86bc3604-9858-485a-bdbe-831ec50de11d" ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" +ClimaDiagnostics = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c" ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" ClimaUtilities = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513" @@ -53,6 +54,7 @@ AtmosphericProfilesLibrary = "0.1" CPUSummary = "0.2" ClimaComms = "0.5" ClimaCore = "0.13" +ClimaDiagnostics = "0.1" ClimaParams = "0.10.4" ClimaTimeSteppers = "0.7.18" ClimaUtilities = "0.1.3" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index fbf51b8f85a..d40ec2d5ff9 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.2" manifest_format = "2.0" -project_hash = "de545a9981288a0945908b937b51bff80bfd4b8f" +project_hash = "64afcb8dacf7a673d0068ba1a72602801e8d529a" [[deps.ADTypes]] git-tree-sha1 = "016833eb52ba2d6bea9fcb50ca295980e728ee24" @@ -90,14 +90,15 @@ version = "1.1.1" [[deps.ArrayInterface]] deps = ["Adapt", "LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "44691067188f6bd1b2289552a23e4b7572f4528d" +git-tree-sha1 = "133a240faec6e074e07c31ee75619c90544179cf" uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" -version = "7.9.0" +version = "7.10.0" [deps.ArrayInterface.extensions] ArrayInterfaceBandedMatricesExt = "BandedMatrices" ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceCUDSSExt = "CUDSS" ArrayInterfaceChainRulesExt = "ChainRules" ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" ArrayInterfaceReverseDiffExt = "ReverseDiff" @@ -108,6 +109,7 @@ version = "7.9.0" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" @@ -257,9 +259,9 @@ version = "1.0.1+0" [[deps.CUDA]] deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "Crayons", "DataFrames", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LLVMLoopInfo", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "NVTX", "Preferences", "PrettyTables", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "StaticArrays", "Statistics"] -git-tree-sha1 = "3dcab8a2c18ca319ea15a41d90e9528b8e93894a" +git-tree-sha1 = "dd1c682b372b6791b69f6823afe364fc92a0146c" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "5.3.0" +version = "5.3.1" weakdeps = ["ChainRulesCore", "SpecialFunctions"] [deps.CUDA.extensions] @@ -319,7 +321,7 @@ weakdeps = ["SparseArrays"] ChainRulesCoreSparseArraysExt = "SparseArrays" [[deps.ClimaAtmos]] -deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "DiffEqCallbacks", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] +deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaDiagnostics", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "DiffEqCallbacks", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] path = ".." uuid = "b2c96348-7fb7-4fe0-8da9-78d88439e717" version = "0.23.0" @@ -340,6 +342,12 @@ weakdeps = ["Krylov"] [deps.ClimaCore.extensions] KrylovExt = "Krylov" +[[deps.ClimaDiagnostics]] +deps = ["Accessors", "ClimaComms", "ClimaCore", "Dates", "NCDatasets", "SciMLBase"] +git-tree-sha1 = "fee70c0ba0e056565e8dd71cc4ac55f25e606a74" +uuid = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" +version = "0.1.1" + [[deps.ClimaParams]] deps = ["DocStringExtensions", "TOML", "Test"] git-tree-sha1 = "528aadfaae6f447df3879eab224625ecceec5982" @@ -353,10 +361,10 @@ uuid = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" version = "0.7.19" [[deps.ClimaUtilities]] -deps = ["Artifacts", "CFTime", "Dates"] -git-tree-sha1 = "d1b30a07829248b325ffffbb739d0f6fe79dfe7c" +deps = ["Artifacts", "Dates"] +git-tree-sha1 = "66b13d461cabc5e9265c9715dba512aea0e1e67f" uuid = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513" -version = "0.1.3" +version = "0.1.4" [deps.ClimaUtilities.extensions] DataHandlingExt = ["ClimaCore", "NCDatasets"] @@ -1765,9 +1773,9 @@ version = "0.5.12" [[deps.Pango_jll]] deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "FriBidi_jll", "Glib_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl"] -git-tree-sha1 = "526f5a03792669e4187e584e8ec9d534248ca765" +git-tree-sha1 = "cb5a2ab6763464ae0f19c86c56c63d4a2b0f5bda" uuid = "36c8627f-9965-5494-a995-c6b170f724f3" -version = "1.52.1+0" +version = "1.52.2+0" [[deps.Parameters]] deps = ["OrderedCollections", "UnPack"] diff --git a/docs/Project.toml b/docs/Project.toml index 232eb5d8b84..adb8c18ff48 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,6 +3,7 @@ ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" ClimaAtmos = "b2c96348-7fb7-4fe0-8da9-78d88439e717" ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c" +ClimaDiagnostics = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" diff --git a/docs/src/diagnostics.md b/docs/src/diagnostics.md index 979aa6954b8..6eccb7bccbd 100644 --- a/docs/src/diagnostics.md +++ b/docs/src/diagnostics.md @@ -84,105 +84,8 @@ writers. ### The low-level interface -Diagnostics are computed and output through callbacks to the main integrator. -`ClimaAtmos` produces the list of callbacks from a ordered list of -`ScheduledDiagnostic`s. These callbacks are orchestrated by a callback -`orchestrate_diagnostics` that runs at the end of every step and calls all the -diagnostic callbacks that are scheduled to be run at that step. - -A `ScheduledDiagnostic` is an instruction on how to compute and output a given -`DiagnosticVariable` (see below), along with specific choices regarding -reductions, compute/output frequencies, and so on. It can be point-wise in space -and time, or can be the result of a reduction in a period that is defined by -`output_every` (e.g., the daily average temperature). - -More specifically, a `ScheduledDiagnostic` contains the following pieces of data - -- `variable`: The diagnostic variable that has to be computed and output. -- `output_every`: Frequency of how often to save the results to disk. If - `output_every` is non-positive, only output at the first time step. -- `output_writer`: Function that controls out to save the computed diagnostic - variable to disk. -- `reduction_time_func`: If not `nothing`, the `ScheduledDiagnostic` receives an - area of scratch space `acc` where to accumulate partial results. Then, at - every `compute_every`, `reduction_time_func` is computed between the - previously stored value in `acc` and the new value. This implements a running - reduction. For example, if `reduction_time_func = max`, the space `acc` will - hold the running maxima of the diagnostic. `acc` is reset after output. -- `reduction_space_func`: NOT IMPLEMENTED YET -- `compute_every`: Run the computations every `compute_every`. This is not - particularly useful for point-wise diagnostics, where we enforce that - `compute_every` = `output_every`. `compute_every` has to evenly divide - `output_every`. -- `pre_output_hook!`: Function that has to be run before saving to disk for - reductions (mostly used to implement averages). The function - `pre_output_hook!` is called with two arguments: the value accumulated during - the reduction, and the number of times the diagnostic was computed from the - last time it was output. -- `output_short_name`: A descriptive name that can be used by the - `output_writer`. If not provided, a default one is generated. It has to be - unique. -- `output_long_name`: A descriptive name that can be used by the `output_writer` - as attribute. If not provided, a default one is generated. - -To implement operations like the arithmetic average, the `reduction_time_func` -has to be chosen as `+`, and a `pre_output_hook!` that renormalize `acc` by the -number of samples has to be provided. `pre_output_hook!` should mutate the -accumulator in place. The return value of `pre_output_hook!` is discarded. An -example of `pre_output_hook!` to compute the arithmetic average is -`pre_output_hook!(acc, N) = @. acc = acc / N`. `ClimaAtmos` provides an alias to -the function needed to compute averages `ClimaAtmos.average_pre_output_hook!`. - -For custom reductions, it is necessary to also specify the identity of operation -by defining a new method to `identity_of_reduction`. `identity_of_reduction` is -a function that takes a `op` argument, where `op` is the operation for which we -want to define the identity. For instance, for the `max`, -`identity_of_reduction` would be `identity_of_reduction(::typeof{max}) = -Inf`. -The identities known to `ClimaAtmos` are defined in the -`diagnostics/reduction_identities.jl` file. The identity is needed to ensure -that we have a neutral state for the accumulators that are used in the -reductions. - -A key entry in a `ScheduledDiagnostic` is the `output_writer`, the function -responsible for saving the output to disk. `output_writer` is called with three -arguments: the value that has to be output, the `ScheduledDiagnostic`, and the -integrator. Internally, the integrator contains extra information (such as the -current timestep), so the `output_writer` can implement arbitrarily complex -behaviors. It is responsibility of the `output_writer` to properly use the -provided information for meaningful output. `ClimaAtmos` provides functions that -return `output_writer`s for standard operations. The main one is currently -`HDF5Writer`, which should be enough for most use cases. To use it, just -initialize a `ClimaAtmos.HDF5Writer` object with your choice of configuration -and pass it as a `output_writer` argument to the `ScheduledDiagnostic`. More -information about the options supported by `ClimaAtmos.HDF5Writer` is available -in its constructor. - -There are two flavors of `ScheduledDiagnostic`s: -`ScheduledDiagnosticIterations`, and `ScheduledDiagnosticTime`. The main -difference between the two is the domain of definition of their frequencies, -which is measured in timesteps for the first one, and in seconds for the other -one. `ScheduledDiagnosticTime`s offer a more natural way to set up physically -meaningful reductions (e.g., we want a daily average). However, it is not clear -what to do when the period does not line up with the timestep. What is the -expected behavior when we want a daily average but our timestep is of 10 hours? -There are multiple possible answer to this question. In `ClimaAtmos`, we enforce -that all the periods are multiples of the timestep. With this restriction, a -`ScheduledDiagnosticTime` can be translated to a -`ScheduledDiagnosticIterations`, where the problem is perfectly represented (in -this sense, one can think of `ScheduledDiagnosticIterations` as being as -internal representation and as `ScheduledDiagnosticTime` being the user-facing -one). - -`ScheduledDiagnosticTime` behave like as `ScheduledDiagnosticIterations`, with -the exception that they can take a special value for `compute_every`, which can -be set to `:timestep` (the symbol) to ensure that the diagnostic is computed at -the end of every integration step. This is particularly convenient when defining -default diagnostics, as they should be largely independent on the choice of the -specific simulation being run (and its timestep). - -Given a timestep `dt`, a `ScheduledDiagnosticIterations` can be obtained from a -`ScheduledDiagnosticTime` `sd` simply by calling -``ScheduledDiagnosticIterations(sd, dt)`. +Check out the documentation for the `ClimaDiagnostics` to find more information +about the low level interface. ## The NetCDF output diff --git a/examples/Manifest.toml b/examples/Manifest.toml index 824442c83f1..5f480d59594 100644 --- a/examples/Manifest.toml +++ b/examples/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.2" manifest_format = "2.0" -project_hash = "60f53d07d79df5008ec331956e3787f67e499da0" +project_hash = "6787473a7399a92137a6bc5c4e0ab1f75dd24605" [[deps.ADTypes]] git-tree-sha1 = "016833eb52ba2d6bea9fcb50ca295980e728ee24" @@ -85,14 +85,15 @@ version = "1.1.1" [[deps.ArrayInterface]] deps = ["Adapt", "LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "44691067188f6bd1b2289552a23e4b7572f4528d" +git-tree-sha1 = "133a240faec6e074e07c31ee75619c90544179cf" uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" -version = "7.9.0" +version = "7.10.0" [deps.ArrayInterface.extensions] ArrayInterfaceBandedMatricesExt = "BandedMatrices" ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceCUDSSExt = "CUDSS" ArrayInterfaceChainRulesExt = "ChainRules" ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" ArrayInterfaceReverseDiffExt = "ReverseDiff" @@ -103,6 +104,7 @@ version = "7.9.0" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" @@ -235,9 +237,9 @@ version = "1.0.1+0" [[deps.CUDA]] deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "Crayons", "DataFrames", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LLVMLoopInfo", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "NVTX", "Preferences", "PrettyTables", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "StaticArrays", "Statistics"] -git-tree-sha1 = "3dcab8a2c18ca319ea15a41d90e9528b8e93894a" +git-tree-sha1 = "dd1c682b372b6791b69f6823afe364fc92a0146c" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "5.3.0" +version = "5.3.1" weakdeps = ["ChainRulesCore", "SpecialFunctions"] [deps.CUDA.extensions] @@ -298,16 +300,20 @@ weakdeps = ["SparseArrays"] [[deps.ClimaAnalysis]] deps = ["NCDatasets", "OrderedCollections", "Statistics"] -git-tree-sha1 = "13af105a344bb87c6747e57ce0abc55294906fbd" +git-tree-sha1 = "1cc42fc6f284dd721051aca33efdba8676ab2617" uuid = "29b5916a-a76c-4e73-9657-3c8fd22e65e6" -version = "0.4.1" -weakdeps = ["CairoMakie"] +version = "0.5.1" [deps.ClimaAnalysis.extensions] CairoMakieExt = "CairoMakie" + GeoMakieExt = "GeoMakie" + + [deps.ClimaAnalysis.weakdeps] + CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" + GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6" [[deps.ClimaAtmos]] -deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "DiffEqCallbacks", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] +deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaDiagnostics", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "DiffEqCallbacks", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] path = ".." uuid = "b2c96348-7fb7-4fe0-8da9-78d88439e717" version = "0.23.0" @@ -358,6 +364,12 @@ git-tree-sha1 = "76e6513438a5a888cf548f78b6e71311e370d626" uuid = "c8b6d40d-e815-466f-95ae-c48aefa668fa" version = "0.7.5" +[[deps.ClimaDiagnostics]] +deps = ["Accessors", "ClimaComms", "ClimaCore", "Dates", "NCDatasets", "SciMLBase"] +git-tree-sha1 = "fee70c0ba0e056565e8dd71cc4ac55f25e606a74" +uuid = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" +version = "0.1.1" + [[deps.ClimaParams]] deps = ["DocStringExtensions", "TOML", "Test"] git-tree-sha1 = "528aadfaae6f447df3879eab224625ecceec5982" @@ -371,10 +383,10 @@ uuid = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" version = "0.7.19" [[deps.ClimaUtilities]] -deps = ["Artifacts", "CFTime", "Dates"] -git-tree-sha1 = "d1b30a07829248b325ffffbb739d0f6fe79dfe7c" +deps = ["Artifacts", "Dates"] +git-tree-sha1 = "66b13d461cabc5e9265c9715dba512aea0e1e67f" uuid = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513" -version = "0.1.3" +version = "0.1.4" weakdeps = ["Adapt", "ClimaComms", "ClimaCore", "ClimaCoreTempestRemap", "Interpolations", "NCDatasets"] [deps.ClimaUtilities.extensions] @@ -1788,9 +1800,9 @@ version = "0.5.12" [[deps.Pango_jll]] deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "FriBidi_jll", "Glib_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl"] -git-tree-sha1 = "526f5a03792669e4187e584e8ec9d534248ca765" +git-tree-sha1 = "cb5a2ab6763464ae0f19c86c56c63d4a2b0f5bda" uuid = "36c8627f-9965-5494-a995-c6b170f724f3" -version = "1.52.1+0" +version = "1.52.2+0" [[deps.Parameters]] deps = ["OrderedCollections", "UnPack"] diff --git a/examples/Project.toml b/examples/Project.toml index 5d1443c8896..350680e24a7 100644 --- a/examples/Project.toml +++ b/examples/Project.toml @@ -15,6 +15,7 @@ ClimaCorePlots = "cf7c7e5a-b407-4c48-9047-11a94a308626" ClimaCoreSpectra = "c2caaa1d-32ae-4754-ba0d-80e7561362e9" ClimaCoreTempestRemap = "d934ef94-cdd4-4710-83d6-720549644b70" ClimaCoreVTK = "c8b6d40d-e815-466f-95ae-c48aefa668fa" +ClimaDiagnostics = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c" ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" ClimaUtilities = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513" @@ -67,7 +68,7 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" [compat] BlockArrays = "0.16" CairoMakie = "0.10, 0.11" -ClimaAnalysis = "0.4" +ClimaAnalysis = "0.5.1" ClimaCoreMakie = "0.3, 0.4" ClimaCorePlots = "0.2" ClimaCoreSpectra = "0.1" diff --git a/perf/Manifest.toml b/perf/Manifest.toml index 19a7196d1a2..71798b9d210 100644 --- a/perf/Manifest.toml +++ b/perf/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.2" manifest_format = "2.0" -project_hash = "3c0445472b963384eef9349d23a8bbaa8b8241df" +project_hash = "2650859be5f5154a5419155572d18ca45a89637f" [[deps.ADTypes]] git-tree-sha1 = "016833eb52ba2d6bea9fcb50ca295980e728ee24" @@ -85,14 +85,15 @@ version = "1.1.1" [[deps.ArrayInterface]] deps = ["Adapt", "LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "44691067188f6bd1b2289552a23e4b7572f4528d" +git-tree-sha1 = "133a240faec6e074e07c31ee75619c90544179cf" uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" -version = "7.9.0" +version = "7.10.0" [deps.ArrayInterface.extensions] ArrayInterfaceBandedMatricesExt = "BandedMatrices" ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceCUDSSExt = "CUDSS" ArrayInterfaceChainRulesExt = "ChainRules" ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" ArrayInterfaceReverseDiffExt = "ReverseDiff" @@ -103,6 +104,7 @@ version = "7.9.0" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" @@ -246,9 +248,9 @@ version = "1.0.1+0" [[deps.CUDA]] deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "Crayons", "DataFrames", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LLVMLoopInfo", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "NVTX", "Preferences", "PrettyTables", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "StaticArrays", "Statistics"] -git-tree-sha1 = "3dcab8a2c18ca319ea15a41d90e9528b8e93894a" +git-tree-sha1 = "dd1c682b372b6791b69f6823afe364fc92a0146c" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "5.3.0" +version = "5.3.1" weakdeps = ["ChainRulesCore", "SpecialFunctions"] [deps.CUDA.extensions] @@ -322,7 +324,7 @@ version = "0.5.1" GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6" [[deps.ClimaAtmos]] -deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "DiffEqCallbacks", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] +deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaDiagnostics", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "DiffEqCallbacks", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] path = ".." uuid = "b2c96348-7fb7-4fe0-8da9-78d88439e717" version = "0.23.0" @@ -373,6 +375,12 @@ git-tree-sha1 = "76e6513438a5a888cf548f78b6e71311e370d626" uuid = "c8b6d40d-e815-466f-95ae-c48aefa668fa" version = "0.7.5" +[[deps.ClimaDiagnostics]] +deps = ["Accessors", "ClimaComms", "ClimaCore", "Dates", "NCDatasets", "SciMLBase"] +git-tree-sha1 = "fee70c0ba0e056565e8dd71cc4ac55f25e606a74" +uuid = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" +version = "0.1.1" + [[deps.ClimaParams]] deps = ["DocStringExtensions", "TOML", "Test"] git-tree-sha1 = "528aadfaae6f447df3879eab224625ecceec5982" @@ -386,10 +394,10 @@ uuid = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" version = "0.7.19" [[deps.ClimaUtilities]] -deps = ["Artifacts", "CFTime", "Dates"] -git-tree-sha1 = "d1b30a07829248b325ffffbb739d0f6fe79dfe7c" +deps = ["Artifacts", "Dates"] +git-tree-sha1 = "66b13d461cabc5e9265c9715dba512aea0e1e67f" uuid = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513" -version = "0.1.3" +version = "0.1.4" weakdeps = ["Adapt", "ClimaComms", "ClimaCore", "ClimaCoreTempestRemap", "Interpolations", "NCDatasets"] [deps.ClimaUtilities.extensions] @@ -1850,9 +1858,9 @@ version = "0.5.12" [[deps.Pango_jll]] deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "FriBidi_jll", "Glib_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl"] -git-tree-sha1 = "526f5a03792669e4187e584e8ec9d534248ca765" +git-tree-sha1 = "cb5a2ab6763464ae0f19c86c56c63d4a2b0f5bda" uuid = "36c8627f-9965-5494-a995-c6b170f724f3" -version = "1.52.1+0" +version = "1.52.2+0" [[deps.Parameters]] deps = ["OrderedCollections", "UnPack"] diff --git a/perf/Project.toml b/perf/Project.toml index b3e8aa8ac69..5c5c969a055 100644 --- a/perf/Project.toml +++ b/perf/Project.toml @@ -11,6 +11,7 @@ ClimaAnalysis = "29b5916a-a76c-4e73-9657-3c8fd22e65e6" ClimaAtmos = "b2c96348-7fb7-4fe0-8da9-78d88439e717" ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" +ClimaDiagnostics = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" ClimaCoreMakie = "908f55d8-4145-4867-9c14-5dad1a479e4d" ClimaCorePlots = "cf7c7e5a-b407-4c48-9047-11a94a308626" ClimaCoreSpectra = "c2caaa1d-32ae-4754-ba0d-80e7561362e9" diff --git a/perf/benchmark_netcdf_io.jl b/perf/benchmark_netcdf_io.jl deleted file mode 100644 index 6beca1185a3..00000000000 --- a/perf/benchmark_netcdf_io.jl +++ /dev/null @@ -1,123 +0,0 @@ -import ClimaAtmos as CA -import ClimaAtmos.Diagnostics as CAD -import ClimaComms -import Profile, ProfileCanvas -import NCDatasets -import Base: rm -using BenchmarkTools - -# This script runs our NetCDF writer and compares its performance with -# NCDatasets. It also produces a flamegraph for IO. - -# Number of target interpolation point along each dimension -const NUM = 100 - -timings = Dict{ClimaComms.AbstractDevice, Any}( - ClimaComms.CPUSingleThreaded() => missing, -) -# If a GPU is available, runs on CPU and GPU -if ClimaComms.device() isa ClimaComms.CUDADevice - timings[ClimaComms.CUDADevice()] = missing -end - -timings_ncdataset = copy(timings) - -function add_nc(nc, outarray) - v = nc["rhoa"] - temporal_size, spatial_size... = size(v) - time_index = temporal_size + 1 - nc["time"][time_index] = 10.0 * time_index - v[time_index, :, :, :] = outarray -end - -for device in keys(timings) - device_name = nameof(typeof(device)) - config = CA.AtmosConfig(; comms_ctx = ClimaComms.context(device)) - config.parsed_args["diagnostics"] = - [Dict("short_name" => "rhoa", "period" => config.parsed_args["dt"])] - config.parsed_args["netcdf_interpolation_num_points"] = [NUM, NUM, NUM] - config.parsed_args["job_id"] = "flame_perf_io" - config.parsed_args["netcdf_output_at_levels"] = false - - simulation = CA.get_simulation(config) - - # Cleanup pre-existing NC files - foreach( - rm, - filter(endswith(".nc"), readdir(simulation.output_dir, join = true)), - ) - - atmos_model = simulation.integrator.p.atmos - cspace = axes(simulation.integrator.u.c) - diagnostics, _ = CA.get_diagnostics(config.parsed_args, atmos_model, cspace) - rhoa_diag = diagnostics[end] - - netcdf_writer = simulation.output_writers[2] - field = simulation.integrator.u.c.ρ - - integrator = simulation.integrator - - # Run once to create the file - CAD.write_field!( - netcdf_writer, - field, - rhoa_diag, - integrator.u, - integrator.p, - integrator.t, - simulation.output_dir, - ) - output_path = CAD.outpath_name(simulation.output_dir, rhoa_diag) - NCDatasets.sync(netcdf_writer.open_files[output_path]) - # Now, profile - @info "Profiling ($device_name)" - prof = Profile.@profile CAD.save_diagnostic_to_disk!( - netcdf_writer, - field, - rhoa_diag, - integrator.u, - integrator.p, - integrator.t, - simulation.output_dir, - ) - results = Profile.fetch() - flame_path = joinpath(simulation.output_dir, "flame_$device_name.html") - ProfileCanvas.html_file(flame_path, results) - @info "Flame saved in $flame_path" - - @info "Benchmarking our NetCDF writer (only IO) ($device_name)" - timings[device] = @benchmark CAD.save_diagnostic_to_disk!( - $netcdf_writer, - $field, - $rhoa_diag, - $(integrator.u), - $(integrator.p), - $(integrator.t), - $(simulation.output_dir), - ) - - @info "Benchmarking NCDatasets ($device_name)" - - output_path = joinpath(simulation.output_dir, "clean_netcdf.nc") - nc = NCDatasets.NCDataset(output_path, "c") - NCDatasets.defDim(nc, "time", Inf) - NCDatasets.defVar(nc, "time", Float32, ("time",)) - NCDatasets.defDim(nc, "x", NUM) - NCDatasets.defDim(nc, "y", NUM) - NCDatasets.defDim(nc, "z", NUM) - v = NCDatasets.defVar(nc, "rhoa", Float64, ("time", "x", "y", "z")) - outarray = Array(netcdf_writer.remappers["rhoa"]._interpolated_values) - v[1, :, :, :] = outarray - - timings_ncdataset[device] = @benchmark $add_nc($nc, $outarray) -end - -for device in keys(timings) - println("DEVICE: ", device) - println("Our writer") - show(stdout, MIME"text/plain"(), timings[device]) - println() - println("NCDatasets") - show(stdout, MIME"text/plain"(), timings_ncdataset[device]) - println() -end diff --git a/perf/flame.jl b/perf/flame.jl index 1d3cb535ad9..bbc173eafde 100644 --- a/perf/flame.jl +++ b/perf/flame.jl @@ -38,18 +38,18 @@ ProfileCanvas.html_file(joinpath(output_dir, "flame.html"), results) ##### allocs_limit = Dict() -allocs_limit["flame_perf_target"] = 51_600 -allocs_limit["flame_perf_target_tracers"] = 81_576 -allocs_limit["flame_perf_target_edmfx"] = 86_608 -allocs_limit["flame_perf_diagnostics"] = 10_876_900 +allocs_limit["flame_perf_target"] = 841_104 +allocs_limit["flame_perf_target_tracers"] = 870_888 +allocs_limit["flame_perf_target_edmfx"] = 1_383_200 +allocs_limit["flame_perf_diagnostics"] = 21_359_336 allocs_limit["flame_perf_target_diagnostic_edmfx"] = 86_608 allocs_limit["flame_sphere_baroclinic_wave_rhoe_equilmoist_expvdiff"] = 4_018_252_656 allocs_limit["flame_perf_target_frierson"] = 4_015_547_056 allocs_limit["flame_perf_target_threaded"] = 1_276_864 -allocs_limit["flame_perf_target_callbacks"] = 184_448 +allocs_limit["flame_perf_target_callbacks"] = 988_816 allocs_limit["flame_perf_gw"] = 3_268_961_856 -allocs_limit["flame_perf_target_prognostic_edmfx_aquaplanet"] = 2_770_296 +allocs_limit["flame_perf_target_prognostic_edmfx_aquaplanet"] = 4_000_488 allocs_limit["flame_gpu_implicit_barowave_moist"] = 336_378 # Ideally, we would like to track all the allocations, but this becomes too # expensive there is too many of them. Here, we set the default sample rate to diff --git a/post_processing/ci_plots.jl b/post_processing/ci_plots.jl index 322250793ab..bcfd3992976 100644 --- a/post_processing/ci_plots.jl +++ b/post_processing/ci_plots.jl @@ -42,7 +42,7 @@ import CairoMakie import CairoMakie.Makie import ClimaAnalysis import ClimaAnalysis: Visualize as viz -import ClimaAnalysis: SimDir, slice_time, slice +import ClimaAnalysis: SimDir, slice import ClimaAnalysis.Utils: kwargs as ca_kwargs import ClimaCoreSpectra: power_spectrum_2d @@ -508,7 +508,11 @@ function make_plots( ) simdirs = SimDir.(output_paths) short_names = ["thetaa"] - vars = map_comparison(get, simdirs, short_names) + period = "10s" + reduction = "inst" + vars = map_comparison(simdirs, short_names) do simdir, short_name + return get(simdir; short_name, reduction, period) + end make_plots_generic(output_paths, vars, y = 0.0, time = LAST_SNAP) end @@ -1066,7 +1070,17 @@ function make_plots( precip_names..., ] reduction = "inst" - period = "30m" + + available_periods = ClimaAnalysis.available_periods( + simdirs[1]; + short_name = short_names[1], + reduction, + ) + if "10m" in available_periods + period = "10m" + elseif "30m" in available_periods + period = "30m" + end short_name_tuples = pair_edmf_names(short_names) var_groups_zt = diff --git a/src/callbacks/callback_helpers.jl b/src/callbacks/callback_helpers.jl index 681bb7943b6..347e6ed9dd0 100644 --- a/src/callbacks/callback_helpers.jl +++ b/src/callbacks/callback_helpers.jl @@ -59,18 +59,16 @@ callback_from_affect(x::AtmosCallback) = x function callback_from_affect(affect!) for p in propertynames(affect!) x = getproperty(affect!, p) - if x isa AtmosCallback - return x - elseif x isa DECB.SavedValues + if x isa AtmosCallback || x isa DECB.SavedValues return x end end - error("Callback not found in $(affect!)") + return nothing end function atmos_callbacks(cbs::SciMLBase.CallbackSet) all_cbs = [cbs.continuous_callbacks..., cbs.discrete_callbacks...] callback_objs = map(cb -> callback_from_affect(cb.affect!), all_cbs) - filter!(x -> !(x isa DECB.SavedValues), callback_objs) + filter!(x -> (x isa AtmosCallback), callback_objs) return callback_objs end diff --git a/src/callbacks/get_callbacks.jl b/src/callbacks/get_callbacks.jl index d8cb3c9b655..47868ec1fb6 100644 --- a/src/callbacks/get_callbacks.jl +++ b/src/callbacks/get_callbacks.jl @@ -1,71 +1,6 @@ -function init_diagnostics!( - diagnostics_iterations, - diagnostic_storage, - diagnostic_accumulators, - diagnostic_counters, - output_dir, - Y, - p, - t; - warn_allocations, -) - for diag in diagnostics_iterations - variable = diag.variable - try - # The first time we call compute! we use its return value. All - # the subsequent times (in the callbacks), we will write the - # result in place - diagnostic_storage[diag] = variable.compute!(nothing, Y, p, t) - diagnostic_counters[diag] = 1 - # If it is not a reduction, call the output writer as well - if isnothing(diag.reduction_time_func) - writer = diag.output_writer - CAD.write_field!( - writer, - diagnostic_storage[diag], - diag, - Y, - p, - t, - output_dir, - ) - if writer isa CAD.NetCDFWriter && - ClimaComms.iamroot(ClimaComms.context(Y.c)) - output_path = CAD.outpath_name(output_dir, diag) - NCDatasets.sync(writer.open_files[output_path]) - end - else - # Add to the accumulator - - # We use similar + .= instead of copy because CUDA 5.2 does - # not supported nested wrappers with view(reshape(view)) - # objects. See discussion in - # https://github.com/CliMA/ClimaAtmos.jl/pull/2579 and - # https://github.com/JuliaGPU/Adapt.jl/issues/21 - diagnostic_accumulators[diag] = - similar(diagnostic_storage[diag]) - diagnostic_accumulators[diag] .= diagnostic_storage[diag] - end - catch e - error("Could not compute diagnostic $(variable.long_name): $e") - end - end - if warn_allocations - for diag in diagnostics_iterations - # We need to hoist these variables/functions to avoid measuring - # allocations due to these variables/functions not being type-stable. - dstorage = diagnostic_storage[diag] - compute! = diag.variable.compute! - # We write over the storage space we have already prepared (and filled) before - allocs = @allocated compute!(dstorage, Y, p, t) - if allocs > 10 * 1024 - @warn "Diagnostics $(diag.output_short_name) allocates $allocs bytes" - end - end - end -end +function get_diagnostics(parsed_args, atmos_model, Y, p, t_start, dt) -function get_diagnostics(parsed_args, atmos_model, cspace) + FT = Spaces.undertype(axes(Y.c)) # We either get the diagnostics section in the YAML file, or we return an empty list # (which will result in an empty list being created by the map below) @@ -88,7 +23,7 @@ function get_diagnostics(parsed_args, atmos_model, cspace) "average" => ((+), CAD.average_pre_output_hook!), ) - hdf5_writer = CAD.HDF5Writer() + hdf5_writer = CAD.HDF5Writer(p.output_dir) if !isnothing(parsed_args["netcdf_interpolation_num_points"]) num_netcdf_points = @@ -99,8 +34,9 @@ function get_diagnostics(parsed_args, atmos_model, cspace) num_netcdf_points = (180, 90, 50) end - netcdf_writer = CAD.NetCDFWriter(; - cspace, + netcdf_writer = CAD.NetCDFWriter( + axes(Y.c), + p.output_dir, num_points = num_netcdf_points, disable_vertical_interpolation = parsed_args["netcdf_output_at_levels"], ) @@ -153,27 +89,30 @@ function get_diagnostics(parsed_args, atmos_model, cspace) haskey(yaml_diag, "period") || error("period keyword required for diagnostics") - period_seconds = time_to_seconds(yaml_diag["period"]) + period_seconds = FT(time_to_seconds(yaml_diag["period"])) if isnothing(output_name) output_short_name = CAD.descriptive_short_name( CAD.get_diagnostic_variable(short_name), - period_seconds, + CAD.EveryDtSchedule(period_seconds; t_start), reduction_time_func, pre_output_hook!, ) end if isnothing(reduction_time_func) - compute_every = period_seconds + compute_every = CAD.EveryDtSchedule(period_seconds; t_start) else - compute_every = :timestep + compute_every = CAD.EveryStepSchedule() end - CAD.ScheduledDiagnosticTime( + CAD.ScheduledDiagnostic( variable = CAD.get_diagnostic_variable(short_name), - output_every = period_seconds, - compute_every = compute_every, + output_schedule_func = CAD.EveryDtSchedule( + period_seconds; + t_start, + ), + compute_schedule_func = compute_every, reduction_time_func = reduction_time_func, pre_output_hook! = pre_output_hook!, output_writer = writer, @@ -189,6 +128,7 @@ function get_diagnostics(parsed_args, atmos_model, cspace) diagnostics = [ CAD.default_diagnostics( atmos_model, + t_start, time_to_seconds(parsed_args["t_end"]); output_writer = netcdf_writer, )..., diff --git a/src/diagnostics/Diagnostics.jl b/src/diagnostics/Diagnostics.jl index d3807b07cdc..6ba8aa44cc5 100644 --- a/src/diagnostics/Diagnostics.jl +++ b/src/diagnostics/Diagnostics.jl @@ -44,7 +44,19 @@ import ..compute_gm_mixing_length! # We need the abbreviations for symbols like curl, grad, and so on include(joinpath("..", "utils", "abbreviations.jl")) +import ClimaDiagnostics: + DiagnosticVariable, + ScheduledDiagnostic, + average_pre_output_hook!, + DiagnosticsCallback + +import ClimaDiagnostics.DiagnosticVariables: descriptive_short_name + +import ClimaDiagnostics.Schedules: EveryStepSchedule, EveryDtSchedule + + +import ClimaDiagnostics.Writers: HDF5Writer, NetCDFWriter, write_field! + include("diagnostic.jl") -include("writers.jl") end diff --git a/src/diagnostics/default_diagnostics.jl b/src/diagnostics/default_diagnostics.jl index 9e90291fb34..b7651a302ab 100644 --- a/src/diagnostics/default_diagnostics.jl +++ b/src/diagnostics/default_diagnostics.jl @@ -5,7 +5,7 @@ # level interfaces, add them here. Feel free to include extra files. """ - default_diagnostics(model, t_end; output_writer) + default_diagnostics(model, t_start, t_end; output_writer) Return a list of `ScheduledDiagnostic`s associated with the given `model` that use `output_write` to write to disk. `t_end` is the expected simulation end time and it is used @@ -20,23 +20,34 @@ If `t_end >= 120 year` take monthly means. One month is defined as 30 days. """ -function default_diagnostics(model::AtmosModel, t_end::Real; output_writer) +function default_diagnostics( + model::AtmosModel, + t_start::Real, + t_end::Real; + output_writer, +) # Unfortunately, [] is not treated nicely in a map (we would like it to be "excluded"), # so we need to manually filter out the submodels that don't have defaults associated # to non_empty_fields = filter( x -> - default_diagnostics(getfield(model, x), t_end; output_writer) != [], + default_diagnostics( + getfield(model, x), + t_start, + t_end; + output_writer, + ) != [], fieldnames(AtmosModel), ) # We use a map because we want to ensure that diagnostics is a well defined type, not # Any. This reduces latency. return vcat( - core_default_diagnostics(output_writer, t_end), + core_default_diagnostics(output_writer, t_start, t_end), map(non_empty_fields) do field default_diagnostics( getfield(model, field), + t_start, t_end; output_writer, ) @@ -49,7 +60,7 @@ end # that all the default_diagnostics return the same type). This is used by # default_diagnostics(model::AtmosModel; output_writer), so that we can ignore defaults for # submodels that have no given defaults. -default_diagnostics(submodel, t_end; output_writer) = [] +default_diagnostics(submodel, t_start, t_end; output_writer) = [] """ produce_common_diagnostic_function(period, reduction) @@ -60,14 +71,15 @@ function common_diagnostics( period, reduction, output_writer, + t_start, short_names...; pre_output_hook! = nothing, ) return [ - ScheduledDiagnosticTime( + ScheduledDiagnostic( variable = get_diagnostic_variable(short_name), - compute_every = :timestep, - output_every = period, # seconds + compute_schedule_func = EveryStepSchedule(), + output_schedule_func = EveryDtSchedule(period; t_start), reduction_time_func = reduction, output_writer = output_writer, pre_output_hook! = pre_output_hook!, @@ -75,15 +87,10 @@ function common_diagnostics( ] end -function average_pre_output_hook!(accum, counter) - @. accum = accum / counter - nothing -end - include("standard_diagnostic_frequencies.jl") """ - frequency_averages(t_end::Real) + frequency_averages(t_start::Real, t_end::Real) Return the correct averaging function depending on the total simulation time. @@ -94,7 +101,7 @@ If `t_end >= 120 year` take monthly means. One month is defined as 30 days. """ -function frequency_averages(t_end::Real) +function frequency_averages(t_start::Real, t_end::Real) if t_end > 120 * 86400 return monthly_averages elseif t_end >= 30 * 86400 @@ -111,11 +118,11 @@ end ######## # Core # ######## -function core_default_diagnostics(output_writer, t_end) +function core_default_diagnostics(output_writer, t_start, t_end) core_diagnostics = ["ts", "ta", "thetaa", "ha", "pfull", "rhoa", "ua", "va", "wa", "hfes"] - average_func = frequency_averages(t_end) + average_func = frequency_averages(t_start, t_end) if t_end > 120 * 86400 min_func = monthly_min @@ -133,15 +140,18 @@ function core_default_diagnostics(output_writer, t_end) return [ # We need to compute the topography at the beginning of the simulation (and only at - # the beginning), so we set output_every = 0 (it still called at the first timestep) - ScheduledDiagnosticIterations(; + # the beginning), so we set output/compute_schedule_func to false. It is still + # computed at the very beginning + ScheduledDiagnostic(; variable = get_diagnostic_variable("orog"), - output_every = 0, + output_schedule_func = (integrator) -> false, + compute_schedule_func = (integrator) -> false, output_writer, + output_short_name = "orog_inst", ), - average_func(core_diagnostics...; output_writer)..., - min_func("ts"; output_writer), - max_func("ts"; output_writer), + average_func(core_diagnostics...; output_writer, t_start)..., + min_func("ts"; output_writer, t_start), + max_func("ts"; output_writer, t_start), ] end @@ -150,38 +160,54 @@ end ################## function default_diagnostics( ::T, + t_start, t_end; output_writer, ) where {T <: Union{EquilMoistModel, NonEquilMoistModel}} moist_diagnostics = ["hur", "hus", "cl", "clw", "cli", "hussfc", "evspsbl"] - average_func = frequency_averages(t_end) - return [average_func(moist_diagnostics...; output_writer)...] + average_func = frequency_averages(t_start, t_end) + return [average_func(moist_diagnostics...; output_writer, t_start)...] end ####################### # Precipitation model # ####################### -function default_diagnostics(::Microphysics0Moment, t_end; output_writer) +function default_diagnostics( + ::Microphysics0Moment, + t_start, + t_end; + output_writer, +) precip_diagnostics = ["pr"] - average_func = frequency_averages(t_end) + average_func = frequency_averages(t_start, t_end) - return [average_func(precip_diagnostics...; output_writer)...] + return [average_func(precip_diagnostics...; output_writer, t_start)...] end -function default_diagnostics(::Microphysics1Moment, t_end; output_writer) +function default_diagnostics( + ::Microphysics1Moment, + t_start, + t_end; + output_writer, +) precip_diagnostics = ["husra", "hussn"] - average_func = frequency_averages(t_end) + average_func = frequency_averages(t_start, t_end) - return [average_func(precip_diagnostics...; output_writer)...] + return [average_func(precip_diagnostics...; output_writer, t_start)...] end ################## # Radiation mode # ################## -function default_diagnostics(::RRTMGPI.AbstractRRTMGPMode, t_end; output_writer) +function default_diagnostics( + ::RRTMGPI.AbstractRRTMGPMode, + t_start, + t_end; + output_writer, +) rad_diagnostics = [ "rsd", "rsdt", @@ -196,14 +222,15 @@ function default_diagnostics(::RRTMGPI.AbstractRRTMGPMode, t_end; output_writer) "rlus", ] - average_func = frequency_averages(t_end) + average_func = frequency_averages(t_start, t_end) - return [average_func(rad_diagnostics...; output_writer)...] + return [average_func(rad_diagnostics...; output_writer, t_start)...] end function default_diagnostics( ::RRTMGPI.AllSkyRadiationWithClearSkyDiagnostics, + t_start, t_end; output_writer, ) @@ -232,18 +259,18 @@ function default_diagnostics( "rlutcs", ] - average_func = frequency_averages(t_end) + average_func = frequency_averages(t_start, t_end) return [ - average_func(rad_diagnostics...; output_writer)..., - average_func(rad_clearsky_diagnostics...; output_writer)..., + average_func(rad_diagnostics...; output_writer, t_start)..., + average_func(rad_clearsky_diagnostics...; output_writer, t_start)..., ] end ################## # Turbconv model # ################## -function default_diagnostics(::PrognosticEDMFX, t_end; output_writer) +function default_diagnostics(::PrognosticEDMFX, t_start, t_end; output_writer) edmfx_draft_diagnostics = [ "arup", "rhoaup", @@ -271,16 +298,16 @@ function default_diagnostics(::PrognosticEDMFX, t_end; output_writer) "lmix", ] - average_func = frequency_averages(t_end) + average_func = frequency_averages(t_start, t_end) return [ - average_func(edmfx_draft_diagnostics...; output_writer)..., - average_func(edmfx_env_diagnostics...; output_writer)..., + average_func(edmfx_draft_diagnostics...; output_writer, t_start)..., + average_func(edmfx_env_diagnostics...; output_writer, t_start)..., ] end -function default_diagnostics(::DiagnosticEDMFX, t_end; output_writer) +function default_diagnostics(::DiagnosticEDMFX, t_start, t_end; output_writer) diagnostic_edmfx_draft_diagnostics = [ "arup", "rhoaup", @@ -295,10 +322,18 @@ function default_diagnostics(::DiagnosticEDMFX, t_end; output_writer) ] diagnostic_edmfx_env_diagnostics = ["waen", "tke", "lmix"] - average_func = frequency_averages(t_end) + average_func = frequency_averages(t_start, t_end) return [ - average_func(diagnostic_edmfx_draft_diagnostics...; output_writer)..., - average_func(diagnostic_edmfx_env_diagnostics...; output_writer)..., + average_func( + diagnostic_edmfx_draft_diagnostics...; + output_writer, + t_start, + )..., + average_func( + diagnostic_edmfx_env_diagnostics...; + output_writer, + t_start, + )..., ] end diff --git a/src/diagnostics/diagnostic.jl b/src/diagnostics/diagnostic.jl index 1a5d7de9dcb..b03bb8e8656 100644 --- a/src/diagnostics/diagnostic.jl +++ b/src/diagnostics/diagnostic.jl @@ -1,103 +1,11 @@ -# diagnostic.jl -# -# This file contains: -# -# - The definition of what a DiagnosticVariable is. Conceptually, a DiagnosticVariable is a -# variable we know how to compute from the state. We attach more information to it for -# documentation and to reference to it with its short name. DiagnosticVariables can exist -# irrespective of the existence of an actual simulation that is being run. ClimaAtmos -# comes with several diagnostics already defined (in the `ALL_DIAGNOSTICS` dictionary). -# +# ClimaAtmos diagnostics + # - A dictionary `ALL_DIAGNOSTICS` with all the diagnostics we know how to compute, keyed # over their short name. If you want to add more diagnostics, look at the included files. # You can add your own file if you want to define several new diagnostics that are # conceptually related. The dictionary `ALL_DIAGNOSTICS` should be considered an # implementation detail. -# -# - The definition of what a ScheduledDiagnostics is. Conceptually, a ScheduledDiagnostics is a -# DiagnosticVariable we want to compute in a given simulation. For example, it could be -# the temperature averaged over a day. We can have multiple ScheduledDiagnostics for the -# same DiagnosticVariable (e.g., daily and monthly average temperatures). -# -# We provide two types of ScheduledDiagnostics: ScheduledDiagnosticIterations and -# ScheduledDiagnosticTime, with the difference being only in what domain the recurrence -# time is defined (are we doing something at every N timesteps or every T seconds?). It is -# much cleaner and simpler to work with ScheduledDiagnosticIterations because iterations -# are well defined and consistent. On the other hand, working in the time domain requires -# dealing with what happens when the timestep is not lined up with the output period. -# Possible solutions to this problem include: uneven output, interpolation, or restricting -# the user from picking specific combinations of timestep/output period. In the current -# implementation, we choose the third option. So, ScheduledDiagnosticTime is provided -# because it is the physically interesting quantity. If we know what is the timestep, we -# can convert between the two and check if the diagnostics are well-posed in terms of the -# relationship between the periods and the timesteps. In some sense, you can think of -# ScheduledDiagnosticIterations as an internal representation and ScheduledDiagnosticTime -# as the external interface. -# -# - A function to convert a list of ScheduledDiagnosticIterations into a list of -# AtmosCallbacks. This function takes three arguments: the list of diagnostics and two -# dictionaries that map each scheduled diagnostic to an area of memory where to save the -# result and where to keep track of how many times the function was called (so that we -# can compute stuff like averages). -# -# - This file also also include several other files, including (but not limited to): -# - core_diagnostics.jl, radiation_diagnostics.jl -# - default_diagnostics.jl (which defines all the higher-level interfaces and defaults) -# - reduction_identities.jl -# - diagnostic_utils.jl - - -""" - DiagnosticVariable - - -A recipe to compute a diagnostic variable from the state, along with some useful metadata. - -The primary use for `DiagnosticVariable`s is to be embedded in a `ScheduledDiagnostic` to -compute diagnostics while the simulation is running. - -The metadata is used exclusively by the `output_writer` in the `ScheduledDiagnostic`. It is -responsibility of the `output_writer` to follow the conventions about the meaning of the -metadata and their use. - -In `ClimaAtmos`, we roughly follow the naming conventions listed in this file: -https://airtable.com/appYNLuWqAgzLbhSq/shrKcLEdssxb8Yvcp/tblL7dJkC3vl5zQLb -Keyword arguments -================= - -- `short_name`: Name used to identify the variable in the output files and in the file - names. Short but descriptive. `ClimaAtmos` follows the CMIP conventions and - the diagnostics are identified by the short name. - -- `long_name`: Name used to describe the variable in the output files. - -- `standard_name`: Standard name, as in - http://cfconventions.org/Data/cf-standard-names/71/build/cf-standard-name-table.html - -- `units`: Physical units of the variable. - -- `comments`: More verbose explanation of what the variable is, or comments related to how - it is defined or computed. - -- `compute!`: Function that compute the diagnostic variable from the state. - It has to take two arguments: the `integrator`, and a - pre-allocated area of memory where to write the result of the - computation. It the no pre-allocated area is available, a new - one will be allocated. To avoid extra allocations, this - function should perform the calculation in-place (i.e., using - `.=`). -""" -Base.@kwdef struct DiagnosticVariable{T} - short_name::String - long_name::String - standard_name::String - units::String - comments::String - compute!::T -end - -# ClimaAtmos diagnostics const ALL_DIAGNOSTICS = Dict{String, DiagnosticVariable}() @@ -192,645 +100,3 @@ include("edmfx_diagnostics.jl") # Default diagnostics and higher level interfaces include("default_diagnostics.jl") - -# Helper functions -include("diagnostics_utils.jl") - -# ScheduledDiagnostics - -# NOTE: The definitions of ScheduledDiagnosticTime and ScheduledDiagnosticIterations are -# nearly identical except for the fact that one is assumed to use units of seconds the other -# units of integration steps. However, we allow for this little repetition of code to avoid -# adding an extra layer of abstraction just to deal with these two objects (some people say -# that "duplication is better than over-abstraction"). Most users will only work with -# ScheduledDiagnosticTime. (It would be nice to have defaults fields in abstract types, as -# proposed in 2013 in https://github.com/JuliaLang/julia/issues/4935) Having two distinct -# types allow us to implement different checks and behaviors (e.g., we allow -# ScheduledDiagnosticTime to have placeholders values for {compute, output}_every so that we -# can plug the timestep in it). - -# Design note: pre_output_hook! -# -# One of our key requirements is to be able to compute arithmetic averages. Unfortunately, -# computing an arithmetic average requires keeping track of how many elements we are summing -# up. pre_output_hook! was introduced so that we can think of an average as a sum coupled -# with division, and perform the division (by the number of elements) before output. -# pre_output_hook! could be used for other operations, but we decided to keep it simple and -# target directly the most important use case for us. -# -# This choice restricts what reductions can be performed. For example, it is not possible to -# have a geometric average. If more complex reduction are needed, this mechanism has to be -# changed. - -struct ScheduledDiagnosticIterations{T1, T2, OW, F1, F2, PO, T} - variable::DiagnosticVariable{T} - output_every::T1 - output_writer::OW - reduction_time_func::F1 - reduction_space_func::F2 - compute_every::T2 - pre_output_hook!::PO - output_short_name::String - output_long_name::String - - """ - ScheduledDiagnosticIterations(; variable::DiagnosticVariable, - output_every, - output_writer, - reduction_time_func = nothing, - reduction_space_func = nothing, - compute_every = isa_reduction ? 1 : output_every, - pre_output_hook! = nothing, - output_short_name = descriptive_short_name(self), - output_short_name = descriptive_long_name(self)) - - - A `DiagnosticVariable` that has to be computed and output during a simulation with a cadence - defined by the number of iterations, with an optional reduction applied to it (e.g., compute - the maximum temperature over the course of every 10 timesteps). This object is turned into - two callbacks (one for computing and the other for output) and executed by the integrator. - - Keyword arguments - ================= - - - `variable`: The diagnostic variable that has to be computed and output. - - - `output_every`: Save the results to disk every `output_every` iterations. If `output_every` - is non-positive, only output at the first time step. - - - `output_writer`: Function that controls out to save the computed diagnostic variable to - disk. `output_writer` has to take three arguments: the value that has to - be output, the `ScheduledDiagnostic`, and the integrator. Internally, the - integrator contains extra information (such as the current timestep). It - is responsibility of the `output_writer` to properly use the provided - information for meaningful output. - - - `reduction_time_func`: If not `nothing`, this `ScheduledDiagnostic` receives an area of - scratch space `acc` where to accumulate partial results. Then, at - every `compute_every`, `reduction_time_func` is computed between - the previously stored value in `acc` and the new value. This - implements a running reduction. For example, if - `reduction_time_func = max`, the space `acc` will hold the running - maxima of the diagnostic. To implement operations like the - arithmetic average, the `reduction_time_func` has to be chosen as - `sum`, and a `pre_output_hook!` that renormalize `acc` by the - number of samples has to be provided. For custom reductions, it is - necessary to also specify the identity of operation by defining a - new method to `identity_of_reduction`. - - - `reduction_space_func`: NOT IMPLEMENTED YET - - - `compute_every`: Run the computations every `compute_every` iterations. This is not - particularly useful for point-wise diagnostics, where we enforce that - `compute_every` = `output_every`. For time reductions, `compute_every` is - set to 1 (compute at every timestep) by default. `compute_every` has to - evenly divide `output_every`. - - - `pre_output_hook!`: Function that has to be run before saving to disk for reductions - (mostly used to implement averages). The function `pre_output_hook!` - is called with two arguments: the value accumulated during the - reduction, and the number of times the diagnostic was computed from - the last time it was output. `pre_output_hook!` should mutate the - accumulator in place. The return value of `pre_output_hook!` is - discarded. An example of `pre_output_hook!` to compute the arithmetic - average is `pre_output_hook!(acc, N) = @. acc = acc / N`. - - - `output_short_name`: A descriptive name for this particular diagnostic. If none is - provided, one will be generated mixing the short name of the - variable, the reduction, and the period of the reduction. - Normally, it has to be unique. In `ClimaAtmos`, we follow the CMIP - conventions for this. - - - `output_long_name`: A descriptive name for this particular diagnostic. If none is - provided, one will be generated mixing the short name of the - variable, the reduction, and the period of the reduction. - - """ - function ScheduledDiagnosticIterations(; - variable::DiagnosticVariable{T}, - output_every, - output_writer, - reduction_time_func = nothing, - reduction_space_func = nothing, - compute_every = isnothing(reduction_time_func) ? output_every : 1, - pre_output_hook! = nothing, - output_short_name = descriptive_short_name( - variable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = false, - ), - output_long_name = descriptive_long_name( - variable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = false, - ), - ) where {T} - - # We provide an inner constructor to enforce some constraints - - (output_every <= 0 || output_every % compute_every == 0) || error( - "output_every ($output_every) should be multiple of compute_every ($compute_every) for diagnostic $(output_short_name)", - ) - - isa_reduction = !isnothing(reduction_time_func) - - # If it is not a reduction, we compute only when we output - if !isa_reduction && compute_every != output_every - @warn "output_every ($output_every) != compute_every ($compute_every) for $(output_short_name), changing compute_every to match" - compute_every = output_every - end - - # pre_output_hook! has to be a function, but it is much more intuitive to specify - # `nothing` when we want nothing to happen. Here, we convert the nothing keyword - # into a function that does nothing - if isnothing(pre_output_hook!) - pre_output_hook! = (accum, count) -> nothing - end - - T1 = typeof(output_every) - T2 = typeof(compute_every) - OW = typeof(output_writer) - F1 = typeof(reduction_time_func) - F2 = typeof(reduction_space_func) - PO = typeof(pre_output_hook!) - - new{T1, T2, OW, F1, F2, PO, T}( - variable, - output_every, - output_writer, - reduction_time_func, - reduction_space_func, - compute_every, - pre_output_hook!, - output_short_name, - output_long_name, - ) - end -end - - -struct ScheduledDiagnosticTime{T1, T2, OW, F1, F2, PO} - variable::DiagnosticVariable - output_every::T1 - output_writer::OW - reduction_time_func::F1 - reduction_space_func::F2 - compute_every::T2 - pre_output_hook!::PO - output_short_name::String - output_long_name::String - - """ - ScheduledDiagnosticTime(; variable::DiagnosticVariable, - output_every, - output_writer, - reduction_time_func = nothing, - reduction_space_func = nothing, - compute_every = isa_reduction ? :timestep : output_every, - pre_output_hook! = nothing, - output_short_name = descriptive_short_name(self), - output_long_name = descriptive_long_name(self), - ) - - - A `DiagnosticVariable` that has to be computed and output during a simulation with a - cadence defined by how many seconds in simulation time, with an optional reduction - applied to it (e.g., compute the maximum temperature over the course of every day). This - object is turned into a `ScheduledDiagnosticIterations`, which is turned into two - callbacks (one for computing and the other for output) and executed by the integrator. - - Keyword arguments - ================= - - - `variable`: The diagnostic variable that has to be computed and output. - - - `output_every`: Save the results to disk every `output_every` seconds. If `output_every` - is non-positive, only output at the first time step. - - - `output_writer`: Function that controls out to save the computed diagnostic variable to - disk. `output_writer` has to take three arguments: the value that has to - be output, the `ScheduledDiagnostic`, and the integrator. Internally, the - integrator contains extra information (such as the current timestep). It - is responsibility of the `output_writer` to properly use the provided - information for meaningful output. - - - `reduction_time_func`: If not `nothing`, this `ScheduledDiagnostic` receives an area of - scratch space `acc` where to accumulate partial results. Then, at - every `compute_every`, `reduction_time_func` is computed between - the previously stored value in `acc` and the new value. This - implements a running reduction. For example, if - `reduction_time_func = max`, the space `acc` will hold the running - maxima of the diagnostic. To implement operations like the - arithmetic average, the `reduction_time_func` has to be chosen as - `sum`, and a `pre_output_hook!` that renormalize `acc` by the - number of samples has to be provided. For custom reductions, it is - necessary to also specify the identity of operation by defining a - new method to `identity_of_reduction`. - - - `reduction_space_func`: NOT IMPLEMENTED YET - - - `compute_every`: Run the computations every `compute_every` seconds. This is not - particularly useful for point-wise diagnostics, where we enforce that - `compute_every` = `output_every`. For time reductions, - `compute_every` is set to `:timestep` (compute at every timestep) by - default. `compute_every` has to evenly divide `output_every`. - `compute_every` can take the special symbol `:timestep` which is a - placeholder for the timestep of the simulation to which this - `ScheduledDiagnostic` is attached. - - - `pre_output_hook!`: Function that has to be run before saving to disk for reductions - (mostly used to implement averages). The function `pre_output_hook!` - is called with two arguments: the value accumulated during the - reduction, and the number of times the diagnostic was computed from - the last time it was output. `pre_output_hook!` should mutate the - accumulator in place. The return value of `pre_output_hook!` is - discarded. An example of `pre_output_hook!` to compute the arithmetic - average is `pre_output_hook!(acc, N) = @. acc = acc / N`. - - - `output_short_name`: A descriptive name for this particular diagnostic. If none is - provided, one will be generated mixing the short name of the - variable, the reduction, and the period of the reduction. - Normally, it has to be unique. In `ClimaAtmos`, we follow the CMIP - conventions for this. - - - `output_long_name`: A descriptive name for this particular diagnostic. If none is - provided, one will be generated mixing the short name of the - variable, the reduction, and the period of the reduction. - """ - function ScheduledDiagnosticTime(; - variable::DiagnosticVariable, - output_every, - output_writer, - reduction_time_func = nothing, - reduction_space_func = nothing, - compute_every = isnothing(reduction_time_func) ? output_every : - :timestep, - pre_output_hook! = nothing, - output_short_name = descriptive_short_name( - variable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true, - ), - output_long_name = descriptive_long_name( - variable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true, - ), - ) - - # We provide an inner constructor to enforce some constraints - - # compute_every could be a Symbol (:timestep). We process this that when we process - # the list of diagnostics - if !isa(compute_every, Symbol) - (output_every <= 0 || output_every % compute_every == 0) || error( - "output_every ($output_every) should be multiple of compute_every ($compute_every) for diagnostic $(output_short_name)", - ) - end - - isa_reduction = !isnothing(reduction_time_func) - - # If it is not a reduction, we compute only when we output - if !isa_reduction && compute_every != output_every - @warn "output_every ($output_every) != compute_every ($compute_every) for $(output_short_name), changing compute_every to match" - compute_every = output_every - end - - # pre_output_hook! has to be a function, but it is much more intuitive to specify - # `nothing` when we want nothing to happen. Here, we convert the nothing keyword - # into a function that does nothing - if isnothing(pre_output_hook!) - pre_output_hook! = (accum, count) -> nothing - end - - T1 = typeof(output_every) - T2 = typeof(compute_every) - OW = typeof(output_writer) - F1 = typeof(reduction_time_func) - F2 = typeof(reduction_space_func) - PO = typeof(pre_output_hook!) - - new{T1, T2, OW, F1, F2, PO}( - variable, - output_every, - output_writer, - reduction_time_func, - reduction_space_func, - compute_every, - pre_output_hook!, - output_short_name, - output_long_name, - ) - end -end - -""" - ScheduledDiagnosticIterations(sd_time::ScheduledDiagnosticTime, Δt) - - -Create a `ScheduledDiagnosticIterations` given a `ScheduledDiagnosticTime` and a timestep -`Δt`. In this, ensure that `compute_every` and `output_every` are meaningful for the given -timestep. - -""" -function ScheduledDiagnosticIterations( - sd_time::ScheduledDiagnosticTime, - Δt::T, -) where {T} - - # If we have the timestep, we can convert time in seconds into iterations - - # if compute_every is :timestep, then we want to compute after every iterations - compute_every = - sd_time.compute_every == :timestep ? 1 : sd_time.compute_every / Δt - output_every = sd_time.output_every / Δt - - # When Δt is a Float32, loss of precision might lead to spurious results (e.g., 1. / - # 0.1f0 = 9.99999985098839). So, we round to the number of significant digits that we - # expect from the float type. - # - # FIXME: eps(typeof(Δt)) is not the best value to pick the number of significant digits - # because it makes sense only for values of order unity. - sigdigits = eps(typeof(Δt)) |> log10 |> abs |> round |> Int - - output_every = round(output_every; sigdigits) - compute_every = round(compute_every; sigdigits) - - isinteger(output_every) || error( - "output_every ($(sd_time.output_every)) should be multiple of the timestep ($Δt) for diagnostic $(sd_time.output_short_name)", - ) - isinteger(compute_every) || error( - "compute_every ($(sd_time.compute_every)) should be multiple of the timestep ($Δt) for diagnostic $(sd_time.output_short_name)", - ) - - ScheduledDiagnosticIterations(; - sd_time.variable, - output_every = convert(Int, output_every), - sd_time.output_writer, - sd_time.reduction_time_func, - sd_time.reduction_space_func, - compute_every = convert(Int, compute_every), - sd_time.pre_output_hook!, - sd_time.output_short_name, - sd_time.output_long_name, - ) -end - -""" - ScheduledDiagnosticTime(sd_time::ScheduledDiagnosticIterations, Δt) - - -Create a `ScheduledDiagnosticTime` given a `ScheduledDiagnosticIterations` and a timestep -`Δt`. - -""" -function ScheduledDiagnosticTime( - sd_time::ScheduledDiagnosticIterations, - Δt::T, -) where {T} - - # If we have the timestep, we can convert time in iterations to seconds - - # if compute_every is :timestep, then we want to compute after every iterations - compute_every = - sd_time.compute_every == 1 ? :timestep : sd_time.compute_every * Δt - output_every = sd_time.output_every * Δt - - ScheduledDiagnosticTime(; - sd_time.variable, - output_every, - sd_time.output_writer, - sd_time.reduction_time_func, - sd_time.reduction_space_func, - compute_every, - sd_time.pre_output_hook!, - sd_time.output_short_name, - sd_time.output_long_name, - ) -end - -# We provide also a companion constructor for ScheduledDiagnosticIterations which returns -# itself (without copy) when called with a timestep. -# -# This is so that we can assume that -# ScheduledDiagnosticIterations(ScheduledDiagnostic{Time, Iterations}, Δt) -# always returns a valid ScheduledDiagnosticIterations -ScheduledDiagnosticIterations( - sd::ScheduledDiagnosticIterations, - _Δt::T, -) where {T} = sd - - -# We define all the known identities in reduction_identities.jl -include("reduction_identities.jl") - -# Helper functions for the callbacks: -# - reset_accumulator! -# - accumulate! - -# When the reduction is nothing, do nothing -reset_accumulator!(_, reduction_time_func::Nothing) = nothing - -# If we have a reduction, we have to reset the accumulator to its neutral state. (If we -# don't have a reduction, we don't have to do anything) -# -# ClimaAtmos defines methods for identity_of_reduction for standard reduction_time_func in -# reduction_identities.jl -function reset_accumulator!(diag_accumulator, reduction_time_func) - # identity_of_reduction works by dispatching over operation - identity = identity_of_reduction(reduction_time_func) - float_type = Spaces.undertype(axes((diag_accumulator))) - identity_ft = convert(float_type, identity) - fill!(parent(diag_accumulator), identity_ft) -end - -# When the reduction is nothing, we do not need to accumulate anything -accumulate!(_, _, reduction_time_func::Nothing) = nothing - -# When we have a reduction, apply it between the accumulated value one -function accumulate!(diag_accumulator, diag_storage, reduction_time_func) - diag_accumulator .= reduction_time_func.(diag_accumulator, diag_storage) - return nothing -end - -any_sync_needed(step, diagnostics) = - any(diag -> sync_needed(step, diag), diagnostics) - -# sync_needed(step, diag) = diag.cbf.n > 0 && step % diag.cbf.n == 0 -sync_needed(step, diag) = diag.output_every > 0 && step % diag.output_every == 0 - -import NVTX -NVTX.@annotate function sync_nc_datasets(diagnostics, output_dir, step, context) - any(diag -> diag.output_writer isa NetCDFWriter, diagnostics) || return - any_sync_needed(step, diagnostics) || return - ClimaComms.iamroot(context) || return - tasks = map(diagnostics) do diag - writer = diag.output_writer - if writer isa NetCDFWriter && sync_needed(step, diag) - output_path = outpath_name(output_dir, diag) - # TODO: find out why Threads.@spawn segfaults in multithreaded IO test - # Threads.@spawn NCDatasets.sync(writer.open_files[output_path]) - NCDatasets.sync(writer.open_files[output_path]) - else - nothing - end - end - isempty(tasks) && return nothing - # I don't think this is needed, but - # leaving here in case: - # for t in tasks - # isnothing(t) && continue - # Threads.wait(t) - # end - return nothing -end - -import NVTX -NVTX.@annotate function compute_callback!( - integrator, - accumulators, - storage, - diag, - counters, - compute!, -) - compute!(storage, integrator.u, integrator.p, integrator.t) - - # accumulator[diag] is not defined for non-reductions - diag_accumulator = get(accumulators, diag, nothing) - - accumulate!(diag_accumulator, storage, diag.reduction_time_func) - counters[diag] += 1 - return nothing -end - -NVTX.@annotate function output_callback!( - integrator, - accumulators, - storage, - diag, - counters, - output_dir, -) - # Move accumulated value to storage so that we can output it (for - # reductions). This provides a unified interface to pre_output_hook! and - # output, at the cost of an additional copy. If this copy turns out to be - # too expensive, we can move the if statement below. - isnothing(diag.reduction_time_func) || (storage .= accumulators[diag]) - - # Any operations we have to perform before writing to output? - # Here is where we would divide by N to obtain an arithmetic average - diag.pre_output_hook!(storage, counters[diag]) - - # Write to disk - write_field!( - diag.output_writer, - storage, - diag, - integrator.u, - integrator.p, - integrator.t, - output_dir, - ) - - # accumulator[diag] is not defined for non-reductions - diag_accumulator = get(accumulators, diag, nothing) - - reset_accumulator!(diag_accumulator, diag.reduction_time_func) - counters[diag] = 0 - return nothing -end - -function orchestrate_diagnostics(integrator, diagnostics_functions) - for d in diagnostics_functions - n = d.cbf.n - if n > 0 && integrator.step % n == 0 - d.f!(integrator) - end - end -end - - -""" - get_callbacks_from_diagnostics(diagnostics, storage, counters) - - -Translate a list of diagnostics into a list of callbacks. - -Positional arguments -===================== - -- `diagnostics`: List of `ScheduledDiagnosticIterations` that have to be converted to - callbacks. We want to have `ScheduledDiagnosticIterations` here so that we - can define callbacks that occur at the end of every N integration steps. - -- `storage`: Dictionary that maps a given `ScheduledDiagnosticIterations` to a potentially - pre-allocated area of memory where to save the newly computed results. - -- `accumulator`: Dictionary that maps a given `ScheduledDiagnosticIterations` to a potentially - pre-allocated area of memory where to accumulate results. - -- `counters`: Dictionary that maps a given `ScheduledDiagnosticIterations` to the counter - that tracks how many times the given diagnostics was computed from the last - time it was output to disk. - -""" -function get_callbacks_from_diagnostics( - diagnostics, - storage, - accumulators, - counters, - output_dir, -) - # We have two types of callbacks: to compute and accumulate diagnostics, and to dump - # them to disk. Note that our callbacks do not contain any branching - - # storage is used to pre-allocate memory and to accumulate partial results for those - # diagnostics that perform reductions. - - callback_arrays = map(diagnostics) do diag - compute_callback = - integrator -> begin - compute_callback!( - integrator, - accumulators, - storage[diag], - diag, - counters, - diag.variable.compute!, - ) - end - output_callback = - integrator -> begin - output_callback!( - integrator, - accumulators, - storage[diag], - diag, - counters, - output_dir, - ) - end - [ - AtmosCallback(compute_callback, EveryNSteps(diag.compute_every)), - AtmosCallback(output_callback, EveryNSteps(diag.output_every)), - ] - end - nc_sync(integrator) = sync_nc_datasets( - diagnostics, - output_dir, - integrator.step, - ClimaComms.context(axes(integrator.u.c)), - ) - - # We need to flatten to tuples - return vcat(callback_arrays..., AtmosCallback(nc_sync, EveryNSteps(1))) -end diff --git a/src/diagnostics/diagnostics_utils.jl b/src/diagnostics/diagnostics_utils.jl deleted file mode 100644 index ad2878c336a..00000000000 --- a/src/diagnostics/diagnostics_utils.jl +++ /dev/null @@ -1,134 +0,0 @@ -# diagnostic_utils.jl -# -# This file contains: -# - descriptive_short_name: to condense ScheduledDiagnostic information into few characters. -# - descriptive_long_name: to produce full names that are clearly human-understandable - - -""" - descriptive_short_name(variable::DiagnosticVariable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true) - - -Return a compact, unique-ish, identifier generated from the given information. - -`output_every` is interpreted as in seconds if `units_are_seconds` is `true`. Otherwise, it -is interpreted as in units of number of iterations. - -This function is useful for filenames and error messages. - -""" -function descriptive_short_name( - variable::DiagnosticVariable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true, -) - var = "$(variable.short_name)" - isa_reduction = !isnothing(reduction_time_func) - - - if isa_reduction - red = "$(reduction_time_func)" - - # Let's check if we are computing the average. Note that this might slip under the - # radar if the user passes their own pre_output_hook!. - if reduction_time_func == (+) && - pre_output_hook! == average_pre_output_hook! - red = "average" - end - - if units_are_seconds - - # Convert period from seconds to days, hours, minutes, seconds - period = "" - - days, rem_seconds = divrem(output_every, 24 * 60 * 60) - hours, rem_seconds = divrem(rem_seconds, 60 * 60) - minutes, seconds = divrem(rem_seconds, 60) - - # At this point, days, hours, minutes, seconds have to be integers. - # Let us force them to be such so that we can have a consistent string output. - - days, hours, minutes, seconds = - map(Int, (days, hours, minutes, seconds)) - - days > 0 && (period *= "$(days)d_") - hours > 0 && (period *= "$(hours)h_") - minutes > 0 && (period *= "$(minutes)m_") - seconds > 0 && (period *= "$(seconds)s_") - - suffix = period * red - else - suffix = "$(output_every)it_$(red)" - end - else - suffix = "inst" - end - return "$(var)_$(suffix)" -end - -""" - descriptive_long_name(variable::DiagnosticVariable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true) - - -Return a verbose description of the given output variable. - -`output_every` is interpreted as in seconds if `units_are_seconds` is `true`. Otherwise, it -is interpreted as in units of number of iterations. - -This function is useful for attributes in output files. - -""" -function descriptive_long_name( - variable::DiagnosticVariable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true, -) - var = "$(variable.long_name)" - isa_reduction = !isnothing(reduction_time_func) - - if isa_reduction - red = "$(reduction_time_func)" - - # Let's check if we are computing the average. Note that this might slip under the - # radar if the user passes their own pre_output_hook!. - if reduction_time_func == (+) && - pre_output_hook! == average_pre_output_hook! - red = "average" - end - - if units_are_seconds - - # Convert period from seconds to days, hours, minutes, seconds - period = "" - - days, rem_seconds = divrem(output_every, 24 * 60 * 60) - hours, rem_seconds = divrem(rem_seconds, 60 * 60) - minutes, seconds = divrem(rem_seconds, 60) - - days > 0 && (period *= "$(days) Day(s)") - hours > 0 && (period *= "$(hours) Hour(s)") - minutes > 0 && (period *= "$(minutes) Minute(s)") - seconds > 0 && (period *= "$(seconds) Second(s)") - - period_str = period - else - period_str = "$(output_every) Iterations" - end - suffix = "$(red) within $(period_str)" - else - suffix = "Instantaneous" - end - return "$(var), $(suffix)" -end diff --git a/src/diagnostics/hdf5_writer.jl b/src/diagnostics/hdf5_writer.jl deleted file mode 100644 index bf822b7ccf8..00000000000 --- a/src/diagnostics/hdf5_writer.jl +++ /dev/null @@ -1,71 +0,0 @@ -import ClimaComms - -############## -# HDF5Writer # -############## - -""" - HDF5Writer() - - -Save a `ScheduledDiagnostic` to a HDF5 file inside the `output_dir` of the simulation. - - -TODO: This is a very barebone HDF5Writer. Do not consider this implementation as the "final -word". - -We need to implement the following features/options: -- Toggle for write new files/append -- Checks for existing files -- Check for new subfolders that have to be created -- More meaningful naming conventions (keeping in mind that we can have multiple variables - with different reductions) -- All variables in one file/each variable in its own file -- All timesteps in one file/each timestep in its own file -- Writing the correct attributes -- Overriding simulation.output_dir (e.g., if the path starts with /) -- ...more features/options - -""" -struct HDF5Writer end - - -""" - close(writer::HDF5Writer) - -Close all the files open in `writer`. (Currently no-op.) -""" -close(writer::HDF5Writer) = nothing - -function write_field!( - writer::HDF5Writer, - field, - diagnostic, - u, - p, - t, - output_dir, -) - var = diagnostic.variable - time = t - - output_path = - joinpath(output_dir, "$(diagnostic.output_short_name)_$(time).h5") - - comms_ctx = ClimaComms.context(u.c) - hdfwriter = InputOutput.HDF5Writer(output_path, comms_ctx) - InputOutput.write!(hdfwriter, field, "$(diagnostic.output_short_name)") - attributes = Dict( - "time" => time, - "long_name" => diagnostic.output_long_name, - "variable_units" => var.units, - "standard_variable_name" => var.standard_name, - ) - - for (k, v) in attributes - InputOutput.HDF5.write_attribute(hdfwriter.file, k, v) - end - - Base.close(hdfwriter) - return nothing -end diff --git a/src/diagnostics/netcdf_writer.jl b/src/diagnostics/netcdf_writer.jl deleted file mode 100644 index b0b403f4633..00000000000 --- a/src/diagnostics/netcdf_writer.jl +++ /dev/null @@ -1,784 +0,0 @@ -# netcdf_writer.jl -# -# The flow of the code is: -# -# - We define a generic NetCDF struct with the NetCDF constructor. In doing this, we check -# what we have to do for topography. We might have to interpolate it or not so that we can -# later provide the correct elevation profile. - -# - The first time write_fields! is called with the writer on a new field, we add the -# dimensions to the NetCDF file. This requires understanding what coordinates we have to -# interpolate on and potentially handle topography. -# -# The functions to do so are for the most part all methods of the same functions, so that -# we can achieve the same behavior for different types of configurations (planes/spheres, -# ...), but also different types of fields (horizontal ones, 3D ones, ...) -# - -################### -# NetCDFWriter # -################## -""" - add_dimension!(nc::NCDatasets.NCDataset, - name::String, - points; - kwargs...) - - -Add dimension identified by `name` in the given `nc` file and fill it with the given -`points`. -""" -function add_dimension!( - nc::NCDatasets.NCDataset, - name::String, - points; - kwargs..., -) - FT = eltype(points) - - NCDatasets.defDim(nc, name, size(points)[end]) - - dim = NCDatasets.defVar(nc, name, FT, (name,)) - for (k, v) in kwargs - dim.attrib[String(k)] = v - end - - dim[:] = points - - return nothing -end - -function dimension_exists( - nc::NCDatasets.NCDataset, - name::String, - expected_size::Tuple, -) - if haskey(nc, name) - if size(nc[name]) != expected_size - error("Incompatible $name dimension already exists in file") - else - return true - end - else - return false - end -end - -""" - add_time_maybe!(nc::NCDatasets.NCDataset, - float_type::DataType; - kwargs...) - - -Add the `time` dimension (with infinite size) to the given NetCDF file if not already there. -Optionally, add all the keyword arguments as attributes. -""" -function add_time_maybe!( - nc::NCDatasets.NCDataset, - float_type::DataType; - kwargs..., -) - - # If we already have time, do nothing - haskey(nc, "time") && return nothing - - NCDatasets.defDim(nc, "time", Inf) - dim = NCDatasets.defVar(nc, "time", float_type, ("time",)) - for (k, v) in kwargs - dim.attrib[String(k)] = v - end - return nothing -end - -""" - add_space_coordinates_maybe!(nc::NCDatasets.NCDataset, - space::Spaces.AbstractSpace, - num_points; - names) - -Add dimensions relevant to the `space` to the given `nc` NetCDF file. The range is -automatically determined and the number of points is set with `num_points`, which has to be -an iterable of size N, where N is the number of dimensions of the space. For instance, 3 for -a cubed sphere, 2 for a surface, 1 for a column. - -The function returns an array with the names of the relevant dimensions. (We want arrays -because we want to preserve the order to match the one in num_points). - -In some cases, the names are adjustable passing the keyword `names`. -""" -function add_space_coordinates_maybe! end - -""" - target_coordinates!(space::Spaces.AbstractSpace, - num_points) - -Return the range of interpolation coordinates. The range is automatically determined and the -number of points is set with `num_points`, which has to be an iterable of size N, where N is -the number of dimensions of the space. For instance, 3 for a cubed sphere, 2 for a surface, -1 for a column. -""" -function target_coordinates(space, num_points) end - -function target_coordinates( - space::S, - num_points, -) where { - S <: - Union{Spaces.CenterFiniteDifferenceSpace, Spaces.FaceFiniteDifferenceSpace}, -} - # Exponentially spaced with base e - # - # We mimic something that looks like pressure levels - # - # p ~ p₀ exp(-z/H) - # - # We assume H to be 7000, which is a good scale height for the Earth atmosphere - H_EARTH = 7000 - - num_points_z = num_points[] - FT = Spaces.undertype(space) - topology = Spaces.topology(space) - vert_domain = topology.mesh.domain - z_min, z_max = FT(vert_domain.coord_min.z), FT(vert_domain.coord_max.z) - # We floor z_min to avoid having to deal with the singular value z = 0. - z_min = max(z_min, 100) - exp_z_min = exp(-z_min / H_EARTH) - exp_z_max = exp(-z_max / H_EARTH) - return collect(-H_EARTH * log.(range(exp_z_min, exp_z_max, num_points_z))) -end - -# Column -function add_space_coordinates_maybe!( - nc::NCDatasets.NCDataset, - space::Spaces.FiniteDifferenceSpace, - num_points_z; - names = ("z",), -) - name, _... = names - z_dimension_exists = dimension_exists(nc, name, (num_points_z,)) - if !z_dimension_exists - zpts = target_coordinates(space, num_points_z) - add_dimension!(nc, name, zpts, units = "m", axis = "Z") - end - return [name] -end - -add_space_coordinates_maybe!( - nc::NCDatasets.NCDataset, - space::Spaces.AbstractSpectralElementSpace, - num_points; -) = add_space_coordinates_maybe!( - nc, - space, - num_points, - Meshes.domain(Spaces.topology(space)); -) - - -# For the horizontal space, we also have to look at the domain, so we define another set of -# functions that dispatches over the domain -target_coordinates(space::Spaces.AbstractSpectralElementSpace, num_points) = - target_coordinates(space, num_points, Meshes.domain(Spaces.topology(space))) - -# Box -function target_coordinates( - space::Spaces.SpectralElementSpace2D, - num_points, - domain::Domains.RectangleDomain, -) - num_points_x, num_points_y = num_points - FT = Spaces.undertype(space) - xmin = FT(domain.interval1.coord_min.x) - xmax = FT(domain.interval1.coord_max.x) - ymin = FT(domain.interval2.coord_min.y) - ymax = FT(domain.interval2.coord_max.y) - xpts = collect(range(xmin, xmax, num_points_x)) - ypts = collect(range(ymin, ymax, num_points_y)) - return (xpts, ypts) -end - -# Plane -function target_coordinates( - space::Spaces.SpectralElementSpace1D, - num_points, - domain::Domains.IntervalDomain, -) - num_points_x, _... = num_points - FT = Spaces.undertype(space) - xmin = FT(domain.coord_min.x) - xmax = FT(domain.coord_max.x) - xpts = collect(range(xmin, xmax, num_points_x)) - return (xpts) -end - -# Cubed sphere -function target_coordinates( - space::Spaces.SpectralElementSpace2D, - num_points, - ::Domains.SphereDomain, -) - num_points_long, num_points_lat = num_points - FT = Spaces.undertype(space) - longpts = collect(range(FT(-180), FT(180), num_points_long)) - latpts = collect(range(FT(-90), FT(90), num_points_lat)) - - return (longpts, latpts) -end - -# Box -function add_space_coordinates_maybe!( - nc::NCDatasets.NCDataset, - space::Spaces.SpectralElementSpace2D, - num_points, - ::Domains.RectangleDomain; - names = ("x", "y"), -) - xname, yname = names - num_points_x, num_points_y = num_points - x_dimension_exists = dimension_exists(nc, xname, (num_points_x,)) - y_dimension_exists = dimension_exists(nc, yname, (num_points_y,)) - - if !x_dimension_exists && !y_dimension_exists - xpts, ypts = target_coordinates(space, num_points) - add_dimension!(nc, xname, xpts; units = "m", axis = "X") - add_dimension!(nc, yname, ypts; units = "m", axis = "Y") - end - return [xname, yname] -end - -# Plane -function add_space_coordinates_maybe!( - nc::NCDatasets.NCDataset, - space::Spaces.SpectralElementSpace1D, - num_points, - ::Domains.IntervalDomain; - names = ("x",), -) - xname, _... = names - num_points_x, = num_points - x_dimension_exists = dimension_exists(nc, xname, (num_points_x,)) - - if !x_dimension_exists - xpts = target_coordinates(space, num_points) - add_dimension!(nc, xname, xpts; units = "m", axis = "X") - end - return [xname] -end - -# Cubed sphere -function add_space_coordinates_maybe!( - nc::NCDatasets.NCDataset, - space::Spaces.SpectralElementSpace2D, - num_points, - ::Domains.SphereDomain; - names = ("lon", "lat"), -) - longname, latname = names - num_points_long, num_points_lat = num_points - - long_dimension_exists = dimension_exists(nc, longname, (num_points_long,)) - lat_dimension_exists = dimension_exists(nc, latname, (num_points_lat,)) - - if !long_dimension_exists && !lat_dimension_exists - longpts, latpts = target_coordinates(space, num_points) - add_dimension!( - nc, - longname, - longpts; - units = "degrees_east", - axis = "X", - ) - add_dimension!(nc, latname, latpts; units = "degrees_north", axis = "Y") - end - - return [longname, latname] -end - -# General hybrid space. This calls both the vertical and horizontal add_space_coordinates_maybe! -# and combines the resulting dictionaries -function add_space_coordinates_maybe!( - nc::NCDatasets.NCDataset, - space::Spaces.ExtrudedFiniteDifferenceSpace, - num_points; - interpolated_physical_z = nothing, - disable_vertical_interpolation = false, -) - - hdims_names = vdims_names = [] - - num_points_horiz..., num_points_vertic = num_points - - # Being an Extruded space, we can assume that we have an horizontal and a vertical space. - # We can also assume that the vertical space has dimension 1 - horizontal_space = Spaces.horizontal_space(space) - - hdims_names = - add_space_coordinates_maybe!(nc, horizontal_space, num_points_horiz) - - vertical_space = Spaces.FiniteDifferenceSpace( - Spaces.vertical_topology(space), - Spaces.staggering(space), - ) - - no_topography = Spaces.grid(space).hypsography isa Grids.Flat - - if disable_vertical_interpolation - zpts = Array( - parent(space.grid.vertical_grid.center_local_geometry.coordinates), - ) - name = no_topography ? "z" : "z_reference" - if !dimension_exists(nc, name, (num_points_vertic,)) - zpts = Array( - parent( - space.grid.vertical_grid.center_local_geometry.coordinates, - ), - ) - add_dimension!(nc, name, zpts[:, 1], units = "m", axis = "Z") - end - vdims_names = [name] - else - if no_topography - vdims_names = add_space_coordinates_maybe!( - nc, - vertical_space, - num_points_vertic, - ) - else - disable_vertical_interpolation && error("Not implemented") - vdims_names = add_space_coordinates_maybe!( - nc, - vertical_space, - num_points_vertic, - interpolated_physical_z; - names = ("z_reference",), - depending_on_dimensions = hdims_names, - ) - end - end - - return (hdims_names..., vdims_names...) -end - -# Ignore the interpolated_physical_z/disable_vertical_interpolation keywords in the general -# case (we only case about the specialized one for extruded spaces) -add_space_coordinates_maybe!( - nc::NCDatasets.NCDataset, - space, - num_points; - interpolated_physical_z = nothing, - disable_vertical_interpolation = false, -) = add_space_coordinates_maybe!(nc::NCDatasets.NCDataset, space, num_points) - -# Elevation with topography - -# `depending_on_dimensions` identifies the dimensions upon which the current one depends on -# (excluding itself). In pretty much all cases, the dimensions depend only on themselves -# (e.g., `lat` is a variable only defined on the latitudes.), and `depending_on_dimensions` -# should be an empty tuple. The only case in which this is not what happens is with `z` with -# topography. With topography, the altitude will depend on the spatial coordinates. So, -# `depending_on_dimensions` might be `("lon", "lat)`, or similar. -function add_space_coordinates_maybe!( - nc::NCDatasets.NCDataset, - space::Spaces.FiniteDifferenceSpace, - num_points, - interpolated_physical_z; - names = ("z_reference",), - depending_on_dimensions, -) - num_points_z = num_points - name, _... = names - - # Add z_reference - z_reference_dimension_dimension_exists = - dimension_exists(nc, name, (num_points_z,)) - - if !z_reference_dimension_dimension_exists - reference_altitudes = target_coordinates(space, num_points_z) - add_dimension!(nc, name, reference_altitudes; units = "m", axis = "Z") - end - - # We also have to add an extra variable with the physical altitudes - physical_name = "z_physical" - z_physical_dimension_dimension_exists = - dimension_exists(nc, physical_name, size(interpolated_physical_z)) - - if !z_physical_dimension_dimension_exists - FT = eltype(interpolated_physical_z) - dim = NCDatasets.defVar( - nc, - physical_name, - FT, - (depending_on_dimensions..., name), - ) - dim.attrib["units"] = "m" - if length(depending_on_dimensions) == 2 - dim[:, :, :] = interpolated_physical_z - elseif length(depending_on_dimensions) == 1 - dim[:, :] = interpolated_physical_z - else - error("Error in calculating z_physical") - end - end - # We do not output this name because it is not an axis - - return [name] -end - -# General hybrid space. This calls both the vertical and horizontal add_space_coordinates_maybe! -# and combines the resulting dictionaries -function target_coordinates( - space::Spaces.ExtrudedFiniteDifferenceSpace, - num_points, -) - - hcoords = vcoords = () - - num_points_horiz..., num_points_vertic = num_points - - hcoords = - target_coordinates(Spaces.horizontal_space(space), num_points_horiz) - - vertical_space = Spaces.FiniteDifferenceSpace( - Spaces.vertical_topology(space), - Spaces.staggering(space), - ) - vcoords = target_coordinates(vertical_space, num_points_vertic) - - hcoords == vcoords == () && error("Found empty space") - - return hcoords, vcoords -end - -function hcoords_from_horizontal_space( - space::Spaces.SpectralElementSpace2D, - domain::Domains.SphereDomain, - hpts, -) - # Notice LatLong not LongLat! - return [Geometry.LatLongPoint(hc2, hc1) for hc1 in hpts[1], hc2 in hpts[2]] -end - -function hcoords_from_horizontal_space( - space::Spaces.SpectralElementSpace2D, - domain::Domains.RectangleDomain, - hpts, -) - return [Geometry.XYPoint(hc1, hc2) for hc1 in hpts[1], hc2 in hpts[2]] -end - -function hcoords_from_horizontal_space( - space::Spaces.SpectralElementSpace1D, - domain::Domains.IntervalDomain, - hpts, -) - return [Geometry.XPoint(hc1) for hc1 in hpts] -end - -""" - hcoords_from_horizontal_space(space, domain, hpts) - -Prepare the matrix of horizontal coordinates with the correct type according to the given `space` -and `domain` (e.g., `ClimaCore.Geometry.LatLongPoint`s). -""" -function hcoords_from_horizontal_space(space, domain, hpts) end - -struct NetCDFWriter{T, TS, DI} - - # TODO: At the moment, each variable gets its remapper. This is a little bit of a waste - # because we probably only need a handful of remappers since the same remapper can be - # used for multiple fields as long as they are all defined on the same space. We need - # just a few remappers because realistically we need to support fields defined on the - # entire space and fields defined on 2D slices. However, handling this distinction at - # construction time is quite difficult. - remappers::Dict{String, Remapper} - - # Tuple/Array of integers that identifies how many points to use for interpolation along - # the various dimensions. It has to have the same size as the target interpolation - # space. - num_points::T - - # How much to compress the data in the final NetCDF file: 0 no compression, 9 max - # compression. - compression_level::Int - - # An array with size num_points with the physical altitude of any given target point. - interpolated_physical_z::TS - - # NetCDF files that are currently open. Only the root process uses this field. - open_files::Dict{String, NCDatasets.NCDataset} - - # Do not interpolate on the z direction, instead evaluate on the levels. - # When disable_vertical_interpolation is true, the num_points on the vertical direction - # is ignored. - disable_vertical_interpolation::Bool - - # Areas of memory preallocated where the interpolation output is saved. Only the root - # process uses this - preallocated_output_arrays::DI -end - -""" - close(writer::NetCDFWriter) - - -Close all the files open in `writer`. -""" -close(writer::NetCDFWriter) = map(NCDatasets.close, values(writer.open_files)) - -""" - NetCDFWriter() - - -Save a `ScheduledDiagnostic` to a NetCDF file inside the `output_dir` of the simulation by -performing a pointwise (non-conservative) remapping first. - -Keyword arguments -================== - -- `cspace`: Center space of fields. -- `num_points`: How many points to use along the different dimensions to interpolate the - fields. This is a tuple of integers, typically having meaning Long-Lat-Z, - or X-Y-Z (the details depend on the configuration being simulated). -- `disable_vertical_interpolation`: Do not interpolate on the z direction, instead evaluate - at on levels. When disable_vertical_interpolation is true, - the num_points on the vertical direction is ignored. -- `compression_level`: How much to compress the output NetCDF file (0 is no compression, 9 - is maximum compression). - -""" -function NetCDFWriter(; - cspace, - num_points = (180, 90, 50), - disable_vertical_interpolation = false, - compression_level = 0, -) - space = cspace - horizontal_space = Spaces.horizontal_space(space) - is_horizontal_space = horizontal_space == space - - if disable_vertical_interpolation - # It is a little tricky to override the number of vertical points because we don't - # know if the vertical direction is the 2nd (as in a plane) or 3rd index (as in a - # box or sphere). To set this value, we check if we are on a plane or not - - # TODO: Get the number of dimensions directly from the space - num_horiz_dimensions = - Spaces.horizontal_space(space) isa Spaces.SpectralElementSpace1D ? - 1 : 2 - - num_vpts = Meshes.nelements(Grids.vertical_topology(space).mesh) - - @warn "Disabling vertical interpolation, the provided number of points is ignored (using $num_vpts)" - num_points = Tuple([num_points[1:num_horiz_dimensions]..., num_vpts]) - end - - # Interpolate physical zs - if is_horizontal_space - hpts = target_coordinates(space, num_points) - vpts = [] - else - hpts, vpts = target_coordinates(space, num_points) - end - - hcoords = hcoords_from_horizontal_space( - horizontal_space, - Meshes.domain(Spaces.topology(horizontal_space)), - hpts, - ) - zcoords = Geometry.ZPoint.(vpts) - - remapper = Remapper(space, hcoords, zcoords) - - interpolated_physical_z = - interpolate(remapper, Fields.coordinate_field(space).z) - - preallocated_arrays = - ClimaComms.iamroot(ClimaComms.context(space)) ? Dict{String, Array}() : - Dict{String, Nothing}() - - return NetCDFWriter{ - typeof(num_points), - typeof(interpolated_physical_z), - typeof(preallocated_arrays), - }( - Dict{String, Remapper}(), - num_points, - compression_level, - interpolated_physical_z, - Dict{String, NCDatasets.NCDataset}(), - disable_vertical_interpolation, - preallocated_arrays, - ) -end - -function interpolate_field!(writer::NetCDFWriter, field, diagnostic, u, p, t) - - var = diagnostic.variable - - space = axes(field) - - horizontal_space = Spaces.horizontal_space(space) - - # We have to deal with to cases: when we have an horizontal slice (e.g., the - # surface), and when we have a full space. We distinguish these cases by checking if - # the given space has the horizontal_space attribute. If not, it is going to be a - # SpectralElementSpace2D and we don't have to deal with the z coordinates. - is_horizontal_space = horizontal_space == space - - # Prepare the remapper if we don't have one for the given variable. We need one remapper - # per variable (not one per diagnostic since all the time reductions return the same - # type of space). - - # TODO: Expand this once we support spatial reductions - if !haskey(writer.remappers, var.short_name) - - # hpts, vpts are ranges of numbers - # hcoords, zcoords are ranges of Geometry.Points - - zcoords = [] - - if is_horizontal_space - hpts = target_coordinates(space, writer.num_points) - vpts = [] - else - hpts, vpts = target_coordinates(space, writer.num_points) - end - - hcoords = hcoords_from_horizontal_space( - horizontal_space, - Meshes.domain(Spaces.topology(horizontal_space)), - hpts, - ) - - # When we disable vertical_interpolation, we override the vertical points with - # the reference values for the vertical space. - if writer.disable_vertical_interpolation && !is_horizontal_space - # We need Array(parent()) because we want an array of values, not a DataLayout - # of Points - vpts = Array( - parent( - space.grid.vertical_grid.center_local_geometry.coordinates, - ), - ) - end - - zcoords = [Geometry.ZPoint(p) for p in vpts] - - writer.remappers[var.short_name] = Remapper(space, hcoords, zcoords) - end - - remapper = writer.remappers[var.short_name] - - # Now we can interpolate onto the target points - # There's an MPI call in here (to aggregate the results) - # - # The first time we call this, we call interpolate and allocate a new array. - # Future calls are in-place - if haskey(writer.preallocated_output_arrays, var.short_name) - interpolate!( - writer.preallocated_output_arrays[var.short_name], - remapper, - field, - ) - else - writer.preallocated_output_arrays[var.short_name] = - interpolate(remapper, field) - end - return nothing -end - -function outpath_name(output_dir, diagnostic) - joinpath(output_dir, "$(diagnostic.output_short_name).nc") -end - -function save_diagnostic_to_disk!( - writer::NetCDFWriter, - field, - diagnostic, - u, - p, - t, - output_dir, -) - # Only the root process has to write - ClimaComms.iamroot(ClimaComms.context(field)) || return nothing - - var = diagnostic.variable - interpolated_field = writer.preallocated_output_arrays[var.short_name] - space = axes(field) - FT = Spaces.undertype(space) - - output_path = outpath_name(output_dir, diagnostic) - - if !haskey(writer.open_files, output_path) - # Append or write a new file - open_mode = isfile(output_path) ? "a" : "c" - writer.open_files[output_path] = - NCDatasets.Dataset(output_path, open_mode) - end - - nc = writer.open_files[output_path] - - # Define time coordinate - add_time_maybe!(nc, FT; units = "s", axis = "T") - - dim_names = add_space_coordinates_maybe!( - nc, - space, - writer.num_points; - writer.interpolated_physical_z, - writer.disable_vertical_interpolation, - ) - - if haskey(nc, "$(var.short_name)") - # We already have something in the file - v = nc["$(var.short_name)"] - temporal_size, spatial_size... = size(v) - spatial_size == size(interpolated_field) || - error("incompatible dimensions for $(var.short_name)") - else - v = NCDatasets.defVar( - nc, - "$(var.short_name)", - FT, - ("time", dim_names...), - deflatelevel = writer.compression_level, - ) - v.attrib["short_name"] = var.short_name - v.attrib["long_name"] = diagnostic.output_long_name - v.attrib["units"] = var.units - v.attrib["comments"] = var.comments - v.attrib["start_date"] = string(p.start_date) - temporal_size = 0 - end - - # We need to write to the next position after what we read from the data (or the first - # position ever if we are writing the file for the first time) - time_index = temporal_size + 1 - - nc["time"][time_index] = t - - # TODO: It would be nice to find a cleaner way to do this - if length(dim_names) == 3 - v[time_index, :, :, :] = interpolated_field - elseif length(dim_names) == 2 - v[time_index, :, :] = interpolated_field - elseif length(dim_names) == 1 - v[time_index, :] = interpolated_field - end - return nothing -end - -function write_field!( - writer::NetCDFWriter, - field, - diagnostic, - u, - p, - t, - output_dir, -) - interpolate_field!(writer, field, diagnostic, u, p, t) - save_diagnostic_to_disk!(writer, field, diagnostic, u, p, t, output_dir) - return nothing -end diff --git a/src/diagnostics/reduction_identities.jl b/src/diagnostics/reduction_identities.jl deleted file mode 100644 index 0fce5e58845..00000000000 --- a/src/diagnostics/reduction_identities.jl +++ /dev/null @@ -1,19 +0,0 @@ -# This file defines group identities (technically _monoid identities_) and is included in -# Diagnostics.jl. -# -# Several diagnostics require performing reductions, such as taking the maximum or the -# average. Since it is not feasible to store all the lists of all the intermediate values, -# we aggregate the results in specific storage areas (e.g., we take -# max(max(max(max(t1, t2), t3), t4), t5) instead of max(t1, t2, t3, t4, t5) -# In this, it is convenient to preallocate the space where we want to accumulate the -# intermediate. However, we have to fill the space with something that does not affect the -# reduction. This, by definition, is the identity of the operation. The identity of the operation -# + is 0 because x + 0 = x for every x. -# -# We have to know the identity for every operation we want to support. Of course, users are -# welcome to define their own by adding new methods to identity_of_reduction. - -identity_of_reduction(::typeof(max)) = -Inf -identity_of_reduction(::typeof(min)) = +Inf -identity_of_reduction(::typeof(+)) = 0 -identity_of_reduction(::typeof(*)) = 1 diff --git a/src/diagnostics/standard_diagnostic_frequencies.jl b/src/diagnostics/standard_diagnostic_frequencies.jl index 9167ee958e4..3dc11d73bcc 100644 --- a/src/diagnostics/standard_diagnostic_frequencies.jl +++ b/src/diagnostics/standard_diagnostic_frequencies.jl @@ -1,220 +1,264 @@ """ - monthly_maxs(short_names...; output_writer) + monthly_maxs(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the monthly max for the given variables. A month is defined as 30 days. """ -monthly_maxs(short_names...; output_writer) = - common_diagnostics(30 * 24 * 60 * 60, max, output_writer, short_names...) +monthly_maxs(short_names...; output_writer, t_start) = common_diagnostics( + 30 * 24 * 60 * 60 * one(t_start), + max, + output_writer, + t_start, + short_names..., +) """ - monthly_max(short_names; output_writer) + monthly_max(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the monthly max for the given variable. A month is defined as 30 days. """ -monthly_max(short_names; output_writer) = - monthly_maxs(short_names; output_writer)[1] +monthly_max(short_names; output_writer, t_start) = + monthly_maxs(short_names; output_writer, t_start)[1] """ - monthly_mins(short_names...; output_writer) + monthly_mins(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the monthly min for the given variables. """ -monthly_mins(short_names...; output_writer) = - common_diagnostics(30 * 24 * 60 * 60, min, output_writer, short_names...) +monthly_mins(short_names...; output_writer, t_start) = common_diagnostics( + 30 * 24 * 60 * 60 * one(t_start), + min, + output_writer, + t_start, + short_names..., +) """ - monthly_min(short_names; output_writer) + monthly_min(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the monthly min for the given variable. A month is defined as 30 days. """ -monthly_min(short_names; output_writer) = - monthly_mins(short_names; output_writer)[1] +monthly_min(short_names; output_writer, t_start) = + monthly_mins(short_names; output_writer, t_start)[1] """ - monthly_averages(short_names...; output_writer) + monthly_averages(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the monthly average for the given variables. A month is defined as 30 days. """ # An average is just a sum with a normalization before output -monthly_averages(short_names...; output_writer) = common_diagnostics( - 30 * 24 * 60 * 60, +monthly_averages(short_names...; output_writer, t_start) = common_diagnostics( + 30 * 24 * 60 * 60 * one(t_start), (+), output_writer, + t_start, short_names...; pre_output_hook! = average_pre_output_hook!, ) """ - monthly_average(short_names; output_writer) + monthly_average(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that compute the monthly average for the given variable. A month is defined as 30 days. """ # An average is just a sum with a normalization before output -monthly_average(short_names; output_writer) = - monthly_averages(short_names; output_writer)[1] +monthly_average(short_names; output_writer, t_start) = + monthly_averages(short_names; output_writer, t_start)[1] """ - tendaily_maxs(short_names...; output_writer) + tendaily_maxs(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the max over ten days for the given variables. """ -tendaily_maxs(short_names...; output_writer) = - common_diagnostics(10 * 24 * 60 * 60, max, output_writer, short_names...) +tendaily_maxs(short_names...; output_writer, t_start) = common_diagnostics( + 10 * 24 * 60 * 60 * one(t_start), + max, + output_writer, + t_start, + short_names..., +) """ - tendaily_max(short_names; output_writer) + tendaily_max(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the max over ten days for the given variable. """ -tendaily_max(short_names; output_writer) = - tendaily_maxs(short_names; output_writer)[1] +tendaily_max(short_names; output_writer, t_start) = + tendaily_maxs(short_names; output_writer, t_start)[1] """ - tendaily_mins(short_names...; output_writer) + tendaily_mins(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the min over ten days for the given variables. """ -tendaily_mins(short_names...; output_writer) = - common_diagnostics(10 * 24 * 60 * 60, min, output_writer, short_names...) +tendaily_mins(short_names...; output_writer, t_start) = common_diagnostics( + 10 * 24 * 60 * 60 * one(t_start), + min, + output_writer, + t_start, + short_names..., +) """ - tendaily_min(short_names; output_writer) + tendaily_min(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the min over ten days for the given variable. """ -tendaily_min(short_names; output_writer) = - tendaily_mins(short_names; output_writer)[1] +tendaily_min(short_names; output_writer, t_start) = + tendaily_mins(short_names; output_writer, t_start)[1] """ - tendaily_averages(short_names...; output_writer) + tendaily_averages(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the average over ten days for the given variables. """ # An average is just a sum with a normalization before output -tendaily_averages(short_names...; output_writer) = common_diagnostics( - 10 * 24 * 60 * 60, +tendaily_averages(short_names...; output_writer, t_start) = common_diagnostics( + 10 * 24 * 60 * 60 * one(t_start), (+), output_writer, + t_start, short_names...; pre_output_hook! = average_pre_output_hook!, ) """ - tendaily_average(short_names; output_writer) + tendaily_average(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that compute the average over ten days for the given variable. """ # An average is just a sum with a normalization before output -tendaily_average(short_names; output_writer) = - tendaily_averages(short_names; output_writer)[1] +tendaily_average(short_names; output_writer, t_start) = + tendaily_averages(short_names; output_writer, t_start)[1] """ - daily_maxs(short_names...; output_writer) + daily_maxs(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the daily max for the given variables. """ -daily_maxs(short_names...; output_writer) = - common_diagnostics(24 * 60 * 60, max, output_writer, short_names...) +daily_maxs(short_names...; output_writer, t_start) = common_diagnostics( + 24 * 60 * 60 * one(t_start), + max, + output_writer, + t_start, + short_names..., +) """ - daily_max(short_names; output_writer) + daily_max(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the daily max for the given variable. """ -daily_max(short_names; output_writer) = - daily_maxs(short_names; output_writer)[1] +daily_max(short_names; output_writer, t_start) = + daily_maxs(short_names; output_writer, t_start)[1] """ - daily_mins(short_names...; output_writer) + daily_mins(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the daily min for the given variables. """ -daily_mins(short_names...; output_writer) = - common_diagnostics(24 * 60 * 60, min, output_writer, short_names...) +daily_mins(short_names...; output_writer, t_start) = common_diagnostics( + 24 * 60 * 60 * one(t_start), + min, + output_writer, + t_start, + short_names..., +) """ - daily_min(short_names; output_writer) + daily_min(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the daily min for the given variable. """ -daily_min(short_names; output_writer) = - daily_mins(short_names; output_writer)[1] +daily_min(short_names; output_writer, t_start) = + daily_mins(short_names; output_writer, t_start)[1] """ - daily_averages(short_names...; output_writer) + daily_averages(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the daily average for the given variables. """ # An average is just a sum with a normalization before output -daily_averages(short_names...; output_writer) = common_diagnostics( - 24 * 60 * 60, +daily_averages(short_names...; output_writer, t_start) = common_diagnostics( + 24 * 60 * 60 * one(t_start), (+), output_writer, + t_start, short_names...; pre_output_hook! = average_pre_output_hook!, ) """ - daily_average(short_names; output_writer) + daily_average(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that compute the daily average for the given variable. """ # An average is just a sum with a normalization before output -daily_average(short_names; output_writer) = - daily_averages(short_names; output_writer)[1] +daily_average(short_names; output_writer, t_start) = + daily_averages(short_names; output_writer, t_start)[1] """ - hourly_maxs(short_names...; output_writer) + hourly_maxs(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the hourly max for the given variables. """ -hourly_maxs(short_names...; output_writer) = - common_diagnostics(60 * 60, max, output_writer, short_names...) +hourly_maxs(short_names...; output_writer, t_start) = common_diagnostics( + 60 * 60 * one(t_start), + max, + output_writer, + t_start, + short_names..., +) """ - hourly_max(short_names; output_writer) + hourly_max(short_names; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the hourly max for the given variable. """ -hourly_max(short_names; output_writer) = - hourly_maxs(short_names; output_writer)[1] +hourly_max(short_names; output_writer, t_start) = + hourly_maxs(short_names; output_writer, t_start)[1] """ - hourly_mins(short_names...; output_writer) + hourly_mins(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the hourly min for the given variables. """ -hourly_mins(short_names...; output_writer) = - common_diagnostics(60 * 60, min, output_writer, short_names...) +hourly_mins(short_names...; output_writer, t_start) = common_diagnostics( + 60 * 60 * one(t_start), + min, + output_writer, + t_start, + short_names..., +) """ - hourly_mins(short_names...; output_writer) + hourly_mins(short_names...; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the hourly min for the given variable. """ -hourly_min(short_names; output_writer) = - hourly_mins(short_names; output_writer)[1] +hourly_min(short_names; output_writer, t_start) = + hourly_mins(short_names; output_writer, t_start)[1] # An average is just a sum with a normalization before output """ - hourly_averages(short_names...; output_writer) + hourly_averages(short_names...; output_writer, t_start) Return a list of `ScheduledDiagnostics` that compute the hourly average for the given variables. """ -hourly_averages(short_names...; output_writer) = common_diagnostics( - 60 * 60, +hourly_averages(short_names...; output_writer, t_start) = common_diagnostics( + 60 * 60 * one(t_start), (+), output_writer, + t_start, short_names...; pre_output_hook! = average_pre_output_hook!, ) """ - hourly_average(short_names...; output_writer) + hourly_average(short_names...; output_writer, t_start) Return a `ScheduledDiagnostics` that computes the hourly average for the given variable. """ -hourly_average(short_names; output_writer) = - hourly_averages(short_names; output_writer)[1] +hourly_average(short_names; output_writer, t_start) = + hourly_averages(short_names; output_writer, t_start)[1] diff --git a/src/diagnostics/writers.jl b/src/diagnostics/writers.jl deleted file mode 100644 index 2b26356dfeb..00000000000 --- a/src/diagnostics/writers.jl +++ /dev/null @@ -1,11 +0,0 @@ -# Writers.jl -# -# This file defines generic writers for diagnostics with opinionated defaults. - -import ClimaCore: Hypsography -import ClimaCore.Remapping: Remapper, interpolate, interpolate! - -import NCDatasets - -include("hdf5_writer.jl") -include("netcdf_writer.jl") diff --git a/src/solver/solve.jl b/src/solver/solve.jl index 984914df86f..fdaaf8e6b91 100644 --- a/src/solver/solve.jl +++ b/src/solver/solve.jl @@ -93,7 +93,7 @@ function solve_atmos!(simulation) maxrss_str = prettymemory(maxrss()) @info "Memory currently used (after solve!) by the process (RSS): $maxrss_str" - foreach(CAD.close, output_writers) + foreach(close, output_writers) end end diff --git a/src/solver/type_getters.jl b/src/solver/type_getters.jl index 229bbb18d5e..5689fe3ba49 100644 --- a/src/solver/type_getters.jl +++ b/src/solver/type_getters.jl @@ -12,6 +12,8 @@ import ClimaCore.Fields import ClimaTimeSteppers as CTS import DiffEqCallbacks as DECB +import ClimaDiagnostics + function get_atmos(config::AtmosConfig, params) (; turbconv_params) = params (; parsed_args) = config @@ -653,53 +655,19 @@ function get_simulation(config::AtmosConfig) # Initialize diagnostics s = @timed_str begin - diagnostics, writers = - get_diagnostics(config.parsed_args, atmos, Spaces.axes(Y.c)) - end - @info "initializing diagnostics: $s" - - length(diagnostics) > 0 && @info "Computing diagnostics:" - - # First, we convert all the ScheduledDiagnosticTime into ScheduledDiagnosticIteration, - # ensuring that there is consistency in the timestep and the periods and translating - # those periods that depended on the timestep - diagnostics_iterations = - [CAD.ScheduledDiagnosticIterations(d, sim_info.dt) for d in diagnostics] - - # For diagnostics that perform reductions, the storage is used for the values computed - # at each call. Reductions also save the accumulated value in diagnostic_accumulators. - diagnostic_storage = Dict() - diagnostic_accumulators = Dict() - diagnostic_counters = Dict() - - s = @timed_str begin - diagnostics_functions = CAD.get_callbacks_from_diagnostics( - diagnostics_iterations, - diagnostic_storage, - diagnostic_accumulators, - diagnostic_counters, - output_dir, + scheduled_diagnostics, writers = get_diagnostics( + config.parsed_args, + atmos, + Y, + p, + t_start, + sim_info.dt, ) end - @info "Prepared diagnostic callbacks: $s" - - # It would be nice to just pass the callbacks to the integrator. However, this leads to - # a significant increase in compile time for reasons that are not known. For this - # reason, we only add one callback to the integrator, and this function takes care of - # executing the other callbacks. This single function is orchestrate_diagnostics - - orchestrate_diagnostics(integrator) = - CAD.orchestrate_diagnostics(integrator, diagnostics_functions) - - diagnostic_callbacks = - call_every_n_steps(orchestrate_diagnostics, skip_first = true) + @info "initializing diagnostics: $s" - # The generic constructor for SciMLBase.CallbackSet has to split callbacks into discrete - # and continuous. This is not hard, but can introduce significant latency. However, all - # the callbacks in ClimaAtmos are discrete_callbacks, so we directly pass this - # information to the constructor continuous_callbacks = tuple() - discrete_callbacks = (callback..., diagnostic_callbacks) + discrete_callbacks = callback s = @timed_str begin all_callbacks = @@ -707,11 +675,8 @@ function get_simulation(config::AtmosConfig) end @info "Prepared SciMLBase.CallbackSet callbacks: $s" steps_cycle_non_diag = n_steps_per_cycle_per_cb(all_callbacks, sim_info.dt) - steps_cycle_diag = - n_steps_per_cycle_per_cb_diagnostic(diagnostics_functions) - steps_cycle = lcm([steps_cycle_non_diag..., steps_cycle_diag...]) + steps_cycle = lcm(steps_cycle_non_diag) @info "n_steps_per_cycle_per_cb (non diagnostics): $steps_cycle_non_diag" - @info "n_steps_per_cycle_per_cb_diagnostic: $steps_cycle_diag" @info "n_steps_per_cycle (non diagnostics): $steps_cycle" tspan = (t_start, sim_info.t_end) @@ -730,20 +695,16 @@ function get_simulation(config::AtmosConfig) integrator = SciMLBase.init(integrator_args...; integrator_kwargs...) end @info "init integrator: $s" - reset_graceful_exit(output_dir) - s = @timed_str init_diagnostics!( - diagnostics_iterations, - diagnostic_storage, - diagnostic_accumulators, - diagnostic_counters, - output_dir, - Y, - p, - t_start; - warn_allocations = config.parsed_args["warn_allocations_diagnostics"], - ) - @info "Init diagnostics: $s" + s = @timed_str begin + integrator = ClimaDiagnostics.IntegratorWithDiagnostics( + integrator, + scheduled_diagnostics, + ) + end + @info "Added diagnostics: $s" + + reset_graceful_exit(output_dir) return AtmosSimulation( job_id,