Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

look into allocations #691

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
767 changes: 397 additions & 370 deletions .buildkite/longruns/pipeline.yml

Large diffs are not rendered by default.

749 changes: 382 additions & 367 deletions .buildkite/pipeline.yml

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion config/longrun_configs/gpu_dyamond_target.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ land_albedo_type: "map_temporal"
mode_name: "amip"
mono_surface: false
monthly_checkpoint: false
output_default_diagnostics: false
run_name: "gpu_dyamond_target"
start_date: "19790301"
t_end: "1days"
t_end: "4hours"
turb_flux_partition: "CombinedStateFluxes"
13 changes: 13 additions & 0 deletions config/model_configs/gpu_dyamond_nodiags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
anim: false
atmos_config_file: "config/gpu_configs/gpu_aquaplanet_dyamond.yml"
dt_cpl: 100
energy_check: false
job_id: "gpu_dyamond_target_nodiags"
land_albedo_type: "map_temporal"
mode_name: "amip"
mono_surface: false
monthly_checkpoint: false
run_name: "gpu_dyamond_target_nodiags"
start_date: "19790301"
t_end: "12hours"
turb_flux_partition: "CombinedStateFluxes"
2 changes: 1 addition & 1 deletion experiments/AMIP/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.2"
manifest_format = "2.0"
project_hash = "284db48dcd39d58ad8518daa0c1caded1552fa4a"
project_hash = "3a9507a7771542aa4f88e9c7e77e8f21b149253b"

[[deps.ADTypes]]
git-tree-sha1 = "016833eb52ba2d6bea9fcb50ca295980e728ee24"
Expand Down
1 change: 1 addition & 0 deletions experiments/AMIP/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
ArtifactWrappers = "a14bc488-3040-4b00-9dc1-f6467924858a"
AtmosphericProfilesLibrary = "86bc3604-9858-485a-bdbe-831ec50de11d"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
ClimaAnalysis = "29b5916a-a76c-4e73-9657-3c8fd22e65e6"
ClimaAtmos = "b2c96348-7fb7-4fe0-8da9-78d88439e717"
Expand Down
67 changes: 66 additions & 1 deletion experiments/AMIP/coupler_driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,36 @@ Given that ClimaCore objects are heavily parametrized, non-abbreviated stacktrac
so we force abbreviated stacktraces even in non-interactive runs.
(See also `Base.type_limited_string_from_context()`)
=#
using CUDA

"""
show_memory_usage(comms_ctx, objects)

Display the current memory footprint of the simulation, using an appropriate
method based on the device being used.

In the GPU case, show the memory usage of the GPU.
In the CPU case, show the memory footprint of the provided object(s).
Note that these two cases provide different information, and should not be
directly compared.

# Arguments
`comms_ctx`: the communication context being used to run the model
`objects`: Dict mapping objects whose memory footprint is displayed in the CPU case to their names
"""
function show_memory_usage(comms_ctx, objects)
if comms_ctx.device isa ClimaComms.CUDADevice
@info "Memory usage: $(CUDA.memory_status())"
elseif comms_ctx.device isa ClimaComms.AbstractCPUDevice
if ClimaComms.iamroot(comms_ctx)
for (obj, name) in objects
@info "Memory footprint of `$(name)` in bytes: $(Base.summarysize(obj))"
end
end
else
@warn "Invalid device type $device; cannot show memory usage."
end
end

redirect_stderr(IOContext(stderr, :stacktrace_types_limited => Ref(false)))

Expand Down Expand Up @@ -190,8 +220,12 @@ returns a `ComponentModelSimulation` object (see `Interfacer` docs for more deta
This uses the `ClimaAtmos.jl` model, with parameterization options specified in the `config_dict_atmos` dictionary.
=#

show_memory_usage(comms_ctx, Dict())

## init atmos model component
atmos_sim = atmos_init(FT, config_dict_atmos);
show_memory_usage(comms_ctx, Dict(atmos_sim => name(atmos_sim)))

thermo_params = get_thermo_params(atmos_sim) # TODO: this should be shared by all models #610

#=
Expand Down Expand Up @@ -222,6 +256,7 @@ land_fraction =
mono = mono_surface,
)
)
show_memory_usage(comms_ctx, Dict(land_fraction => "land_fraction"))

#=
### Surface Models: AMIP and SlabPlanet Modes
Expand Down Expand Up @@ -332,6 +367,17 @@ if mode_name == "amip"
update_field!(atmos_sim, Val(:co2), CO2_init)

mode_specifics = (; name = mode_name, SST_info = SST_info, SIC_info = SIC_info, CO2_info = CO2_info)
show_memory_usage(
comms_ctx,
Dict(
land_sim => name(land_sim),
ocean_sim => name(ocean_sim),
ice_sim => name(ice_sim),
SST_info => "SST_info",
SIC_info => "SIC_info",
CO2_info => "CO2_info",
),
)

elseif mode_name in ("slabplanet", "slabplanet_aqua", "slabplanet_terra")

Expand Down Expand Up @@ -381,6 +427,10 @@ elseif mode_name in ("slabplanet", "slabplanet_aqua", "slabplanet_terra")
))

mode_specifics = (; name = mode_name, SST_info = nothing, SIC_info = nothing)
show_memory_usage(
comms_ctx,
Dict(land_sim => name(land_sim), ocean_sim => name(ocean_sim), ice_sim => name(ice_sim)),
)

elseif mode_name == "slabplanet_eisenman"

Expand Down Expand Up @@ -423,8 +473,13 @@ elseif mode_name == "slabplanet_eisenman"
)

mode_specifics = (; name = mode_name, SST_info = nothing, SIC_info = nothing)
show_memory_usage(
comms_ctx,
Dict(land_sim => name(land_sim), ocean_sim => name(ocean_sim), ice_sim => name(ice_sim)),
)
end


#=
## Coupler Initialization
The coupler needs to contain exchange information, manage the calendar and be able to access all component models. It can also optionally
Expand Down Expand Up @@ -452,6 +507,7 @@ coupler_field_names = (
)
coupler_fields =
NamedTuple{coupler_field_names}(ntuple(i -> ClimaCore.Fields.zeros(boundary_space), length(coupler_field_names)))
show_memory_usage(comms_ctx, Dict(coupler_fields => "coupler_fields"))

## model simulations
model_sims = (atmos_sim = atmos_sim, ice_sim = ice_sim, land_sim = land_sim, ocean_sim = ocean_sim);
Expand Down Expand Up @@ -484,7 +540,8 @@ monthly_2d_diags = init_diagnostics(
name_tag = "monthly_mean_2d_",
)

diagnostics = (monthly_3d_diags, monthly_2d_diags)
diagnostics = () #(monthly_3d_diags, monthly_2d_diags)
show_memory_usage(comms_ctx, Dict(diagnostics => "diagnostics"))

#=
## Initialize Conservation Checks
Expand All @@ -503,6 +560,8 @@ if energy_check
)
conservation_checks = (; energy = EnergyConservationCheck(model_sims), water = WaterConservationCheck(model_sims))
end
show_memory_usage(comms_ctx, Dict(conservation_checks => "conservation_checks"))


#=
## Initialize Callbacks
Expand All @@ -523,6 +582,7 @@ checkpoint_cb =
update_firstdayofmonth!_cb =
MonthlyCallback(dt = FT(1), func = update_firstdayofmonth!, ref_date = [dates.date1[1]], active = true)
callbacks = (; checkpoint = checkpoint_cb, update_firstdayofmonth! = update_firstdayofmonth!_cb)
show_memory_usage(comms_ctx, Dict(callbacks => "callbacks"))

#=
## Initialize Coupled Simulation
Expand Down Expand Up @@ -550,6 +610,7 @@ cs = CoupledSimulation{FT}(
callbacks,
dir_paths,
);
show_memory_usage(comms_ctx, Dict(cs => "cs"))

#=
## Restart component model states if specified
Expand Down Expand Up @@ -626,6 +687,8 @@ reinit_model_sims!(cs.model_sims)
import_atmos_fields!(cs.fields, cs.model_sims, cs.boundary_space, turbulent_fluxes)
update_model_sims!(cs.model_sims, cs.fields, turbulent_fluxes)

show_memory_usage(comms_ctx, Dict(cs => "cs"))

#=
## Coupling Loop

Expand Down Expand Up @@ -726,6 +789,7 @@ end #hide

## run the coupled simulation
solve_coupler!(cs);
show_memory_usage(comms_ctx, Dict(cs => "cs"))

#=
## Postprocessing
Expand Down Expand Up @@ -825,6 +889,7 @@ if ClimaComms.iamroot(comms_ctx)
Leaderboard.plot_biases(atmos_sim.integrator.p.output_dir, compare_vars, cs.dates.date; output_path)
end
end
show_memory_usage(comms_ctx, Dict(cs => "cs"))

if isinteractive()
## clean up for interactive runs, retain all output otherwise
Expand Down
4 changes: 2 additions & 2 deletions perf/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.2"
manifest_format = "2.0"
project_hash = "ed46c461d51248cd9cbdd956368072e478bae235"
project_hash = "b45e4e9d311af3db6eb01b9c25b9517209fbcc29"

[[deps.ADTypes]]
git-tree-sha1 = "016833eb52ba2d6bea9fcb50ca295980e728ee24"
Expand Down Expand Up @@ -307,7 +307,7 @@ uuid = "d934ef94-cdd4-4710-83d6-720549644b70"
version = "0.3.14"

[[deps.ClimaCoupler]]
deps = ["ClimaAtmos", "ClimaComms", "ClimaCore", "ClimaCoreTempestRemap", "ClimaLand", "ClimaParams", "Dates", "DocStringExtensions", "Insolation", "JLD2", "NCDatasets", "Plots", "SciMLBase", "StaticArrays", "Statistics", "SurfaceFluxes", "TempestRemap_jll", "Thermodynamics"]
deps = ["CUDA", "ClimaAtmos", "ClimaComms", "ClimaCore", "ClimaCoreTempestRemap", "ClimaLand", "ClimaParams", "Dates", "DocStringExtensions", "Insolation", "JLD2", "NCDatasets", "Plots", "SciMLBase", "StaticArrays", "Statistics", "SurfaceFluxes", "TempestRemap_jll", "Thermodynamics"]
path = ".."
uuid = "4ade58fe-a8da-486c-bd89-46df092ec0c7"
version = "0.1.0"
Expand Down
1 change: 1 addition & 0 deletions perf/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
ArtifactWrappers = "a14bc488-3040-4b00-9dc1-f6467924858a"
AtmosphericProfilesLibrary = "86bc3604-9858-485a-bdbe-831ec50de11d"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ClimaAtmos = "b2c96348-7fb7-4fe0-8da9-78d88439e717"
ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884"
Expand Down
14 changes: 7 additions & 7 deletions src/Utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ function swap_space!(field_out, field_in::Fields.Field)
end

"""
get_device(parsed_args)
get_device(parsed_args)

Returns the device on which the model is being run
Returns the device on which the model is being run

# Arguments
- `parsed_args`: dictionary containing a "device" flag which decides which device to run on
# Arguments
- `parsed_args`: dictionary containing a "device" flag which decides which device to run on
"""
function get_device(parsed_args)
if parsed_args["device"] == "auto"
Expand All @@ -46,11 +46,11 @@ end


"""
get_comms_context(parsed_args)
get_comms_context(parsed_args)

Sets up the appropriate ClimaComms context for the device the model is to be run on
Sets up the appropriate ClimaComms context for the device the model is to be run on

# Arguments
# Arguments
`parsed_args`: dictionary containing a "device" flag whcih decides which device context is needed
"""
function get_comms_context(parsed_args)
Expand Down
12 changes: 11 additions & 1 deletion test/component_model_tests/climaatmos_standalone/atmos_driver.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
using ClimaComms
using Logging
using ClimaAtmos

redirect_stderr(IOContext(stderr, :stacktrace_types_limited => Ref(false)))
import ClimaAtmos as CA
import Random
using ClimaCoupler
Random.seed!(1234)

pkg_dir = pkgdir(ClimaCoupler)
atmos_config_file = joinpath(pkg_dir, "test/component_model_tests/climaatmos_standalone/longrun_aquaplanet_dyamond.yml")
comms_ctx = ClimaComms.context(ClimaComms.device())
@show typeof(comms_ctx)

config = CA.AtmosConfig(atmos_config_file; comms_ctx = comms_ctx)

OUTPUT_DIR = joinpath(pkg_dir, "test/component_model_tests/climaatmos_standalone/output/longrun_aquaplanet_dyamond_artifacts/")
mkpath(OUTPUT_DIR)

if !(@isdefined config)
config = CA.AtmosConfig()
end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
h_elem: 60
z_max: 55000.0
z_elem: 63
dz_bottom: 30.0
dz_top: 3000.0
moist: "equil"
precip_model: "0M"
cloud_model: "grid_scale"
rad: "allskywithclear"
idealized_insolation: false
dt_rad: "1hours"
vert_diff: "true"
surface_setup: "DefaultMoninObukhov"
rayleigh_sponge: true
dt_save_state_to_disk: "3hours"
dt: "50secs"
t_end: "4hours"
job_id: "longrun_aquaplanet_dyamond"
toml: [toml/longrun_aquaplanet_dyamond.toml]
6 changes: 6 additions & 0 deletions toml/longrun_aquaplanet_dyamond.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[zd_rayleigh]
value = 35000.0

[alpha_rayleigh_uh]
alias = "alpha_rayleigh_uh"
value = 0.0