Skip to content

Commit

Permalink
Merge pull request #2408 from CliMA/gb/perf_1
Browse files Browse the repository at this point in the history
Increase sampling rate for allocations to 1
  • Loading branch information
Sbozzolo authored Dec 4, 2023
2 parents d6a7e79 + b96f1ea commit a1de1c0
Showing 1 changed file with 28 additions and 14 deletions.
42 changes: 28 additions & 14 deletions perf/flame.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,36 @@ ProfileCanvas.html_file(joinpath(output_dir, "flame.html"), results)
##### Allocation tests
#####

allocs_limit = Dict()
allocs_limit["flame_perf_target"] = 4656
allocs_limit["flame_perf_target_tracers"] = 204288
allocs_limit["flame_perf_target_edmfx"] = 253440
allocs_limit["flame_perf_diagnostics"] = 3016328
allocs_limit["flame_perf_target_diagnostic_edmfx"] = 893504
allocs_limit["flame_sphere_baroclinic_wave_rhoe_equilmoist_expvdiff"] =
67443909648
allocs_limit["flame_perf_target_threaded"] = 5857808
allocs_limit["flame_perf_target_callbacks"] = 46407936
allocs_limit["flame_perf_gw"] = 4868951088
allocs_limit["flame_perf_target_prognostic_edmfx_aquaplanet"] = 898768


# Ideally, we would like to track all the allocations, but this becomes too
# expensive there is too many of them. Here, we set the default sample rate to
# 1, but lower it to a smaller value when we expect the job to produce lots of
# allocations. Empirically, we find that on the Caltech cluster the limit is 10
# M of allocation.
max_allocs_for_full_sampling = 10e6

# For jobs that we don't track, we set an expected_allocs of
# max_allocs_for_full_sampling, which leads to a sampling rate of 1
expected_allocs = get(allocs_limit, job_id, max_allocs_for_full_sampling)
sampling_rate = expected_allocs <= max_allocs_for_full_sampling ? 1 : 0.01

# use new allocation profiler
@info "collecting allocations"
@info "collecting allocations with sampling rate $sampling_rate"
Profile.Allocs.clear()
Profile.Allocs.@profile sample_rate = 0.01 SciMLBase.step!(integrator)
Profile.Allocs.@profile sample_rate = sampling_rate SciMLBase.step!(integrator)
results = Profile.Allocs.fetch()
Profile.Allocs.clear()
profile = ProfileCanvas.view_allocs(results)
Expand All @@ -58,18 +84,6 @@ allocs = @allocated SciMLBase.step!(integrator)
@timev SciMLBase.step!(integrator)
@info "`allocs ($job_id)`: $(allocs)"

allocs_limit = Dict()
allocs_limit["flame_perf_target"] = 4656
allocs_limit["flame_perf_target_tracers"] = 204288
allocs_limit["flame_perf_target_edmfx"] = 253440
allocs_limit["flame_perf_diagnostics"] = 3016328
allocs_limit["flame_perf_target_diagnostic_edmfx"] = 893504
allocs_limit["flame_sphere_baroclinic_wave_rhoe_equilmoist_expvdiff"] =
67443909648
allocs_limit["flame_perf_target_threaded"] = 5857808
allocs_limit["flame_perf_target_callbacks"] = 46407936
allocs_limit["flame_perf_gw"] = 4868951088
allocs_limit["flame_perf_target_prognostic_edmfx_aquaplanet"] = 898768

if allocs < allocs_limit[job_id] * buffer
@info "TODO: lower `allocs_limit[$job_id]` to: $(allocs)"
Expand Down

0 comments on commit a1de1c0

Please sign in to comment.