Merge pull request #2408 from CliMA/gb/perf_1

Increase sampling rate for allocations to 1
CliMA · Dec 4, 2023 · a1de1c0 · a1de1c0
2 parents d6a7e79 + b96f1ea
commit a1de1c0
Showing 1 changed file with 28 additions and 14 deletions.
diff --git a/perf/flame.jl b/perf/flame.jl
@@ -34,10 +34,36 @@ ProfileCanvas.html_file(joinpath(output_dir, "flame.html"), results)
 ##### Allocation tests
 #####
 
+allocs_limit = Dict()
+allocs_limit["flame_perf_target"] = 4656
+allocs_limit["flame_perf_target_tracers"] = 204288
+allocs_limit["flame_perf_target_edmfx"] = 253440
+allocs_limit["flame_perf_diagnostics"] = 3016328
+allocs_limit["flame_perf_target_diagnostic_edmfx"] = 893504
+allocs_limit["flame_sphere_baroclinic_wave_rhoe_equilmoist_expvdiff"] =
+    67443909648
+allocs_limit["flame_perf_target_threaded"] = 5857808
+allocs_limit["flame_perf_target_callbacks"] = 46407936
+allocs_limit["flame_perf_gw"] = 4868951088
+allocs_limit["flame_perf_target_prognostic_edmfx_aquaplanet"] = 898768
+
+
+# Ideally, we would like to track all the allocations, but this becomes too
+# expensive there is too many of them. Here, we set the default sample rate to
+# 1, but lower it to a smaller value when we expect the job to produce lots of
+# allocations. Empirically, we find that on the Caltech cluster the limit is 10
+# M of allocation.
+max_allocs_for_full_sampling = 10e6
+
+# For jobs that we don't track, we set an expected_allocs of
+# max_allocs_for_full_sampling, which leads to a sampling rate of 1
+expected_allocs = get(allocs_limit, job_id, max_allocs_for_full_sampling)
+sampling_rate = expected_allocs <= max_allocs_for_full_sampling ? 1 : 0.01
+
 # use new allocation profiler
-@info "collecting allocations"
+@info "collecting allocations with sampling rate $sampling_rate"
 Profile.Allocs.clear()
-Profile.Allocs.@profile sample_rate = 0.01 SciMLBase.step!(integrator)
+Profile.Allocs.@profile sample_rate = sampling_rate SciMLBase.step!(integrator)
 results = Profile.Allocs.fetch()
 Profile.Allocs.clear()
 profile = ProfileCanvas.view_allocs(results)
@@ -58,18 +84,6 @@ allocs = @allocated SciMLBase.step!(integrator)
 @timev SciMLBase.step!(integrator)
 @info "`allocs ($job_id)`: $(allocs)"
 
-allocs_limit = Dict()
-allocs_limit["flame_perf_target"] = 4656
-allocs_limit["flame_perf_target_tracers"] = 204288
-allocs_limit["flame_perf_target_edmfx"] = 253440
-allocs_limit["flame_perf_diagnostics"] = 3016328
-allocs_limit["flame_perf_target_diagnostic_edmfx"] = 893504
-allocs_limit["flame_sphere_baroclinic_wave_rhoe_equilmoist_expvdiff"] =
-    67443909648
-allocs_limit["flame_perf_target_threaded"] = 5857808
-allocs_limit["flame_perf_target_callbacks"] = 46407936
-allocs_limit["flame_perf_gw"] = 4868951088
-allocs_limit["flame_perf_target_prognostic_edmfx_aquaplanet"] = 898768
 
 if allocs < allocs_limit[job_id] * buffer
     @info "TODO: lower `allocs_limit[$job_id]` to: $(allocs)"