From b96f1ea37ef32f1969c8a70408653882141df2d7 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Mon, 4 Dec 2023 11:04:53 -0800 Subject: [PATCH] Dynamically choose the sampling rate for allocations --- perf/flame.jl | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/perf/flame.jl b/perf/flame.jl index 72e626875c..6bd8da3b48 100644 --- a/perf/flame.jl +++ b/perf/flame.jl @@ -34,10 +34,36 @@ ProfileCanvas.html_file(joinpath(output_dir, "flame.html"), results) ##### Allocation tests ##### +allocs_limit = Dict() +allocs_limit["flame_perf_target"] = 4656 +allocs_limit["flame_perf_target_tracers"] = 204288 +allocs_limit["flame_perf_target_edmfx"] = 253440 +allocs_limit["flame_perf_diagnostics"] = 3016328 +allocs_limit["flame_perf_target_diagnostic_edmfx"] = 893504 +allocs_limit["flame_sphere_baroclinic_wave_rhoe_equilmoist_expvdiff"] = + 67443909648 +allocs_limit["flame_perf_target_threaded"] = 5857808 +allocs_limit["flame_perf_target_callbacks"] = 46407936 +allocs_limit["flame_perf_gw"] = 4868951088 +allocs_limit["flame_perf_target_prognostic_edmfx_aquaplanet"] = 898768 + + +# Ideally, we would like to track all the allocations, but this becomes too +# expensive there is too many of them. Here, we set the default sample rate to +# 1, but lower it to a smaller value when we expect the job to produce lots of +# allocations. Empirically, we find that on the Caltech cluster the limit is 10 +# M of allocation. +max_allocs_for_full_sampling = 10e6 + +# For jobs that we don't track, we set an expected_allocs of +# max_allocs_for_full_sampling, which leads to a sampling rate of 1 +expected_allocs = get(allocs_limit, job_id, max_allocs_for_full_sampling) +sampling_rate = expected_allocs <= max_allocs_for_full_sampling ? 1 : 0.01 + # use new allocation profiler -@info "collecting allocations" +@info "collecting allocations with sampling rate $sampling_rate" Profile.Allocs.clear() -Profile.Allocs.@profile sample_rate = 0.01 SciMLBase.step!(integrator) +Profile.Allocs.@profile sample_rate = sampling_rate SciMLBase.step!(integrator) results = Profile.Allocs.fetch() Profile.Allocs.clear() profile = ProfileCanvas.view_allocs(results) @@ -58,18 +84,6 @@ allocs = @allocated SciMLBase.step!(integrator) @timev SciMLBase.step!(integrator) @info "`allocs ($job_id)`: $(allocs)" -allocs_limit = Dict() -allocs_limit["flame_perf_target"] = 4656 -allocs_limit["flame_perf_target_tracers"] = 204288 -allocs_limit["flame_perf_target_edmfx"] = 253440 -allocs_limit["flame_perf_diagnostics"] = 3016328 -allocs_limit["flame_perf_target_diagnostic_edmfx"] = 893504 -allocs_limit["flame_sphere_baroclinic_wave_rhoe_equilmoist_expvdiff"] = - 67443909648 -allocs_limit["flame_perf_target_threaded"] = 5857808 -allocs_limit["flame_perf_target_callbacks"] = 46407936 -allocs_limit["flame_perf_gw"] = 4868951088 -allocs_limit["flame_perf_target_prognostic_edmfx_aquaplanet"] = 898768 if allocs < allocs_limit[job_id] * buffer @info "TODO: lower `allocs_limit[$job_id]` to: $(allocs)"