From 7f075e43f599fb7404c0252460624df5595cf186 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Wed, 8 May 2024 11:39:31 -0400 Subject: [PATCH] Cleanup old folders on central --- regression_tests/move_output.jl | 50 +++++++++++++++++++++- regression_tests/self_reference_or_path.jl | 28 ++++++------ 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/regression_tests/move_output.jl b/regression_tests/move_output.jl index c629effc906..fa7530938d6 100644 --- a/regression_tests/move_output.jl +++ b/regression_tests/move_output.jl @@ -9,11 +9,11 @@ job_ids = getindex.(split.(lines, "\""), 2) @assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1 @assert length(job_ids) ≠ 0 # safety net +# Note: cluster_data_prefix is also defined in compute_mse.jl +cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main" if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci" commit = ENV["BUILDKITE_COMMIT"] branch = ENV["BUILDKITE_BRANCH"] - # Note: cluster_data_prefix is also defined in compute_mse.jl - cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main" @info "pwd() = $(pwd())" @info "branch = $(branch)" @@ -46,3 +46,49 @@ if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci" else @info "ENV keys: $(keys(ENV))" end + +function cleanup_central(cluster_data_prefix) + @warn "Cleaning up old files on central" + # Get (sorted) array of paths, `pop!(sorted_paths)` + # is the most recent merged folder. + sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix) + keep_latest_n = 0 + keep_latest_ref_counters = 5 + if !isempty(sorted_paths) + N = length(sorted_paths) - keep_latest_n + paths_to_delete = [] + ref_counters_main = ref_counters_per_path(sorted_paths) + i_largest_reference = argmax(ref_counters_main) + path = sorted_paths[i_largest_reference] + ref_counter_file_main = joinpath(path, "ref_counter.jl") + @assert isfile(ref_counter_file_main) + ref_counter_main = parse(Int, first(readlines(ref_counter_file_main))) + + for i in 1:N + path = sorted_paths[i] + ref_counter_file = joinpath(path, "ref_counter.jl") + if !isfile(ref_counter_file) + push!(paths_to_delete, path) + else + ref_counter = parse(Int, first(readlines(ref_counter_file))) + # Just to be safe, let's also make sure that we don't delete + # any paths with recent (let's say 5) ref counter increments ago. + if ref_counter + keep_latest_ref_counters < ref_counter_main + push!(paths_to_delete, path) + end + end + end + @show length(sorted_paths) + @show length(paths_to_delete) + @info "Deleting files:" + for i in 1:N + f = paths_to_delete[i] + @info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime)))" + end + # for i in 1:N + # rm(paths_to_delete[i]) + # end + end +end + +cleanup_central(cluster_data_prefix) diff --git a/regression_tests/self_reference_or_path.jl b/regression_tests/self_reference_or_path.jl index 6ccf48aafd5..06ac599044a 100644 --- a/regression_tests/self_reference_or_path.jl +++ b/regression_tests/self_reference_or_path.jl @@ -13,6 +13,17 @@ function sorted_dataset_folder(; dir = pwd()) return sorted_paths end +function ref_counters_per_path(paths) + ref_counters_in_path = Vector{Int}(undef, length(paths)) + ref_counters_in_path .= -1 + for (i, path) in enumerate(paths) + ref_counter_file = joinpath(path, "ref_counter.jl") + !isfile(ref_counter_file) && continue + ref_counters_in_path[i] = parse(Int, first(readlines(ref_counter_file))) + end + return ref_counters_in_path +end + function self_reference_or_path() if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci" return :self_reference @@ -36,24 +47,17 @@ function self_reference_or_path() @assert isfile(ref_counter_file_PR) ref_counter_PR = parse(Int, first(readlines(ref_counter_file_PR))) - ref_counters_main = Vector{Int}(undef, length(sorted_paths)) - ref_counters_main .= -1 - for (i, path) in enumerate(sorted_paths) - ref_counter_file_main = joinpath(path, "ref_counter.jl") - !isfile(ref_counter_file_main) && continue - ref_counters_main[i] = - parse(Int, first(readlines(ref_counter_file_main))) - end - i_oldest_reference = findfirst(ref_counters_main) do ref_counter_main + ref_counters_main = ref_counters_per_path(sorted_paths) + i_largest_reference = findfirst(ref_counters_main) do ref_counter_main ref_counter_main == ref_counter_PR end - if i_oldest_reference == nothing + if i_largest_reference == nothing @warn "`ref_counter.jl` not found on main, assuming self-reference" @info "Please review output results before merging." return :self_reference end - # Oldest reference path: - path = sorted_paths[i_oldest_reference] + # Largest ref-counter reference path: + path = sorted_paths[i_largest_reference] ref_counter_file_main = joinpath(path, "ref_counter.jl") @info "Files on main:" # for debugging