Skip to content

Commit

Permalink
Cleanup old folders on central
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Jul 6, 2024
1 parent 98e60b9 commit 1ff1069
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 16 deletions.
69 changes: 65 additions & 4 deletions regression_tests/move_output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@ job_ids = getindex.(split.(lines, "\""), 2)
@assert count(x -> occursin("OrderedDict", x), all_lines) == length(job_ids) + 1
@assert length(job_ids) 0 # safety net

if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci"
# Note: cluster_data_prefix is also defined in compute_mse.jl
cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"
buildkite_ci = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci"
in_merge_queue = startswith(branch, "gh-readonly-queue/main/")
if buildkite_ci
commit = ENV["BUILDKITE_COMMIT"]
branch = ENV["BUILDKITE_BRANCH"]
# Note: cluster_data_prefix is also defined in compute_mse.jl
cluster_data_prefix = "/central/scratch/esm/slurm-buildkite/climaatmos-main"

@info "pwd() = $(pwd())"
@info "branch = $(branch)"
@info "commit = $(commit)"

using Glob
@show readdir(joinpath(@__DIR__, ".."))
if startswith(branch, "gh-readonly-queue/main/")
if in_merge_queue
commit_sha = commit[1:7]
mkpath(cluster_data_prefix)
path = joinpath(cluster_data_prefix, commit_sha)
Expand All @@ -46,3 +48,62 @@ if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) == "climaatmos-ci"
else
@info "ENV keys: $(keys(ENV))"
end

function reason(path)
f = joinpath(path, "ref_counter.jl")
if !isfile(f)
return "ref_counter.jl does not exist"
else
ref_counter = parse(Int, first(readlines(f)))
return "ref_counter: $ref_counter"
end
end

function cleanup_central(cluster_data_prefix)
@warn "Cleaning up old files on central"
# Get (sorted) array of paths, `pop!(sorted_paths)`
# is the most recent merged folder.
sorted_paths = sorted_dataset_folder(; dir = cluster_data_prefix)
keep_latest_n = 0
keep_latest_ref_counters = 5
if !isempty(sorted_paths)
N = length(sorted_paths) - keep_latest_n
paths_to_delete = []
ref_counters_main = ref_counters_per_path(sorted_paths)
i_largest_reference = argmax(ref_counters_main)
path = sorted_paths[i_largest_reference]
ref_counter_file_main = joinpath(path, "ref_counter.jl")
@assert isfile(ref_counter_file_main)
ref_counter_main = parse(Int, first(readlines(ref_counter_file_main)))

for i in 1:N
path = sorted_paths[i]
ref_counter_file = joinpath(path, "ref_counter.jl")
if !isfile(ref_counter_file)
push!(paths_to_delete, path)
else
ref_counter = parse(Int, first(readlines(ref_counter_file)))
# Just to be safe, let's also make sure that we don't delete
# any paths with recent (let's say 5) ref counter increments ago.
if ref_counter + keep_latest_ref_counters < ref_counter_main
push!(paths_to_delete, path)
end
end
end
@show ref_counter_main
@show length(sorted_paths)
@show length(paths_to_delete)
@info "Deleting files:"
for i in 1:length(paths_to_delete)
f = paths_to_delete[i]
@info " (File, date): ($(f), $(Dates.unix2datetime(stat(f).mtime))). Reason: $(reason(f))"
end
for i in 1:length(paths_to_delete)
rm(paths_to_delete[i]; recursive = true, force = true)
end
end
end

if buildkite_ci && in_merge_queue
cleanup_central(cluster_data_prefix)
end
28 changes: 16 additions & 12 deletions regression_tests/self_reference_or_path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@ function sorted_dataset_folder(; dir = pwd())
return sorted_paths
end

function ref_counters_per_path(paths)
ref_counters_in_path = Vector{Int}(undef, length(paths))
ref_counters_in_path .= -1
for (i, path) in enumerate(paths)
ref_counter_file = joinpath(path, "ref_counter.jl")
!isfile(ref_counter_file) && continue
ref_counters_in_path[i] = parse(Int, first(readlines(ref_counter_file)))
end
return ref_counters_in_path
end

function self_reference_or_path()
if get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci"
return :self_reference
Expand All @@ -36,24 +47,17 @@ function self_reference_or_path()
@assert isfile(ref_counter_file_PR)
ref_counter_PR = parse(Int, first(readlines(ref_counter_file_PR)))

ref_counters_main = Vector{Int}(undef, length(sorted_paths))
ref_counters_main .= -1
for (i, path) in enumerate(sorted_paths)
ref_counter_file_main = joinpath(path, "ref_counter.jl")
!isfile(ref_counter_file_main) && continue
ref_counters_main[i] =
parse(Int, first(readlines(ref_counter_file_main)))
end
i_oldest_reference = findfirst(ref_counters_main) do ref_counter_main
ref_counters_main = ref_counters_per_path(sorted_paths)
i_largest_reference = findfirst(ref_counters_main) do ref_counter_main
ref_counter_main == ref_counter_PR
end
if i_oldest_reference == nothing
if i_largest_reference == nothing
@warn "`ref_counter.jl` not found on main, assuming self-reference"
@info "Please review output results before merging."
return :self_reference
end
# Oldest reference path:
path = sorted_paths[i_oldest_reference]
# Largest ref-counter reference path:
path = sorted_paths[i_largest_reference]
ref_counter_file_main = joinpath(path, "ref_counter.jl")

@info "Files on main:" # for debugging
Expand Down

0 comments on commit 1ff1069

Please sign in to comment.