Skip to content

Commit

Permalink
Add test to specifically see if we delete the _right_ files
Browse files Browse the repository at this point in the history
  • Loading branch information
JBorrow committed Nov 15, 2024
1 parent 20563cf commit 28d1e3f
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 5 deletions.
10 changes: 8 additions & 2 deletions librarian_background/rolling_deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,16 @@ def core(self, session: Session):

# Get the instances that are older than the age

logger.info(
"Querying for created_times later than {} UTC ({} local)",
age_cutoff,
age_cutoff.astimezone(),
)

query_begin = time.perf_counter()
stmt = select(Instance).filter(
Instance.store_id == store.id,
Instance.created_time < age_cutoff,
Instance.created_time < age_cutoff.astimezone(timezone.utc),
Instance.available == True,
)

Expand All @@ -92,7 +98,7 @@ def core(self, session: Session):
# TODO: Soft timeout
# Check that we got what we wanted.
try:
# assert instance.created_time < age_cutoff
assert instance.created_time.replace(tzinfo=timezone.utc) < age_cutoff
assert instance.store_id == store.id
assert instance.available
except AssertionError:
Expand Down
73 changes: 70 additions & 3 deletions tests/background_unit_test/test_rolling_deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,18 @@
"""

import shutil
from datetime import datetime, timedelta
from pathlib import Path

from hera_librarian.deletion import DeletionPolicy


def prep_file(garbage_file, test_orm, session):
def prep_file(garbage_file, test_orm, session, FILE_NAME="path/for/rolling/deletion"):

store = session.query(test_orm.StoreMetadata).filter_by(ingestable=True).first()

info = store.store_manager.path_info(garbage_file)

FILE_NAME = "path/for/rolling/deletion"

store_path = store.store_manager.store(Path(FILE_NAME))

shutil.copy(garbage_file, store_path)
Expand Down Expand Up @@ -135,3 +134,71 @@ def test_rolling_deletion_with_single_instance_unavailable(
)

return


def test_rolling_deletion_with_multiple_files_age_out(
test_client, test_server, test_orm, garbage_file
):
"""
See if we correctly age out several files
"""
from librarian_background.rolling_deletion import RollingDeletion

_, get_session, _ = test_server

session = get_session()

file_names = []
file_ages = []
instance_ids = []

for file_id in range(1, 10):
store, file, instance = prep_file(
garbage_file, test_orm, session, f"TEST_FILE/{file_id}.txt"
)
file.create_time = file.create_time - timedelta(days=file_id)
instance.created_time = file.create_time

file_names.append(file.name)
file_ages.append(file_id)
instance_ids.append(instance.id)

session.commit()

# Run the task
task = RollingDeletion(
name="Rolling deletion",
soft_timeout="6:00:00",
store_name=store.name,
age_in_days=5.0,
number_of_remote_copies=0,
verify_downstream_checksums=False,
mark_unavailable=True,
force_deletion=False,
)()

assert task

session.close()

session = get_session()

# Check that the older instances are gone

instances = [
session.query(test_orm.Instance).filter_by(id=id).one_or_none()
for id in instance_ids
]

for name, age, instance in zip(file_names, file_ages, instances):
if age >= 5:
assert not instance.available
else:
assert instance.available

# Delete the file we created
session.get(test_orm.File, name).delete(
session=session, commit=True, force=True
)

return
23 changes: 23 additions & 0 deletions tests/integration_test/test_send_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,29 @@ def test_send_from_existing_file_row(
checksums_from_validations = {x.current_checksum for x in instance_validations}
assert len(checksums_from_validations) == 1 # Same file

# Ok, now try the deletion task.
from librarian_background.rolling_deletion import RollingDeletion

task = RollingDeletion(
name="rolling_deletion",
store_name="local_store",
age_in_days=0.0000000000000000001,
number_of_remote_copies=1,
verify_downstream_checksums=True,
mark_unavailable=True,
force_deletion=False,
)

with source_session_maker() as session:
task.core(session=session)

# Check that the instance is gone
with source_session_maker() as session:
for file_name in copied_files:
file = session.get(test_orm.File, file_name)
for instance in file.instances:
assert instance.available == False

# Remove the librarians we added.
assert mocked_admin_client.remove_librarian(name="live_server")

Expand Down

0 comments on commit 28d1e3f

Please sign in to comment.