From 1f586a6ffba210bd32555fc0ba921fb09933fc6b Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Fri, 9 Sep 2022 17:18:07 +0100 Subject: [PATCH 01/10] Cleanup script to search and remove applications that we do not want. --- .gitignore | 1 + scripts/cleanup.py | 164 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100755 scripts/cleanup.py diff --git a/.gitignore b/.gitignore index a969af7..1df0203 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea /bin /bugs /pkg diff --git a/scripts/cleanup.py b/scripts/cleanup.py new file mode 100755 index 0000000..f4cbe06 --- /dev/null +++ b/scripts/cleanup.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +import argparse +import glob +import gzip +import os +from datetime import datetime, timedelta +from typing import Dict, Iterable, List + +# Cleanup for rageshake server output files +# +# Example usage: +# +# ./cleanup.py --dry-run --path /home/rageshakes/store --max-days 100 element-auto-uisi:90 +# +# No dependencies required beyond a modern python3. + + +class Cleanup(object): + def __init__( + self, + limits: Dict[str, int], + days_to_check: List[int], + dry_run: bool, + root_path: str, + ): + self.dry_run = dry_run + self.days_to_check = days_to_check + self.limits = limits + self.root_path = root_path + self.deleted = 0 + self.checked = 0 + self.disk_saved = 0 + + def check_date(self, folder_name: str, applications_to_delete: List[str]) -> None: + if len(applications_to_delete) == 0: + print(f"W Not checking {folder_name}, no applications would be removed") + return + + # list folder_name for rageshakes: + # foreach: + files = glob.iglob(folder_name + "/[0-9]*") + + checked = 0 + deleted = 0 + for rageshake_name in files: + checked = checked + 1 + if self.check_rageshake(rageshake_name, applications_to_delete): + deleted = deleted + 1 + + print( + f"I Checked {folder_name} for {applications_to_delete}, deleted {deleted}/{checked} rageshakes" + ) + + self.deleted = self.deleted + deleted + self.checked = self.checked + checked + # optionally delete folder if we deleted 100% of rageshakes, but for now it' s fine. + + def check_rageshake( + self, rageshake_folder_path: str, applications_to_delete: List[str] + ) -> bool: + try: + + with gzip.open(rageshake_folder_path + "/details.log.gz") as details: + for line in details.readlines(): + parts = line.decode("utf-8").split(":", 2) + if ( + parts[0] == "Application" + and parts[1].strip() in applications_to_delete + ): + self.delete(rageshake_folder_path) + return True + + except FileNotFoundError as e: + print( + f"W Unable to open {e.filename} to check for application name. Ignoring this folder." + ) + + return False + + def delete(self, rageshake_folder_path: str) -> None: + files = glob.glob(rageshake_folder_path + "/*") + for file in files: + self.disk_saved += os.stat(file).st_size + if self.dry_run: + print(f"I would delete {file}") + else: + print(f"I deleted {file}") + os.unlink(file) + + if self.dry_run: + print(f"I would remove directory {rageshake_folder_path}") + else: + print(f"I removing directory {rageshake_folder_path}") + os.rmdir(rageshake_folder_path) + + def cleanup(self) -> None: + today = datetime.today() + for days_ago in self.days_to_check: + target = today - timedelta(days=days_ago) + folder_name = target.strftime("%Y%m%d") + applications = [] + for name in self.limits.keys(): + if self.limits[name] < days_ago: + applications.append(name) + self.check_date(self.root_path + "/" + folder_name, applications) + pass + + +def main(): + parser = argparse.ArgumentParser(description="Cleanup rageshake files on disk") + parser.add_argument( + "limits", + metavar="LIMIT", + type=str, + nargs="+", + help="application_name retention limits in days (each formatted app-name:10)", + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "--max-days", + dest="max_days", + type=int, + help="Search all days until this maximum", + ) + group.add_argument( + "--days-to-check", + dest="days_to_check", + type=str, + help="Explicitly supply days in the past to check for deletion, eg '1,2,3,5'", + ) + + parser.add_argument( + "--dry-run", dest="dry_run", action="store_true", help="Dry run (do not delete)" + ) + parser.add_argument( + "--path", + dest="path", + type=str, + required=True, + help="Root path of rageshakes (eg /home/rageshakes/bugs/)", + ) + + args = parser.parse_args() + application_limits: Dict[str, int] = {} + for x in args.limits: + application_limits[x.split(":")[0]] = int(x.split(":")[1]) + + days_to_check: Iterable[int] = [] + if args.max_days: + days_to_check = range(args.max_days) + if args.days_to_check: + days_to_check = map(lambda x: int(x), args.days_to_check.split(",")) + + cleanup = Cleanup(application_limits, days_to_check, args.dry_run, args.path) + + cleanup.cleanup() + print( + f"I Deleted {cleanup.deleted} of {cleanup.checked} rageshakes. " + f"saving {cleanup.disk_saved} bytes. Dry run? {cleanup.dry_run}" + ) + + +if __name__ == "__main__": + main() From 3f57c181f6e6ec9f5f9895ee51530c2d949026e6 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 14 Dec 2022 12:18:26 +0000 Subject: [PATCH 02/10] Extend cleanup to allow a list of mxids to be excluded. For example, testing/demo mxids may want to be retained for longer. --- scripts/cleanup.py | 59 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/scripts/cleanup.py b/scripts/cleanup.py index f4cbe06..48de07d 100755 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -22,6 +22,7 @@ def __init__( days_to_check: List[int], dry_run: bool, root_path: str, + mxids_to_exclude: List[str], ): self.dry_run = dry_run self.days_to_check = days_to_check @@ -30,6 +31,10 @@ def __init__( self.deleted = 0 self.checked = 0 self.disk_saved = 0 + self.mxids_to_exclude = mxids_to_exclude + self.excluded_by_user = { + mxids_to_exclude[i]: 0 for i in range(len(mxids_to_exclude)) + } def check_date(self, folder_name: str, applications_to_delete: List[str]) -> None: if len(applications_to_delete) == 0: @@ -59,16 +64,23 @@ def check_rageshake( self, rageshake_folder_path: str, applications_to_delete: List[str] ) -> bool: try: - + app_name = None + mxid = None with gzip.open(rageshake_folder_path + "/details.log.gz") as details: for line in details.readlines(): - parts = line.decode("utf-8").split(":", 2) - if ( - parts[0] == "Application" - and parts[1].strip() in applications_to_delete - ): - self.delete(rageshake_folder_path) - return True + parts = line.decode("utf-8").split(":", maxsplit=1) + if parts[0] == "Application": + app_name = parts[1].strip() + if parts[0] == "user_id": + mxid = parts[1].strip() + print(f"app_name {app_name} user_id {mxid}") + if app_name in applications_to_delete: + if mxid in self.mxids_to_exclude: + self.excluded_by_user[mxid] = self.excluded_by_user[mxid] + 1 + else: + self.delete(rageshake_folder_path) + return True + return False except FileNotFoundError as e: print( @@ -128,7 +140,12 @@ def main(): type=str, help="Explicitly supply days in the past to check for deletion, eg '1,2,3,5'", ) - + parser.add_argument( + "--exclude-mxids", + dest="exclude_mxids", + type=str, + help="Supply a text file containing one mxid per line to exclude from cleanup. Blank lines and lines starting # are ignored.", + ) parser.add_argument( "--dry-run", dest="dry_run", action="store_true", help="Dry run (do not delete)" ) @@ -151,13 +168,35 @@ def main(): if args.days_to_check: days_to_check = map(lambda x: int(x), args.days_to_check.split(",")) - cleanup = Cleanup(application_limits, days_to_check, args.dry_run, args.path) + mxids_to_exclude = [] + if args.exclude_mxids: + with open(args.exclude_mxids) as file: + for lineno, data in enumerate(file): + data = data.strip() + if len(data) == 0: + # blank line, ignore + pass + elif data[0] == "#": + # comment, ignore + pass + elif data[0] == "@": + # mxid + mxids_to_exclude.append(data) + else: + raise Exception( + f"Unable to parse --exclude-mxids file on line {lineno + 1}: {data}" + ) + + cleanup = Cleanup( + application_limits, days_to_check, args.dry_run, args.path, mxids_to_exclude + ) cleanup.cleanup() print( f"I Deleted {cleanup.deleted} of {cleanup.checked} rageshakes. " f"saving {cleanup.disk_saved} bytes. Dry run? {cleanup.dry_run}" ) + print(f"I excluded count by user {cleanup.excluded_by_user}") if __name__ == "__main__": From f6d5868567fd17240b5a5bce45c740fe2ce9e6d1 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 14 Dec 2022 12:31:17 +0000 Subject: [PATCH 03/10] Add changelog.d entry for cleanup script. --- changelog.d/61.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/61.feature diff --git a/changelog.d/61.feature b/changelog.d/61.feature new file mode 100644 index 0000000..396d25a --- /dev/null +++ b/changelog.d/61.feature @@ -0,0 +1 @@ +zero-dependency python script to cleanup old rageshakes. From 1a8b7c72a8b7ddf9113b1d19162f6e1ff089d537 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Mon, 9 Jan 2023 11:49:32 +0000 Subject: [PATCH 04/10] Correct date format --- scripts/cleanup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cleanup.py b/scripts/cleanup.py index 48de07d..d3ec6ce 100755 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -109,7 +109,7 @@ def cleanup(self) -> None: today = datetime.today() for days_ago in self.days_to_check: target = today - timedelta(days=days_ago) - folder_name = target.strftime("%Y%m%d") + folder_name = target.strftime("%Y-%m-%d") applications = [] for name in self.limits.keys(): if self.limits[name] < days_ago: From df8bde04846bbcfb2fb797af523c3ba7f88281fd Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Thu, 12 Jan 2023 14:21:18 +0000 Subject: [PATCH 05/10] Apply suggestions from code review Simple changes from codereview Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- .gitignore | 2 +- changelog.d/61.feature | 2 +- scripts/cleanup.py | 25 +++++++++++-------------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 1df0203..265a15a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -.idea +/.idea /bin /bugs /pkg diff --git a/changelog.d/61.feature b/changelog.d/61.feature index 396d25a..7a1cfcc 100644 --- a/changelog.d/61.feature +++ b/changelog.d/61.feature @@ -1 +1 @@ -zero-dependency python script to cleanup old rageshakes. +Add a zero-dependency python script to cleanup old rageshakes. diff --git a/scripts/cleanup.py b/scripts/cleanup.py index d3ec6ce..500e0b6 100755 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -33,7 +33,7 @@ def __init__( self.disk_saved = 0 self.mxids_to_exclude = mxids_to_exclude self.excluded_by_user = { - mxids_to_exclude[i]: 0 for i in range(len(mxids_to_exclude)) + mxid: 0 for mxid in mxids_to_exclude } def check_date(self, folder_name: str, applications_to_delete: List[str]) -> None: @@ -42,22 +42,21 @@ def check_date(self, folder_name: str, applications_to_delete: List[str]) -> Non return # list folder_name for rageshakes: - # foreach: files = glob.iglob(folder_name + "/[0-9]*") checked = 0 deleted = 0 for rageshake_name in files: - checked = checked + 1 + checked += 1 if self.check_rageshake(rageshake_name, applications_to_delete): - deleted = deleted + 1 + deleted += 1 print( - f"I Checked {folder_name} for {applications_to_delete}, deleted {deleted}/{checked} rageshakes" + f"I Checked {folder_name} for {applications_to_delete}, {'would delete' if self.dryrun else 'deleted'} {deleted}/{checked} rageshakes" ) - self.deleted = self.deleted + deleted - self.checked = self.checked + checked + self.deleted += deleted + self.checked += checked # optionally delete folder if we deleted 100% of rageshakes, but for now it' s fine. def check_rageshake( @@ -76,11 +75,10 @@ def check_rageshake( print(f"app_name {app_name} user_id {mxid}") if app_name in applications_to_delete: if mxid in self.mxids_to_exclude: - self.excluded_by_user[mxid] = self.excluded_by_user[mxid] + 1 + self.excluded_by_user[mxid] += 1 else: self.delete(rageshake_folder_path) return True - return False except FileNotFoundError as e: print( @@ -96,7 +94,7 @@ def delete(self, rageshake_folder_path: str) -> None: if self.dry_run: print(f"I would delete {file}") else: - print(f"I deleted {file}") + print(f"I deleting {file}") os.unlink(file) if self.dry_run: @@ -110,12 +108,11 @@ def cleanup(self) -> None: for days_ago in self.days_to_check: target = today - timedelta(days=days_ago) folder_name = target.strftime("%Y-%m-%d") - applications = [] + applications = set() for name in self.limits.keys(): if self.limits[name] < days_ago: - applications.append(name) + applications.add(name) self.check_date(self.root_path + "/" + folder_name, applications) - pass def main(): @@ -193,7 +190,7 @@ def main(): cleanup.cleanup() print( - f"I Deleted {cleanup.deleted} of {cleanup.checked} rageshakes. " + f"I Deleted {cleanup.deleted} of {cleanup.checked} rageshakes, " f"saving {cleanup.disk_saved} bytes. Dry run? {cleanup.dry_run}" ) print(f"I excluded count by user {cleanup.excluded_by_user}") From 3981d909675c0c8a8ac8094f3748504e8b609b5c Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Thu, 12 Jan 2023 14:41:19 +0000 Subject: [PATCH 06/10] Fixes from review --- scripts/cleanup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/cleanup.py b/scripts/cleanup.py index 500e0b6..d8cd3c4 100755 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -4,7 +4,7 @@ import gzip import os from datetime import datetime, timedelta -from typing import Dict, Iterable, List +from typing import Dict, Iterable, List, Set # Cleanup for rageshake server output files # @@ -36,7 +36,7 @@ def __init__( mxid: 0 for mxid in mxids_to_exclude } - def check_date(self, folder_name: str, applications_to_delete: List[str]) -> None: + def check_date(self, folder_name: str, applications_to_delete: Set[str]) -> None: if len(applications_to_delete) == 0: print(f"W Not checking {folder_name}, no applications would be removed") return @@ -52,7 +52,7 @@ def check_date(self, folder_name: str, applications_to_delete: List[str]) -> Non deleted += 1 print( - f"I Checked {folder_name} for {applications_to_delete}, {'would delete' if self.dryrun else 'deleted'} {deleted}/{checked} rageshakes" + f"I Checked {folder_name} for {applications_to_delete}, {'would delete' if self.dry_run else 'deleted'} {deleted}/{checked} rageshakes" ) self.deleted += deleted @@ -60,7 +60,7 @@ def check_date(self, folder_name: str, applications_to_delete: List[str]) -> Non # optionally delete folder if we deleted 100% of rageshakes, but for now it' s fine. def check_rageshake( - self, rageshake_folder_path: str, applications_to_delete: List[str] + self, rageshake_folder_path: str, applications_to_delete: Set[str] ) -> bool: try: app_name = None From 6285f58694753c75849f9131889c0453be37f411 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Thu, 12 Jan 2023 15:45:21 +0000 Subject: [PATCH 07/10] Address code review comments --- scripts/cleanup.py | 107 ++++++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/scripts/cleanup.py b/scripts/cleanup.py index d8cd3c4..bdc1cb5 100755 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -3,9 +3,11 @@ import glob import gzip import os +import sys from datetime import datetime, timedelta from typing import Dict, Iterable, List, Set + # Cleanup for rageshake server output files # # Example usage: @@ -19,24 +21,43 @@ class Cleanup(object): def __init__( self, limits: Dict[str, int], - days_to_check: List[int], + days_to_check: Iterable[int], dry_run: bool, root_path: str, mxids_to_exclude: List[str], ): - self.dry_run = dry_run - self.days_to_check = days_to_check - self.limits = limits - self.root_path = root_path + self._limits = limits + self._days_to_check = days_to_check + self._dry_run = dry_run + self._root_path = root_path + self._mxids_to_exclude = mxids_to_exclude + # Count of files we deleted or would delete (dry-run) self.deleted = 0 + # Count of files we checked self.checked = 0 + # Sum of bytes in files we deleted or would delete (dry-run) self.disk_saved = 0 - self.mxids_to_exclude = mxids_to_exclude - self.excluded_by_user = { - mxid: 0 for mxid in mxids_to_exclude - } + # History of how many times a given mxid saved a file. + self.excluded_count_by_user = {mxid: 0 for mxid in mxids_to_exclude} - def check_date(self, folder_name: str, applications_to_delete: Set[str]) -> None: + def cleanup(self) -> None: + """ + Check for rageshakes to remove according to settings + """ + today = datetime.today() + for days_ago in self._days_to_check: + target = today - timedelta(days=days_ago) + folder_name = target.strftime("%Y-%m-%d") + applications = set() + for name in self._limits.keys(): + if self._limits[name] < days_ago: + applications.add(name) + self._check_date(self._root_path + "/" + folder_name, applications) + + def _check_date(self, folder_name: str, applications_to_delete: Set[str]) -> None: + """ + Check all rageshakes on a given date (folder) + """ if len(applications_to_delete) == 0: print(f"W Not checking {folder_name}, no applications would be removed") return @@ -48,20 +69,24 @@ def check_date(self, folder_name: str, applications_to_delete: Set[str]) -> None deleted = 0 for rageshake_name in files: checked += 1 - if self.check_rageshake(rageshake_name, applications_to_delete): + if self._check_rageshake(rageshake_name, applications_to_delete): deleted += 1 print( - f"I Checked {folder_name} for {applications_to_delete}, {'would delete' if self.dry_run else 'deleted'} {deleted}/{checked} rageshakes" + f"I Checked {folder_name} for {applications_to_delete}, " + f"{'would delete' if self._dry_run else 'deleted'} {deleted}/{checked} rageshakes" ) self.deleted += deleted self.checked += checked # optionally delete folder if we deleted 100% of rageshakes, but for now it' s fine. - def check_rageshake( + def _check_rageshake( self, rageshake_folder_path: str, applications_to_delete: Set[str] ) -> bool: + """ + Checks a given rageshake folder, returning True if the rageshake was deleted + """ try: app_name = None mxid = None @@ -72,12 +97,11 @@ def check_rageshake( app_name = parts[1].strip() if parts[0] == "user_id": mxid = parts[1].strip() - print(f"app_name {app_name} user_id {mxid}") if app_name in applications_to_delete: - if mxid in self.mxids_to_exclude: - self.excluded_by_user[mxid] += 1 + if mxid in self._mxids_to_exclude: + self.excluded_count_by_user[mxid] += 1 else: - self.delete(rageshake_folder_path) + self._delete(rageshake_folder_path) return True except FileNotFoundError as e: @@ -87,33 +111,25 @@ def check_rageshake( return False - def delete(self, rageshake_folder_path: str) -> None: + def _delete(self, rageshake_folder_path: str) -> None: + """ + Delete a given rageshake folder + """ files = glob.glob(rageshake_folder_path + "/*") for file in files: self.disk_saved += os.stat(file).st_size - if self.dry_run: + if self._dry_run: print(f"I would delete {file}") else: print(f"I deleting {file}") os.unlink(file) - if self.dry_run: + if self._dry_run: print(f"I would remove directory {rageshake_folder_path}") else: print(f"I removing directory {rageshake_folder_path}") os.rmdir(rageshake_folder_path) - def cleanup(self) -> None: - today = datetime.today() - for days_ago in self.days_to_check: - target = today - timedelta(days=days_ago) - folder_name = target.strftime("%Y-%m-%d") - applications = set() - for name in self.limits.keys(): - if self.limits[name] < days_ago: - applications.add(name) - self.check_date(self.root_path + "/" + folder_name, applications) - def main(): parser = argparse.ArgumentParser(description="Cleanup rageshake files on disk") @@ -138,8 +154,8 @@ def main(): help="Explicitly supply days in the past to check for deletion, eg '1,2,3,5'", ) parser.add_argument( - "--exclude-mxids", - dest="exclude_mxids", + "--exclude-mxids-file", + dest="exclude_mxids_file", type=str, help="Supply a text file containing one mxid per line to exclude from cleanup. Blank lines and lines starting # are ignored.", ) @@ -157,7 +173,16 @@ def main(): args = parser.parse_args() application_limits: Dict[str, int] = {} for x in args.limits: - application_limits[x.split(":")[0]] = int(x.split(":")[1]) + parts = x.rsplit(":", 1) + try: + if len(parts) < 2: + raise ValueError("missing :") + limit = int(parts[1]) + except ValueError as e: + print(f"E Malformed --limits argument: {e}", file=sys.stderr) + sys.exit(1) + + application_limits[parts[0]] = limit days_to_check: Iterable[int] = [] if args.max_days: @@ -166,8 +191,8 @@ def main(): days_to_check = map(lambda x: int(x), args.days_to_check.split(",")) mxids_to_exclude = [] - if args.exclude_mxids: - with open(args.exclude_mxids) as file: + if args.exclude_mxids_file: + with open(args.exclude_mxids_file) as file: for lineno, data in enumerate(file): data = data.strip() if len(data) == 0: @@ -180,9 +205,11 @@ def main(): # mxid mxids_to_exclude.append(data) else: - raise Exception( - f"Unable to parse --exclude-mxids file on line {lineno + 1}: {data}" + print( + f"E Unable to parse --exclude-mxids-file on line {lineno + 1}: {data}", + file=sys.stderr, ) + sys.exit(1) cleanup = Cleanup( application_limits, days_to_check, args.dry_run, args.path, mxids_to_exclude @@ -191,9 +218,9 @@ def main(): cleanup.cleanup() print( f"I Deleted {cleanup.deleted} of {cleanup.checked} rageshakes, " - f"saving {cleanup.disk_saved} bytes. Dry run? {cleanup.dry_run}" + f"saving {cleanup.disk_saved} bytes. Dry run? {cleanup._dry_run}" ) - print(f"I excluded count by user {cleanup.excluded_by_user}") + print(f"I excluded count by user {cleanup.excluded_count_by_user}") if __name__ == "__main__": From 7b23c895dc55689528f99c870e7aebc1434787f6 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Thu, 12 Jan 2023 16:06:36 +0000 Subject: [PATCH 08/10] Move to os.scandir() to improve speed. --- scripts/cleanup.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/scripts/cleanup.py b/scripts/cleanup.py index bdc1cb5..b0d3675 100755 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -62,15 +62,23 @@ def _check_date(self, folder_name: str, applications_to_delete: Set[str]) -> Non print(f"W Not checking {folder_name}, no applications would be removed") return - # list folder_name for rageshakes: - files = glob.iglob(folder_name + "/[0-9]*") + if not os.path.exists(folder_name): + print(f"W Not checking {folder_name}, not present or not a directory") + return checked = 0 deleted = 0 - for rageshake_name in files: - checked += 1 - if self._check_rageshake(rageshake_name, applications_to_delete): - deleted += 1 + with os.scandir(folder_name) as rageshakes: + for rageshake in rageshakes: + rageshake_path = folder_name + os.pathsep + rageshake.name + if rageshake.is_dir(): + checked += 1 + if self._check_rageshake(rageshake_path, applications_to_delete): + deleted += 1 + else: + print( + f"W File in rageshake tree {rageshake_path} is not a directory" + ) print( f"I Checked {folder_name} for {applications_to_delete}, " From 1383654b2fd5e169886f147a046dd51aef09b0fc Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Fri, 13 Jan 2023 14:17:25 +0000 Subject: [PATCH 09/10] Apply suggestions from code review Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- scripts/cleanup.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/cleanup.py b/scripts/cleanup.py index b0d3675..d759715 100755 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -17,7 +17,7 @@ # No dependencies required beyond a modern python3. -class Cleanup(object): +class Cleanup: def __init__( self, limits: Dict[str, int], @@ -93,7 +93,11 @@ def _check_rageshake( self, rageshake_folder_path: str, applications_to_delete: Set[str] ) -> bool: """ - Checks a given rageshake folder, returning True if the rageshake was deleted + Checks a given rageshake folder against the application and userid lists. + + If the folder matches, and dryrun mode is disabled, the folder is deleted. + + @returns: True if the rageshake matched, False if it was skipped. """ try: app_name = None @@ -121,7 +125,7 @@ def _check_rageshake( def _delete(self, rageshake_folder_path: str) -> None: """ - Delete a given rageshake folder + Delete a given rageshake folder, unless dryrun mode is enabled """ files = glob.glob(rageshake_folder_path + "/*") for file in files: From a4b6a07b3fe7eb0038a33c65556dde534d6e6672 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Fri, 13 Jan 2023 16:36:25 +0000 Subject: [PATCH 10/10] Reduce code within try block; add documentation. --- scripts/cleanup.py | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/scripts/cleanup.py b/scripts/cleanup.py index d759715..5f4355d 100755 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -18,6 +18,11 @@ class Cleanup: + """ + Cleanup a rageshake bug repository. + + Once created, call cleanup() to begin the actual operation. Statistics are available after cleanup completes. + """ def __init__( self, limits: Dict[str, int], @@ -26,6 +31,15 @@ def __init__( root_path: str, mxids_to_exclude: List[str], ): + """ + Set options for a cleanup run of a rageshake repository. + + @param limits: Map of app name to integer number of days that application's rageshakes should be retained + @param days_to_check: List of ints each representing "days ago" that should be checked for rageshakes to delete + @param dry_run: If set, perform all actions but do not complete deletion of files + @param root_path: Base path to rageshake bug repository + @param mxids_to_exclude: Rageshakes sent by this list of mxids should always be preserved. + """ self._limits = limits self._days_to_check = days_to_check self._dry_run = dry_run @@ -42,7 +56,9 @@ def __init__( def cleanup(self) -> None: """ - Check for rageshakes to remove according to settings + Check for rageshakes to remove according to settings. + + Do not run multiple times as statistics are generated internally during each call. """ today = datetime.today() for days_ago in self._days_to_check: @@ -99,9 +115,10 @@ def _check_rageshake( @returns: True if the rageshake matched, False if it was skipped. """ + app_name = None + mxid = None + try: - app_name = None - mxid = None with gzip.open(rageshake_folder_path + "/details.log.gz") as details: for line in details.readlines(): parts = line.decode("utf-8").split(":", maxsplit=1) @@ -109,17 +126,18 @@ def _check_rageshake( app_name = parts[1].strip() if parts[0] == "user_id": mxid = parts[1].strip() - if app_name in applications_to_delete: - if mxid in self._mxids_to_exclude: - self.excluded_count_by_user[mxid] += 1 - else: - self._delete(rageshake_folder_path) - return True - except FileNotFoundError as e: print( f"W Unable to open {e.filename} to check for application name. Ignoring this folder." ) + return False + + if app_name in applications_to_delete: + if mxid in self._mxids_to_exclude: + self.excluded_count_by_user[mxid] += 1 + else: + self._delete(rageshake_folder_path) + return True return False @@ -184,8 +202,8 @@ def main(): args = parser.parse_args() application_limits: Dict[str, int] = {} - for x in args.limits: - parts = x.rsplit(":", 1) + for l in args.limits: + parts = l.rsplit(":", 1) try: if len(parts) < 2: raise ValueError("missing :")