From 15f22fb29bb7d054ef3e328f9f2938ecfe959a86 Mon Sep 17 00:00:00 2001 From: mikedarcy Date: Mon, 4 Mar 2024 20:27:31 -0800 Subject: [PATCH] Don't try to instantiate a Bag instance on non-directories in `is_bag`. Don't preserve `mtime` for payload files in idempotent mode. There are arguments for/against this but there is ultimately more broad utility in eliminating it. At some point there must be a tradeoff; if this information is important to preserve, idempotent mode should not be used. Similarly, when extracting a tar-based archive, if `mtime` is set to zero (i.e. epoch), use the tarfile extraction filter to set `mtime` to None which will cause tarfile to suppress preserving the mtime in the extracted file. This should work in every version that has the back-ported extraction filter support. Python versions less than 3.8 won't be able to do this but those are deprecated and are going to be unsupported by bdbag soon. Add Python 3.12 into GH Actions tests. Remove travis.yml. --- .github/workflows/bdbag.yml | 2 +- .travis.yml | 24 ------------------------ bdbag/bdbag_api.py | 23 ++++++++++++++--------- setup.py | 4 ++-- 4 files changed, 17 insertions(+), 36 deletions(-) delete mode 100644 .travis.yml diff --git a/.github/workflows/bdbag.yml b/.github/workflows/bdbag.yml index 4eaeaef..825ee6d 100644 --- a/.github/workflows/bdbag.yml +++ b/.github/workflows/bdbag.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 426746f..0000000 --- a/.travis.yml +++ /dev/null @@ -1,24 +0,0 @@ -language: python -python: - - "2.7" - - "3.5" - - "3.6" - - "3.7" - - "3.8" - - "3.9" -os: - - linux -# - osx -# - windows -install: - - if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]]; then pip install --upgrade importlib_metadata; fi - - pip install coveralls coverage flake8 - - python setup.py install -before_script: - # Break build in case of serious syntax errors or missing imports - - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics -script: coverage run --include bdbag/*,bdbag/fetch/* --omit bdbag/bdbag_cli.py,bdbag/bdbag_ro.py,bdbag/bdbagit_profile.py,bdbag/fetch/transports/fetch_globus.py,bdbag/fetch/transports/fetch_boto3.py setup.py test -after_success: coveralls -after_failure: - - cat test.log - - cat test_cli.log diff --git a/bdbag/bdbag_api.py b/bdbag/bdbag_api.py index 285c1aa..f5b9fc9 100644 --- a/bdbag/bdbag_api.py +++ b/bdbag/bdbag_api.py @@ -130,9 +130,10 @@ def prune_bag_manifests(bag): def is_bag(bag_path): bag = None try: - bag = bdbagit.BDBag(bag_path) + if os.path.isdir(bag_path): + bag = bdbagit.BDBag(bag_path) except (bdbagit.BagError, bdbagit.BagValidationError) as e: # pragma: no cover - logger.warning("Exception while checking if %s is a bag: %s" % (bag_path, e)) + logger.warning("Exception while checking if directory %s is a bag: %s" % (bag_path, e)) return True if bag else False @@ -382,10 +383,8 @@ def archive_bag(bag_path, bag_archiver, config_file=None, idempotent=None): def tar_bag_dir(bag_path, tar_file_path, tarmode, idempotent=False): def filter_mtime(tarinfo): - # a fixed mtime is a core requirement for a reproducible archive, but we should preserve payload file mtimes - if not (tarinfo.name.startswith(os.path.basename(bag_path) + "/data/") and - os.path.isfile(os.path.dirname(bag_path) + "/" + tarinfo.name)): - tarinfo.mtime = 0 + # a fixed mtime is a core requirement for a reproducible archive + tarinfo.mtime = 0 return tarinfo is_idempotent_tgz = False @@ -434,8 +433,8 @@ def zip_bag_dir(bag_path, zip_file_path, idempotent=False): for e in entries: filepath = os.path.join(os.path.dirname(bag_path), e) payload_dir = os.path.join(os.path.basename(bag_path), "data", "") - # a fixed mtime is a core requirement for a reproducible archive, but we should preserve payload file mtimes - if idempotent and not (e.startswith(payload_dir) and os.path.isfile(filepath)): + # a fixed mtime is a core requirement for a reproducible archive + if idempotent: date_time = (1980, 1, 1, 0, 0, 0) else: st = os.stat(filepath) @@ -506,7 +505,13 @@ def extract_bag(bag_path, output_path=None, temp=False, config_file=None): try: if isinstance(archive, tarfile.TarFile): if hasattr(tarfile, 'data_filter'): - archive.extractall(base_path, filter='data') + # customize tarfile 'data' filter: if we encounter a tarinfo entry with a mtime of 0 (epoch), then + # set mtime to None which will cause tarfile to suppress preserving the mtime for the extracted file + def tar_data_filter(entry, path): + if entry.mtime == 0: + entry.mtime = None + return tarfile.data_filter(entry, path) + archive.extractall(base_path, filter=tar_data_filter) else: if isinstance(archive, tarfile.TarFile): logger.warning('SECURITY WARNING: TAR extraction may be unsafe; consider updating Python to a ' diff --git a/setup.py b/setup.py index a2bec3b..fed552b 100644 --- a/setup.py +++ b/setup.py @@ -84,11 +84,11 @@ "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11' + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12' ] )