Skip to content

Commit

Permalink
Lazy load hashlib.md5 file system cache (#373)
Browse files Browse the repository at this point in the history
* lazy load md5

* add changelog entry

* extend tests
  • Loading branch information
northernSage authored Apr 13, 2024
1 parent 9a63146 commit 083a652
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 4 deletions.
11 changes: 11 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
Version 0.13.0
--------------

Unreleased

- default ``hashlib.md5`` may not be available in FIPS builds. We
now do not access it at import time on ``FileSystemCache``so developers
have time to change the default.
``hashlib.md5`` will be lazy loaded when a new default is not provided


Version 0.12.0
--------------

Expand Down
19 changes: 16 additions & 3 deletions src/cachelib/file.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import errno
import hashlib
import logging
import os
import platform
Expand All @@ -7,7 +8,6 @@
import tempfile
import typing as _t
from contextlib import contextmanager
from hashlib import md5
from pathlib import Path
from time import sleep
from time import time
Expand All @@ -16,6 +16,14 @@
from cachelib.serializers import FileSystemSerializer


def _lazy_md5(string: bytes = b"") -> _t.Any:
"""Don't access ``hashlib.md5`` until runtime. FIPS builds may not include
md5, in which case the import and use as a default would fail before the
developer can configure something else.
"""
return hashlib.md5(string)


class FileSystemCache(BaseCache):
"""A cache that stores the items on the file system. This cache depends
on being the only user of the `cache_dir`. Make absolutely sure that
Expand All @@ -38,6 +46,8 @@ class FileSystemCache(BaseCache):
_fs_transaction_suffix = ".__wz_cache"
#: keep amount of files in a cache element
_fs_count_file = "__wz_cache_count"
#: default file name hashing method
_default_hash_method = staticmethod(_lazy_md5)

serializer = FileSystemSerializer()

Expand All @@ -47,12 +57,15 @@ def __init__(
threshold: int = 500,
default_timeout: int = 300,
mode: _t.Optional[int] = None,
hash_method: _t.Any = md5,
hash_method: _t.Any = None,
):
BaseCache.__init__(self, default_timeout)
self._path = cache_dir
self._threshold = threshold
self._hash_method = hash_method

self._hash_method = self._default_hash_method
if hash_method is not None:
self._hash_method = hash_method

# Mode set by user takes precedence. If no mode has
# been given, we need to set the correct default based
Expand Down
14 changes: 13 additions & 1 deletion tests/test_file_system_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,21 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, hash_method=hashlib.sha256, **kwargs)


class CustomDefaultHashingMethodCache(FileSystemCache):
_default_hash_method = hashlib.sha256

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)


@pytest.fixture(
autouse=True,
params=[FileSystemCache, CustomSerializerCache, CustomHashingMethodCache],
params=[
FileSystemCache,
CustomSerializerCache,
CustomHashingMethodCache,
CustomDefaultHashingMethodCache,
],
)
def cache_factory(request, tmpdir):
def _factory(self, *args, **kwargs):
Expand Down

0 comments on commit 083a652

Please sign in to comment.