Skip to content

Commit

Permalink
minor bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Apr 9, 2024
1 parent 31368ff commit 70336e3
Show file tree
Hide file tree
Showing 12 changed files with 330 additions and 762 deletions.
139 changes: 66 additions & 73 deletions docs/userguide/database/hdfDB.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion h5rdmtoolbox/_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def is_valid_logger_level(level: Union[str, int]):
'expose_user_prop_to_attrs': True,
'add_provenance': False,
'ignore_set_std_attr_err': False,
'auto_create_h5tbx_version': True, # automatically creates the group h5rdmtoolbox with the version attribute
'auto_create_h5tbx_version': False, # automatically creates the group h5rdmtoolbox with the version attribute
'uuid_name': 'uuid', # attribute name used for UUIDs
# if a standard attribute is defined and cannot be retrieved because the value is invalid, ignore it:
'ignore_get_std_attr_err': False,
Expand Down
23 changes: 17 additions & 6 deletions h5rdmtoolbox/_user.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import appdirs
import importlib_resources
import pathlib
import shutil
import time
from itertools import count
from typing import Tuple

import appdirs
import importlib_resources

_filecounter = count()
_dircounter = count()

Expand Down Expand Up @@ -89,16 +88,28 @@ def _get_dir(self, name: str) -> pathlib.Path:

return self.user_dirs[name]

def clear_cache(self, delta_days: int):
def clear_cache(self, delta_days: int, utime: bool = False):
"""Clear the cache directory. The delta_days arguments will be used
to delete files older than delta_days days. This is only applied to files"""
to delete files older than delta_days days. This is only applied to files
Parameters
----------
delta_days : int
The number of days to keep the files in the cache.
utime : bool
If True, the file access time will be used to determine the age of the file.
Otherwise, the file creation time will be used.
"""
if delta_days == 0:
shutil.rmtree(self.user_dirs['cache'])
return
if self.user_dirs['cache'].exists():
for f in self.user_dirs['cache'].iterdir():
# get the file creation time
fct = f.stat().st_ctime
if utime:
fct = f.stat().st_atime
else:
fct = f.stat().st_ctime
dt = _now - fct
if dt > delta_days * 86400:
f.unlink()
Expand Down
19 changes: 8 additions & 11 deletions h5rdmtoolbox/database/hdfdb/filedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import pathlib
from typing import Union, Generator, List

from .objdb import ObjDB
from .nonsearchable import NonInsertableDatabaseInterface
from .objdb import ObjDB
from .. import lazy
from ..template import HDF5DBInterface

Expand All @@ -12,16 +12,14 @@ class FileDB(NonInsertableDatabaseInterface, HDF5DBInterface):
"""A database interface for an HDF5 file, where the filename is given."""

def __init__(self, filename: Union[str, pathlib.Path]):
self.filename = pathlib.Path(filename)
self.filename: str = str(filename)
self.find = self._instance_find # allow `find` to be a static method and instance method
self.find_one = self._instance_find_one # allow `find_one` to be a static method and instance method

@staticmethod
def find_one(file_or_filename, *args, **kwargs) -> lazy.LHDFObject:
def find_one(filename: Union[str, pathlib.Path], *args, **kwargs) -> lazy.LHDFObject:
"""Please refer to the docstring of the find_one method of the ObjDB class"""
if isinstance(file_or_filename, (h5py.Group, h5py.Dataset)):
return ObjDB(file_or_filename).find_one(*args, **kwargs)
with h5py.File(file_or_filename, 'r') as h5:
with h5py.File(str(filename), 'r') as h5:
return ObjDB(h5).find_one(*args, **kwargs)

def _instance_find(self, *args, **kwargs):
Expand Down Expand Up @@ -81,16 +79,15 @@ def find_one(self, *args, **kwargs) -> lazy.LHDFObject:
contains the object, the first one is returned. If you want to find one per file,
call find_one_per_file instead."""
for filename in self.filenames:
with h5py.File(filename, 'r') as h5:
with h5py.File(filename, mode='r') as h5:
ret = ObjDB(h5).find_one(*args, **kwargs)
if ret:
return ret
return

def find(self, *args, **kwargs) -> Generator[lazy.LHDFObject, None, None]:
all_results = []
"""Call find on all the files"""
for filename in self.filenames:
with h5py.File(filename, 'r') as h5:
ret = ObjDB(h5).find(*args, **kwargs)
all_results.extend(ret)
return all_results
for r in ret:
yield r
19 changes: 15 additions & 4 deletions h5rdmtoolbox/database/hdfdb/objdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ def _h5find(h5obj: Union[h5py.Group, h5py.Dataset], qk, qv, recursive, objfilter
"""
found_objs = []

if qk in query.value_operator:
# user wants to compare qv to the value of the object

Expand Down Expand Up @@ -301,7 +300,7 @@ def _h5find(h5obj: Union[h5py.Group, h5py.Dataset], qk, qv, recursive, objfilter
if query.operator[ok](objattr, ov):
found_objs.append(hv)
except Exception as e:
raise Exception(f'Error while filtering for "{qk}" with "{ok}" and "{ov}"') from e
raise Exception(f'Error while filtering for "{qk}" with "{ok}" and "{ov}": {e}')
return found_objs


Expand Down Expand Up @@ -420,8 +419,20 @@ def __init__(self, obj: Union[h5py.Dataset, h5py.Group]):
self.src_obj = h5py.Dataset(obj.id)
else:
raise TypeError(f'Unexpected type: {type(obj)}')
self.find = self._instance_find # allow `find` to be a static method and instance method
self.find_one = self._instance_find_one # allow `find_one` to be a static method and instance method

@staticmethod
def find_one(obj: Union[h5py.Dataset, h5py.Group], *args, **kwargs) -> lazy.LHDFObject:
"""Please refer to the docstring of the find_one method of the ObjDB class"""
return ObjDB(obj).find_one(*args, **kwargs)

@staticmethod
def find(obj: Union[h5py.Dataset, h5py.Group], *args, **kwargs) -> lazy.LHDFObject:
"""Please refer to the docstring of the find_one method of the ObjDB class"""
return ObjDB(obj).find(*args, **kwargs)

def find_one(self,
def _instance_find_one(self,
flt: Union[Dict, str],
objfilter=None,
recursive: bool = True,
Expand Down Expand Up @@ -452,7 +463,7 @@ def find_one(self,
ignore_attribute_error=ignore_attribute_error)
)

def find(self,
def _instance_find(self,
flt: Union[Dict, str],
objfilter=None,
recursive: bool = True,
Expand Down
4 changes: 4 additions & 0 deletions h5rdmtoolbox/database/hdfdb/query.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""query module"""
import logging
import numpy as np
import re
Expand Down Expand Up @@ -59,9 +60,12 @@ def _regex(value, pattern) -> bool:
except UnicodeDecodeError:
warnings.warn(f'could not decode {value}', UserWarning)
return False

if isinstance(value, bytes):
value = value.decode()

value = str(value)

match = re.search(pattern, value)
if match is None:
return False
Expand Down
14 changes: 7 additions & 7 deletions h5rdmtoolbox/layout/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class SpecificationResult:

def __init__(self, target):
self.target = target
self.target_name = target.name
self.target_name = target if isinstance(target, str) else target.name
self.target_type = 'Dataset' if isinstance(target, h5py.Dataset) else 'Group'
self.validation_flag = VALIDATION_FLAGS.UNCALLED.value
self.res = []
Expand Down Expand Up @@ -575,13 +575,13 @@ def validate(self, filename_or_root_group: Union[str, pathlib.Path, h5py.Group])
"""Validate the layout by passing a filename or an opened root group"""
self.reset()

if isinstance(filename_or_root_group, (str, pathlib.Path)):
with h5tbx.File(filename_or_root_group, mode='r') as h5:
return self.validate(h5)
# if isinstance(filename_or_root_group, (str, pathlib.Path)):
# with h5tbx.File(filename_or_root_group, mode='r') as h5:
# return self.validate(h5)

if isinstance(filename_or_root_group, h5py.Group):
if not filename_or_root_group.name == '/':
raise ValueError('If passing an HDF5 group, a root group must be passed')
# if isinstance(filename_or_root_group, h5py.Group):
# if not filename_or_root_group.name == '/':
# raise ValueError('If passing an HDF5 group, a root group must be passed')

# first reset all specs (n_calls = 0, _n_fails = 0, failed = None)
for spec in self.specifications:
Expand Down
Loading

0 comments on commit 70336e3

Please sign in to comment.