Skip to content

Commit

Permalink
caching: handle maximum size in bytes
Browse files Browse the repository at this point in the history
Considering the items can have varying sizes, having a maximum size expressed in `bytes` makes more sense. At the end of the day, this is what would impact the user's experience the most.
  • Loading branch information
JoepVanlier committed Aug 16, 2024
1 parent 65c1226 commit 49f077a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
17 changes: 13 additions & 4 deletions lumicks/pylake/channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,35 @@

import numbers
from typing import Union
from functools import lru_cache

import numpy as np
import numpy.typing as npt
from cachetools import LRUCache, cached

from .detail.timeindex import to_timestamp
from .detail.utilities import downsample
from .nb_widgets.range_selector import SliceRangeSelectorWidget


@lru_cache(maxsize=100)
@cached(LRUCache(maxsize=1 << 30, getsizeof=lambda x: x.nbytes), info=True) # 1 GB of cache
def _get_array(cache_object):
return cache_object.read_array()


class LazyCache:
def __init__(self, location, dset):
def __init__(self, location, dset, nbytes):
"""A lazy globally cached wrapper around an object that is convertible to a numpy array"""
self._location = location
self._dset = dset
self._nbytes = nbytes

def __len__(self):
return len(self._dset)

@property
def nbytes(self):
return self._nbytes

def __hash__(self):
return hash(self._location)

Expand All @@ -35,7 +40,11 @@ def from_h5py_dset(dset, field=None):
if field:
location = f"{location}.{field}"
dset = dset.fields(field)
return LazyCache(location, dset)
item_size = dset.read_dtype.itemsize
else:
item_size = dset.dtype.itemsize

return LazyCache(location, dset, nbytes=item_size * len(dset))

def read_array(self):
# Note, we deliberately do _not_ allow additional arguments to asarray since we would
Expand Down
4 changes: 4 additions & 0 deletions lumicks/pylake/tests/test_file/test_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_global_cache_continuous(h5_file):
# These should point to the same data
assert id(f1x1.data) == id(f1x2.data)
assert _get_array.cache_info().hits == 1
assert _get_array.cache_info().currsize == 40

with pytest.raises(ValueError, match="assignment destination is read-only"):
f1x1.data[5:100] = 3
Expand All @@ -33,8 +34,10 @@ def test_global_cache_timeseries(h5_file):
# These should point to the same data
assert id(f1x1.data) == id(f1x2.data)
assert _get_array.cache_info().hits == 1
assert _get_array.cache_info().currsize == 16
assert id(f1x1.timestamps) == id(f1x2.timestamps)
assert _get_array.cache_info().hits == 2
assert _get_array.cache_info().currsize == 32

with pytest.raises(ValueError, match="assignment destination is read-only"):
f1x1.data[5:100] = 3
Expand All @@ -53,6 +56,7 @@ def test_global_cache_timetags(h5_file):
# These should point to the same data
assert id(tags1.data) == id(tags2.data)
assert _get_array.cache_info().hits == 1
assert _get_array.cache_info().currsize == 72

with pytest.raises(ValueError, match="assignment destination is read-only"):
tags1.data[5:100] = 3

0 comments on commit 49f077a

Please sign in to comment.