From ae9b0834d4866f892d6317a3692759937fb3daa3 Mon Sep 17 00:00:00 2001 From: Nick Frasser Date: Tue, 20 Dec 2022 16:00:19 -0500 Subject: [PATCH 1/4] fix: match cython types to C ones in dataset.pxd --- cryosparc/dataset.pxd | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/cryosparc/dataset.pxd b/cryosparc/dataset.pxd index f1207a0f..7b585de4 100644 --- a/cryosparc/dataset.pxd +++ b/cryosparc/dataset.pxd @@ -1,4 +1,5 @@ -ctypedef Py_ssize_t Dset +from libc.stdint cimport uint64_t, uint32_t +ctypedef uint64_t Dset cdef extern from "cryosparc-tools/dataset.h": @@ -7,18 +8,18 @@ cdef extern from "cryosparc-tools/dataset.h": Dset dset_innerjoin(const char *key, Dset dset_r, Dset dset_s) nogil void dset_del(Dset dset) nogil - Py_ssize_t dset_totalsz(Dset dset) nogil - long dset_ncol(Dset dset) nogil - Py_ssize_t dset_nrow(Dset dset) nogil - const char *dset_key(Dset dset, Py_ssize_t index) nogil + uint64_t dset_totalsz(Dset dset) nogil + uint32_t dset_ncol(Dset dset) nogil + uint64_t dset_nrow(Dset dset) nogil + const char *dset_key(Dset dset, uint64_t index) nogil int dset_type(Dset dset, const char *colkey) nogil void *dset_get(Dset dset, const char *colkey) nogil - Py_ssize_t dset_getsz(Dset dset, const char *colkey) nogil - bint dset_setstr(Dset dset, const char *colkey, Py_ssize_t index, const char *value) nogil - const char *dset_getstr(Dset dset, const char *colkey, Py_ssize_t index) nogil - long dset_getshp(Dset dset, const char *colkey) nogil + uint64_t dset_getsz(Dset dset, const char *colkey) nogil + bint dset_setstr(Dset dset, const char *colkey, uint64_t index, const char *value) nogil + const char *dset_getstr(Dset dset, const char *colkey, uint64_t index) nogil + uint32_t dset_getshp(Dset dset, const char *colkey) nogil - bint dset_addrows(Dset dset, long num) nogil + bint dset_addrows(Dset dset, uint32_t num) nogil bint dset_addcol_scalar(Dset dset, const char *key, int type) nogil bint dset_addcol_array(Dset dset, const char *key, int type, int shape0, int shape1, int shape2) nogil bint dset_changecol(Dset dset, const char *key, int type) nogil From 938754632e4ecbf27c3ddb619ee9adee96ff4c89 Mon Sep 17 00:00:00 2001 From: Nick Frasser Date: Tue, 20 Dec 2022 16:08:28 -0500 Subject: [PATCH 2/4] fix: use correct numpy object type --- cryosparc/dtype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cryosparc/dtype.py b/cryosparc/dtype.py index 5f4c33f7..9e5eef2d 100644 --- a/cryosparc/dtype.py +++ b/cryosparc/dtype.py @@ -62,7 +62,7 @@ class DatasetHeader(TypedDict): DsetType.T_U32: n.uint32, DsetType.T_STR: n.uint64, # Note: Prefer T_OBJ when working in Python DsetType.T_U64: n.uint64, - DsetType.T_OBJ: n.object0, + DsetType.T_OBJ: n.object_, } TYPE_TO_DSET_MAP = { From 1817d8e183cf386e752378616da0a8313255f2e6 Mon Sep 17 00:00:00 2001 From: Nick Frasser Date: Wed, 21 Dec 2022 11:35:55 -0500 Subject: [PATCH 3/4] fix: correct MASK_IDX bitshift in C dataset (#9) Without this we are limited to ~32k datasets in memory --- cryosparc/include/cryosparc-tools/dataset.h | 2 +- tests/test_dataset.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cryosparc/include/cryosparc-tools/dataset.h b/cryosparc/include/cryosparc-tools/dataset.h index 9662af29..95021562 100755 --- a/cryosparc/include/cryosparc-tools/dataset.h +++ b/cryosparc/include/cryosparc-tools/dataset.h @@ -363,7 +363,7 @@ moreslots (void) { } #define SHIFT_GEN (64-15) -#define MASK_IDX (0xffffffffffffffff >> SHIFT_GEN) +#define MASK_IDX (0xffffffffffffffff >> 15) static inline uint64_t roundup(uint64_t value, uint64_t to) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 68c15768..2587c5d0 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -265,3 +265,13 @@ def test_append_many_empty(): def test_union_many_empty(): assert len(Dataset.union_many().rows()) == 0 + + +def test_allocate_many(): + # Checks for logic issues when allocating a lot of datasets + for _ in range(3): + allocated = [] + for _ in range(33_000): + allocated.append(Dataset(1)) + assert len(allocated) == 33_000 + del allocated From a90631eb46191beb407c4dfa547caf301a4531ea Mon Sep 17 00:00:00 2001 From: Nick Frasser Date: Wed, 21 Dec 2022 16:02:42 -0500 Subject: [PATCH 4/4] v4.1.1 --- CHANGELOG.md | 6 ++++++ cryosparc/__init__.py | 2 +- pyproject.toml | 2 +- setup.py | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f93d990e..6d14236d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v4.1.1 + +- Use correct numpy object type for newer versions of Numpy +- Fix limit on number of active datasets +- Use correct C types in Cython header definition + ## v4.1.0 - Initial release diff --git a/cryosparc/__init__.py b/cryosparc/__init__.py index eab54346..be07e000 100644 --- a/cryosparc/__init__.py +++ b/cryosparc/__init__.py @@ -1,4 +1,4 @@ -__version__ = "4.1.0" +__version__ = "4.1.1" def get_include(): diff --git a/pyproject.toml b/pyproject.toml index 4b476704..7f55cf21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "cryosparc-tools" -version = "4.1.0" +version = "4.1.1" description = "Toolkit for interfacing with CryoSPARC" readme = "README.md" requires-python = ">=3.7" diff --git a/setup.py b/setup.py index 4d872482..c2fe7a8f 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name="cryosparc_tools", - version="4.1.0", + version="4.1.1", description="Toolkit for interfacing with CryoSPARC", headers=["cryosparc/include/cryosparc-tools/dataset.h"], ext_modules=cythonize(