Merge pull request #132 from int-brain-lab/v2.10.0

V2.10.0
int-brain-lab · Oct 24, 2024 · c83b7e6 · c83b7e6
2 parents 26bea63 + 1efa119
commit c83b7e6
Show file tree

Hide file tree

Showing 20 changed files with 720 additions and 276 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,13 +1,34 @@
 # Changelog
-## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.9.1]
+## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.10.0]
+This version improves behaviour of loading revisions and loading datasets from list_datasets output. 
+
+### Modified
+
+- sub-collections no longer captured when filtering with filename that starts with wildcard in wildcard mode
+- bugfix of spurious error raised when loading dataset with a revision provided
+- default_revisions_only parameter in One.list_datasets filters non-default datasets
+- permit data frame input to One.load_datasets and load precise relative paths provided (instead of default revisions)
+- redundent session_path column has been dropped from the datasets cache table
+- bugfix in one.params.setup: suggest previous cache dir if available instead of always the default
+- bugfix in one.params.setup: remove all extrenuous parameters (i.e. TOKEN) when running setup in silent mode
+- warn user to reauthenticate when password is None in silent mode
+- always force authentication when password passed, even when token cached
+- bugfix: negative indexing of paginated response objects now functions correctly
+- deprecate one.util.ensure_list; moved to iblutil.util.ensure_list
+
+### Added
+
+- one.alf.exceptions.ALFWarning category allows users to filter warnings relating to mixed revisions
+
+## [2.9.1]
 
 ### Modified
 
 - HOTFIX: When downloading cache only authenticate Alyx when necessary
 - HOTFIX: Ensure http data server URL does not end in slash
 - HOTFIX: Handle public aggregate dataset relative paths
 - HOTFIX: No longer warns in silent mode when no param conflicts present
-- Explicit kwargs in load_* methods to avoid user confusion (e.g. no 'namespace' kwarg for `load_dataset`)
+- explicit kwargs in load_* methods to avoid user confusion (e.g. no 'namespace' kwarg for `load_dataset`)
 
 ## [2.9.0]
 This version adds a couple of new ALF functions.

diff --git a/docs/one_installation.md b/docs/one_installation.md
@@ -61,7 +61,7 @@ one = ONE()
 To change your default database, or re-run the setup for a given database, you can use the following
 
 ```python
-ONE._setup(base_url='https://test.alyx.internationalbrainlab.org', make_default=True)
+ONE.setup(base_url='https://test.alyx.internationalbrainlab.org', make_default=True)
 ```
 
 ## 4. Update

diff --git a/one/__init__.py b/one/__init__.py
@@ -1,2 +1,2 @@
 """The Open Neurophysiology Environment (ONE) API."""
-__version__ = '2.9.1'
+__version__ = '2.10.0'
diff --git a/one/alf/cache.py b/one/alf/cache.py
@@ -30,7 +30,7 @@
 from one.alf.io import iter_sessions, iter_datasets
 from one.alf.files import session_path_parts, get_alf_path
 from one.converters import session_record2path
-from one.util import QC_TYPE
+from one.util import QC_TYPE, patch_cache
 
 __all__ = ['make_parquet_db', 'remove_missing_datasets', 'DATASETS_COLUMNS', 'SESSIONS_COLUMNS']
 _logger = logging.getLogger(__name__)
@@ -52,7 +52,6 @@
 DATASETS_COLUMNS = (
     'id',               # int64
     'eid',              # int64
-    'session_path',     # relative to the root
     'rel_path',         # relative to the session path, includes the filename
     'file_size',        # file size in bytes
     'hash',             # sha1/md5, computed in load function
@@ -89,7 +88,6 @@ def _get_dataset_info(full_ses_path, rel_dset_path, ses_eid=None, compute_hash=F
     return {
         'id': Path(rel_ses_path, rel_dset_path).as_posix(),
         'eid': str(ses_eid),
-        'session_path': str(rel_ses_path),
         'rel_path': Path(rel_dset_path).as_posix(),
         'file_size': file_size,
         'hash': md5(full_dset_path) if compute_hash else None,
@@ -297,18 +295,30 @@ def remove_missing_datasets(cache_dir, tables=None, remove_empty_sessions=True,
     if tables is None:
         tables = {}
         for name in ('datasets', 'sessions'):
-            tables[name], _ = parquet.load(cache_dir / f'{name}.pqt')
-    to_delete = []
+            table, m = parquet.load(cache_dir / f'{name}.pqt')
+            tables[name] = patch_cache(table, m.get('min_api_version'), name)
+
+    INDEX_KEY = '.?id'
+    for name in tables:
+        # Set the appropriate index if none already set
+        if isinstance(tables[name].index, pd.RangeIndex):
+            idx_columns = sorted(tables[name].filter(regex=INDEX_KEY).columns)
+            tables[name].set_index(idx_columns, inplace=True)
+
+    to_delete = set()
     gen_path = partial(session_record2path, root_dir=cache_dir)
-    sessions = sorted(map(lambda x: gen_path(x[1]), tables['sessions'].iterrows()))
+    # map of session path to eid
+    sessions = {gen_path(rec): eid for eid, rec in tables['sessions'].iterrows()}
     for session_path in iter_sessions(cache_dir):
-        rel_session_path = session_path.relative_to(cache_dir).as_posix()
-        datasets = tables['datasets'][tables['datasets']['session_path'] == rel_session_path]
+        try:
+            datasets = tables['datasets'].loc[sessions[session_path]]
+        except KeyError:
+            datasets = tables['datasets'].iloc[0:0, :]
         for dataset in iter_datasets(session_path):
             if dataset.as_posix() not in datasets['rel_path']:
-                to_delete.append(session_path.joinpath(dataset))
+                to_delete.add(session_path.joinpath(dataset))
         if session_path not in sessions and remove_empty_sessions:
-            to_delete.append(session_path)
+            to_delete.add(session_path)
 
     if dry:
         print('The following session and datasets would be removed:', end='\n\t')

diff --git a/one/alf/exceptions.py b/one/alf/exceptions.py
@@ -1,4 +1,4 @@
-"""ALyx File related errors.
+"""ALyx File related errors and warnings.
 
 A set of Alyx and ALF related error classes which provide a more verbose description of the raised
 issues.
@@ -82,3 +82,8 @@ class ALFMultipleRevisionsFound(ALFError):
     explanation = ('The matching object/file(s) belong to more than one revision.  '
                    'Multiple datasets in different revision folders were found with no default '
                    'specified.')
+
+
+class ALFWarning(Warning):
+    """Cautions when loading ALF datasets."""
+    pass