Skip to content

Commit

Permalink
add from_repo and fixes some bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Dec 14, 2023
1 parent 085ee59 commit d2f628a
Show file tree
Hide file tree
Showing 9 changed files with 234 additions and 99 deletions.
4 changes: 2 additions & 2 deletions h5rdmtoolbox/conventions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@

logger = create_tbx_logger('conventions')

from .core import Convention, from_yaml, from_zenodo, get_current_convention, from_zenodo, get_registered_conventions
from .core import Convention, from_yaml, from_repo, from_zenodo, get_current_convention, from_zenodo, get_registered_conventions
from .standard_attributes import StandardAttribute
from . import standard_names
from . import _h5tbx as __h5tbx_convention

__all__ = ['Convention', 'from_yaml', 'from_zenodo',
__all__ = ['Convention', 'from_yaml', 'from_zenodo', 'from_repo',
'get_current_convention', 'get_registered_conventions',
'from_zenodo', 'StandardAttribute']
40 changes: 37 additions & 3 deletions h5rdmtoolbox/conventions/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pydoc import locate
from typing import Union, List, Dict, Tuple

from h5rdmtoolbox.repository import RepositoryInterface
from . import cfg
from . import consts
from . import errors
Expand All @@ -20,9 +21,8 @@
from .utils import json2yaml
from .._repr import make_italic, make_bold
from .._user import UserDir
from ..repository.zenodo.utils import recid_from_doi_or_redid
from ..repository import zenodo

from ..repository.zenodo.utils import recid_from_doi_or_redid

CV_DIR = UserDir['conventions']

Expand Down Expand Up @@ -614,6 +614,16 @@ def delete(convention: Union[str, Convention]):
del sys.modules[convention_name]


def from_file(filename) -> Convention:
"""Load a convention from a file. Currently yaml and json files are supported"""
if filename.suffix == '.yaml':
return from_yaml(filename)
elif filename.suffix == '.json':
return from_json(filename)
else:
raise ValueError(f'File {filename} has an unknown suffix')


def from_yaml(filename: Union[str, pathlib.Path], overwrite: bool = False) -> Convention:
"""Load a convention from a YAML file. See Convention.from_yaml() for details"""
return Convention.from_yaml(filename, overwrite=overwrite)
Expand All @@ -624,12 +634,36 @@ def from_json(filename: Union[str, pathlib.Path], overwrite: bool = False) -> Co
return Convention.from_json(filename, overwrite=overwrite)


def from_repo(repo_interface: RepositoryInterface,
name: str,
overwrite: bool = False,
force_download: bool = False):
"""Download a YAML file from a repository"""
# check if file exists:
path_compatible_doi = repo_interface.get_doi().replace('/', '_')
estimated_filename = UserDir['cache'] / f'{path_compatible_doi}' / name
estimated_filename.parent.mkdir(parents=True, exist_ok=True)
if estimated_filename.exists():
if not overwrite:
raise FileExistsError(f'File {name} exists in cache and overwrite is set to False.')
if overwrite and not force_download:
return from_file(estimated_filename)

filename = repo_interface.download_file(name)
if estimated_filename.exists():
estimated_filename.unlink()
filename.rename(estimated_filename)
return from_file(estimated_filename)


def from_zenodo(doi_or_recid: str,
name: str = None,
overwrite: bool = False,
force_download: bool = False) -> Convention:
"""Download a YAML file from a zenodo repository
Depreciated. Use `from_repo` in future.
Parameters
----------
doi_or_recid: str
Expand All @@ -649,7 +683,7 @@ def from_zenodo(doi_or_recid: str,
"""
# depending on the input, try to convert to a valid DOI:
# parse record id:

warnings.warn('Please use `from_repo` instead of from_zenodo', DeprecationWarning)
rec_id = recid_from_doi_or_redid(doi_or_recid)

if name is None:
Expand Down
2 changes: 1 addition & 1 deletion h5rdmtoolbox/conventions/standard_names/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ def from_zenodo(doi_or_recid: str) -> "StandardNameTable":
Example
-------
>>> snt = StandardNameTable.from_zenodo(doi="doi:10.5281/zenodo.8266929")
>>> snt = StandardNameTable.from_zenodo(doi_or_recid="doi:10.5281/zenodo.8266929")
Notes
-----
Expand Down
77 changes: 49 additions & 28 deletions h5rdmtoolbox/repository/zenodo/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import appdirs
import pathlib
import requests
import time
import warnings
from typing import Union, List, Callable

Expand Down Expand Up @@ -62,9 +63,12 @@ def delete(self):
def get(self, raise_for_status: bool):
"""Get the deposit (json) data."""

def get_doi(self):
def get_doi(self) -> str:
"""Get the DOI of the deposit."""
return self.get().json()['metadata']['prereserve_doi']['doi']
doi = self.get().json()['metadata'].get('doi', None)
if doi is None:
return self.get().json()['metadata']['prereserve_doi']['doi']
return doi

def exists(self) -> bool:
"""Check if the deposit exists on Zenodo."""
Expand Down Expand Up @@ -116,10 +120,24 @@ def access_token(self) -> str:

def get(self, raise_for_status: bool = False):
"""Get the deposit (json) data."""
r = requests.get(
"%s/%s" % (self.base_url, self.rec_id),
params={"access_token": self.access_token},
)

def _fetch():
return requests.get(
"%s/%s" % (self.base_url, self.rec_id),
params={"access_token": self.access_token},
)

r = _fetch()
while r.status_code == 429:
logger.info(f"Too many requests message: {r.json()}. Sleep for 60 seconds and try again.")
time.sleep(60)
r = _fetch()

while r.status_code == 500:
logger.info(f"Internal error: {r.json()}. Sleep for 60 seconds and try again.")
time.sleep(60)
r = _fetch()

if raise_for_status:
r.raise_for_status()
return r
Expand Down Expand Up @@ -155,40 +173,43 @@ def upload_file(self, filename, overwrite: bool = False):
f"You can only modify metadata.")
r.raise_for_status()

def download_files(self, target_folder: Union[str, pathlib.Path] = None) -> List[pathlib.Path]:
"""Download all (!) files from Zenodo.
def download_files(self,
target_folder: Union[str, pathlib.Path] = None,
suffix: Union[str, List[str], None] = None) -> List[pathlib.Path]:
"""Download all (!) files from Zenodo. You may specify one or multiple suffixes to only download certain files.
Parameters
----------
target_folder : str or pathlib.Path, optional
The target folder, by default None
suffix: Union[str, List[str], None], optional=None
Specify a suffix to only download certain files
Returns
-------
List[pathlib.Path]
A list of all downloaded files.
"""
r = self.get()
downloaded_files = []
for f in r.json()['files']:
if target_folder is None:
target_folder = pathlib.Path(appdirs.user_data_dir('h5rdmtoolbox')) / 'zenodo_downloads' / str(
self.rec_id)
target_folder.mkdir(exist_ok=True, parents=True)
else:
target_folder = pathlib.Path(target_folder)
fname = f["filename"]
target_filename = target_folder / fname
bucket_dict = requests.get(f['links']['self'],
params={'access_token': self.access_token}).json()
logger.debug(f'downloading file "{fname}" to "{target_filename}"')
downloaded_files.append(target_filename)
with open(target_filename, 'wb') as file:
file.write(requests.get(bucket_dict['links']['self']).content)
return downloaded_files
if suffix is None:
return [self.download_file(filename) for filename in self.get_filenames()]
if isinstance(suffix, str):
suffix = [suffix]
return [self.download_file(filename) for filename in self.get_filenames() if filename.endswith(tuple(suffix))]

def download_file(self, filename, target_folder: Union[str, pathlib.Path] = None):
"""Download a single file from Zenodo."""
def download_file(self,
filename: str,
target_folder: Union[str, pathlib.Path] = None):
"""Download a single file from Zenodo.
Parameters
----------
filename : str
The filename to download
target_folder : Union[str, pathlib.Path], optional
The target folder, by default None
If None, the file will be downloaded to the default folder, which is in
the user data directory of the h5rdmtoolbox package.
"""
if target_folder is None:
target_folder = pathlib.Path(appdirs.user_data_dir('h5rdmtoolbox')) / 'zenodo_downloads' / str(
self.rec_id)
Expand Down
31 changes: 31 additions & 0 deletions tests/clean_zenodo_sandbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import requests

from h5rdmtoolbox.repository.zenodo.tokens import get_api_token


def delete_sandbox_deposits():
"""Delete all deposits in the sandbox account."""
r = requests.get(
'https://sandbox.zenodo.org/api/deposit/depositions',
params={'access_token': get_api_token(sandbox=True)}
)
r.raise_for_status()
for deposit in r.json():
try:
# if deposit['title'].startswith('[test]'):
if not deposit['submitted']:
print(f'deleting deposit {deposit["title"]} with id {deposit["id"]}')
r = requests.delete(
'https://sandbox.zenodo.org/api/deposit/depositions/{}'.format(deposit['id']),
params={'access_token': get_api_token(sandbox=True)}
)
else:
print(
f'Cannot delete {deposit["title"]} with id {deposit["id"]} because it is already published."'
)
except Exception as e:
pass


if __name__ == '__main__':
delete_sandbox_deposits()
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import unittest
import xarray as xr
from h5rdmtoolbox.repository import zenodo

import h5rdmtoolbox as h5tbx
from h5rdmtoolbox.conventions.standard_names import HDF5StandardNameInterface
Expand All @@ -10,7 +11,11 @@
class TestStandardAttributes(unittest.TestCase):

def setUp(self) -> None:
cv = h5tbx.conventions.from_zenodo(doi_or_recid=10156750, overwrite=True)
# cv = h5tbx.conventions.from_zenodo(doi_or_recid=10156750, overwrite=True)
repo = zenodo.ZenodoRecord(10156750)
cv = h5tbx.conventions.from_repo(repo,
name='tutorial_convention.yaml',
overwrite=True)
cv.properties[h5tbx.File]['data_type'].make_optional()
cv.properties[h5tbx.File]['contact'].make_optional()
h5tbx.use(cv)
Expand Down
Loading

0 comments on commit d2f628a

Please sign in to comment.