Skip to content

Commit

Permalink
use download_url for standard name table attribute value
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Jun 26, 2024
1 parent 4bc2539 commit 7bd0fe7
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 17 deletions.
5 changes: 4 additions & 1 deletion docs/colab/quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,11 @@
" h5.u.iri['units'].predicate = 'http://qudt.org/schema/qudt/Unit'\n",
" h5.u.iri['units'].object = QUDT_UNIT.M_PER_SEC\n",
" h5.iri['contact'].predicate = M4I.orcidId\n",
" h5.iri['standard_name_table'].predicate = 'https://matthiasprobst.github.io/ssno#standardNameTable'\n",
"\n",
" h5.rdf['data_type'].definition = 'The source type of the data' # define the meaning of the attribute with a custom text"
" h5.rdf['data_type'].definition = 'The source type of the data' # define the meaning of the attribute with a custom text\n",
"\n",
" h5.dump()"
]
},
{
Expand Down
12 changes: 10 additions & 2 deletions h5rdmtoolbox/_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,20 @@ def process_string_for_link(string: str) -> typing.Tuple[str, bool]:
"""
if 'zenodo.' in string:
print(string)
if re.match(r'10\.\d{4,9}/zenodo\.\d{4,9}', string):
zenodo_url = f'https://doi.org/{string}'
img_url = f'https://zenodo.org/badge/DOI/{string}.svg'
if string.startswith('https://zenodo.org/record/'):
elif string.startswith('https://zenodo.org/record/'):
zenodo_url = string
img_url = f'https://zenodo.org/badge/DOI/10.5281/zenodo.{string.split("/")[-1]}.svg'
elif string.startswith('https://zenodo.org/records/'):
rec_id = string.split('/')[4]
zenodo_url = f'https://doi.org/10.5281/zenodo.{rec_id}'
img_url = f'https://zenodo.org/badge/DOI/10.5281/zenodo.{rec_id}.svg'
else:
zenodo_url = string
img_url = ''
return make_href(url=zenodo_url, text=f'<img src="{img_url}" alt="DOI">'), True
for p in (r"(https?://\S+)", r"(ftp://\S+)", r"(www\.\S+)"):
urls = re.findall(p, string)
Expand Down Expand Up @@ -242,7 +250,7 @@ def __call__(self, group, indent=0, preamble=None, hide_uri: bool = False):
if predicate:
print(spaces + f'@predicate: {predicate}')
for attr_name in group.attrs.raw.keys():
if attr_name == RDF_SUBJECT_ATTR_NAME:
if attr_name == RDF_TYPE_ATTR_NAME:
print(spaces + f'@type: {group.attrs[attr_name]}')
else:
if not attr_name.isupper():
Expand Down
1 change: 1 addition & 0 deletions h5rdmtoolbox/convention/standard_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def set(self, parent, value, attrs=None):
f'Validation of "{value}" for standard attribute "{self.name}" failed.\n'
f'Expected fields: {self.validator.model_fields}\nPydantic error: {err}')
validated_value = getattr(_validated_value, key0)

return super(type(parent.attrs), parent.attrs).__setitem__(
self.name,
parse_object_for_attribute_setting(validated_value)
Expand Down
29 changes: 22 additions & 7 deletions h5rdmtoolbox/convention/standard_names/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,9 @@ def __init__(self,
version: str,
meta: Dict,
standard_names: Dict = None,
affixes: Dict = None):
affixes: Dict = None,
download_url: str=None):
self.download_url = download_url
self._name = name
if standard_names is None:
standard_names = {}
Expand Down Expand Up @@ -190,6 +192,12 @@ def __str__(self) -> str:
return str(zenodo_doi)
return self.to_json()

def __h5attr_repr__(self)-> str:
"""HDF5 Attribute representation"""
if self.download_url:
return self.download_url
return self.to_json()

def __contains__(self, standard_name):
if isinstance(standard_name, StandardName):
standard_name = standard_name.name
Expand Down Expand Up @@ -708,24 +716,31 @@ def from_zenodo(source: str = None, doi_or_recid=None) -> "StandardNameTable":
# it is a file from zenodo, download it:
from ...repository.utils import download_file
cv_filename = download_file(source)
return StandardNameTable.from_yaml(cv_filename)
snt = StandardNameTable.from_yaml(cv_filename)
snt.download_url = source
return snt
if source.startswith('https://zenodo.org/record/') or source.startswith('https://doi.org/'):
from ...repository.zenodo import ZenodoRecord
z = ZenodoRecord(source)
for file in z.files:
if pathlib.Path(file.filename).suffix == '.yaml':
try:
return StandardNameTable.from_yaml(file.download())
snt = StandardNameTable.from_yaml(file.download())
if source.endswith('/'):
snt.download_url = f"{source}{file.filename}"
else:
snt.download_url = f"{source}/{file.filename}"
return snt
except Exception as e:
logger.error(f'Error while reading file {file.filename}: {e}')
continue
raise FileNotFoundError(f'No valid standard name found in Zenodo repo {source}')

if source.startswith('10.5281/zenodo.'):
doi = source.split('.')[-1]
if (UserDir['standard_name_tables'] / f'{doi}.yaml').exists():
return StandardNameTable.from_yaml(UserDir['standard_name_tables'] / f'{doi}.yaml')
return StandardNameTable.from_zenodo(doi)
rec_id = source.split('.')[-1]
if (UserDir['standard_name_tables'] / f'{rec_id}.yaml').exists():
return StandardNameTable.from_yaml(UserDir['standard_name_tables'] / f'{rec_id}.yaml')
return StandardNameTable.from_zenodo(int(rec_id))

# parse input:
rec_id = zenodo.utils.recid_from_doi_or_redid(source)
Expand Down
2 changes: 2 additions & 0 deletions h5rdmtoolbox/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,8 @@ def parse_object_for_attribute_setting(value) -> Union[str, int, float, bool, Li
return value
if isinstance(value, (h5py.Dataset, h5py.Group)):
return value.name
if hasattr(value, '__h5attr_repr__'):
return value.__h5attr_repr__()
try:
return str(value) # try parsing to string
except TypeError:
Expand Down
13 changes: 6 additions & 7 deletions tests/conventions/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,11 @@ def test_upload_convention(self):

# download file from zenodo deposit:
self.assertEqual(1, len(zsr.get_filenames()))
zsr.download_files()
zsr.download_file('tutorial_convention.yaml')

filename = zsr.file('tutorial_convention.yaml').download()
self.assertTrue(filename.exists())
download_dir = pathlib.Path(appdirs.user_data_dir('h5rdmtoolbox')) / 'zenodo_downloads'
self.assertTrue(
(download_dir / f'{zsr.rec_id}' / 'tutorial_convention.yaml').exists()
)
self.assertEqual(download_dir, filename.parent.parent)
zsr.delete()

def test_delete(self):
Expand Down Expand Up @@ -217,8 +216,8 @@ def test_overload_standard_attributes(self):
def test_overwrite_existing_file(self):
if self.connected:
# delete an existing convention like this first:
h5tbx.convention.from_zenodo(doi_or_recid=TutorialConventionZenodoRecordID, overwrite=False)
# h5tbx.convention.from_yaml('test_convention.yaml')
cv = h5tbx.convention.from_zenodo(doi_or_recid=TutorialConventionZenodoRecordID, overwrite=False)
self.assertEqual(cv.name, 'h5rdmtoolbox-tutorial-convention')
h5tbx.use('h5rdmtoolbox-tutorial-convention')

with h5tbx.File(mode='w',
Expand Down

0 comments on commit 7bd0fe7

Please sign in to comment.