From 6b2ca154815eaa53deb8818aaadecfee69d1bb60 Mon Sep 17 00:00:00 2001 From: Matthias Probst Date: Mon, 18 Dec 2023 11:05:28 +0100 Subject: [PATCH] updating docs --- .github/workflows/tests.yml | 1 - docs/_static/h5tbx_modules.svg | 32 +- docs/_static/new_icon.svg | 16 +- docs/_static/new_icon_with_text.svg | 267 +++++---- docs/conf.py | 2 +- .../concept_of_std_attrs.png | Bin .../concept_of_std_attrs.svg | 0 .../example_convention.json | 0 .../example_convention.yaml | 0 .../examples/EngMeta.ipynb | 0 .../examples/EngMeta.yaml | 0 .../examples/Provenance.ipynb | 0 .../examples/index.rst | 0 .../examples/standard_name_convention.ipynb | 0 .../examples/standard_name_interface.ipynb | 0 docs/{conventions => convention}/index.rst | 0 .../ontologies.ipynb | 0 .../piv_specific.yaml | 0 .../standard_attributes_and_conventions.ipynb | 0 docs/{conventions => convention}/test.yaml | 0 .../units-convention.json | 0 .../units-convention.yaml | 0 docs/gettingstarted/index.rst | 1 - docs/gettingstarted/motivation.rst | 33 +- docs/glossary/index.rst | 43 +- docs/index.rst | 31 +- .../{layouts.ipynb => getting_started.ipynb} | 162 ++++-- docs/layout/index.rst | 14 +- docs/repository/index.rst | 7 +- h5rdmtoolbox/database/hdfdb/query.py | 11 +- h5rdmtoolbox/layout/core.py | 88 ++- h5rdmtoolbox/layout/test.hdf | Bin 6144 -> 0 bytes h5rdmtoolbox/wrapper/core.py | 3 - tests/database/test_filequery.py | 516 ------------------ tests/database/test_hdfDB.py | 66 +++ tests/layouts/test_core.py | 20 + tests/repository/test.ini | 4 + tests/repository/test_zenodo.py | 80 ++- 38 files changed, 640 insertions(+), 757 deletions(-) rename docs/{conventions => convention}/concept_of_std_attrs.png (100%) rename docs/{conventions => convention}/concept_of_std_attrs.svg (100%) rename docs/{conventions => convention}/example_convention.json (100%) rename docs/{conventions => convention}/example_convention.yaml (100%) rename docs/{conventions => convention}/examples/EngMeta.ipynb (100%) rename docs/{conventions => convention}/examples/EngMeta.yaml (100%) rename docs/{conventions => convention}/examples/Provenance.ipynb (100%) rename docs/{conventions => convention}/examples/index.rst (100%) rename docs/{conventions => convention}/examples/standard_name_convention.ipynb (100%) rename docs/{conventions => convention}/examples/standard_name_interface.ipynb (100%) rename docs/{conventions => convention}/index.rst (100%) rename docs/{conventions => convention}/ontologies.ipynb (100%) rename docs/{conventions => convention}/piv_specific.yaml (100%) rename docs/{conventions => convention}/standard_attributes_and_conventions.ipynb (100%) rename docs/{conventions => convention}/test.yaml (100%) rename docs/{conventions => convention}/units-convention.json (100%) rename docs/{conventions => convention}/units-convention.yaml (100%) rename docs/layout/{layouts.ipynb => getting_started.ipynb} (59%) delete mode 100644 h5rdmtoolbox/layout/test.hdf delete mode 100644 tests/database/test_filequery.py create mode 100644 tests/repository/test.ini diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a5f34921..cbf1dd2a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,6 @@ on: branches: - main - dev - - dev-repository jobs: test: diff --git a/docs/_static/h5tbx_modules.svg b/docs/_static/h5tbx_modules.svg index 99528ef4..6daa3347 100644 --- a/docs/_static/h5tbx_modules.svg +++ b/docs/_static/h5tbx_modules.svg @@ -25,14 +25,14 @@ inkscape:document-units="mm" showgrid="false" inkscape:zoom="2.4568452" - inkscape:cx="467.2659" - inkscape:cy="79.980618" + inkscape:cx="331.31921" + inkscape:cy="79.980619" inkscape:window-width="1920" inkscape:window-height="991" inkscape:window-x="-9" - inkscape:window-y="1341" + inkscape:window-y="-9" inkscape:window-maximized="1" - inkscape:current-layer="layer1"> + inkscape:current-layer="g13"> wrapper conventions + x="39.413193" + y="22.500298">convention databases + y="22.500298">database layouts + x="95.999214" + y="22.500298">layout repositories + x="119.85126" + y="22.500298">repository planning collecting sharing re-using analyzing diff --git a/docs/_static/new_icon_with_text.svg b/docs/_static/new_icon_with_text.svg index 2c888238..3316741c 100644 --- a/docs/_static/new_icon_with_text.svg +++ b/docs/_static/new_icon_with_text.svg @@ -25,12 +25,12 @@ inkscape:document-units="mm" showgrid="false" inkscape:zoom="1.7067197" - inkscape:cx="443.83387" - inkscape:cy="105.17251" - inkscape:window-width="2400" - inkscape:window-height="1261" + inkscape:cx="244.6213" + inkscape:cy="191.30265" + inkscape:window-width="1920" + inkscape:window-height="991" inkscape:window-x="-9" - inkscape:window-y="-9" + inkscape:window-y="1341" inkscape:window-maximized="1" inkscape:current-layer="layer2"> planning collecting sharing exploring analyzing Metadata convention + x="82.626175" + y="17.798597" + id="tspan1">Metadata Core File HDF5 - Standard Attributevalidation - HDF5-database + x="7.2187376" + y="25.85743" + id="tspan7">database Zenodo + x="1.6879052" + y="55.99271" + id="tspan11">repository Metadata-aware - processing - HDF5 + xarray + y="83.395844" + id="tspan3">HDF5 + xarray ORCID + x="4.5279565" + y="41.686359" + id="tspan14">Zenodo MongoDB + + + 5 + + 1 + + 2 + + 3 + + 4 + + convention + validation + layout diff --git a/docs/conf.py b/docs/conf.py index ba915146..c588676b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,7 +18,7 @@ # -- Project information ----------------------------------------------------- project = 'h5RDMtoolbox' -copyright = '2022, Matthias Probst' +copyright = '2023, Matthias Probst' author = 'Matthias Probst' # -- General configuration --------------------------------------------------- diff --git a/docs/conventions/concept_of_std_attrs.png b/docs/convention/concept_of_std_attrs.png similarity index 100% rename from docs/conventions/concept_of_std_attrs.png rename to docs/convention/concept_of_std_attrs.png diff --git a/docs/conventions/concept_of_std_attrs.svg b/docs/convention/concept_of_std_attrs.svg similarity index 100% rename from docs/conventions/concept_of_std_attrs.svg rename to docs/convention/concept_of_std_attrs.svg diff --git a/docs/conventions/example_convention.json b/docs/convention/example_convention.json similarity index 100% rename from docs/conventions/example_convention.json rename to docs/convention/example_convention.json diff --git a/docs/conventions/example_convention.yaml b/docs/convention/example_convention.yaml similarity index 100% rename from docs/conventions/example_convention.yaml rename to docs/convention/example_convention.yaml diff --git a/docs/conventions/examples/EngMeta.ipynb b/docs/convention/examples/EngMeta.ipynb similarity index 100% rename from docs/conventions/examples/EngMeta.ipynb rename to docs/convention/examples/EngMeta.ipynb diff --git a/docs/conventions/examples/EngMeta.yaml b/docs/convention/examples/EngMeta.yaml similarity index 100% rename from docs/conventions/examples/EngMeta.yaml rename to docs/convention/examples/EngMeta.yaml diff --git a/docs/conventions/examples/Provenance.ipynb b/docs/convention/examples/Provenance.ipynb similarity index 100% rename from docs/conventions/examples/Provenance.ipynb rename to docs/convention/examples/Provenance.ipynb diff --git a/docs/conventions/examples/index.rst b/docs/convention/examples/index.rst similarity index 100% rename from docs/conventions/examples/index.rst rename to docs/convention/examples/index.rst diff --git a/docs/conventions/examples/standard_name_convention.ipynb b/docs/convention/examples/standard_name_convention.ipynb similarity index 100% rename from docs/conventions/examples/standard_name_convention.ipynb rename to docs/convention/examples/standard_name_convention.ipynb diff --git a/docs/conventions/examples/standard_name_interface.ipynb b/docs/convention/examples/standard_name_interface.ipynb similarity index 100% rename from docs/conventions/examples/standard_name_interface.ipynb rename to docs/convention/examples/standard_name_interface.ipynb diff --git a/docs/conventions/index.rst b/docs/convention/index.rst similarity index 100% rename from docs/conventions/index.rst rename to docs/convention/index.rst diff --git a/docs/conventions/ontologies.ipynb b/docs/convention/ontologies.ipynb similarity index 100% rename from docs/conventions/ontologies.ipynb rename to docs/convention/ontologies.ipynb diff --git a/docs/conventions/piv_specific.yaml b/docs/convention/piv_specific.yaml similarity index 100% rename from docs/conventions/piv_specific.yaml rename to docs/convention/piv_specific.yaml diff --git a/docs/conventions/standard_attributes_and_conventions.ipynb b/docs/convention/standard_attributes_and_conventions.ipynb similarity index 100% rename from docs/conventions/standard_attributes_and_conventions.ipynb rename to docs/convention/standard_attributes_and_conventions.ipynb diff --git a/docs/conventions/test.yaml b/docs/convention/test.yaml similarity index 100% rename from docs/conventions/test.yaml rename to docs/convention/test.yaml diff --git a/docs/conventions/units-convention.json b/docs/convention/units-convention.json similarity index 100% rename from docs/conventions/units-convention.json rename to docs/convention/units-convention.json diff --git a/docs/conventions/units-convention.yaml b/docs/convention/units-convention.yaml similarity index 100% rename from docs/conventions/units-convention.yaml rename to docs/convention/units-convention.yaml diff --git a/docs/gettingstarted/index.rst b/docs/gettingstarted/index.rst index 9dc6c47c..597717a3 100644 --- a/docs/gettingstarted/index.rst +++ b/docs/gettingstarted/index.rst @@ -10,5 +10,4 @@ Get a quick insight into the toolbox and an overview about the capabilities. motivation quickoverview.ipynb - command_line_interface diff --git a/docs/gettingstarted/motivation.rst b/docs/gettingstarted/motivation.rst index ee6857a1..4c10f3da 100644 --- a/docs/gettingstarted/motivation.rst +++ b/docs/gettingstarted/motivation.rst @@ -12,23 +12,38 @@ good practices to make data more reusable. The principles are described in detai This python package is designed as a toolbox, which assists users and even projects, communities or collaborations during data generation, processing and exploration. The package is based on the scientific file format -HDF5. The file format is chosen because it is a widely used, open and well-documented format, which allows data -to be stored in a self-describing way by using so-called attributes in addition to the data itself. HDF5 thus -will be very suitable for the majority of scientific data. +HDF5. Why HDF5? --------- +The file format is chosen because it is a widely used, open and well-documented format, which allows data +to be stored in a self-describing way by using so-called attributes in addition to the data itself. HDF5 thus +will be very suitable for the majority of scientific data. -HDF5 is selected as the file format around everything is built because... +In short: -- it allows storing heterogeneous data -- the access is fast and efficient -- allows storing metadata together with raw data (self-descriptiveness) -- has a comprehensive file-system-like structure +- It allows storing heterogeneous data, +- the access is fast and efficient, +- allows storing metadata together with raw data (self-descriptiveness), +- has a comprehensive file-system-like structure, - has a large community. +More information on HDF5 can be found `on the HDF Group website `_. + + +Working HDF5 +------------ + +The toolbox interfaces is based on the `h5py `_ package, which is a pythonic interface to the +HDF5 binary data format. However, it returns `numpy `_ arrays, which are not self-descriptive (just +data arrays). While efficient to work with, original information from the HDF5 file is lost. The h5rdmtoolbox therefore +provides a wrapper around h5py, which returns a `xarray `_ object instead of a +numpy array. This object is self-descriptive and allows attaching meta information to the data - just like HDF5 datasets. + Using HDF5 in combination with xarray allows keeping track of the meta information also during data processing, as both, the file and the data object, allow attaching attributes to the data. This reduces processing errors, enhances interpretability and finally makes it easier to share. -More information on HDF5 can be found `on the HDF Group website `_. \ No newline at end of file +This is the very basic feature of the toolbox which already enriches your daily work with HDF5 files. However, there +are many more aspects implemented in the toolbox, which assist you in making your data FAIRer. Find out more in the +following sections. diff --git a/docs/glossary/index.rst b/docs/glossary/index.rst index 2f56db06..55c97347 100644 --- a/docs/glossary/index.rst +++ b/docs/glossary/index.rst @@ -10,26 +10,23 @@ Glossary **standard_name** are special meta data descriptors that follow a specific standard and allow automated exploration and analysis. - long_name - A human-readable string. Attribute of a dataset. Must be given if **standard_name** is not set. - - Standard Name Table (SNT) - Tabular content, which contains the standard name and (at least) a description and a - canonical unit for a it. The respective python class `StandardNameTable` is linked to an HDF5 file and - can perform consistency checks (checks syntax, name and unit). A table as a file may be a XML document - or a YAML file. - - standard_name - A string respecting more or less strict rules defined by a community and defined in a name table. - Attribute of a dataset. Must be given if **long_name** is not set. - - units - Attribute of a dataset describing the physical units of the dataset. Dimensionless datasets - have units='' - - Layout - A layout defines the structure of an HDF5 file. It may define exact content, e.g. attribute name and value or - define expected dataset dimensions or shape. Also some limited conditional layout definition is possible, e.g. - that dataset may be in another group if the expected does not exist. Layout definitions are attached to a wrapper - HDF file and especially assists during data collection as it defines the final content of a file which was prior - defined by a community or project. \ No newline at end of file + convention + Set of "standard attributes" used to describe data. A convention can be enabled, which will + automatically add the standard attributes as parameters to the methods like e.g. `create_dataset`. + + standard attributes + Attributes that are used to describe data. Standard attributes are defined by a convention. + Standard attributes validate the user input, which is done using the `pydantic` package. + + layout + Layouts define the structure of an HDF5 file. It may define exact content, e.g. attribute name and value or + define expected dataset dimensions or shape. It cannot specify the array data of datasets. + + repository + A repository is a storage place for data, usually online, which assigns a unique identifier to the uploaded + data. An popular example is Zenodo. Typically, a repository can be queried for metadata such as author, + title, description, type of data, but not for the content of the data (see database). + + database + A database hosts data and allows to query the content of the data. Examples for databases in the context + ofHDF5 is mongoDB. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 4162c947..a90bdb54 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,19 +1,18 @@ HDF5 Research Data Management Toolbox ===================================== -The "HDF5 Research Data Management Toolbox" (h5RDMtoolbox) is a python package supporting everyone who is working +The "HDF5 Research Data Management Toolbox" (`h5rdmMtoolbox`) is a python package supporting everyone who is working with HDF5 to achieve a sustainable data lifecycle which follows the `FAIR `_ (Findable, Accessible, Interoperable, Reusable) principles. It specifically supports the five main steps of - 1. Planning (defining domain- or problem-specific metadata convention(s) and an internal layout(s) for HDF5 files) + 1. Planning (defining a domain- or problem-specific metadata convention and an layout defining the internal structure of HDF5 files) 2. Collecting data (creating HDF5 files from scratch or converting to HDF5 files from other sources) 3. :doc:`Analyzing and processing data ` (Plotting, processing data while keeping the HDF5 attributes by using `xarray `_) - 4. Sharing data (publishing, archiving, ... e.g. to databases like `mongoDB `_ or repositories - like `Zenodo `_ + 4. Sharing data (either into a repository like e.g. `Zenodo `_ or into a database) 5. Reusing data (Searching data in databases, local file structures or online repositories - like `Zenodo `_. + like `Zenodo `_). .. image:: _static/new_icon_with_text.svg @@ -24,20 +23,30 @@ It specifically supports the five main steps of .. note:: - This project is under current development and is happy to receive ideas as well as + This project is under current development and is happy to receive ideas, code contributions as well as `bug and issue reports `_. Thank you! Overview -------- -The packages come with three sub-packages, each covering a different aspect of efficient and sustainable work with -HDF5 files: +The `h5rdmtoolbox` is organized in five sub-packages corresponding to main features, which are needed to achieve a +sustainable data lifecycle. The sub-packages are: - - :doc:`convention `: Modular construction of conventions (metadata standardization for HDF files) - - :doc:`wrapper `: User-friendly wrapper around `h5py` implementation for efficient work with HDF5 files and above conventions - - :doc:`database `: Querying HDF5 files (A database for HDF5 files or interfacing with mongoDB) + - :doc:`wrapper `: User-friendly wrapper around `h5py` implementation for efficient work with HDF5 files and conventions + - :doc:`convention `: Simple, user-definable construction of conventions (metadata standardization for HDF files) + - :doc:`database `: Querying HDF5 files (A database for HDF5 files or interfacing with dedicated solutions, like mongoDB) + - :doc:`repository `: Up/Download to/from repositories (currently, a Zenodo interface is implemented) + - :doc:`layout `: User-definable specifications for the layout of HDF5 files (attributes, datasets, groups and properties) +Besides the wrapper, which uses the convention sub-package, all sub-packages are independent of each other and can be +developed and used separately. + + +.. image:: _static/h5tbx_modules.svg + :width: 500 + :alt: Alternative text + :align: center Please navigate through the chapters on the left to learn more about the package. They are organized in the following: diff --git a/docs/layout/layouts.ipynb b/docs/layout/getting_started.ipynb similarity index 59% rename from docs/layout/layouts.ipynb rename to docs/layout/getting_started.ipynb index f8c4637b..1e76b371 100644 --- a/docs/layout/layouts.ipynb +++ b/docs/layout/getting_started.ipynb @@ -5,9 +5,13 @@ "id": "ee60b8cd-e14d-4607-9122-26c71e4e025c", "metadata": {}, "source": [ - "# Layouts\n", + "# Getting Started with \"Layouts\"\n", "\n", - "The `toolbox` provides a framework, which allows defining the layout (structure) of an HDF5 file. In contrast, *conventions* define which attributes are required for all datasets and provide feedback during file creation. *Layouts* define *which* dataset, groups, attributes and properties are expected. Let's learn about it by practical examples:" + "The `toolbox` provides a framework, which allows defining the layout (structure) of an HDF5 file. This means we can specify, which dataset, groups, attributes and properties are expected.\n", + "\n", + "What's the difference to conventions?, Conventions come into play during dataset or group creation. Conventions define which attributes could or must be provided during the creation. This is done through \"standard attributes\", which also validate the value. A layout on the other hand is used after a file is created and checks the full content (except array values) of a file. It should be used to check, if a file adheres to a project definition. If it does, it can be shared with others (other users, repositories, databases, ...).\n", + "\n", + "Let's learn about it by practical examples:" ] }, { @@ -25,7 +29,9 @@ "id": "3a553a51-1573-4b28-9d91-26f8740b4c74", "metadata": {}, "source": [ - "Init a layout:" + "## 1. Create a layout\n", + "\n", + "Creating a layout by calling the `Layout` class:" ] }, { @@ -43,7 +49,7 @@ "id": "90673c5c-bbd2-475a-b191-d40e5f2f2e5a", "metadata": {}, "source": [ - "A Layout(definition) consists of so-called \"layout specifications\", which we have to add to the layout object. Currently, there are no specifications:" + "A Layout consists of so-called \"layout specifications\". All specifications are store in a list. So far we have not added any specification, so the list is empty:" ] }, { @@ -67,46 +73,25 @@ "lay.specifications" ] }, - { - "cell_type": "markdown", - "id": "939c72a1-8a23-4114-9598-40a600c1c717", - "metadata": {}, - "source": [ - "## Example data\n", - "Let's create some thest data:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "b0ff9809-1c42-4987-83d7-e5ef7bcc2623", - "metadata": {}, - "outputs": [], - "source": [ - "import h5rdmtoolbox as h5tbx\n", - "with h5tbx.File() as h5:\n", - " h5.create_dataset('u', shape=(3, 5), compression='lzf')\n", - " h5.create_dataset('v', shape=(3, 5), compression='gzip')\n", - " h5.create_group('instruments', attrs={'description': 'Instrument data'})" - ] - }, { "cell_type": "markdown", "id": "95002eca-127d-45ad-89b1-1558acdc22ae", "metadata": {}, "source": [ - "## 1. Adding a specification\n", + "### 1.1 Adding a specification\n", "\n", "Let's add a specification. For this we call `.add()`. We will add information for a query request, which will be performed later, when we validate a file (layout).\n", "\n", - "The first argument is the query method. We will use `find` from the database class `h5rdmtoolbox.database.hdfdb.FileDB`. Then we add keyword arguments to be passed to that method.\n", + "The first argument is the query method. We will use `find` from the database class [`h5rdmtoolbox.database.hdfdb.FileDB`](../database/hdfDB.ipynb). Then we add keyword arguments to be passed to that method.\n", "\n", - "As a first example, we request the following for all files to be validated with our layout: **all dataset must be compressed with \"gzip\"**" + "As a first example, we request the following for all files to be validated with our layout:\n", + "- all dataset must be compressed with \"gzip\"\n", + "- the dataset with name \"/u\" must exist" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "762c996d-af0c-4d7e-a794-31c9c788ae76", "metadata": {}, "outputs": [], @@ -116,37 +101,116 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "ba490b3c-820c-4dca-a94c-6e7f5f211dee", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[LayoutSpecification (kwargs={'flt': {}, 'objfilter': 'dataset'})]" + "[LayoutSpecification (kwargs={'flt': {}, 'objfilter': 'dataset'}),\n", + " LayoutSpecification (kwargs={'flt': {'$name': '/u'}, 'objfilter': 'dataset'})]" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# the file must have datasets (this spec makes more sense with the following spec)\n", "spec_all_dataset = lay.add(\n", " hdfdb.FileDB.find, # query function\n", " flt={},\n", " objfilter='dataset'\n", ")\n", "\n", + "# all datasets must be compressed with gzip (conditional spec. only called if parent spec is successful)\n", "spec_compression = spec_all_dataset.add(\n", " hdfdb.FileDB.find_one, # query function\n", " flt={'$compression': 'gzip'} # query parameter\n", ")\n", "\n", + "# the file must have the dataset \"/u\"\n", + "spec_ds_u = lay.add(\n", + " hdfdb.FileDB.find, # query function\n", + " flt={'$name': '/u'},\n", + " objfilter='dataset'\n", + ")\n", + "\n", "# we added one specification to the layout:\n", "lay.specifications" ] }, + { + "cell_type": "markdown", + "id": "54fcadf7-50c3-44ff-a610-6ab25c7d92d0", + "metadata": {}, + "source": [ + "**Note:** We added three specifications: The first (`spec_all_dataset`) and the last (`spec_ds_u`) specification were added to layout class. The second specification (`spec_compression`) was added to the first specification and therefore is a *conditional specification*. This means, that it is only called, if the parent specification was successful. Also note, that a child specification is called on all result objects of the parent specification. In our case, `spec_compression` is called on all datasets objects in the file." + ] + }, + { + "cell_type": "markdown", + "id": "dc06152d-b350-47dd-bd9d-56b9ef275f94", + "metadata": {}, + "source": [ + "We can call `sepcifications` of the first specification and, indeed, see specification defining the compression type:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fdc586ea-4175-4c4b-a884-a2a16daebc01", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[LayoutSpecification (kwargs={'flt': {'$compression': 'gzip'}})]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lay.specifications[0].specifications" + ] + }, + { + "cell_type": "markdown", + "id": "939c72a1-8a23-4114-9598-40a600c1c717", + "metadata": {}, + "source": [ + "**Example data**\n", + "\n", + "To test our layout, we need some example data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b0ff9809-1c42-4987-83d7-e5ef7bcc2623", + "metadata": {}, + "outputs": [], + "source": [ + "import h5rdmtoolbox as h5tbx\n", + "with h5tbx.File() as h5:\n", + " h5.create_dataset('u', shape=(3, 5), compression='lzf')\n", + " h5.create_dataset('v', shape=(3, 5), compression='gzip')\n", + " h5.create_group('instruments', attrs={'description': 'Instrument data'})" + ] + }, + { + "cell_type": "markdown", + "id": "5303fc07-e8c6-4f0e-9abd-1def29794760", + "metadata": {}, + "source": [ + "## 2. Validate a file" + ] + }, { "cell_type": "markdown", "id": "4a7c54bf-817a-4082-a87c-25fa3569dd10", @@ -157,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "a5b3846d-63f6-486a-8cb0-6bb12e307e2c", "metadata": {}, "outputs": [ @@ -165,7 +229,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-12-17_18:29:56,398 ERROR [core.py:84] Applying spec. \"LayoutSpecification (kwargs={'flt': {'$compression': 'gzip'}})\" on \"\" failed.\n" + "2023-12-18_10:15:43,100 ERROR [core.py:117] Applying spec. \"LayoutSpecification (kwargs={'flt': {'$compression': 'gzip'}})\" on \"\" failed.\n" ] } ], @@ -183,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "de3744f2-e3d9-4388-852a-486601f88ade", "metadata": {}, "outputs": [ @@ -193,7 +257,7 @@ "False" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -204,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "3e98e206-0f47-4d82-a213-b4d4b7964b86", "metadata": {}, "outputs": [ @@ -214,7 +278,7 @@ "[]" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -233,7 +297,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "e371cada-69b2-45dc-a759-2537ed1b9218", "metadata": {}, "outputs": [ @@ -243,7 +307,7 @@ "(2, 1)" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -253,12 +317,16 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "24f5ac66-5d8f-4c0d-bf93-d320037f8a05", + "cell_type": "markdown", + "id": "67f421cd-a500-4e86-995f-df089990e13d", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## 3. Sharing layouts\n", + "\n", + "**This is work in progress!!!**\n", + "\n", + "Currently, the only way to share layouts, is to share the above code or to save the object as a pickle file (c.f. https://docs.python.org/3/library/pickle.html)." + ] } ], "metadata": { diff --git a/docs/layout/index.rst b/docs/layout/index.rst index c457fc46..3d359a0a 100644 --- a/docs/layout/index.rst +++ b/docs/layout/index.rst @@ -1,10 +1,20 @@ Layout ====== -With "layouts" we can describe the HDF5 file content, including the location of certain datasets or groups, but also the attributes and properties (`shape`, `dtype`, `compression`, ...). +With "layouts" we can describe the HDF5 file content, including the location of certain +datasets or groups, but also the attributes and properties (`shape`, `dtype`, `compression`, ...). + +The way, layouts are designed is based on the HDF5-as-a-database-approach. This means, that we +collect a list of query statements, which are called on HDF5 files. E.g. we could say, that we +require an HDF5 file to have a dataset with a specific name. We would write a query accordingly. +Later, during validation, the query is performed. If such a dataset is found, the layout successfully +validated the file content. +So, before working with layouts, you might first want to learn about `_. + + .. toctree:: :titlesonly: :glob: - layouts.ipynb \ No newline at end of file + getting_started.ipynb \ No newline at end of file diff --git a/docs/repository/index.rst b/docs/repository/index.rst index 6771cddd..0d1befed 100644 --- a/docs/repository/index.rst +++ b/docs/repository/index.rst @@ -1,7 +1,12 @@ Repository ========== -The repository module provides interfaces to data repositories like Zenodo. While there will be more repository interfaces in the future, Zenodo is the one implemented at the moment. It is a popular choice among scientists. +As part of the `h5rdmtoolbox` a repository is understood as an (online) storage place for data. +It assigns a unique identifier to the data deposit and provides a way to retrieve the data set by its identifier. +The `repository` module provides interfaces to different repositories. An abstract class `RepositoryInterface` is +implemented. It defines the methods that a repository interface has to implement. Currently, only the Zenodo repository +interface is implemented. It is a popular choice among scientists. Please feel free to contribute other repository +interfaces. .. toctree:: diff --git a/h5rdmtoolbox/database/hdfdb/query.py b/h5rdmtoolbox/database/hdfdb/query.py index 3928c29d..204c9e76 100644 --- a/h5rdmtoolbox/database/hdfdb/query.py +++ b/h5rdmtoolbox/database/hdfdb/query.py @@ -1,10 +1,6 @@ -import warnings - -import logging import numpy as np import re - - +import warnings def _eq(a, b): @@ -53,15 +49,14 @@ def _regex(value, pattern) -> bool: if value is None: return False - if value is None: - return False - if isinstance(value, np.bytes_): try: value = value.decode() except UnicodeDecodeError: warnings.warn(f'could not decode {value}', UserWarning) return False + if isinstance(value, bytes): + value = value.decode() match = re.search(pattern, value) if match is None: diff --git a/h5rdmtoolbox/layout/core.py b/h5rdmtoolbox/layout/core.py index 26d67ebd..f3ae598e 100644 --- a/h5rdmtoolbox/layout/core.py +++ b/h5rdmtoolbox/layout/core.py @@ -11,6 +11,27 @@ class LayoutSpecification: + """Specification for a layout + + Parameters + ---------- + func: Callable + Function to be called on the hdf5 file. The first argument of the function will be + an opened h5py.File or h5py.Group or h5py.Dataset object. + kwargs: Dict + Keyword arguments passed to the func. + n: int + Number of matches if function returns an iterable object. Only used, if `func` + return an iterable object. + comment: Optional[str] + Optional comment explaining the specification + parent: Optional[LayoutSpecification] + Parent specification. If the specification is a conditional one, it has a parent. + If this is the case, the function `func` is called on the parent's results. + If the parent's specification fails, the specification is not applied. + If `parent` is None, the specification has no parent and is applied to the + hdf5 root group. + """ def __init__(self, func, kwargs, n=None, comment: str = None, parent=None): self.func = func @@ -24,24 +45,35 @@ def __init__(self, func, kwargs, n=None, comment: str = None, parent=None): self._n_calls = 0 self._n_fails = 0 - def __eq__(self, other): + def __eq__(self, other) -> bool: + """A specification is equal to another if the ID is identical or the + function, kwargs, comment and parent are identical.""" if not isinstance(other, LayoutSpecification): return False + if isinstance(other, Layout): + return False same_id = self.id == other.id - same_props = self.func == other.func and self.kwargs == other.kwargs - return same_id or same_props + if same_id: + return True + same_parent = self.parent == other.parent + same_comment = self.comment == other.comment + same_kwargs = self.kwargs == other.kwargs + same_func = self.func == other.func + same_n = self.n == other.n + + return all([same_parent, same_comment, same_kwargs, same_func, same_n]) @property - def n_calls(self): + def n_calls(self) -> int: """Return number of calls""" return self._n_calls @property - def n_fails(self): + def n_fails(self) -> int: """Return number of failed calls""" return self._n_fails - def reset(self): + def reset(self) -> None: """Reset the specification and all its children""" self.failed = None self._n_calls = 0 @@ -50,8 +82,9 @@ def reset(self): spec.reset() @property - def called(self): - """Return True if the specification has been called at least once""" + def called(self) -> bool: + """Return True if the specification has been called at least once. + This is determined by the number of calls.""" return self.n_calls > 0 @property @@ -190,18 +223,55 @@ def print_summary(self, indent=2): class LayoutResult: + """Container for the result of a layout validation. It only contains a list of failed specs.""" def __init__(self, list_of_failed_specs: List[LayoutSpecification]): self.list_of_failed_specs = list_of_failed_specs - def is_valid(self): + def is_valid(self) -> bool: + """Return True if the layout is valid, which is the case if no specs failed""" return len(self.list_of_failed_specs) == 0 class Layout(LayoutSpecification): + """A layout is a collection of specifications that can be applied to an HDF5 file or group. + + The class is inherited from LayoutSpecification. Some methods are overwritten. + + Examples + -------- + >>> from h5rdmtoolbox import layout + >>> lay = layout.Layout() + >>> spec_all_dataset = lay.add( + >>> hdfdb.FileDB.find, # query function + >>> flt={}, + >>> objfilter='dataset' + >>> ) + >>> + >>> # all datasets must be compressed with gzip (conditional spec. only called if parent spec is successful) + >>> spec_compression = spec_all_dataset.add( + >>> hdfdb.FileDB.find_one, # query function + >>> flt={'$compression': 'gzip'} # query parameter + >>> ) + >>> + >>> # the file must have the dataset "/u" + >>> spec_ds_u = lay.add( + >>> hdfdb.FileDB.find, # query function + >>> flt={'$name': '/u'}, + >>> objfilter='dataset' + >>> ) + >>> lay.validate('path/to/file.h5') + """ + + def __init__(self): self.specifications = [] + def __eq__(self, other): + if not isinstance(other, Layout): + return False + return self.specifications == other.specifications + def validate(self, filename_or_root_group: Union[str, pathlib.Path, h5py.Group]) -> LayoutResult: """Validate the layout by passing a filename or an opened root group""" if isinstance(filename_or_root_group, h5py.Group): diff --git a/h5rdmtoolbox/layout/test.hdf b/h5rdmtoolbox/layout/test.hdf deleted file mode 100644 index 01d6acc53d1575d0defc73727667a264ece7c3b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6144 zcmeHI&r3o<5T1Sh;9*7yf!ZaHoy@?_8EJWljOx(sNfaccg1_+A)&3Lzl>RH*-TB5) zh({d?Wah9lyE{8?_M30^y*KL@+nN0gISE5b(yljUSU{-2E6X_?+Rz4i20ahhFq9y- zhvPOn$DgHne+vKm+eU+|r^FYHZK@>MTFp*&Rc|^1&Bob{yW;~cDt!6?&*c7N7!zpW zWlbD@s12n(e|c~WJ@gSwz>yP(2YOnzvwY19{%EdrV0+6+iSi-k-`I^(6#D%j;D8}I zC1IokD)3~LrJ6TfUVHZJxYrpC?Ml%Y-&w9dII|FbUU6Q$g#FdH>=i%WUAO9n0{dl_ zl3e(kqb?sO(-dnR83AJ}@9V_Hvi5Izb&6WlsJcbf1?myctM*zcA?e;Q4m2g6Rk7e$h0TpxHEg1j-*m5>{)FV9<7o`Psp z&~f>Wb&uTZLbl^sq>Ycum9l$51jF@pxYK+`EY*sDBA^H;0*Zhlpa>`eihv@Z2q*%I KfFkf;2z&w#b5#Za diff --git a/h5rdmtoolbox/wrapper/core.py b/h5rdmtoolbox/wrapper/core.py index c8561d38..0ad0dd11 100644 --- a/h5rdmtoolbox/wrapper/core.py +++ b/h5rdmtoolbox/wrapper/core.py @@ -1923,9 +1923,6 @@ class File(h5py.File, Group, SpecialAttributeWriter, Core): An additional argument is added to the h5py. - .. seealso:: :meth:`check` - - .. note:: All features from h5py packages are preserved. diff --git a/tests/database/test_filequery.py b/tests/database/test_filequery.py deleted file mode 100644 index e20456de..00000000 --- a/tests/database/test_filequery.py +++ /dev/null @@ -1,516 +0,0 @@ -# import numpy as np -# import unittest -# -# import h5rdmtoolbox as h5tbx -# from h5rdmtoolbox.wrapper.core import File -# -# -# class TestFileQuery(unittest.TestCase): -# -# def setUp(self) -> None: -# h5tbx.use(None) -# -# def test_find_dict_entry(self): -# with h5tbx.File() as h5: -# h5.attrs['contact'] = {'name': 'John Doe', -# 'surname': 'Doe', -# 'firstName': 'John'} -# h5.create_dataset('grp/ds1', shape=(1, 2, 3), -# attrs=dict(units='', long_name='long name 1', -# contact={'name': 'Jane Doe', -# 'surname': 'Doe', -# 'firstName': 'Simon'})) -# h5.create_group('grp/sub_grp', attrs=dict(contact={'name': 'John Doe', -# 'surname': 'Doe', -# 'firstName': 'John'})) -# self.assertEqual(0, len(h5tbx.FileDB(h5.hdf_filename).find({'contact.name': 'Doe'}))) -# self.assertEqual(3, len(h5tbx.FileDB(h5.hdf_filename).find({'contact.surname': 'Doe'}))) -# self.assertEqual(2, len(h5tbx.FileDB(h5.hdf_filename).find({'contact.firstName': 'John'}))) -# self.assertEqual(0, len(h5tbx.FileDB(h5.hdf_filename).find({'contact.lastName': 'Doe'}))) -# self.assertEqual(0, len(h5tbx.FileDB(h5.hdf_filename).find({'noContact.lastName': 'Doe'}))) -# -# def test_regex(self): -# from h5rdmtoolbox.database.file import _regex -# self.assertFalse(_regex(None, '*')) -# self.assertFalse(_regex('hallo', r'\d4')) -# self.assertFalse(_regex('hallo', 'hello')) -# -# def test_FileDB(self): -# fname1 = h5tbx.utils.generate_temporary_filename('.hdf', touch=True) -# fname2 = h5tbx.utils.generate_temporary_filename('.hdf', touch=True) -# tmp_dir = h5tbx.utils.generate_temporary_directory() -# fname3 = tmp_dir / 'tmpX.hdf' -# with h5tbx.File(fname3, 'w') as h5: -# pass -# fd = h5tbx.FileDB([fname1, fname2]) -# self.assertEqual(fd.filenames, [fname1, fname2]) -# fd = h5tbx.FileDB([fname1, fname2, tmp_dir]) -# self.assertEqual(fd.filenames, [fname1, fname2, fname3]) -# fd = h5tbx.FileDB([tmp_dir, ]) -# self.assertEqual(fd.filenames, [fname3]) -# f = h5tbx.FileDB(fname1) -# self.assertIsInstance(f, h5tbx.database.File) -# -# fname4 = tmp_dir / 'sub_grp/tmpX.hdf' -# fname4.parent.mkdir() -# with h5tbx.File(fname4, 'w') as h5: -# pass -# fd = h5tbx.FileDB(tmp_dir, rec=True) -# self.assertEqual(fd.filenames, [fname3, fname4]) -# fd = h5tbx.FileDB([tmp_dir], rec=True) -# self.assertEqual(fd.filenames, [fname3, fname4]) -# -# def test_Folder(self): -# folder_dir = h5tbx.utils.generate_temporary_directory() -# sub_folder = folder_dir / 'sub_folder' -# sub_folder.mkdir() -# -# with h5tbx.File(folder_dir / 'f1.hdf', 'w') as h5: -# h5.create_dataset('ds1', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds2', shape=(4, 2, 3), attrs=dict(units='', long_name='long name 2')) -# h5.create_dataset('ds3', shape=(4, 2, 3), attrs=dict(units='', long_name='long name 3')) -# -# with h5tbx.File(folder_dir / 'f2.hdf', 'w') as h5: -# h5.create_dataset('ds1', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds2', shape=(4, 2, 3), attrs=dict(units='', long_name='long name 2')) -# h5.create_dataset('ds3', shape=(4, 2, 3), attrs=dict(units='', long_name='long name 3')) -# -# with h5tbx.File(folder_dir / sub_folder / 'f3.hdf', 'w') as h5: -# h5.create_dataset('ds1', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds2', shape=(4, 2, 3), attrs=dict(units='', long_name='long name 2')) -# h5.create_dataset('ds3', shape=(4, 2, 3), attrs=dict(units='', long_name='long name 3')) -# -# with self.assertRaises(ValueError): -# h5tbx.database.Folder('here') -# -# fd = h5tbx.database.Folder(folder_dir, rec=False) -# self.assertFalse(fd.rec) -# -# fdauto = h5tbx.FileDB(folder_dir, rec=False) -# self.assertIsInstance(fdauto, h5tbx.database.Folder) -# self.assertFalse(fdauto.rec) -# -# fdauto = h5tbx.FileDB(folder_dir, rec=True) -# self.assertIsInstance(fdauto, h5tbx.database.Folder) -# self.assertTrue(fdauto.rec) -# -# self.assertEqual(2, len(list(fd.filenames))) -# self.assertEqual(2, len(fd)) -# self.assertEqual(fd.filenames[0], fd[0].filename) -# self.assertEqual(fd.filenames[0], fd.find_one({'$basename': 'ds1'}).filename) -# self.assertEqual(fd.filenames[0], fd.find_one({'$basename': 'ds1'}).filename) -# -# self.assertEqual(2, len(fd.find({'long_name': 'long name 1'}))) -# -# fdr = h5tbx.database.Folder(folder_dir, rec=True) -# self.assertEqual(3, len(list(fdr.filenames))) -# -# res = fd.find_one_per_file({'$basename': {'$regex': 'ds[0-9]'}}) -# self.assertEqual(2, len(res)) -# -# def test_exists(self): -# with h5tbx.File() as h5: -# h5.create_dataset('ds1', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds2', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# -# self.assertTrue('long_name' in h5.find_one({'long_name': {'$exists': True}}).attrs) -# self.assertIsInstance(h5.find_one({'long_name': {'$exists': True}}), h5tbx.wrapper.core.Dataset) -# -# mres = h5.find({'long_name': {'$exists': True}}) -# self.assertEqual(2, len(mres)) -# -# def test_chained_find(self): -# with h5tbx.File() as h5: -# g = h5.create_group('grp1') -# g.create_dataset('ds1', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# -# r = h5.find_one({'$basename': 'grp1'}).find_one({'$basename': 'ds1'}) -# self.assertEqual(r.name, '/grp1/ds1') -# -# r = h5.find({'$basename': 'grp1'}) -# -# r = r.find_one({'$basename': 'ds1'}) -# self.assertEqual(r.name, '/grp1/ds1') -# -# lazy_results = h5tbx.FileDB(h5.hdf_filename).find({'$basename': 'grp1'}) -# r = h5tbx.database.file.ResultList(lazy_results).find_one({'$basename': 'ds1'}) -# self.assertEqual(r.name, '/grp1/ds1') -# -# lazy_results = h5tbx.FileDB(h5.hdf_filename).find({'$basename': 'grp1'}) -# r = h5tbx.database.file.ResultList(lazy_results).find({'$basename': 'ds1'}) -# self.assertEqual(r[0].name, '/grp1/ds1') -# -# r = lazy_results.find_one({'$basename': 'non-existent'}) -# self.assertTrue(r is None) -# -# def test_math_operators(self): -# from h5rdmtoolbox.database.file import _pass, _mean -# self.assertEqual(None, _pass(np.array([1, 2, 3]), 1)) -# self.assertEqual(None, _mean(np.array(['hello', 'world'], dtype='S'), 1)) -# -# def test_chained_find2(self): -# with h5tbx.File() as h5: -# h5.write_iso_timestamp(name='timestamp', -# dt=None) # writes the current date time in iso format to the attribute -# h5.attrs['project'] = 'tutorial' -# h5.create_dataset('velocity', data=[1, 2, -1], attrs=dict(units='m/s', standard_name='x_velocity')) -# g = h5.create_group('group1') -# g.create_dataset('velocity', data=[4, 0, -3, 12, 3], attrs=dict(units='m/s', standard_name='x_velocity')) -# g = h5.create_group('group2') -# g.create_dataset('velocity', data=[12, 11.3, 4.6, 7.3, 8.1], -# attrs=dict(units='m/s', standard_name='x_velocity')) -# g.create_dataset('z', data=5.4, attrs=dict(units='m', standard_name='z_coordinate')) -# h5.dump() -# filename = h5.hdf_filename -# # find the dataset "z". It is 0D with data=5.4 -# results = h5tbx.FileDB(filename).find({'standard_name': 'z_coordinate'}).find({'$eq': 5.4}) -# self.assertEqual(1, len(results)) -# -# def test_Files(self): -# fnames = [] -# with File() as h51: -# h51.create_dataset('ds', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# fnames.append(h51.hdf_filename) -# -# with File() as h52: -# h52.create_dataset('ds', shape=(4, 2, 3), attrs=dict(units='', long_name='long name 2')) -# fnames.append(h52.hdf_filename) -# -# with h5tbx.FileDB(fnames) as h5s: -# self.assertIsInstance(h5s, h5tbx.database.Files) -# self.assertEqual(2, len(h5s['ds'])) -# self.assertIsInstance(h5s['ds'][0], h5tbx.Dataset) -# self.assertTrue(len(h5s.filenames) == 2) -# with self.assertRaises(TypeError): -# h5s.find(2) -# res = h5s.find({'$basename': 'ds'}) -# self.assertEqual(sorted([h51.ds, h52.ds]), sorted(res)) -# res = h5s.find({'$basename': 'none'}) -# self.assertEqual(res, []) -# res = h5s.find_one({'$basename': 'ds'}) -# self.assertEqual(h51.ds, res) -# -# def test_find_shortcuts(self): -# """find method shortcuts tests""" -# with h5tbx.File() as h5: -# h5.write_iso_timestamp(name='timestamp', -# dt=None) # writes the current date time in iso format to the attribute -# h5.attrs['project'] = 'tutorial' -# h5.create_dataset('velocity', data=[1, 2, -1], attrs=dict(units='m/s', standard_name='x_velocity')) -# g = h5.create_group('group1') -# g.create_dataset('velocity', data=[4, 0, -3, 12, 3], attrs=dict(units='m/s', standard_name='x_velocity')) -# g = h5.create_group('group2') -# g.create_dataset('velocity', data=[12, 11.3, 4.6, 7.3, 8.1], -# attrs=dict(units='m/s', standard_name='x_velocity')) -# h5.dump() -# filename = h5.hdf_filename -# -# res_v1 = h5tbx.database.File(filename).find({'standard_name': {'$regex': '.*'}}, '$dataset') -# res_v2 = h5tbx.database.File(filename).find('standard_name', '$dataset') -# for r1, r2 in zip(sorted(res_v1), sorted(res_v2)): -# self.assertEqual(r1, r2) -# -# res_v1 = h5tbx.database.File(filename).find({'standard_name': {'$regex': '.*'}, -# 'units': {'$regex': '.*'}}, '$dataset') -# res_v2 = h5tbx.database.File(filename).find(['standard_name', 'units'], '$dataset') -# for r1, r2 in zip(sorted(res_v1), sorted(res_v2)): -# self.assertEqual(r1, r2) -# -# with self.assertRaises(TypeError): -# h5tbx.database.File(filename).find(2, '$dataset') -# -# with self.assertRaises(TypeError): -# h5tbx.database.File(filename).find([2, 2], '$dataset') -# -# def test_compare_to_dataset_values(self): -# with h5tbx.use('h5tbx'): -# with h5tbx.File() as h5: -# h5.create_dataset('u', data=4.5, attrs=dict(units='m/s', standard_name='x_velocity')) -# h5.create_dataset('v', data=13.5, attrs=dict(units='m/s', standard_name='y_velocity')) -# g = h5.create_group('group1') -# g.create_dataset('u', data=4.5, attrs=dict(units='m/s', standard_name='x_velocity')) -# g.create_dataset('v', data=13.5, attrs=dict(units='m/s', standard_name='y_velocity')) -# -# res = h5.find({'$eq': 4.5}, '$dataset', rec=False) -# self.assertEqual(res, [h5['u']]) -# -# res = h5.find({'$eq': 4.5}, rec=False) -# self.assertEqual(res, [h5['u']]) -# -# res = h5.find({'$eq': 13.5}, '$dataset', rec=False) -# self.assertEqual(res, [h5['v']]) -# -# res = h5.find({'$gt': 12.5}, rec=False) -# self.assertEqual(res, [h5['v']]) -# -# res = h5.find({'$gt': 0.5}, rec=False) -# self.assertEqual(sorted(res), sorted([h5['v'], h5['u']])) -# -# res = h5.find({'$lt': 20.5}, rec=False) -# self.assertEqual(sorted(res), sorted([h5['v'], h5['u']])) -# -# res = h5.find({'$lte': 13.5}, rec=False) -# self.assertEqual(sorted(res), sorted([h5['v'], h5['u']])) -# -# res = h5.find({'$eq': 4.5}, rec=True) -# self.assertEqual(sorted(res), sorted([h5['u'], h5['/group1/u']])) -# -# res = h5.find_one({'$eq': 4.5}, rec=True) -# self.assertEqual(res.basename, h5['u'].basename) -# -# def test_compare_to_dataset_values_2(self): -# with h5tbx.use('h5tbx'): -# with h5tbx.File() as h5: -# h5.create_dataset('u', data=[1.2, 3.4, 4.5], attrs=dict(units='m/s', standard_name='x_velocity')) -# h5.create_dataset('v', data=[4.0, 13.5, -3.4], attrs=dict(units='m/s', standard_name='y_velocity')) -# -# res = h5.find_one({'$eq': [1.2, 3.4, 4.5]}, rec=False) -# self.assertEqual(res.basename, h5['u'].basename) -# res = h5.find({'$eq': [1.2, 3.4, 4.5]}, rec=False) -# self.assertEqual(res[0].basename, h5['u'].basename) -# res = h5.find({'$eq': [1.2, 3.4, 4.0]}, rec=False) -# self.assertEqual(0, len(res)) -# -# def test_find_init_function(self): -# with h5tbx.File() as h5: -# h5.create_dataset('u', data=[1.2, 3.4, 4.5], attrs=dict(units='m/s', standard_name='x_velocity')) -# h5.create_dataset('v', data=[4.0, 13.5, -3.4], attrs=dict(units='m/s', standard_name='y_velocity')) -# res = h5tbx.find_one(h5.hdf_filename, {'$eq': [1.2, 3.4, 4.5]}, rec=False) -# self.assertEqual(res.basename, 'u') -# res = sorted(h5tbx.find(h5.hdf_filename, {})) -# self.assertEqual(len(res), 3) -# self.assertEqual(res[0].basename, '') -# self.assertEqual(res[1].basename, 'u') -# self.assertEqual(res[2].basename, 'v') -# -# def test_compare_to_dataset_values_mean(self): -# with h5tbx.use('h5tbx'): -# with h5tbx.File() as h5: -# h5.create_dataset('u', data=[1.2, 3.4, 4.5], attrs=dict(units='m/s', standard_name='x_velocity')) -# h5.create_dataset('v', data=[4.0, 13.5, -3.4], attrs=dict(units='m/s', standard_name='y_velocity')) -# res = h5.find({'$eq': {'$mean': np.mean([1.2, 3.4, 4.5])}}, rec=False) -# self.assertEqual(1, len(res)) -# self.assertEqual(res[0].basename, h5['u'].basename) -# -# def test_compare_to_dataset_values_mean_combined(self): -# with h5tbx.use('h5tbx'): -# with h5tbx.File() as h5: -# h5.create_dataset('u', data=[1.2, 3.4, 4.5], attrs=dict(units='m/s', standard_name='x_velocity')) -# h5.create_dataset('z', data=[1.2, 3.4, 4.5], attrs=dict(units='m/s', standard_name='z_velocity')) -# h5.create_dataset('v', data=[4.0, 13.5, -3.4], attrs=dict(units='m/s', standard_name='y_velocity')) -# -# res = h5.find({'standard_name': 'x_velocity', -# '$eq': {'$mean': np.mean([1.2, 3.4, 4.5])}}, rec=False) -# -# self.assertEqual(1, len(res)) -# self.assertEqual(res[0].basename, h5['u'].basename) -# -# def test_compare_to_dataset_values_range(self): -# with h5tbx.use('h5tbx'): -# with h5tbx.File() as h5: -# h5.create_dataset('u', data=4.5, attrs=dict(units='m/s', standard_name='x_velocity')) -# h5.create_dataset('v', data=13.5, attrs=dict(units='m/s', standard_name='y_velocity')) -# -# res = h5.find({'$gt': 10.0, '$lt': 12.7}, rec=False) -# self.assertEqual(0, len(res)) -# -# res = h5.find({'$gt': 10.0, '$lt': 13.7}, rec=False) -# self.assertEqual(1, len(res)) -# self.assertEqual('v', res[0].basename) -# -# def test_numerical_attrs(self): -# with h5tbx.File() as h5: -# h5.create_dataset('a1', shape=(1, 2, 3), attrs=dict(a=1)) -# h5.create_dataset('a2', shape=(1, 2, 3), attrs=dict(a=2)) -# h5.create_dataset('a3', shape=(1, 2, 3), attrs=dict(a=3)) -# h5.create_dataset('a4', shape=(1, 2, 3), attrs=dict(a=4)) -# h5.create_dataset('b5', shape=(1, 2, 3), attrs=dict(b=5)) -# h5.create_dataset('b6', shape=(1, 2, 3), attrs=dict(b=6)) -# -# self.assertEqual(h5.find({'a': None}), []) -# self.assertEqual(h5.find({'a': ''}), []) -# -# self.assertEqual(h5.find_one({'a': None}), None) -# -# self.assertListEqual(h5.find({'a': 1}), [h5['a1']]) # __eq__ -# self.assertEqual(h5.find_one({'a': 1}), h5['a1']) # __eq__ -# -# self.assertEqual(h5.find({'a': {'$lt': 2}}), [h5['a1'], ]) # $lt -# self.assertEqual(h5.find_one({'a': {'$lt': 2}}), h5['a1']) # $lt -# -# self.assertEqual(sorted(h5.find({'a': {'$lt': 3}})), sorted([h5['a1'], h5['a2'], ])) # $lt -# self.assertIn(h5.find_one({'a': {'$lt': 3}}), [h5['a1'], h5['a2']]) # $lt -# -# self.assertEqual(h5.find({'a': {'$lte': 1}}), [h5['a1'], ]) # $lte -# self.assertEqual(h5.find_one({'a': {'$lte': 1}}), h5['a1']) # $lte -# -# self.assertEqual(sorted(h5.find({'a': {'$lte': 2}})), [h5['a1'], h5['a2'], ]) # $lte -# self.assertIn(h5.find_one({'a': {'$lte': 2}}), [h5['a1'], h5['a2'], ]) # $lte -# -# self.assertEqual(h5.find({'a': {'$gt': 3}}), [h5['a4'], ]) # $gt -# self.assertEqual(h5.find_one({'a': {'$gt': 3}}), h5['a4']) # $gt -# -# self.assertEqual(h5.find({'a': {'$gte': 4}}), [h5['a4'], ]) # $gte -# self.assertEqual(h5.find_one({'a': {'$gte': 4}}), h5['a4']) # $gte -# -# self.assertEqual(sorted(h5.find({'a': {'$gte': 3}})), [h5['a3'], h5['a4'], ]) # $gte -# self.assertIn(h5.find_one({'a': {'$gte': 3}}), [h5['a3'], h5['a4'], ]) # $gte -# -# def test_lazy(self): -# self.assertTrue(h5tbx.database.lazy.lazy(None) is None) -# with self.assertRaises(TypeError): -# h5tbx.database.lazy.lazy(3.4) -# -# with h5tbx.File() as h5: -# h5.create_group('grp', attrs={'a': 1, 'b': 2}) -# h5.create_dataset('ds1', shape=(1, 2, 3), attrs=dict(a=99, b=100)) -# self.assertIsInstance(h5tbx.database.lazy.lazy([h5.hdf_filename, 'ds1']), -# h5tbx.database.lazy.LDataset) -# self.assertIsInstance(h5tbx.database.lazy.lazy([h5.hdf_filename, 'grp']), -# h5tbx.database.lazy.LGroup) -# -# with h5tbx.File() as h5: -# h5.create_group('g1', attrs={'a': 1, 'b': 2}) -# h5.create_group('g2', attrs={'a': -12, 'b': 2}) -# h5.create_dataset('ds1', shape=(1, 2, 3), attrs=dict(a=99, b=100)) -# h5.create_dataset('ds2', shape=(1, 2, 3), attrs=dict(a=2)) -# h5.create_group('/a/b/c/d', attrs={'is_subgroup': True}) -# r = h5tbx.database.File(h5.hdf_filename).find_one({'a': {'$gte': 80}}) -# self.assertIsInstance(r, h5tbx.database.lazy.LDataset) -# self.assertIsInstance(r.attrs, dict) -# self.assertEqual(r.attrs['a'], 99) -# with h5tbx.set_config(add_provenance=False): -# self.assertEqual(r.attrs.keys(), r[()].attrs.keys()) -# self.assertEqual(list(r.attrs.values()), list(r[()].attrs.values())) -# self.assertEqual(r.shape, (1, 2, 3)) -# self.assertEqual(r.ndim, 3) -# -# with r as h5: -# self.assertIsInstance(h5, h5tbx.Dataset) -# -# r = h5tbx.database.File(h5.hdf_filename).find_one({'a': {'$gte': 0}}, '$group') -# -# self.assertIsInstance(r, h5tbx.database.lazy.LGroup) -# self.assertEqual('', r.parentname) -# self.assertEqual([], r.parentnames) -# -# r_subgrp = h5tbx.database.File(h5.hdf_filename).find_one({'is_subgroup': True}, '$group') -# self.assertIsInstance(r_subgrp, h5tbx.database.lazy.LGroup) -# self.assertEqual('/a/b/c', r_subgrp.parentname) -# self.assertEqual(['a', 'b', 'c'], r_subgrp.parentnames) -# -# self.assertIsInstance(r.attrs, dict) -# self.assertEqual(r.name, '/g1') -# self.assertEqual(r.basename, 'g1') -# -# def test_regex2(self): -# with h5tbx.File() as h5: -# h5.create_dataset('ds1', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds2', shape=(1, 2, 3), attrs=dict(units='', long_name='another long name 2')) -# h5.create_dataset('ds3', shape=(1, 2, 3), attrs=dict(units='', long_name='yet another long name 3')) -# -# self.assertEqual(h5.find_one({'long_name': {'$regex': 'long name 1'}}), h5['ds1']) -# self.assertEqual(h5.find_one({'long_name': {'$regex': 'does not exist'}}), None) -# self.assertIn(h5.find_one({'long_name': {'$regex': 'long name'}}), [h5['ds1'], h5['ds2'], h5['ds3']]) -# self.assertEqual(sorted(h5.find({'long_name': {'$regex': '(.*)long name(.*)'}})), -# [h5['ds1'], h5['ds2'], h5['ds3']]) -# self.assertEqual(h5.find({'long_name': {'$regex': '(.*)long_name(.*)'}}), -# []) -# -# def test_and_find(self): -# with h5tbx.File() as h5: -# h5.create_dataset('ds', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds2', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds3', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 2')) -# h5.create_group('grps', attrs=dict(long_name='long name 1')) -# res = h5.find({'$basename': 'ds', 'long_name': 'long name 1'}) -# self.assertEqual(res[0], h5['ds']) -# res = sorted(h5.find({'$shape': (1, 2, 3), 'long_name': 'long name 1'}, '$dataset')) -# self.assertEqual(len(res), 2) -# self.assertEqual(res[0], h5['ds']) -# self.assertEqual(res[1], h5['ds2']) -# res = h5.find_one({'$shape': (1, 2, 3), 'long_name': 'long name 1'}, '$dataset') -# self.assertIn(res, [h5['ds'], h5['ds2']]) -# res = h5tbx.database.File(h5.hdf_filename).find_one({'$basename': 'ds', 'long_name': 'long name 1'}) -# self.assertEqual('ds', res.basename) -# -# def test_recursive_find(self): -# with h5tbx.File() as h5: -# gd = h5.create_group('trn_datacubes') -# gd.create_dataset('u', data=np.random.random((3, 5, 10, 20))) -# g = h5.create_group('monitors') -# g.create_dataset('pressure1', data=[1, 2, 3], attrs={'long_name': 'Pressure'}) -# g.create_dataset('pressure2', data=[1, 2, 3], attrs={'long_name': 'Pressure'}) -# -# self.assertEqual(gd.find({'long_name': 'Pressure'}, rec=True), []) -# self.assertEqual(gd.find({'long_name': 'Pressure'}, rec=False), []) -# self.assertEqual(gd.find({'$shape': (3, 5, 10, 20)}, rec=True, objfilter='$Dataset'), [gd.u]) -# with self.assertRaises(AttributeError): -# gd.find({'$fail': (3, 5, 10, 20)}, objfilter='$Dataset', rec=True) -# -# def test_distinct(self): -# with h5tbx.File() as h5: -# gd = h5.create_group('trn_datacubes') -# gd.create_dataset('u', data=np.random.random((3, 5, 10, 20))) -# g = h5.create_group('monitors') -# g.create_dataset('pressure1', data=[1, 2, 3], attrs={'long_name': 'Pressure'}) -# g.create_dataset('pressure2', data=[1, 2, 3], attrs={'long_name': 'Pressure'}) -# -# self.assertEqual(h5.distinct('long_name', '$Dataset'), ['Pressure', ]) -# self.assertEqual([(3,), (3, 5, 10, 20)], h5.distinct('$shape', '$Dataset')) -# self.assertEqual(sorted(['/trn_datacubes', '/monitors', '/']), -# sorted(h5.distinct('$name', '$Group'))) -# -# def test_getitem(self): -# fnames = [] -# with File() as h51: -# h51.create_dataset('ds', data=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# fnames.append(h51.filename) -# -# with File() as h52: -# h52.create_dataset('ds', data=(4, 5, 6), attrs=dict(units='', long_name='long name 2')) -# fnames.append(h52.filename) -# -# def test_isel(self): -# with h5tbx.File() as h5: -# h5.create_dataset('ds', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds2', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 1')) -# h5.create_dataset('ds3', shape=(1, 2, 3), attrs=dict(units='', long_name='long name 2')) -# -# self.assertTrue(h5tbx.database.File(h5.hdf_filename).find_one({}, '$dataset') is None) -# res = h5tbx.database.File(h5.hdf_filename).find({}, '$dataset') -# self.assertEqual(len(res), 3) -# -# self.assertTupleEqual((2, 3), res[0].isel(dim_0=0).shape) -# with self.assertRaises(KeyError): -# res[0].isel(z=0.3) -# self.assertTupleEqual((1, 3), res[0].isel(dim_1=0).shape) -# self.assertTupleEqual((1, 2), res[0].isel(dim_2=0).shape) -# self.assertTupleEqual((1,), res[0].isel(dim_1=0, dim_2=1).shape) -# self.assertEqual(1, res[0].isel(dim_2=2, dim_1=1).ndim) -# self.assertEqual(0, res[0].isel(dim_2=2, dim_1=1, dim_0=0).ndim) -# with self.assertRaises(IndexError): -# self.assertEqual(0, res[0].isel(dim_2=2, dim_1=1, dim_0=2).ndim) -# self.assertTupleEqual((1, 2, 2), res[0].isel(dim_2=slice(0, 2, 1)).shape) -# with self.assertRaises(ValueError): -# res[0].isel(dim_5=2) -# -# def test_sel(self): -# with h5tbx.File() as h5: -# h5.create_dataset('z', data=[2, 5, 10], make_scale=True) -# h5.create_dataset('ds1', data=[1, 2, 3], attrs=dict(units='', long_name='long name 1'), -# attach_scales=('z',)) -# -# res = h5tbx.database.File(h5.hdf_filename).find({'$basename': {'$regex': '^ds[0-3]'}}, '$dataset') -# self.assertEqual(len(res), 1) -# -# with self.assertRaises(ValueError): -# res[0].sel(z=0.2).shape -# self.assertTupleEqual((), res[0].sel(z=0.2, method='nearest').shape) -# self.assertTrue(res[0].sel(z=0.2, method='nearest')[()] == 1) -# self.assertTrue(res[0].sel(z=4.8, method='nearest')[()] == 2) -# self.assertTrue(res[0].sel(z=5.2, method='nearest')[()] == 2) -# self.assertTrue(res[0].sel(z=8.9, method='nearest')[()] == 3) -# -# with self.assertRaises(NotImplementedError): -# res[0].sel(z=9, method='closest') diff --git a/tests/database/test_hdfDB.py b/tests/database/test_hdfDB.py index 2f13b4e5..46f83bb0 100644 --- a/tests/database/test_hdfDB.py +++ b/tests/database/test_hdfDB.py @@ -101,6 +101,72 @@ def test_find_one(self): single_res = gdb_root.find_one({'a': {'$gte': 0}}, recursive=True) self.assertTrue(single_res.attrs['a'] >= 0) + def test_regex(self): + from h5rdmtoolbox.database.hdfdb.query import _regex + self.assertFalse(_regex(None, 'b')) + self.assertTrue(_regex('a', 'a')) + self.assertTrue(_regex(b'a', 'a')) + self.assertTrue(_regex(np.bytes_('a'), 'a')) + + def test_eq(self): + from h5rdmtoolbox.database.hdfdb.query import _eq + self.assertFalse(_eq(None, 'b')) + self.assertTrue(_eq('a', 'a')) + self.assertTrue(_eq(1, 1)) + self.assertFalse(_eq(1, 2)) + + def test_lte(self): + from h5rdmtoolbox.database.hdfdb.query import _lte + self.assertFalse(_lte(None, 'b')) + self.assertFalse(_lte('a', None)) + self.assertTrue(_lte(1, 2)) + self.assertTrue(_lte(1, 1)) + self.assertFalse(_lte(2, 1)) + + def test_gte(self): + from h5rdmtoolbox.database.hdfdb.query import _gte + self.assertFalse(_gte(None, 'b')) + self.assertFalse(_gte('a', None)) + self.assertTrue(_gte(2, 1)) + self.assertTrue(_gte(1, 1)) + self.assertFalse(_gte(1, 2)) + + def test_lt(self): + from h5rdmtoolbox.database.hdfdb.query import _lt + self.assertFalse(_lt(None, 'b')) + self.assertFalse(_lt('a', None)) + self.assertTrue(_lt(1, 2)) + self.assertFalse(_lt(1, 1)) + self.assertFalse(_lt(2, 1)) + + def test_gt(self): + from h5rdmtoolbox.database.hdfdb.query import _gt + self.assertFalse(_gt(None, 'b')) + self.assertFalse(_gt('a', None)) + self.assertTrue(_gt(2, 1)) + self.assertFalse(_gt(1, 1)) + self.assertFalse(_gt(1, 2)) + + def test_basename(self): + from h5rdmtoolbox.database.hdfdb.query import _basename + self.assertFalse(_basename(None, 'b')) + self.assertFalse(_basename('a', None)) + self.assertTrue(_basename('/a', 'a')) + self.assertTrue(_basename('/a/b', 'b')) + self.assertTrue(_basename('/a/b/c', 'c')) + self.assertFalse(_basename('/a/b/c', 'c/d')) + self.assertFalse(_basename('/a/b/c', 'b')) + self.assertFalse(_basename('/a/b/c', 'a')) + self.assertFalse(_basename('/a/b/c', '/a/b/c')) + + def test_get_ndim(self): + from h5rdmtoolbox.database.hdfdb.query import get_ndim + self.assertEqual(0, get_ndim(5)) + self.assertEqual(0, get_ndim(np.array(5.4))) + self.assertEqual(1, get_ndim(np.array([1, 2, 3]))) + self.assertEqual(2, get_ndim(np.array([[1, 2, 3]]))) + self.assertEqual(3, get_ndim(np.array([[[1, 2, 3]]]))) + def test_find(self): with h5py.File(h5tbx.utils.generate_temporary_filename(suffix='.hdf'), 'w') as h5: diff --git a/tests/layouts/test_core.py b/tests/layouts/test_core.py index 73494a35..c94c340e 100644 --- a/tests/layouts/test_core.py +++ b/tests/layouts/test_core.py @@ -10,6 +10,26 @@ class TestCore(unittest.TestCase): + def test_eq(self): + lay = layout.Layout() + spec1 = lay.add(hdfdb.FileDB.find, flt={'$name': '/u'}, n=1) + spec2 = lay.add(hdfdb.FileDB.find, flt={'$name': '/u'}, n=1) # return spec1 object! + self.assertEqual(spec1, spec2) + self.assertEqual(spec1, spec1) # same id + self.assertNotEqual(spec1, None) + self.assertNotEqual(spec1, 1) + self.assertNotEqual(spec1, 'a') + self.assertNotEqual(spec1, []) + self.assertNotEqual(spec1, {}) + self.assertNotEqual(spec1, set()) + self.assertNotEqual(spec1, lay) + + spec3 = lay.add(hdfdb.FileDB.find, flt={'$name': '/v'}, n=2) + self.assertNotEqual(spec1, spec3) + + spec_sub_1 = spec1.add(hdfdb.FileDB.find, flt={'$name': '/u'}, n=1) + self.assertNotEqual(spec1, spec_sub_1) + def test_number_of_datasets(self): filename = h5tbx.utils.generate_temporary_filename(suffix='.hdf') with h5py.File(filename, 'w') as h5: diff --git a/tests/repository/test.ini b/tests/repository/test.ini new file mode 100644 index 00000000..092f03dc --- /dev/null +++ b/tests/repository/test.ini @@ -0,0 +1,4 @@ +[zenodo:sandbox] +access_token = 123 +[zenodo] +access_token = 456 \ No newline at end of file diff --git a/tests/repository/test_zenodo.py b/tests/repository/test_zenodo.py index 9f6d6d5e..79ee86b7 100644 --- a/tests/repository/test_zenodo.py +++ b/tests/repository/test_zenodo.py @@ -1,5 +1,6 @@ import json import logging +import os import pathlib import unittest from datetime import datetime @@ -8,7 +9,7 @@ from h5rdmtoolbox.repository import zenodo, upload_file from h5rdmtoolbox.repository.h5metamapper import hdf2json from h5rdmtoolbox.repository.zenodo.metadata import Metadata, Creator, Contributor -from h5rdmtoolbox.repository.zenodo.tokens import get_api_token +from h5rdmtoolbox.repository.zenodo.tokens import get_api_token, set_api_token logger = logging.getLogger(__name__) @@ -33,6 +34,83 @@ class TestConfig(unittest.TestCase): def test_get_api(self): self.assertIsInstance(get_api_token(sandbox=True), str) + from h5rdmtoolbox.repository.zenodo.tokens import _parse_ini_file + import appdirs + fname = pathlib.Path(appdirs.user_data_dir('h5rdmtoolbox')) / 'zenodo.ini' + if fname.exists(): + bak_fname = fname.rename(fname.with_suffix('.bak')) + + with self.assertRaises(FileNotFoundError): + _parse_ini_file(None) + + with self.assertRaises(FileNotFoundError): + _parse_ini_file('invalid.ini') + + with self.assertRaises(FileNotFoundError): + _parse_ini_file(None) + + bak_fname.rename(fname) + + tmp_ini_file = h5tbx.utils.generate_temporary_filename(suffix='.ini', touch=True) + ini_filename = _parse_ini_file(tmp_ini_file) + self.assertEqual(ini_filename, tmp_ini_file) + self.assertTrue(ini_filename.exists()) + ini_filename.unlink() + + def test_get_api_token(self): + env_token_sb = os.environ.pop('ZENODO_SANDBOX_API_TOKEN', None) + env_token = os.environ.pop('ZENODO_API_TOKEN', None) + test_ini_filename = pathlib.Path(__file__).parent / 'test.ini' + self.assertEqual(get_api_token(sandbox=True, zenodo_ini_filename=test_ini_filename), '123') + self.assertEqual(get_api_token(sandbox=False, zenodo_ini_filename=test_ini_filename), '456') + + os.environ['ZENODO_SANDBOX_API_TOKEN'] = 'abc' + self.assertEqual(get_api_token(sandbox=True, zenodo_ini_filename=test_ini_filename), 'abc') + + os.environ['ZENODO_API_TOKEN'] = 'def' + self.assertEqual('def', os.environ.get('ZENODO_API_TOKEN', None)) + self.assertEqual(get_api_token(sandbox=False, zenodo_ini_filename=test_ini_filename), 'def') + os.environ.pop('ZENODO_API_TOKEN', None) + + # reset environment variable + if env_token_sb is not None: + os.environ['ZENODO_SANDBOX_API_TOKEN'] = env_token_sb + self.assertEqual(env_token_sb, os.environ.get('ZENODO_SANDBOX_API_TOKEN', None)) + if env_token is not None: + os.environ['ZENODO_API_TOKEN'] = env_token + self.assertEqual(env_token, os.environ.get('ZENODO_API_TOKEN', None)) + + def test_set_api_token(self): + + env_token_sb = os.environ.pop('ZENODO_SANDBOX_API_TOKEN', None) + env_token = os.environ.pop('ZENODO_API_TOKEN', None) + + ini_filename = h5tbx.utils.generate_temporary_filename(suffix='.ini', touch=False) + with self.assertRaises(FileNotFoundError): + set_api_token(sandbox=True, + access_token='321', + zenodo_ini_filename=ini_filename) + ini_filename = h5tbx.utils.generate_temporary_filename(suffix='.ini', touch=False) + with open(ini_filename, 'w') as f: + pass + set_api_token(sandbox=True, + access_token='321', + zenodo_ini_filename=ini_filename) + t = get_api_token(sandbox=True, zenodo_ini_filename=ini_filename) + self.assertEqual(t, '321') + + set_api_token(sandbox=False, + access_token='321123', + zenodo_ini_filename=ini_filename) + t = get_api_token(sandbox=False, zenodo_ini_filename=ini_filename) + self.assertEqual(t, '321123') + + if env_token_sb is not None: + os.environ['ZENODO_SANDBOX_API_TOKEN'] = env_token_sb + self.assertEqual(env_token_sb, os.environ.get('ZENODO_SANDBOX_API_TOKEN', None)) + if env_token is not None: + os.environ['ZENODO_API_TOKEN'] = env_token + def test_upload_hdf(self): z = zenodo.ZenodoSandboxDeposit(None)