update and unify extension imlementation

matthiasprobst · Apr 29, 2024 · c44b398 · c44b398
1 parent fd9256b
commit c44b398
Show file tree

Hide file tree

Showing 13 changed files with 1,004 additions and 719 deletions.
diff --git a/docs/practical_examples/tmp.grpah.html b/docs/practical_examples/tmp.grpah.html
@@ -88,8 +88,8 @@ <h1></h1>
 
 
                   // parsing and collecting nodes and edges from the python
-                  nodes = new vis.DataSet([{"color": "black", "id": 0, "label": "_:N25", "shape": "dot", "size": 20, "title": "_:N25"}, {"color": "#97c2fc", "id": 1, "label": "A hint", "shape": "dot", "title": "A hint"}, {"color": "#97c2fc", "id": 2, "label": "prov:Person", "shape": "dot", "title": "prov:Person"}, {"color": "#97c2fc", "id": 3, "label": "Probst", "shape": "dot", "title": "Probst"}, {"color": "#97c2fc", "id": 4, "label": "Matthias", "shape": "dot", "title": "Matthias"}]);
-                  edges = new vis.DataSet([{"from": 0, "label": "skos:definition", "to": 1}, {"from": 2, "label": "foaf:lastName", "to": 3}, {"from": 2, "label": "foaf:firstName", "to": 4}]);
+                  nodes = new vis.DataSet([{"color": "#97c2fc", "id": 0, "label": "prov:Person", "shape": "dot", "title": "prov:Person"}, {"color": "#97c2fc", "id": 1, "label": "Matthias", "shape": "dot", "title": "Matthias"}, {"color": "#97c2fc", "id": 2, "label": "Probst", "shape": "dot", "title": "Probst"}, {"color": "black", "id": 3, "label": "_:N25", "shape": "dot", "size": 20, "title": "_:N25"}, {"color": "#97c2fc", "id": 4, "label": "A hint", "shape": "dot", "title": "A hint"}]);
+                  edges = new vis.DataSet([{"from": 0, "label": "foaf:firstName", "to": 1}, {"from": 0, "label": "foaf:lastName", "to": 2}, {"from": 3, "label": "skos:definition", "to": 4}]);
 
                   nodeColors = {};
                   allNodes = nodes.get({ returnType: "Object" });

diff --git a/docs/userguide/misc/Extensions.ipynb b/docs/userguide/misc/Extensions.ipynb
diff --git a/docs/userguide/wrapper/DatasetSlicing.ipynb b/docs/userguide/wrapper/DatasetSlicing.ipynb
diff --git a/h5rdmtoolbox/convention/_h5tbx.py b/h5rdmtoolbox/convention/_h5tbx.py
diff --git a/h5rdmtoolbox/extensions/magnitude.py b/h5rdmtoolbox/extensions/magnitude.py
@@ -1,77 +1,67 @@
 """Extension to compute magnitude of xarray datasets"""
+import h5py
 import numpy as np
 import xarray as xr
-from typing import Union, Dict
+from typing import Dict, Optional
 
+from h5rdmtoolbox.protocols import H5TbxDataset
+from h5rdmtoolbox.wrapper.accessory import Accessory, register_special_dataset
 
-@xr.register_dataset_accessor("magnitude")
-class MagnitudeAccessor:
-    """Accessor to convert units of data array. It is
-    also possible to convert its coordinates"""
 
-    def __init__(self, xarray_obj):
-        """Initialize the accessor"""
-        self._obj = xarray_obj
+class MagnitudeInterface:
+    def __init__(self,
+                 datasets: Dict[str, H5TbxDataset],
+                 name: Optional[str] = None,
+                 keep_attrs: bool = False):
+        self.datasets = datasets
+        self.name = name
+        self.keep_attrs = keep_attrs
+
+    def _compute_magnitude(self, datasets):
+        assert len(datasets) > 1, 'At least two datasets are required to compute magnitude'
+        keys = list(datasets.keys())
+        mag2 = datasets[keys[0]].pint.quantify() ** 2
+        with xr.set_options(keep_attrs=self.keep_attrs):
+            for key in keys[1:]:
+                mag2 += datasets[key].pint.quantify() ** 2
 
-    def compute_from(self,
-                     *data_vars,
-                     name: Union[str, None] = None,
-                     inplace: bool = True,
-                     attrs: Union[Dict, None] = None,
-                     overwrite: bool = False):
-        """compute magnitude from data variable names
-        Parameters
-        ----------
-        data_vars: str
-            Names of data variables to compute magnitude from.
-        name: str
-            Name of the magnitude variable to be used in the dataset.
-            If None, the name is automatically generated.
-            Example: if data_vars = ['u', 'v', 'w'], then name is 'magnitude_of_u_v_w'
-        inplace: bool
-            If True, the magnitude variable is added to the dataset.
-            Otherwise, a new dataset is returned.
-        attrs: dict
-            Attributes to be added to the magnitude variable
-        overwrite: bool
-            If True, the magnitude variable is overwritten if it already exists in the dataset.
-        """
-        mag2 = self._obj[data_vars[0]].pint.quantify() ** 2
-        from .. import consts
-        # anc_ds = []
-        # anc_ds.append(self._obj[data_vars[0]].attrs.get(consts.ANCILLARY_DATASET, ()))
-        for data_var in data_vars[1:]:
-            mag2 += self._obj[data_var].pint.quantify() ** 2
-            # anc_ds.append(self._obj[data_var].attrs.get(consts.ANCILLARY_DATASET, ()))
-        # with xr.set_options(keep_attrs=True):
         mag = np.sqrt(mag2).pint.dequantify()
+        if self.name is None:
+            mag.name = 'magnitude_of_' + '_and_'.join(k.replace(' ', '_') for k in keys)
+        else:
+            mag.name = self.name
+        return mag
 
-        # drop ancillary dataset information:
-        mag.attrs.pop(consts.ANCILLARY_DATASET, None)
+    def __getitem__(self, *args, **kwargs):
+        return self._compute_magnitude(
+            {k: v.__getitem__(*args, **kwargs) for k, v in self.datasets.items()}
+        )
 
-        # gather ancillary dataset information from vector components:
-        _anc = [self._obj[da].attrs.get(consts.ANCILLARY_DATASET, None) for da in data_vars]
+    def isel(self, **indexers):
+        return self._compute_magnitude(
+            {k: v.isel(**indexers) for k, v in self.datasets.items()}
+        )
 
-        _anc = [a for a in _anc if a is not None]
-        if _anc:
-            mag.attrs[consts.ANCILLARY_DATASET] = list(set([item for sublist in _anc for item in sublist]))
+    def sel(self, method=None, **coords):
+        return self._compute_magnitude(
+            {k: v.sel(method=method, **coords) for k, v in self.datasets.items()}
+        )
 
-        joined_names = '_'.join(data_vars)
-        if name is None:
-            name = f'magnitude_of_{joined_names}'
-        if name in self._obj:
-            if not overwrite:
-                raise KeyError(f'The name of variable "{name}" is already exists in the dataset.')
-            del self._obj[name]
-        mag.name = name
-        processing_comment = 'processing_comment'
-        while processing_comment in mag.attrs:
-            processing_comment = f'_{processing_comment}'
-        mag.attrs['processing_comment'] = f'computed from: {joined_names.replace("_", ", ")}'
-        if attrs:
-            mag.attrs.update(attrs)
 
-        if inplace:
-            self._obj[name] = mag
-            return self._obj
-        return mag
+@register_special_dataset("Magnitude", "Group")
+@register_special_dataset("Magnitude", "File")
+class Magnitude(Accessory):
+    def __call__(self, *datasets, name: Optional[str] = None, keep_attrs: bool = False) -> MagnitudeInterface:
+        if len(datasets) < 2:
+            raise ValueError('Please provide at least two datasets to compute magnitude')
+        hdf_datasets = {}
+        for dataset in datasets:
+            if isinstance(dataset, str):
+                ds = self._obj[dataset]
+            elif isinstance(dataset, h5py.Dataset):
+                ds = dataset
+            else:
+                raise TypeError(f'Invalid type: {type(dataset)}')
+            hdf_datasets[ds.name.strip('/')] = ds
+
+        return MagnitudeInterface(hdf_datasets, name=name, keep_attrs=keep_attrs)