From 0233d91a87fd8bb20bf680f48674a6bbacf454fd Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Thu, 15 Jun 2023 10:48:34 +0200 Subject: [PATCH 01/32] adding links to first references of the vocabulary items --- Makefile | 1 + dev_tools/docs/nxdl.py | 47 ++++++++++++++++++++++++++++++++++++++++-- requirements.txt | 3 +++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ae556d733..44c076c34 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ PYTHON = python3 SPHINX = sphinx-build BUILD_DIR = "build" +export NEXUS_DEF_PATH = $(shell pwd) .PHONY: help install style autoformat test clean prepare html pdf impatient-guide all local diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index 8da3ebbc0..0e8d646f2 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -7,6 +7,7 @@ from typing import Optional import lxml +from pynxtools.nexus import nexus as pynxtools_nxlib from ..globals.directories import get_nxdl_root from ..globals.errors import NXDLParseError @@ -506,7 +507,7 @@ def _print_attribute(self, ns, kind, node, optional, indent, parent_path): ) self._print(f"{indent}.. index:: {index_name} ({kind} attribute)\n") self._print( - f"{indent}**@{name}**: {optional}{self._format_type(node)}{self._format_units(node)}\n" + f"{indent}**@{name}**: {optional}{self._format_type(node)}{self._format_units(node)} {self.get_first_parent_ref(f'{parent_path}/{name}', 'attribute')}\n" ) self._print_doc(indent + self._INDENTATION_UNIT, ns, node) node_list = node.xpath("nx:enumeration", namespaces=ns) @@ -549,6 +550,7 @@ def _print_full_tree(self, ns, parent, name, indent, parent_path): f"{self._format_type(node)}" f"{dims}" f"{self._format_units(node)}" + f" {self.get_first_parent_ref(f'{parent_path}/{name}', 'field')}" "\n" ) @@ -585,7 +587,9 @@ def _print_full_tree(self, ns, parent, name, indent, parent_path): # target = hTarget.replace(".. _", "").replace(":\n", "") # TODO: https://github.com/nexusformat/definitions/issues/1057 self._print(f"{indent}{hTarget}") - self._print(f"{indent}**{name}**: {optional_text}{typ}\n") + self._print( + f"{indent}**{name}**: {optional_text}{typ} {self.get_first_parent_ref(f'{parent_path}/{name}', 'group')}\n" + ) self._print_if_deprecated(ns, node, indent + self._INDENTATION_UNIT) self._print_doc(indent + self._INDENTATION_UNIT, ns, node) @@ -624,3 +628,42 @@ def _print_full_tree(self, ns, parent, name, indent, parent_path): def _print(self, *args, end="\n"): # TODO: change instances of \t to proper indentation self._rst_lines.append(" ".join(args) + end) + + def get_first_parent_ref(self, path, tag): + nx_name = path[1 : path.find("/", 1)] + path = path[path.find("/", 1) :] + + try: + parents = pynxtools_nxlib.get_inherited_nodes(path, nx_name)[2] + except: + return "" + if len(parents) > 1: + parent = parents[1] + parent_path = parent_display_name = parent.attrib["nxdlpath"] + parent_path_segments = parent_path[1:].split("/") + parent_def_name = parent.attrib["nxdlbase"][ + parent.attrib["nxdlbase"] + .rfind("/") : parent.attrib["nxdlbase"] + .rfind(".nxdl") + ] + + # Case where the first parent is a base_class + if parent_path_segments[0] == "": + return f":ref:`<{parent_def_name[1:]}> <{parent_def_name[1:]}>`" + + parent_display_name = ( + f"{parent_def_name[1:]}/.../{parent_path_segments[-1]}" + if len(parent_path_segments) > 1 + else f"{parent_def_name[1:]}/{parent_path_segments[-1]}" + ) + if tag == "attribute": + pos_of_right_slash = parent_path.rfind("/") + parent_path = ( + parent_path[:pos_of_right_slash] + + "@" + + parent_path[pos_of_right_slash + 1 :] + ) + return ( + f":ref:`<{parent_display_name}> <{parent_def_name}{parent_path}-{tag}>`" + ) + return "" diff --git a/requirements.txt b/requirements.txt index 6d024bda3..8b22819ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ pyyaml # Documentation building sphinx>=5 sphinx-tabs +sphinx-comments # Testing pytest @@ -13,3 +14,5 @@ pytest black>=22.3 flake8>=4 isort>=5.10 + +pynxtools>=0.0.3 \ No newline at end of file From a671d15a7da7dba7842de3aa853ea34fd22129d9 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Thu, 15 Jun 2023 13:34:56 +0200 Subject: [PATCH 02/32] do not display first reference redundantly if it is the only reference --- dev_tools/docs/nxdl.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index 0e8d646f2..fdff6a825 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -635,7 +635,7 @@ def get_first_parent_ref(self, path, tag): try: parents = pynxtools_nxlib.get_inherited_nodes(path, nx_name)[2] - except: + except FileNotFoundError: return "" if len(parents) > 1: parent = parents[1] @@ -649,7 +649,11 @@ def get_first_parent_ref(self, path, tag): # Case where the first parent is a base_class if parent_path_segments[0] == "": - return f":ref:`<{parent_def_name[1:]}> <{parent_def_name[1:]}>`" + return "" + + #special treatment for NXnote@type + if tag == "attribute" and parent_def_name == "/NXnote" and parent_path == "/type": + return "" parent_display_name = ( f"{parent_def_name[1:]}/.../{parent_path_segments[-1]}" From 124b41eedeb44d530629bc95cfea46f89dd874f5 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Thu, 15 Jun 2023 13:41:29 +0200 Subject: [PATCH 03/32] reformatting --- dev_tools/docs/nxdl.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index fdff6a825..fedfe0278 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -650,9 +650,13 @@ def get_first_parent_ref(self, path, tag): # Case where the first parent is a base_class if parent_path_segments[0] == "": return "" - - #special treatment for NXnote@type - if tag == "attribute" and parent_def_name == "/NXnote" and parent_path == "/type": + + # special treatment for NXnote@type + if ( + tag == "attribute" + and parent_def_name == "/NXnote" + and parent_path == "/type" + ): return "" parent_display_name = ( From 8351366eacbb24aa33665ab23604371112aaaa52 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Thu, 15 Jun 2023 22:07:01 +0200 Subject: [PATCH 04/32] changing to shorter link with tooltip --- dev_tools/docs/nxdl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index fedfe0278..6c7b09233 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -672,6 +672,7 @@ def get_first_parent_ref(self, path, tag): + parent_path[pos_of_right_slash + 1 :] ) return ( - f":ref:`<{parent_display_name}> <{parent_def_name}{parent_path}-{tag}>`" + f"<:abbr:`parent (parent definition: {parent_def_name[1:]}" + + f"/{parent_path_segments[-1]})`:ref:`🔗 <{parent_def_name}{parent_path}-{tag}>`>" ) return "" From 1af3a42452c2506f7e303f8a4b73022d7dcccae7 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Thu, 15 Jun 2023 22:12:47 +0200 Subject: [PATCH 05/32] linting --- dev_tools/docs/nxdl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index 6c7b09233..d76abfbd2 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -672,7 +672,7 @@ def get_first_parent_ref(self, path, tag): + parent_path[pos_of_right_slash + 1 :] ) return ( - f"<:abbr:`parent (parent definition: {parent_def_name[1:]}" + - f"/{parent_path_segments[-1]})`:ref:`🔗 <{parent_def_name}{parent_path}-{tag}>`>" + f"<:abbr:`parent (parent definition: {parent_def_name[1:]}" + + f"/{parent_path_segments[-1]})`:ref:`🔗 <{parent_def_name}{parent_path}-{tag}>`>" ) return "" From 8c0881d3311c75a17828d2ba86eed2cea6b399aa Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Thu, 15 Jun 2023 22:33:18 +0200 Subject: [PATCH 06/32] linting --- dev_tools/docs/nxdl.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index d76abfbd2..086e3aa76 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -659,11 +659,6 @@ def get_first_parent_ref(self, path, tag): ): return "" - parent_display_name = ( - f"{parent_def_name[1:]}/.../{parent_path_segments[-1]}" - if len(parent_path_segments) > 1 - else f"{parent_def_name[1:]}/{parent_path_segments[-1]}" - ) if tag == "attribute": pos_of_right_slash = parent_path.rfind("/") parent_path = ( @@ -671,8 +666,9 @@ def get_first_parent_ref(self, path, tag): + "@" + parent_path[pos_of_right_slash + 1 :] ) + parent_display_name = f"{parent_def_name[1:]}{parent_path}" return ( - f"<:abbr:`parent (parent definition: {parent_def_name[1:]}" - + f"/{parent_path_segments[-1]})`:ref:`🔗 <{parent_def_name}{parent_path}-{tag}>`>" + f"<:abbr:`parent (parent definition: {parent_display_name})" + + f"`:ref:`🔗 `>" ) return "" From d5faa6b0c1bbd289c1b6ee654e613535ca909187 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Thu, 15 Jun 2023 22:54:42 +0200 Subject: [PATCH 07/32] supporting unicode char for latex --- manual/source/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/manual/source/conf.py b/manual/source/conf.py index 51b35e4bb..19a52d707 100644 --- a/manual/source/conf.py +++ b/manual/source/conf.py @@ -95,4 +95,5 @@ latex_elements = { 'maxlistdepth':7, # some application definitions are deeply nested 'preamble': '\\usepackage{amsbsy}\n' + \DeclareUnicodeCharacter{1F517}{X} } From 1138ebfe0088cf56e8505244e5419646db9ee76e Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Thu, 15 Jun 2023 23:56:12 +0200 Subject: [PATCH 08/32] short tooltip and link --- build/manual/source/conf.py | 100 ++++++++++++++++++++++++++++++++++++ dev_tools/docs/nxdl.py | 4 +- 2 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 build/manual/source/conf.py diff --git a/build/manual/source/conf.py b/build/manual/source/conf.py new file mode 100644 index 000000000..f6d05e297 --- /dev/null +++ b/build/manual/source/conf.py @@ -0,0 +1,100 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +import sys, os, datetime + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'nexus' +author = 'NIAC, https://www.nexusformat.org' +copyright = u'1996-{}, {}'.format(datetime.datetime.now().year, author) +description = u'NeXus: A Common Data Format for Neutron, X-ray, and Muon Science' + +# The full version, including alpha/beta/rc tags +version = u'unknown NXDL version' +release = u'unknown NXDL release' +nxdl_version = open('../../NXDL_VERSION').read().strip() +if nxdl_version is not None: + version = nxdl_version.split('.')[0] + release = nxdl_version + + +# -- General configuration --------------------------------------------------- + +# https://github.com/nexusformat/definitions/issues/659#issuecomment-577438319 +needs_sphinx = '2.3' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.mathjax', + 'sphinx.ext.ifconfig', + 'sphinx.ext.viewcode', + 'sphinx.ext.githubpages', + 'sphinx.ext.todo', + 'sphinx_tabs.tabs' +] + +# Show `.. todo` directives in the output +# todo_include_todos = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +# html_theme = 'alabaster' +html_theme = 'sphinxdoc' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add extra files +html_extra_path = ['CNAME'] + +html_sidebars = { + '**': [ + 'localtoc.html', + 'relations.html', + 'sourcelink.html', + 'searchbox.html', + 'google_search.html' + ], +} + +# Output file base name for HTML help builder. +htmlhelp_basename = 'NeXusManualdoc' + +# -- Options for Latex output ------------------------------------------------- +latex_elements = { + 'maxlistdepth':25, # some application definitions are deeply nested + 'preamble': r''' + \usepackage{amsbsy} + \usepackage[utf8]{inputenc}''' +} diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index 086e3aa76..d0b4cb9c1 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -668,7 +668,7 @@ def get_first_parent_ref(self, path, tag): ) parent_display_name = f"{parent_def_name[1:]}{parent_path}" return ( - f"<:abbr:`parent (parent definition: {parent_display_name})" - + f"`:ref:`🔗 `>" + f":abbr:`... (override: {parent_display_name})" + + f"`:ref:`🔗 `" ) return "" From cb92872532a11368c962e0bcfcf9c2ad6b8478b8 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 00:21:04 +0200 Subject: [PATCH 09/32] adjusted conf.py --- build/manual/source/conf.py | 100 ------------------------------------ manual/source/conf.py | 5 +- 2 files changed, 3 insertions(+), 102 deletions(-) delete mode 100644 build/manual/source/conf.py diff --git a/build/manual/source/conf.py b/build/manual/source/conf.py deleted file mode 100644 index f6d05e297..000000000 --- a/build/manual/source/conf.py +++ /dev/null @@ -1,100 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -import sys, os, datetime - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - - -# -- Project information ----------------------------------------------------- - -project = 'nexus' -author = 'NIAC, https://www.nexusformat.org' -copyright = u'1996-{}, {}'.format(datetime.datetime.now().year, author) -description = u'NeXus: A Common Data Format for Neutron, X-ray, and Muon Science' - -# The full version, including alpha/beta/rc tags -version = u'unknown NXDL version' -release = u'unknown NXDL release' -nxdl_version = open('../../NXDL_VERSION').read().strip() -if nxdl_version is not None: - version = nxdl_version.split('.')[0] - release = nxdl_version - - -# -- General configuration --------------------------------------------------- - -# https://github.com/nexusformat/definitions/issues/659#issuecomment-577438319 -needs_sphinx = '2.3' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.mathjax', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', - 'sphinx.ext.todo', - 'sphinx_tabs.tabs' -] - -# Show `.. todo` directives in the output -# todo_include_todos = True - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -# html_theme = 'alabaster' -html_theme = 'sphinxdoc' - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Add extra files -html_extra_path = ['CNAME'] - -html_sidebars = { - '**': [ - 'localtoc.html', - 'relations.html', - 'sourcelink.html', - 'searchbox.html', - 'google_search.html' - ], -} - -# Output file base name for HTML help builder. -htmlhelp_basename = 'NeXusManualdoc' - -# -- Options for Latex output ------------------------------------------------- -latex_elements = { - 'maxlistdepth':25, # some application definitions are deeply nested - 'preamble': r''' - \usepackage{amsbsy} - \usepackage[utf8]{inputenc}''' -} diff --git a/manual/source/conf.py b/manual/source/conf.py index 19a52d707..346e664d9 100644 --- a/manual/source/conf.py +++ b/manual/source/conf.py @@ -94,6 +94,7 @@ # -- Options for Latex output ------------------------------------------------- latex_elements = { 'maxlistdepth':7, # some application definitions are deeply nested - 'preamble': '\\usepackage{amsbsy}\n' - \DeclareUnicodeCharacter{1F517}{X} + 'preamble': r''' + \usepackage{amsbsy} + \DeclareUnicodeCharacter{1F517}{X}''' } From c117842cfc088b9e3babb8f368c9d113e5890642 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 00:42:05 +0200 Subject: [PATCH 10/32] removing pynxtools as dependecy --- dev_tools/docs/nxdl.py | 2 +- dev_tools/utils/nexus.py | 1394 ++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 - 3 files changed, 1395 insertions(+), 4 deletions(-) create mode 100644 dev_tools/utils/nexus.py diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index d0b4cb9c1..1316e230a 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -7,7 +7,7 @@ from typing import Optional import lxml -from pynxtools.nexus import nexus as pynxtools_nxlib +from ..utils import nexus as pynxtools_nxlib from ..globals.directories import get_nxdl_root from ..globals.errors import NXDLParseError diff --git a/dev_tools/utils/nexus.py b/dev_tools/utils/nexus.py new file mode 100644 index 000000000..ac1d8b36c --- /dev/null +++ b/dev_tools/utils/nexus.py @@ -0,0 +1,1394 @@ +# pylint: disable=too-many-lines +"""Read files from different format and print it in a standard NeXus format +""" + +import os +import xml.etree.ElementTree as ET +from functools import lru_cache +from glob import glob +import sys +import logging +import textwrap +import h5py +import click + + +class NxdlAttributeError(Exception): + """An exception for throwing an error when an Nxdl attribute is not found.""" + + +def get_app_defs_names(): + """Returns all the AppDef names without their extension: .nxdl.xml""" + app_def_path_glob = f"{get_nexus_definitions_path()}{os.sep}applications{os.sep}*.nxdl*" + contrib_def_path_glob = (f"{get_nexus_definitions_path()}{os.sep}" + f"contributed_definitions{os.sep}*.nxdl*") + files = sorted(glob(app_def_path_glob)) + sorted(glob(contrib_def_path_glob)) + return [os.path.basename(file).split(".")[0] for file in files] + ["NXroot"] + + +@lru_cache(maxsize=None) +def get_xml_root(file_path): + """Reducing I/O time by caching technique""" + + return ET.parse(file_path).getroot() + + +def get_nexus_definitions_path(): + """Check NEXUS_DEF_PATH variable. +If it is empty, this function is filling it""" + try: # either given by sys env + return os.environ['NEXUS_DEF_PATH'] + except KeyError: # or it should be available locally under the dir 'definitions' + local_dir = os.path.abspath(os.path.dirname(__file__)) + return os.path.join(local_dir, f"..{os.sep}definitions") + + +def get_hdf_root(hdf_node): + """Get the root HDF5 node""" + node = hdf_node + while node.name != '/': + node = node.parent + return node + + +def get_hdf_parent(hdf_info): + """Get the parent of an hdf_node in an hdf_info""" + if 'hdf_path' not in hdf_info: + return hdf_info['hdf_node'].parent + node = get_hdf_root(hdf_info['hdf_node']) if 'hdf_root' not in hdf_info \ + else hdf_info['hdf_root'] + for child_name in hdf_info['hdf_path'].split('/'): + node = node[child_name] + return node + + +def get_parent_path(hdf_name): + """Get parent path""" + return '/'.join(hdf_name.split('/')[:-1]) + + +def get_hdf_info_parent(hdf_info): + """Get the hdf_info for the parent of an hdf_node in an hdf_info""" + if 'hdf_path' not in hdf_info: + return {'hdf_node': hdf_info['hdf_node'].parent} + node = get_hdf_root(hdf_info['hdf_node']) if 'hdf_root' not in hdf_info \ + else hdf_info['hdf_root'] + for child_name in hdf_info['hdf_path'].split('/')[1:-1]: + node = node[child_name] + return {'hdf_node': node, 'hdf_path': get_parent_path(hdf_info['hdf_path'])} + + +def get_nx_class_path(hdf_info): + """Get the full path of an HDF5 node using nexus classes +in case of a field, end with the field name""" + hdf_node = hdf_info['hdf_node'] + if hdf_node.name == '/': + return '' + if isinstance(hdf_node, h5py.Group): + return get_nx_class_path(get_hdf_info_parent(hdf_info)) + '/' + \ + (hdf_node.attrs['NX_class'] if 'NX_class' in hdf_node.attrs.keys() else + hdf_node.name.split('/')[-1]) + if isinstance(hdf_node, h5py.Dataset): + return get_nx_class_path( + get_hdf_info_parent(hdf_info)) + '/' + hdf_node.name.split('/')[-1] + return '' + + +def get_nxdl_entry(hdf_info): + """Get the nxdl application definition for an HDF5 node""" + entry = hdf_info + while isinstance(entry['hdf_node'], h5py.Dataset) or \ + 'NX_class' not in entry['hdf_node'].attrs.keys() or \ + entry['hdf_node'].attrs['NX_class'] != 'NXentry': + entry = get_hdf_info_parent(entry) + if entry['hdf_node'].name == '/': + return 'NO NXentry found' + try: + nxdef = entry['hdf_node']['definition'][()] + return nxdef.decode() + except KeyError: # 'NO Definition referenced' + return "NXentry" + + +def get_nx_class(nxdl_elem): + """Get the nexus class for a NXDL node""" + if 'category' in nxdl_elem.attrib.keys(): + return None + try: + return nxdl_elem.attrib['type'] + except KeyError: + return 'NX_CHAR' + + +def get_nx_namefit(hdf_name, name, name_any=False): + """Checks if an HDF5 node name corresponds to a child of the NXDL element +uppercase letters in front can be replaced by arbitraty name, but +uppercase to lowercase match is preferred, +so such match is counted as a measure of the fit""" + if name == hdf_name: + return len(name) * 2 + # count leading capitals + counting = 0 + while counting < len(name) and name[counting].upper() == name[counting]: + counting += 1 + if name_any or counting == len(name) or \ + (counting > 0 and hdf_name.endswith(name[counting:])): # if potential fit + # count the matching chars + fit = 0 + for i in range(min(counting, len(hdf_name))): + if hdf_name[i].upper() == name[i]: + fit += 1 + else: + break + if fit == min(counting, len(hdf_name)): # accept only full fits as better fits + return fit + return 0 + return -1 # no fit + + +def get_nx_classes(): + """Read base classes from the NeXus definition folder. +Check each file in base_classes, applications, contributed_definitions. +If its category attribute is 'base', then it is added to the list. """ + base_classes = sorted(glob(os.path.join(get_nexus_definitions_path(), + 'base_classes', '*.nxdl.xml'))) + applications = sorted(glob(os.path.join(get_nexus_definitions_path(), + 'applications', '*.nxdl.xml'))) + contributed = sorted(glob(os.path.join(get_nexus_definitions_path(), + 'contributed_definitions', '*.nxdl.xml'))) + nx_clss = [] + for nexus_file in base_classes + applications + contributed: + root = get_xml_root(nexus_file) + if root.attrib['category'] == 'base': + nx_clss.append(str(nexus_file[nexus_file.rindex(os.sep) + 1:])[:-9]) + nx_clss = sorted(nx_clss) + return nx_clss + + +def get_nx_units(): + """Read unit kinds from the NeXus definition/nxdlTypes.xsd file""" + filepath = f"{get_nexus_definitions_path()}{os.sep}nxdlTypes.xsd" + root = get_xml_root(filepath) + units_and_type_list = [] + for child in root: + for i in child.attrib.values(): + units_and_type_list.append(i) + flag = False + for line in units_and_type_list: + if line == 'anyUnitsAttr': + flag = True + nx_units = [] + elif 'NX' in line and flag is True: + nx_units.append(line) + elif line == 'primitiveType': + flag = False + else: + pass + return nx_units + + +def get_nx_attribute_type(): + """Read attribute types from the NeXus definition/nxdlTypes.xsd file""" + filepath = get_nexus_definitions_path() + '/nxdlTypes.xsd' + root = get_xml_root(filepath) + units_and_type_list = [] + for child in root: + for i in child.attrib.values(): + units_and_type_list.append(i) + flag = False + for line in units_and_type_list: + if line == 'primitiveType': + flag = True + nx_types = [] + elif 'NX' in line and flag is True: + nx_types.append(line) + elif line == 'anyUnitsAttr': + flag = False + else: + pass + return nx_types + + +def get_node_name(node): + '''Node - xml node. Returns html documentation name. + Either as specified by the 'name' or taken from the type (nx_class). + Note that if only class name is available, the NX prefix is removed and + the string is converted to UPPER case.''' + if 'name' in node.attrib.keys(): + name = node.attrib['name'] + else: + name = node.attrib['type'] + if name.startswith('NX'): + name = name[2:].upper() + return name + + +def belongs_to(nxdl_elem, child, name, class_type=None, hdf_name=None): + """Checks if an HDF5 node name corresponds to a child of the NXDL element +uppercase letters in front can be replaced by arbitraty name, but +uppercase to lowercase match is preferred""" + if class_type and get_nx_class(child) != class_type: + return False + act_htmlname = get_node_name(child) + chk_name = hdf_name or name + if act_htmlname == chk_name: + return True + if not hdf_name: # search for name fits is only allowed for hdf_nodes + return False + try: # check if nameType allows different name + name_any = bool(child.attrib['nameType'] == "any") + except KeyError: + name_any = False + params = [act_htmlname, chk_name, name_any, nxdl_elem, child, name] + return belongs_to_capital(params) + + +def belongs_to_capital(params): + """Checking continues for Upper case""" + (act_htmlname, chk_name, name_any, nxdl_elem, child, name) = params + # or starts with capital and no reserved words used + if (name_any or 'A' <= act_htmlname[0] <= 'Z') and \ + name != 'doc' and name != 'enumeration': + fit = get_nx_namefit(chk_name, act_htmlname, name_any) # check if name fits + if fit < 0: + return False + for child2 in nxdl_elem: + if get_local_name_from_xml(child) != \ + get_local_name_from_xml(child2) or get_node_name(child2) == act_htmlname: + continue + # check if the name of another sibling fits better + name_any2 = "nameType" in child2.attrib.keys() and child2.attrib["nameType"] == "any" + fit2 = get_nx_namefit(chk_name, get_node_name(child2), name_any2) + if fit2 > fit: + return False + # accept this fit + return True + return False + + +def get_local_name_from_xml(element): + """Helper function to extract the element tag without the namespace.""" + return element.tag[element.tag.rindex("}") + 1:] + + +def get_own_nxdl_child_reserved_elements(child, name, nxdl_elem): + """checking reserved elements, like doc, enumeration""" + if get_local_name_from_xml(child) == 'doc' and name == 'doc': + if nxdl_elem.get('nxdlbase'): + child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) + child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/doc') + return child + if get_local_name_from_xml(child) == 'enumeration' and name == 'enumeration': + if nxdl_elem.get('nxdlbase'): + child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) + child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/enumeration') + return child + return False + + +def get_own_nxdl_child_base_types(child, class_type, nxdl_elem, name, hdf_name): + """checking base types of group, field,m attribute""" + if get_local_name_from_xml(child) == 'group': + if (class_type is None or (class_type and get_nx_class(child) == class_type)) and \ + belongs_to(nxdl_elem, child, name, class_type, hdf_name): + if nxdl_elem.get('nxdlbase'): + child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) + child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + return child + if get_local_name_from_xml(child) == 'field' and \ + belongs_to(nxdl_elem, child, name, None, hdf_name): + if nxdl_elem.get('nxdlbase'): + child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) + child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + return child + if get_local_name_from_xml(child) == 'attribute' and \ + belongs_to(nxdl_elem, child, name, None, hdf_name): + if nxdl_elem.get('nxdlbase'): + child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) + child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + return child + return False + + +def get_own_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=None): + """Checks if an NXDL child node fits to the specific name (either nxdl or hdf) + name - nxdl name + class_type - nxdl type or hdf classname (for groups, it is obligatory) + hdf_name - hdf name""" + for child in nxdl_elem: + if 'name' in child.attrib and child.attrib['name'] == name: + if nxdl_elem.get('nxdlbase'): + child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) + child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + return child + for child in nxdl_elem: + if "name" in child.attrib and child.attrib["name"] == name: + child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + return child + + for child in nxdl_elem: + result = get_own_nxdl_child_reserved_elements(child, name, nxdl_elem) + if result is not False: + return result + if nexus_type and get_local_name_from_xml(child) != nexus_type: + continue + result = get_own_nxdl_child_base_types(child, class_type, nxdl_elem, name, hdf_name) + if result is not False: + return result + return None + + +def find_definition_file(bc_name): + """find the nxdl file corresponding to the name. + Note that it first checks in contributed and goes beyond only if no contributed found""" + bc_filename = None + for nxdl_folder in ['contributed_definitions', 'base_classes', 'applications']: + if os.path.exists(f"{get_nexus_definitions_path()}{os.sep}" + f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml"): + bc_filename = f"{get_nexus_definitions_path()}{os.sep}" \ + f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml" + break + return bc_filename + + +def get_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=None, go_base=True): # pylint: disable=too-many-arguments + """Get the NXDL child node corresponding to a specific name +(e.g. of an HDF5 node,or of a documentation) note that if child is not found in application +definition, it also checks for the base classes""" + # search for possible fits for hdf_nodes : skipped + # only exact hits are returned when searching an nxdl child + own_child = get_own_nxdl_child(nxdl_elem, name, class_type, hdf_name, nexus_type) + if own_child is not None: + return own_child + if not go_base: + return None + bc_name = get_nx_class(nxdl_elem) # check in the base class, app def or contributed + if bc_name[2] == '_': # filter primitive types + return None + if bc_name == "group": # Check if it is the root element. Then send to NXroot.nxdl.xml + bc_name = "NXroot" + bc_filename = find_definition_file(bc_name) + if not bc_filename: + raise ValueError('nxdl file not found in definitions folder!') + bc_obj = ET.parse(bc_filename).getroot() + bc_obj.set('nxdlbase', bc_filename) + if 'category' in bc_obj.attrib: + bc_obj.set('nxdlbase_class', bc_obj.attrib['category']) + bc_obj.set('nxdlpath', '') + return get_own_nxdl_child(bc_obj, name, class_type, hdf_name, nexus_type) + + +def get_required_string(nxdl_elem): + """Check for being REQUIRED, RECOMMENDED, OPTIONAL, NOT IN SCHEMA""" + if nxdl_elem is None: + return "<>" + is_optional = 'optional' in nxdl_elem.attrib.keys() \ + and nxdl_elem.attrib['optional'] == "true" + is_minoccurs = 'minOccurs' in nxdl_elem.attrib.keys() \ + and nxdl_elem.attrib['minOccurs'] == "0" + is_recommended = 'recommended' in nxdl_elem.attrib.keys() \ + and nxdl_elem.attrib['recommended'] == "true" + + if is_recommended: + return "<>" + if is_optional or is_minoccurs: + return "<>" + # default optionality: in BASE CLASSES is true; in APPLICATIONS is false + try: + if nxdl_elem.get('nxdlbase_class') == 'base': + return "<>" + except TypeError: + return "<>" + return "<>" + + +def chk_nxdataaxis_v2(hdf_node, name, logger): + """Check if dataset is an axis""" + own_signal = hdf_node.attrs.get('signal') # check for being a Signal + if own_signal is str and own_signal == "1": + logger.debug("Dataset referenced (v2) as NXdata SIGNAL") + own_axes = hdf_node.attrs.get('axes') # check for being an axis + if own_axes is str: + axes = own_axes.split(':') + for i in len(axes): + if axes[i] and name == axes[i]: + logger.debug("Dataset referenced (v2) as NXdata AXIS #%d", i) + return None + ownpaxis = hdf_node.attrs.get('primary') + own_axis = hdf_node.attrs.get('axis') + if own_axis is int: + # also convention v1 + if ownpaxis is int and ownpaxis == 1: + logger.debug("Dataset referenced (v2) as NXdata AXIS #%d", own_axis - 1) + else: + logger.debug( + "Dataset referenced (v2) as NXdata (primary/alternative) AXIS #%d", own_axis - 1) + return None + + +def chk_nxdataaxis(hdf_node, name, logger): + """NEXUS Data Plotting Standard v3: new version from 2014""" + if not isinstance(hdf_node, h5py.Dataset): # check if it is a field in an NXdata node + return None + parent = hdf_node.parent + if not parent or (parent and not parent.attrs.get('NX_class') == "NXdata"): + return None + signal = parent.attrs.get('signal') # chk for Signal + if signal and name == signal: + logger.debug("Dataset referenced as NXdata SIGNAL") + return None + axes = parent.attrs.get('axes') # check for default Axes + if axes is str: + if name == axes: + logger.debug("Dataset referenced as NXdata AXIS") + return None + elif axes is not None: + for i, j in enumerate(axes): + if name == j: + indices = parent.attrs.get(j + '_indices') + if indices is int: + logger.debug(f"Dataset referenced as NXdata AXIS #{indices}") + else: + logger.debug(f"Dataset referenced as NXdata AXIS #{i}") + return None + indices = parent.attrs.get(name + '_indices') # check for alternative Axes + if indices is int: + logger.debug(f"Dataset referenced as NXdata alternative AXIS #{indices}") + return chk_nxdataaxis_v2(hdf_node, name, logger) # check for older conventions + + +# below there are some functions used in get_nxdl_doc function: +def write_doc_string(logger, doc, attr): + """Simple function that prints a line in the logger if doc exists""" + if doc: + logger.debug("@" + attr + ' [NX_CHAR]') + return logger, doc, attr + + +def try_find_units(logger, elem, nxdl_path, doc, attr): + """Try to find if units is defined inside the field in the NXDL element, + otherwise try to find if units is defined as a child of the NXDL element.""" + try: # try to find if units is defined inside the field in the NXDL element + unit = elem.attrib[attr] + if doc: + logger.debug(get_node_concept_path(elem) + "@" + attr + ' [' + unit + ']') + elem = None + nxdl_path.append(attr) + except KeyError: # otherwise try to find if units is defined as a child of the NXDL element + orig_elem = elem + elem = get_nxdl_child(elem, attr, nexus_type='attribute') + if elem is not None: + if doc: + logger.debug(get_node_concept_path(orig_elem) + + "@" + attr + ' - [' + get_nx_class(elem) + ']') + nxdl_path.append(elem) + else: # if no units category were defined in NXDL: + if doc: + logger.debug(get_node_concept_path(orig_elem) + + "@" + attr + " - REQUIRED, but undefined unit category") + nxdl_path.append(attr) + return logger, elem, nxdl_path, doc, attr + + +def check_attr_name_nxdl(param): + """Check for ATTRIBUTENAME_units in NXDL (normal). +If not defined, check for ATTRIBUTENAME to see if the ATTRIBUTE +is in the SCHEMA, but no units category were defined. """ + (logger, elem, nxdl_path, doc, attr, req_str) = param + orig_elem = elem + elem2 = get_nxdl_child(elem, attr, nexus_type='attribute') + if elem2 is not None: # check for ATTRIBUTENAME_units in NXDL (normal) + elem = elem2 + if doc: + logger.debug(get_node_concept_path(orig_elem) + + "@" + attr + ' - [' + get_nx_class(elem) + ']') + nxdl_path.append(elem) + else: + # if not defined, check for ATTRIBUTENAME to see if the ATTRIBUTE + # is in the SCHEMA, but no units category were defined + elem2 = get_nxdl_child(elem, attr[:-6], nexus_type='attribute') + if elem2 is not None: + req_str = '<>' + if doc: + logger.debug(get_node_concept_path(orig_elem) + + "@" + attr + " - RECOMMENDED, but undefined unit category") + nxdl_path.append(attr) + else: # otherwise: NOT IN SCHEMA + elem = elem2 + if doc: + logger.debug(get_node_concept_path(orig_elem) + "@" + attr + " - IS NOT IN SCHEMA") + return logger, elem, nxdl_path, doc, attr, req_str + + +def try_find_default(logger, orig_elem, elem, nxdl_path, doc, attr): # pylint: disable=too-many-arguments + """Try to find if default is defined as a child of the NXDL element """ + if elem is not None: + if doc: + logger.debug(get_node_concept_path(orig_elem) + + "@" + attr + ' - [' + get_nx_class(elem) + ']') + nxdl_path.append(elem) + else: # if no default category were defined in NXDL: + if doc: + logger.debug(get_node_concept_path(orig_elem) + "@" + attr + " - [NX_CHAR]") + nxdl_path.append(attr) + return logger, elem, nxdl_path, doc, attr + + +def other_attrs(logger, orig_elem, elem, nxdl_path, doc, attr): # pylint: disable=too-many-arguments + """Handle remaining attributes """ + if elem is not None: + if doc: + logger.debug(get_node_concept_path(orig_elem) + + "@" + attr + ' - [' + get_nx_class(elem) + ']') + nxdl_path.append(elem) + else: + if doc: + logger.debug(get_node_concept_path(orig_elem) + "@" + attr + " - IS NOT IN SCHEMA") + return logger, elem, nxdl_path, doc, attr + + +def check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node): + """Check for several attributes. - deprecation - enums - nxdataaxis """ + logger, elem, path = variables + dep_str = elem.attrib.get('deprecated') # check for deprecation + if dep_str: + if doc: + logger.debug("DEPRECATED - " + dep_str) + for base_elem in elist if not attr else [elem]: # check for enums + sdoc = get_nxdl_child(base_elem, 'enumeration', go_base=False) + if sdoc is not None: + if doc: + logger.debug("enumeration (" + get_node_concept_path(base_elem) + "):") + for item in sdoc: + if get_local_name_from_xml(item) == 'item': + if doc: + logger.debug("-> " + item.attrib['value']) + chk_nxdataaxis(hdf_node, path.split('/')[-1], logger) # look for NXdata reference (axes/signal) + for base_elem in elist if not attr else [elem]: # check for doc + sdoc = get_nxdl_child(base_elem, 'doc', go_base=False) + if doc: + logger.debug("documentation (" + get_node_concept_path(base_elem) + "):") + logger.debug(sdoc.text if sdoc is not None else "") + return logger, elem, path, doc, elist, attr, hdf_node + + +def get_node_concept_path(elem): + """get the short version of nxdlbase:nxdlpath""" + return str(elem.get('nxdlbase').split('/')[-1] + ":" + elem.get('nxdlpath')) + + +def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals + elem, elist, attr, hdf_node, logger, doc, nxdl_path, req_str, path, hdf_info): + """Get nxdl documentation for an attribute""" + new_elem = [] + old_elem = elem + for elem_index, act_elem1 in enumerate(elist): + act_elem = act_elem1 + # NX_class is a compulsory attribute for groups in a nexus file + # which should match the type of the corresponding NXDL element + if attr == 'NX_class' and not isinstance(hdf_node, h5py.Dataset) and elem_index == 0: + elem = None + logger, doc, attr = write_doc_string(logger, doc, attr) + new_elem = elem + break + # units category is a compulsory attribute for any fields + if attr == 'units' and isinstance(hdf_node, h5py.Dataset): + req_str = "<>" + logger, act_elem, nxdl_path, doc, attr = try_find_units(logger, + act_elem, + nxdl_path, + doc, + attr) + # units for attributes can be given as ATTRIBUTENAME_units + elif attr.endswith('_units'): + logger, act_elem, nxdl_path, doc, attr, req_str = check_attr_name_nxdl((logger, + act_elem, + nxdl_path, + doc, + attr, + req_str)) + # default is allowed for groups + elif attr == 'default' and not isinstance(hdf_node, h5py.Dataset): + req_str = "<>" + # try to find if default is defined as a child of the NXDL element + act_elem = get_nxdl_child(act_elem, attr, nexus_type='attribute', go_base=False) + logger, act_elem, nxdl_path, doc, attr = try_find_default(logger, + act_elem1, + act_elem, + nxdl_path, + doc, + attr) + else: # other attributes + act_elem = get_nxdl_child(act_elem, attr, nexus_type='attribute', go_base=False) + if act_elem is not None: + logger, act_elem, nxdl_path, doc, attr = \ + other_attrs(logger, act_elem1, act_elem, nxdl_path, doc, attr) + if act_elem is not None: + new_elem.append(act_elem) + if req_str is None: + req_str = get_required_string(act_elem) # check for being required + if doc: + logger.debug(req_str) + variables = [logger, act_elem, path] + logger, elem, path, doc, elist, attr, hdf_node = check_deprecation_enum_axis(variables, + doc, + elist, + attr, + hdf_node) + elem = old_elem + if req_str is None and doc: + if attr != 'NX_class': + logger.debug("@" + attr + " - IS NOT IN SCHEMA") + logger.debug("") + return (req_str, get_nxdl_entry(hdf_info), nxdl_path) + + +def get_nxdl_doc(hdf_info, logger, doc, attr=False): + """Get nxdl documentation for an HDF5 node (or its attribute)""" + hdf_node = hdf_info['hdf_node'] + # new way: retrieve multiple inherited base classes + (class_path, nxdl_path, elist) = \ + get_inherited_nodes(None, nx_name=get_nxdl_entry(hdf_info), hdf_node=hdf_node, + hdf_path=hdf_info['hdf_path'] if 'hdf_path' in hdf_info else None, + hdf_root=hdf_info['hdf_root'] if 'hdf_root' in hdf_info else None) + elem = elist[0] if class_path and elist else None + if doc: + logger.debug("classpath: " + str(class_path)) + logger.debug("NOT IN SCHEMA" if elem is None else + "classes:\n" + "\n".join + (get_node_concept_path(e) for e in elist)) + # old solution with a single elem instead of using elist + path = get_nx_class_path(hdf_info) + req_str = None + if elem is None: + if doc: + logger.debug("") + return ('None', None, None) + if attr: + return get_nxdl_attr_doc(elem, elist, attr, hdf_node, logger, doc, nxdl_path, + req_str, path, hdf_info) + req_str = get_required_string(elem) # check for being required + if doc: + logger.debug(req_str) + variables = [logger, elem, path] + logger, elem, path, doc, elist, attr, hdf_node = check_deprecation_enum_axis(variables, + doc, + elist, + attr, + hdf_node) + return (req_str, get_nxdl_entry(hdf_info), nxdl_path) + + +def get_doc(node, ntype, nxhtml, nxpath): + """Get documentation""" + # URL for html documentation + anchor = '' + for n_item in nxpath: + anchor += n_item.lower() + "-" + anchor = ('https://manual.nexusformat.org/classes/', + nxhtml + "#" + anchor.replace('_', '-') + ntype) + if not ntype: + anchor = anchor[:-1] + doc = "" # RST documentation from the field 'doc' + doc_field = node.find("doc") + if doc_field is not None: + doc = doc_field.text + (index, enums) = get_enums(node) # enums + if index: + enum_str = "\n " + ("Possible values:" + if len(enums.split(',')) > 1 + else "Obligatory value:") + "\n " + enums + "\n" + else: + enum_str = "" + return anchor, doc + enum_str + + +def print_doc(node, ntype, level, nxhtml, nxpath): + """Print documentation""" + anchor, doc = get_doc(node, ntype, nxhtml, nxpath) + print(" " * (level + 1) + anchor) + preferred_width = 80 + level * 2 + wrapper = textwrap.TextWrapper(initial_indent=' ' * (level + 1), width=preferred_width, + subsequent_indent=' ' * (level + 1), expand_tabs=False, + tabsize=0) + if doc is not None: + for par in doc.split('\n'): + print(wrapper.fill(par)) + + +def get_namespace(element): + """Extracts the namespace for elements in the NXDL""" + return element.tag[element.tag.index("{"):element.tag.rindex("}") + 1] + + +def get_enums(node): + """Makes list of enumerations, if node contains any. + Returns comma separated STRING of enumeration values, if there are enum tag, + otherwise empty string.""" + # collect item values from enumeration tag, if any + namespace = get_namespace(node) + enums = [] + for enumeration in node.findall(f"{namespace}enumeration"): + for item in enumeration.findall(f"{namespace}item"): + enums.append(item.attrib["value"]) + enums = ','.join(enums) + if enums != "": + return (True, '[' + enums + ']') + return (False, "") # if there is no enumeration tag, returns empty string + + +def add_base_classes(elist, nx_name=None, elem: ET.Element = None): + """Add the base classes corresponding to the last eleme in elist to the list. Note that if +elist is empty, a nxdl file with the name of nx_name or a rather room elem is used if provided""" + if elist and nx_name is None: + nx_name = get_nx_class(elist[-1]) + # to support recursive defintions, like NXsample in NXsample, the following test is removed + # if elist and nx_name and f"{nx_name}.nxdl.xml" in (e.get('nxdlbase') for e in elist): + # return + if elem is None: + if not nx_name: + return + nxdl_file_path = find_definition_file(nx_name) + if nxdl_file_path is None: + nxdl_file_path = f"{nx_name}.nxdl.xml" + elem = ET.parse(nxdl_file_path).getroot() + elem.set('nxdlbase', nxdl_file_path) + else: + elem.set('nxdlbase', '') + if 'category' in elem.attrib: + elem.set('nxdlbase_class', elem.attrib['category']) + elem.set('nxdlpath', '') + elist.append(elem) + # add inherited base class + if 'extends' in elem.attrib and elem.attrib['extends'] != 'NXobject': + add_base_classes(elist, elem.attrib['extends']) + else: + add_base_classes(elist) + + +def set_nxdlpath(child, nxdl_elem): + """ + Setting up child nxdlbase, nxdlpath and nxdlbase_class from nxdl_element. + """ + if nxdl_elem.get('nxdlbase'): + child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) + child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + return child + + +def get_direct_child(nxdl_elem, html_name): + """ returns the child of nxdl_elem which has a name + corresponding to the the html documentation name html_name""" + for child in nxdl_elem: + if get_local_name_from_xml(child) in ('group', 'field', 'attribute') and \ + html_name == get_node_name(child): + decorated_child = set_nxdlpath(child, nxdl_elem) + return decorated_child + return None + + +def get_field_child(nxdl_elem, html_name): + """ returns the child of nxdl_elem which has a name + corresponding to the html documentation name html_name""" + data_child = None + for child in nxdl_elem: + if get_local_name_from_xml(child) != 'field': + continue + if get_node_name(child) == html_name: + data_child = set_nxdlpath(child, nxdl_elem) + break + return data_child + + +def get_best_nxdata_child(nxdl_elem, hdf_node, hdf_name): + """ returns the child of an NXdata nxdl_elem which has a name + corresponding to the hdf_name""" + nxdata = hdf_node.parent + signals = [] + if 'signal' in nxdata.attrs.keys(): + signals.append(nxdata.attrs.get("signal")) + if "auxiliary_signals" in nxdata.attrs.keys(): + for aux_signal in nxdata.attrs.get("auxiliary_signals"): + signals.append(aux_signal) + data_child = get_field_child(nxdl_elem, 'DATA') + data_error_child = get_field_child(nxdl_elem, 'FIELDNAME_errors') + for signal in signals: + if signal == hdf_name: + return (data_child, 100) + if hdf_name.endswith('_errors') and signal == hdf_name[:-7]: + return (data_error_child, 100) + axes = [] + if "axes" in nxdata.attrs.keys(): + for axis in nxdata.attrs.get("axes"): + axes.append(axis) + axis_child = get_field_child(nxdl_elem, 'AXISNAME') + for axis in axes: + if axis == hdf_name: + return (axis_child, 100) + return (None, 0) + + +def get_best_child(nxdl_elem, hdf_node, hdf_name, hdf_class_name, nexus_type): + """ returns the child of nxdl_elem which has a name + corresponding to the the html documentation name html_name""" + bestfit = -1 + bestchild = None + if 'name' in nxdl_elem.attrib.keys() and nxdl_elem.attrib['name'] == 'NXdata' and \ + hdf_node is not None and hdf_node.parent is not None and \ + hdf_node.parent.attrs.get('NX_class') == 'NXdata': + (fnd_child, fit) = get_best_nxdata_child(nxdl_elem, hdf_node, hdf_name) + if fnd_child is not None: + return (fnd_child, fit) + for child in nxdl_elem: + fit = -2 + if get_local_name_from_xml(child) == nexus_type and \ + (nexus_type != 'group' or get_nx_class(child) == hdf_class_name): + name_any = "nameType" in nxdl_elem.attrib.keys() and \ + nxdl_elem.attrib["nameType"] == "any" + fit = get_nx_namefit(hdf_name, get_node_name(child), name_any) + if fit > bestfit: + bestfit = fit + bestchild = set_nxdlpath(child, nxdl_elem) + return (bestchild, bestfit) + + +def walk_elist(elist, html_name): + """Handle elist from low priority inheritance classes to higher""" + for ind in range(len(elist) - 1, -1, -1): + child = get_direct_child(elist[ind], html_name) + if child is None: + # check for names fitting to a superclas definition + main_child = None + for potential_direct_parent in elist: + main_child = get_direct_child(potential_direct_parent, html_name) + if main_child is not None: + (fitting_child, _) = get_best_child(elist[ind], None, html_name, + get_nx_class(main_child), + get_local_name_from_xml(main_child)) + if fitting_child is not None: + child = fitting_child + break + elist[ind] = child + if elist[ind] is None: + del elist[ind] + continue + # override: remove low priority inheritance classes if class_type is overriden + if len(elist) > ind + 1 and get_nx_class(elist[ind]) != get_nx_class(elist[ind + 1]): + del elist[ind + 1:] + # add new base class(es) if new element brings such (and not a primitive type) + if len(elist) == ind + 1 and get_nx_class(elist[ind])[0:3] != 'NX_': + add_base_classes(elist) + return elist, html_name + + +def helper_get_inherited_nodes(hdf_info2, elist, pind, attr): + """find the best fitting name in all children""" + hdf_path, hdf_node, hdf_class_path = hdf_info2 + hdf_name = hdf_path[pind] + hdf_class_name = hdf_class_path[pind] + if pind < len(hdf_path) - (2 if attr else 1): + act_nexus_type = 'group' + elif pind == len(hdf_path) - 1 and attr: + act_nexus_type = 'attribute' + else: + act_nexus_type = 'field' if isinstance(hdf_node, h5py.Dataset) else 'group' + # find the best fitting name in all children + bestfit = -1 + html_name = None + for ind in range(len(elist) - 1, -1, -1): + newelem, fit = get_best_child(elist[ind], + hdf_node, + hdf_name, + hdf_class_name, + act_nexus_type) + if fit >= bestfit and newelem is not None: + html_name = get_node_name(newelem) + return hdf_path, hdf_node, hdf_class_path, elist, pind, attr, html_name + + +def get_hdf_path(hdf_info): + """Get the hdf_path from an hdf_info""" + if 'hdf_path' in hdf_info: + return hdf_info['hdf_path'].split('/')[1:] + return hdf_info['hdf_node'].name.split('/')[1:] + + +@lru_cache(maxsize=None) +def get_inherited_nodes(nxdl_path: str = None, # pylint: disable=too-many-arguments,too-many-locals + nx_name: str = None, elem: ET.Element = None, + hdf_node=None, hdf_path=None, hdf_root=None, attr=False): + """Returns a list of ET.Element for the given path.""" + # let us start with the given definition file + elist = [] # type: ignore[var-annotated] + add_base_classes(elist, nx_name, elem) + nxdl_elem_path = [elist[0]] + + class_path = [] # type: ignore[var-annotated] + if hdf_node is not None: + hdf_info = {'hdf_node': hdf_node} + if hdf_path: + hdf_info['hdf_path'] = hdf_path + if hdf_root: + hdf_root['hdf_root'] = hdf_root + hdf_node = hdf_info['hdf_node'] + hdf_path = get_hdf_path(hdf_info) + hdf_class_path = get_nx_class_path(hdf_info).split('/')[1:] + if attr: + hdf_path.append(attr) + hdf_class_path.append(attr) + path = hdf_path + else: + html_path = nxdl_path.split('/')[1:] + path = html_path + for pind in range(len(path)): + if hdf_node is not None: + hdf_info2 = [hdf_path, hdf_node, hdf_class_path] + [hdf_path, hdf_node, hdf_class_path, elist, + pind, attr, html_name] = helper_get_inherited_nodes(hdf_info2, elist, + pind, attr) + if html_name is None: # return if NOT IN SCHEMA + return (class_path, nxdl_elem_path, None) + else: + html_name = html_path[pind] + elist, html_name = walk_elist(elist, html_name) + if elist: + class_path.append(get_nx_class(elist[0])) + nxdl_elem_path.append(elist[0]) + return (class_path, nxdl_elem_path, elist) + + +def get_node_at_nxdl_path(nxdl_path: str = None, + nx_name: str = None, elem: ET.Element = None, + exc: bool = True): + """Returns an ET.Element for the given path. + This function either takes the name for the NeXus Application Definition + we are looking for or the root elem from a previously loaded NXDL file + and finds the corresponding XML element with the needed attributes.""" + try: + (class_path, nxdlpath, elist) = get_inherited_nodes(nxdl_path, nx_name, elem) + except ValueError as value_error: + if exc: + raise NxdlAttributeError(f"Attributes were not found for {nxdl_path}. " + "Please check this entry in the template dictionary.") \ + from value_error + return None + if class_path and nxdlpath and elist: + elem = elist[0] + else: + elem = None + if exc: + raise NxdlAttributeError(f"Attributes were not found for {nxdl_path}. " + "Please check this entry in the template dictionary.") + return elem + + +def process_node(hdf_node, hdf_path, parser, logger, doc=True): + """Processes an hdf5 node. +- it logs the node found and also checks for its attributes +- retrieves the corresponding nxdl documentation +TODO: +- follow variants +- NOMAD parser: store in NOMAD """ + hdf_info = {'hdf_path': hdf_path, 'hdf_node': hdf_node} + if isinstance(hdf_node, h5py.Dataset): + logger.debug(f'===== FIELD (/{hdf_path}): {hdf_node}') + val = str(hdf_node[()]).split('\n') if len(hdf_node.shape) <= 1 else str( + hdf_node[0]).split('\n') + logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}') + else: + logger.debug( + f"===== GROUP (/{hdf_path} " + f"[{get_nxdl_entry(hdf_info)}" + f"::{get_nx_class_path(hdf_info)}]): {hdf_node}" + ) + (req_str, nxdef, nxdl_path) = get_nxdl_doc(hdf_info, logger, doc) + if parser is not None and isinstance(hdf_node, h5py.Dataset): + parser({"hdf_info": hdf_info, + "nxdef": nxdef, + "nxdl_path": nxdl_path, + "val": val, + "logger": logger}) + for key, value in hdf_node.attrs.items(): + logger.debug(f'===== ATTRS (/{hdf_path}@{key})') + val = str(value).split('\n') + logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}') + (req_str, nxdef, nxdl_path) = \ + get_nxdl_doc(hdf_info, logger, doc, attr=key) + if ( + parser is not None + and req_str is not None + and 'NOT IN SCHEMA' not in req_str + and 'None' not in req_str + ): + parser({"hdf_info": hdf_info, + "nxdef": nxdef, + "nxdl_path": nxdl_path, + "val": val, + "logger": logger}, attr=key) + + +def logger_auxiliary_signal(logger, nxdata): + """Handle the presence of auxiliary signal""" + aux = nxdata.attrs.get('auxiliary_signals') + if aux is not None: + if isinstance(aux, str): + aux = [aux] + for asig in aux: + logger.debug(f'Further auxiliary signal has been identified: {asig}') + return logger + + +def print_default_plotable_header(logger): + """Print a three-lines header""" + logger.debug('========================') + logger.debug('=== Default Plotable ===') + logger.debug('========================') + + +def get_default_plotable(root, logger): + """Get default plotable""" + print_default_plotable_header(logger) + # v3 from 2014 + # nxentry + nxentry = None + default_nxentry_group_name = root.attrs.get("default") + if default_nxentry_group_name: + try: + nxentry = root[default_nxentry_group_name] + except KeyError: + nxentry = None + if not nxentry: + nxentry = entry_helper(root) + if not nxentry: + logger.debug('No NXentry has been found') + return + logger.debug('') + logger.debug('NXentry has been identified: ' + nxentry.name) + # nxdata + nxdata = None + nxgroup = nxentry + default_group_name = nxgroup.attrs.get("default") + while default_group_name: + try: + nxgroup = nxgroup[default_group_name] + default_group_name = nxgroup.attrs.get("default") + except KeyError: + pass + if nxgroup == nxentry: + nxdata = nxdata_helper(nxentry) + else: + nxdata = nxgroup + if not nxdata: + logger.debug('No NXdata group has been found') + return + logger.debug('') + logger.debug('NXdata group has been identified: ' + nxdata.name) + process_node(nxdata, nxdata.name, None, logger, False) + # signal + signal = None + signal_dataset_name = nxdata.attrs.get("signal") + try: + signal = nxdata[signal_dataset_name] + except (TypeError, KeyError): + signal = None + if not signal: + signal = signal_helper(nxdata) + if not signal: + logger.debug('No Signal has been found') + return + logger.debug('') + logger.debug('Signal has been identified: ' + signal.name) + process_node(signal, signal.name, None, logger, False) + logger = logger_auxiliary_signal(logger, nxdata) # check auxiliary_signals + dim = len(signal.shape) + axes = [] # axes + axis_helper(dim, nxdata, signal, axes, logger) + + +def entry_helper(root): + """Check entry related data""" + nxentries = [] + for key in root.keys(): + if isinstance(root[key], h5py.Group) and root[key].attrs.get('NX_class') and \ + root[key].attrs['NX_class'] == "NXentry": + nxentries.append(root[key]) + if len(nxentries) >= 1: + return nxentries[0] + return None + + +def nxdata_helper(nxentry): + """Check if nxentry hdf5 object has a NX_class and, if it contains NXdata, +return its value""" + lnxdata = [] + for key in nxentry.keys(): + if isinstance(nxentry[key], h5py.Group) and nxentry[key].attrs.get('NX_class') and \ + nxentry[key].attrs['NX_class'] == "NXdata": + lnxdata.append(nxentry[key]) + if len(lnxdata) >= 1: + return lnxdata[0] + return None + + +def signal_helper(nxdata): + """Check signal related data""" + signals = [] + for key in nxdata.keys(): + if isinstance(nxdata[key], h5py.Dataset): + signals.append(nxdata[key]) + if len(signals) == 1: # v3: as there was no selection given, only 1 data field shall exists + return signals[0] + if len(signals) > 1: # v2: select the one with an attribute signal="1" attribute + for sig in signals: + if sig.attrs.get("signal") and sig.attrs.get("signal") is str and \ + sig.attrs.get("signal") == "1": + return sig + return None + + +def find_attrib_axis_actual_dim_num(nxdata, a_item, ax_list): + """Finds axis that have defined dimensions""" + # find those with attribute axis= actual dimension number + lax = [] + for key in nxdata.keys(): + if isinstance(nxdata[key], h5py.Dataset): + try: + if nxdata[key].attrs['axis'] == a_item + 1: + lax.append(nxdata[key]) + except KeyError: + pass + if len(lax) == 1: + ax_list.append(lax[0]) + # if there are more alternatives, prioritise the one with an attribute primary="1" + elif len(lax) > 1: + for sax in lax: + if sax.attrs.get('primary') and sax.attrs.get('primary') == 1: + ax_list.insert(0, sax) + else: + ax_list.append(sax) + + +def get_single_or_multiple_axes(nxdata, ax_datasets, a_item, ax_list): + """Gets either single or multiple axes from the NXDL""" + try: + if isinstance(ax_datasets, str): # single axis is defined + # explicite definition of dimension number + ind = nxdata.attrs.get(ax_datasets + '_indices') + if ind and ind is int: + if ind == a_item: + ax_list.append(nxdata[ax_datasets]) + elif a_item == 0: # positional determination of the dimension number + ax_list.append(nxdata[ax_datasets]) + else: # multiple axes are listed + # explicite definition of dimension number + for aax in ax_datasets: + ind = nxdata.attrs.get(aax + '_indices') + if ind and isinstance(ind, int): + if ind == a_item: + ax_list.append(nxdata[aax]) + if not ax_list: # positional determination of the dimension number + ax_list.append(nxdata[ax_datasets[a_item]]) + except KeyError: + pass + return ax_list + + +def axis_helper(dim, nxdata, signal, axes, logger): + """Check axis related data""" + for a_item in range(dim): + ax_list = [] + ax_datasets = nxdata.attrs.get("axes") # primary axes listed in attribute axes + ax_list = get_single_or_multiple_axes(nxdata, ax_datasets, a_item, ax_list) + for attr in nxdata.attrs.keys(): # check for corresponding AXISNAME_indices + if attr.endswith('_indices') and nxdata.attrs[attr] == a_item and \ + nxdata[attr.split('_indices')[0]] not in ax_list: + ax_list.append(nxdata[attr.split('_indices')[0]]) + # v2 # check for ':' separated axes defined in Signal + if not ax_list: + try: + ax_datasets = signal.attrs.get("axes").split(':') + ax_list.append(nxdata[ax_datasets[a_item]]) + except (KeyError, AttributeError): + pass + if not ax_list: # check for axis/primary specifications + find_attrib_axis_actual_dim_num(nxdata, a_item, ax_list) + axes.append(ax_list) + logger.debug('') + logger.debug( + f'For Axis #{a_item}, {len(ax_list)} axes have been identified: {str(ax_list)}' + ) + + +def get_all_is_a_rel_from_hdf_node(hdf_node, hdf_path): + """Return list of nxdl concept paths for a nxdl element which corresponds to + hdf node. + """ + hdf_info = {'hdf_path': hdf_path, 'hdf_node': hdf_node} + (_, _, elist) = \ + get_inherited_nodes(None, nx_name=get_nxdl_entry(hdf_info), hdf_node=hdf_node, + hdf_path=hdf_info['hdf_path'] if 'hdf_path' in hdf_info else None, + hdf_root=hdf_info['hdf_root'] if 'hdf_root' in hdf_info else None) + return elist + + +def hdf_node_to_self_concept_path(hdf_info, logger): + """ Get concept or nxdl path from given hdf_node. + """ + # The bellow logger is for deactivatine unnecessary debug message above + if logger is None: + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + (_, _, nxdl_path) = get_nxdl_doc(hdf_info, logger, None) + con_path = '' + if nxdl_path: + for nd_ in nxdl_path: + con_path = con_path + '/' + get_node_name(nd_) + return con_path + + +class HandleNexus: + """documentation""" + def __init__(self, logger, nexus_file, + d_inq_nd=None, c_inq_nd=None): + self.logger = logger + local_dir = os.path.abspath(os.path.dirname(__file__)) + + self.input_file_name = nexus_file if nexus_file is not None else \ + os.path.join(local_dir, '../../tests/data/nexus/201805_WSe2_arpes.nxs') + self.parser = None + self.in_file = None + self.d_inq_nd = d_inq_nd + self.c_inq_nd = c_inq_nd + # Aggregating hdf path corresponds to concept query node + self.hdf_path_list_for_c_inq_nd = [] + + def visit_node(self, hdf_name, hdf_node): + """Function called by h5py that iterates on each node of hdf5file. + It allows h5py visititems function to visit nodes.""" + if self.d_inq_nd is None and self.c_inq_nd is None: + process_node(hdf_node, '/' + hdf_name, self.parser, self.logger) + elif (self.d_inq_nd is not None + and hdf_name in (self.d_inq_nd, self.d_inq_nd[1:])): + process_node(hdf_node, '/' + hdf_name, self.parser, self.logger) + elif self.c_inq_nd is not None: + attributed_concept = self.c_inq_nd.split('@') + attr = attributed_concept[1] if len(attributed_concept) > 1 else None + elist = get_all_is_a_rel_from_hdf_node(hdf_node, '/' + hdf_name) + if elist is None: + return + fnd_superclass = False + fnd_superclass_attr = False + for elem in reversed(elist): + tmp_path = elem.get('nxdlbase').split('.nxdl')[0] + con_path = '/NX' + tmp_path.split('NX')[-1] + elem.get('nxdlpath') + if fnd_superclass or con_path == attributed_concept[0]: + fnd_superclass = True + if attr is None: + self.hdf_path_list_for_c_inq_nd.append(hdf_name) + break + for attribute in hdf_node.attrs.keys(): + attr_concept = get_nxdl_child(elem, attribute, nexus_type='attribute', + go_base=False) + if attr_concept is not None and \ + attr_concept.get('nxdlpath').endswith(attr): + fnd_superclass_attr = True + con_path = '/NX' + tmp_path.split('NX')[-1] \ + + attr_concept.get('nxdlpath') + self.hdf_path_list_for_c_inq_nd.append(hdf_name + "@" + attribute) + break + if fnd_superclass_attr: + break + + def not_yet_visited(self, root, name): + """checking if a new node has already been visited in its path""" + path = name.split('/') + for i in range(1, len(path)): + act_path = '/'.join(path[:i]) + # print(act_path+' - '+name) + if root['/' + act_path] == root['/' + name]: + return False + return True + + def full_visit(self, root, hdf_node, name, func): + """visiting recursivly all children, but avoiding endless cycles""" + # print(name) + if len(name) > 0: + func(name, hdf_node) + if isinstance(hdf_node, h5py.Group): + for ch_name, child in hdf_node.items(): + full_name = ch_name if len(name) == 0 else name + '/' + ch_name + if self.not_yet_visited(root, full_name): + self.full_visit(root, child, full_name, func) + + def process_nexus_master_file(self, parser): + """Process a nexus master file by processing all its nodes and their attributes""" + self.parser = parser + self.in_file = h5py.File( + self.input_file_name[0] + if isinstance(self.input_file_name, list) + else self.input_file_name, 'r' + ) + self.full_visit(self.in_file, self.in_file, '', self.visit_node) + if self.d_inq_nd is None and self.c_inq_nd is None: + get_default_plotable(self.in_file, self.logger) + # To log the provided concept and concepts founded + if self.c_inq_nd is not None: + for hdf_path in self.hdf_path_list_for_c_inq_nd: + self.logger.info(hdf_path) + self.in_file.close() + + +@click.command() +@click.option( + '-f', + '--nexus-file', + required=False, + default=None, + help=('NeXus file with extension .nxs to learn NeXus different concept' + ' documentation and concept.') +) +@click.option( + '-d', + '--documentation', + required=False, + default=None, + help=("Definition path in nexus output (.nxs) file. Returns debug" + "log relavent with that definition path. Example: /entry/data/delays") +) +@click.option( + '-c', + '--concept', + required=False, + default=None, + help=("Concept path from application definition file (.nxdl,xml). Finds out" + "all the available concept definition (IS-A realation) for rendered" + "concept path. Example: /NXarpes/ENTRY/INSTRUMENT/analyser") +) +def main(nexus_file, documentation, concept): + """The main function to call when used as a script.""" + logging_format = "%(levelname)s: %(message)s" + stdout_handler = logging.StreamHandler(sys.stdout) + stdout_handler.setLevel(logging.DEBUG) + logging.basicConfig(level=logging.INFO, format=logging_format, handlers=[stdout_handler]) + logger = logging.getLogger(__name__) + logger.addHandler(stdout_handler) + logger.setLevel(logging.DEBUG) + logger.propagate = False + if documentation and concept: + raise ValueError("Only one option either documentation (-d) or is_a relation " + "with a concept (-c) can be requested.") + nexus_helper = HandleNexus(logger, nexus_file, + d_inq_nd=documentation, + c_inq_nd=concept) + nexus_helper.process_nexus_master_file(None) + + +if __name__ == '__main__': + main() # pylint: disable=no-value-for-parameter diff --git a/requirements.txt b/requirements.txt index 8b22819ff..6d024bda3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,6 @@ pyyaml # Documentation building sphinx>=5 sphinx-tabs -sphinx-comments # Testing pytest @@ -14,5 +13,3 @@ pytest black>=22.3 flake8>=4 isort>=5.10 - -pynxtools>=0.0.3 \ No newline at end of file From 185f6497ab02c7fc1a819568fa0cea00de1334c6 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 00:45:17 +0200 Subject: [PATCH 11/32] linting --- dev_tools/utils/nexus.py | 1069 +++++++++++++++++++++++--------------- 1 file changed, 645 insertions(+), 424 deletions(-) diff --git a/dev_tools/utils/nexus.py b/dev_tools/utils/nexus.py index ac1d8b36c..7b09e30f3 100644 --- a/dev_tools/utils/nexus.py +++ b/dev_tools/utils/nexus.py @@ -19,9 +19,13 @@ class NxdlAttributeError(Exception): def get_app_defs_names(): """Returns all the AppDef names without their extension: .nxdl.xml""" - app_def_path_glob = f"{get_nexus_definitions_path()}{os.sep}applications{os.sep}*.nxdl*" - contrib_def_path_glob = (f"{get_nexus_definitions_path()}{os.sep}" - f"contributed_definitions{os.sep}*.nxdl*") + app_def_path_glob = ( + f"{get_nexus_definitions_path()}{os.sep}applications{os.sep}*.nxdl*" + ) + contrib_def_path_glob = ( + f"{get_nexus_definitions_path()}{os.sep}" + f"contributed_definitions{os.sep}*.nxdl*" + ) files = sorted(glob(app_def_path_glob)) + sorted(glob(contrib_def_path_glob)) return [os.path.basename(file).split(".")[0] for file in files] + ["NXroot"] @@ -35,9 +39,9 @@ def get_xml_root(file_path): def get_nexus_definitions_path(): """Check NEXUS_DEF_PATH variable. -If it is empty, this function is filling it""" + If it is empty, this function is filling it""" try: # either given by sys env - return os.environ['NEXUS_DEF_PATH'] + return os.environ["NEXUS_DEF_PATH"] except KeyError: # or it should be available locally under the dir 'definitions' local_dir = os.path.abspath(os.path.dirname(__file__)) return os.path.join(local_dir, f"..{os.sep}definitions") @@ -46,65 +50,82 @@ def get_nexus_definitions_path(): def get_hdf_root(hdf_node): """Get the root HDF5 node""" node = hdf_node - while node.name != '/': + while node.name != "/": node = node.parent return node def get_hdf_parent(hdf_info): """Get the parent of an hdf_node in an hdf_info""" - if 'hdf_path' not in hdf_info: - return hdf_info['hdf_node'].parent - node = get_hdf_root(hdf_info['hdf_node']) if 'hdf_root' not in hdf_info \ - else hdf_info['hdf_root'] - for child_name in hdf_info['hdf_path'].split('/'): + if "hdf_path" not in hdf_info: + return hdf_info["hdf_node"].parent + node = ( + get_hdf_root(hdf_info["hdf_node"]) + if "hdf_root" not in hdf_info + else hdf_info["hdf_root"] + ) + for child_name in hdf_info["hdf_path"].split("/"): node = node[child_name] return node def get_parent_path(hdf_name): """Get parent path""" - return '/'.join(hdf_name.split('/')[:-1]) + return "/".join(hdf_name.split("/")[:-1]) def get_hdf_info_parent(hdf_info): """Get the hdf_info for the parent of an hdf_node in an hdf_info""" - if 'hdf_path' not in hdf_info: - return {'hdf_node': hdf_info['hdf_node'].parent} - node = get_hdf_root(hdf_info['hdf_node']) if 'hdf_root' not in hdf_info \ - else hdf_info['hdf_root'] - for child_name in hdf_info['hdf_path'].split('/')[1:-1]: + if "hdf_path" not in hdf_info: + return {"hdf_node": hdf_info["hdf_node"].parent} + node = ( + get_hdf_root(hdf_info["hdf_node"]) + if "hdf_root" not in hdf_info + else hdf_info["hdf_root"] + ) + for child_name in hdf_info["hdf_path"].split("/")[1:-1]: node = node[child_name] - return {'hdf_node': node, 'hdf_path': get_parent_path(hdf_info['hdf_path'])} + return {"hdf_node": node, "hdf_path": get_parent_path(hdf_info["hdf_path"])} def get_nx_class_path(hdf_info): """Get the full path of an HDF5 node using nexus classes -in case of a field, end with the field name""" - hdf_node = hdf_info['hdf_node'] - if hdf_node.name == '/': - return '' + in case of a field, end with the field name""" + hdf_node = hdf_info["hdf_node"] + if hdf_node.name == "/": + return "" if isinstance(hdf_node, h5py.Group): - return get_nx_class_path(get_hdf_info_parent(hdf_info)) + '/' + \ - (hdf_node.attrs['NX_class'] if 'NX_class' in hdf_node.attrs.keys() else - hdf_node.name.split('/')[-1]) + return ( + get_nx_class_path(get_hdf_info_parent(hdf_info)) + + "/" + + ( + hdf_node.attrs["NX_class"] + if "NX_class" in hdf_node.attrs.keys() + else hdf_node.name.split("/")[-1] + ) + ) if isinstance(hdf_node, h5py.Dataset): - return get_nx_class_path( - get_hdf_info_parent(hdf_info)) + '/' + hdf_node.name.split('/')[-1] - return '' + return ( + get_nx_class_path(get_hdf_info_parent(hdf_info)) + + "/" + + hdf_node.name.split("/")[-1] + ) + return "" def get_nxdl_entry(hdf_info): """Get the nxdl application definition for an HDF5 node""" entry = hdf_info - while isinstance(entry['hdf_node'], h5py.Dataset) or \ - 'NX_class' not in entry['hdf_node'].attrs.keys() or \ - entry['hdf_node'].attrs['NX_class'] != 'NXentry': + while ( + isinstance(entry["hdf_node"], h5py.Dataset) + or "NX_class" not in entry["hdf_node"].attrs.keys() + or entry["hdf_node"].attrs["NX_class"] != "NXentry" + ): entry = get_hdf_info_parent(entry) - if entry['hdf_node'].name == '/': - return 'NO NXentry found' + if entry["hdf_node"].name == "/": + return "NO NXentry found" try: - nxdef = entry['hdf_node']['definition'][()] + nxdef = entry["hdf_node"]["definition"][()] return nxdef.decode() except KeyError: # 'NO Definition referenced' return "NXentry" @@ -112,27 +133,30 @@ def get_nxdl_entry(hdf_info): def get_nx_class(nxdl_elem): """Get the nexus class for a NXDL node""" - if 'category' in nxdl_elem.attrib.keys(): + if "category" in nxdl_elem.attrib.keys(): return None try: - return nxdl_elem.attrib['type'] + return nxdl_elem.attrib["type"] except KeyError: - return 'NX_CHAR' + return "NX_CHAR" def get_nx_namefit(hdf_name, name, name_any=False): """Checks if an HDF5 node name corresponds to a child of the NXDL element -uppercase letters in front can be replaced by arbitraty name, but -uppercase to lowercase match is preferred, -so such match is counted as a measure of the fit""" + uppercase letters in front can be replaced by arbitraty name, but + uppercase to lowercase match is preferred, + so such match is counted as a measure of the fit""" if name == hdf_name: return len(name) * 2 # count leading capitals counting = 0 while counting < len(name) and name[counting].upper() == name[counting]: counting += 1 - if name_any or counting == len(name) or \ - (counting > 0 and hdf_name.endswith(name[counting:])): # if potential fit + if ( + name_any + or counting == len(name) + or (counting > 0 and hdf_name.endswith(name[counting:])) + ): # if potential fit # count the matching chars fit = 0 for i in range(min(counting, len(hdf_name))): @@ -143,24 +167,31 @@ def get_nx_namefit(hdf_name, name, name_any=False): if fit == min(counting, len(hdf_name)): # accept only full fits as better fits return fit return 0 - return -1 # no fit + return -1 # no fit def get_nx_classes(): """Read base classes from the NeXus definition folder. -Check each file in base_classes, applications, contributed_definitions. -If its category attribute is 'base', then it is added to the list. """ - base_classes = sorted(glob(os.path.join(get_nexus_definitions_path(), - 'base_classes', '*.nxdl.xml'))) - applications = sorted(glob(os.path.join(get_nexus_definitions_path(), - 'applications', '*.nxdl.xml'))) - contributed = sorted(glob(os.path.join(get_nexus_definitions_path(), - 'contributed_definitions', '*.nxdl.xml'))) + Check each file in base_classes, applications, contributed_definitions. + If its category attribute is 'base', then it is added to the list.""" + base_classes = sorted( + glob(os.path.join(get_nexus_definitions_path(), "base_classes", "*.nxdl.xml")) + ) + applications = sorted( + glob(os.path.join(get_nexus_definitions_path(), "applications", "*.nxdl.xml")) + ) + contributed = sorted( + glob( + os.path.join( + get_nexus_definitions_path(), "contributed_definitions", "*.nxdl.xml" + ) + ) + ) nx_clss = [] for nexus_file in base_classes + applications + contributed: root = get_xml_root(nexus_file) - if root.attrib['category'] == 'base': - nx_clss.append(str(nexus_file[nexus_file.rindex(os.sep) + 1:])[:-9]) + if root.attrib["category"] == "base": + nx_clss.append(str(nexus_file[nexus_file.rindex(os.sep) + 1 :])[:-9]) nx_clss = sorted(nx_clss) return nx_clss @@ -175,12 +206,12 @@ def get_nx_units(): units_and_type_list.append(i) flag = False for line in units_and_type_list: - if line == 'anyUnitsAttr': + if line == "anyUnitsAttr": flag = True nx_units = [] - elif 'NX' in line and flag is True: + elif "NX" in line and flag is True: nx_units.append(line) - elif line == 'primitiveType': + elif line == "primitiveType": flag = False else: pass @@ -189,7 +220,7 @@ def get_nx_units(): def get_nx_attribute_type(): """Read attribute types from the NeXus definition/nxdlTypes.xsd file""" - filepath = get_nexus_definitions_path() + '/nxdlTypes.xsd' + filepath = get_nexus_definitions_path() + "/nxdlTypes.xsd" root = get_xml_root(filepath) units_and_type_list = [] for child in root: @@ -197,12 +228,12 @@ def get_nx_attribute_type(): units_and_type_list.append(i) flag = False for line in units_and_type_list: - if line == 'primitiveType': + if line == "primitiveType": flag = True nx_types = [] - elif 'NX' in line and flag is True: + elif "NX" in line and flag is True: nx_types.append(line) - elif line == 'anyUnitsAttr': + elif line == "anyUnitsAttr": flag = False else: pass @@ -210,23 +241,23 @@ def get_nx_attribute_type(): def get_node_name(node): - '''Node - xml node. Returns html documentation name. + """Node - xml node. Returns html documentation name. Either as specified by the 'name' or taken from the type (nx_class). Note that if only class name is available, the NX prefix is removed and - the string is converted to UPPER case.''' - if 'name' in node.attrib.keys(): - name = node.attrib['name'] + the string is converted to UPPER case.""" + if "name" in node.attrib.keys(): + name = node.attrib["name"] else: - name = node.attrib['type'] - if name.startswith('NX'): + name = node.attrib["type"] + if name.startswith("NX"): name = name[2:].upper() return name def belongs_to(nxdl_elem, child, name, class_type=None, hdf_name=None): """Checks if an HDF5 node name corresponds to a child of the NXDL element -uppercase letters in front can be replaced by arbitraty name, but -uppercase to lowercase match is preferred""" + uppercase letters in front can be replaced by arbitraty name, but + uppercase to lowercase match is preferred""" if class_type and get_nx_class(child) != class_type: return False act_htmlname = get_node_name(child) @@ -236,7 +267,7 @@ def belongs_to(nxdl_elem, child, name, class_type=None, hdf_name=None): if not hdf_name: # search for name fits is only allowed for hdf_nodes return False try: # check if nameType allows different name - name_any = bool(child.attrib['nameType'] == "any") + name_any = bool(child.attrib["nameType"] == "any") except KeyError: name_any = False params = [act_htmlname, chk_name, name_any, nxdl_elem, child, name] @@ -247,17 +278,25 @@ def belongs_to_capital(params): """Checking continues for Upper case""" (act_htmlname, chk_name, name_any, nxdl_elem, child, name) = params # or starts with capital and no reserved words used - if (name_any or 'A' <= act_htmlname[0] <= 'Z') and \ - name != 'doc' and name != 'enumeration': + if ( + (name_any or "A" <= act_htmlname[0] <= "Z") + and name != "doc" + and name != "enumeration" + ): fit = get_nx_namefit(chk_name, act_htmlname, name_any) # check if name fits if fit < 0: return False for child2 in nxdl_elem: - if get_local_name_from_xml(child) != \ - get_local_name_from_xml(child2) or get_node_name(child2) == act_htmlname: + if ( + get_local_name_from_xml(child) != get_local_name_from_xml(child2) + or get_node_name(child2) == act_htmlname + ): continue # check if the name of another sibling fits better - name_any2 = "nameType" in child2.attrib.keys() and child2.attrib["nameType"] == "any" + name_any2 = ( + "nameType" in child2.attrib.keys() + and child2.attrib["nameType"] == "any" + ) fit2 = get_nx_namefit(chk_name, get_node_name(child2), name_any2) if fit2 > fit: return False @@ -268,68 +307,81 @@ def belongs_to_capital(params): def get_local_name_from_xml(element): """Helper function to extract the element tag without the namespace.""" - return element.tag[element.tag.rindex("}") + 1:] + return element.tag[element.tag.rindex("}") + 1 :] def get_own_nxdl_child_reserved_elements(child, name, nxdl_elem): """checking reserved elements, like doc, enumeration""" - if get_local_name_from_xml(child) == 'doc' and name == 'doc': - if nxdl_elem.get('nxdlbase'): - child.set('nxdlbase', nxdl_elem.get('nxdlbase')) - child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) - child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/doc') + if get_local_name_from_xml(child) == "doc" and name == "doc": + if nxdl_elem.get("nxdlbase"): + child.set("nxdlbase", nxdl_elem.get("nxdlbase")) + child.set("nxdlbase_class", nxdl_elem.get("nxdlbase_class")) + child.set("nxdlpath", nxdl_elem.get("nxdlpath") + "/doc") return child - if get_local_name_from_xml(child) == 'enumeration' and name == 'enumeration': - if nxdl_elem.get('nxdlbase'): - child.set('nxdlbase', nxdl_elem.get('nxdlbase')) - child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) - child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/enumeration') + if get_local_name_from_xml(child) == "enumeration" and name == "enumeration": + if nxdl_elem.get("nxdlbase"): + child.set("nxdlbase", nxdl_elem.get("nxdlbase")) + child.set("nxdlbase_class", nxdl_elem.get("nxdlbase_class")) + child.set("nxdlpath", nxdl_elem.get("nxdlpath") + "/enumeration") return child return False def get_own_nxdl_child_base_types(child, class_type, nxdl_elem, name, hdf_name): """checking base types of group, field,m attribute""" - if get_local_name_from_xml(child) == 'group': - if (class_type is None or (class_type and get_nx_class(child) == class_type)) and \ - belongs_to(nxdl_elem, child, name, class_type, hdf_name): - if nxdl_elem.get('nxdlbase'): - child.set('nxdlbase', nxdl_elem.get('nxdlbase')) - child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) - child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + if get_local_name_from_xml(child) == "group": + if ( + class_type is None or (class_type and get_nx_class(child) == class_type) + ) and belongs_to(nxdl_elem, child, name, class_type, hdf_name): + if nxdl_elem.get("nxdlbase"): + child.set("nxdlbase", nxdl_elem.get("nxdlbase")) + child.set("nxdlbase_class", nxdl_elem.get("nxdlbase_class")) + child.set( + "nxdlpath", nxdl_elem.get("nxdlpath") + "/" + get_node_name(child) + ) return child - if get_local_name_from_xml(child) == 'field' and \ - belongs_to(nxdl_elem, child, name, None, hdf_name): - if nxdl_elem.get('nxdlbase'): - child.set('nxdlbase', nxdl_elem.get('nxdlbase')) - child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) - child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + if get_local_name_from_xml(child) == "field" and belongs_to( + nxdl_elem, child, name, None, hdf_name + ): + if nxdl_elem.get("nxdlbase"): + child.set("nxdlbase", nxdl_elem.get("nxdlbase")) + child.set("nxdlbase_class", nxdl_elem.get("nxdlbase_class")) + child.set( + "nxdlpath", nxdl_elem.get("nxdlpath") + "/" + get_node_name(child) + ) return child - if get_local_name_from_xml(child) == 'attribute' and \ - belongs_to(nxdl_elem, child, name, None, hdf_name): - if nxdl_elem.get('nxdlbase'): - child.set('nxdlbase', nxdl_elem.get('nxdlbase')) - child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) - child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + if get_local_name_from_xml(child) == "attribute" and belongs_to( + nxdl_elem, child, name, None, hdf_name + ): + if nxdl_elem.get("nxdlbase"): + child.set("nxdlbase", nxdl_elem.get("nxdlbase")) + child.set("nxdlbase_class", nxdl_elem.get("nxdlbase_class")) + child.set( + "nxdlpath", nxdl_elem.get("nxdlpath") + "/" + get_node_name(child) + ) return child return False -def get_own_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=None): +def get_own_nxdl_child( + nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=None +): """Checks if an NXDL child node fits to the specific name (either nxdl or hdf) - name - nxdl name - class_type - nxdl type or hdf classname (for groups, it is obligatory) - hdf_name - hdf name""" + name - nxdl name + class_type - nxdl type or hdf classname (for groups, it is obligatory) + hdf_name - hdf name""" for child in nxdl_elem: - if 'name' in child.attrib and child.attrib['name'] == name: - if nxdl_elem.get('nxdlbase'): - child.set('nxdlbase', nxdl_elem.get('nxdlbase')) - child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) - child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + if "name" in child.attrib and child.attrib["name"] == name: + if nxdl_elem.get("nxdlbase"): + child.set("nxdlbase", nxdl_elem.get("nxdlbase")) + child.set("nxdlbase_class", nxdl_elem.get("nxdlbase_class")) + child.set( + "nxdlpath", nxdl_elem.get("nxdlpath") + "/" + get_node_name(child) + ) return child for child in nxdl_elem: if "name" in child.attrib and child.attrib["name"] == name: - child.set('nxdlbase', nxdl_elem.get('nxdlbase')) + child.set("nxdlbase", nxdl_elem.get("nxdlbase")) return child for child in nxdl_elem: @@ -338,7 +390,9 @@ def get_own_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_ty return result if nexus_type and get_local_name_from_xml(child) != nexus_type: continue - result = get_own_nxdl_child_base_types(child, class_type, nxdl_elem, name, hdf_name) + result = get_own_nxdl_child_base_types( + child, class_type, nxdl_elem, name, hdf_name + ) if result is not False: return result return None @@ -346,21 +400,28 @@ def get_own_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_ty def find_definition_file(bc_name): """find the nxdl file corresponding to the name. - Note that it first checks in contributed and goes beyond only if no contributed found""" + Note that it first checks in contributed and goes beyond only if no contributed found + """ bc_filename = None - for nxdl_folder in ['contributed_definitions', 'base_classes', 'applications']: - if os.path.exists(f"{get_nexus_definitions_path()}{os.sep}" - f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml"): - bc_filename = f"{get_nexus_definitions_path()}{os.sep}" \ - f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml" + for nxdl_folder in ["contributed_definitions", "base_classes", "applications"]: + if os.path.exists( + f"{get_nexus_definitions_path()}{os.sep}" + f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml" + ): + bc_filename = ( + f"{get_nexus_definitions_path()}{os.sep}" + f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml" + ) break return bc_filename -def get_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=None, go_base=True): # pylint: disable=too-many-arguments +def get_nxdl_child( + nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=None, go_base=True +): # pylint: disable=too-many-arguments """Get the NXDL child node corresponding to a specific name -(e.g. of an HDF5 node,or of a documentation) note that if child is not found in application -definition, it also checks for the base classes""" + (e.g. of an HDF5 node,or of a documentation) note that if child is not found in application + definition, it also checks for the base classes""" # search for possible fits for hdf_nodes : skipped # only exact hits are returned when searching an nxdl child own_child = get_own_nxdl_child(nxdl_elem, name, class_type, hdf_name, nexus_type) @@ -369,18 +430,20 @@ def get_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=N if not go_base: return None bc_name = get_nx_class(nxdl_elem) # check in the base class, app def or contributed - if bc_name[2] == '_': # filter primitive types + if bc_name[2] == "_": # filter primitive types return None - if bc_name == "group": # Check if it is the root element. Then send to NXroot.nxdl.xml + if ( + bc_name == "group" + ): # Check if it is the root element. Then send to NXroot.nxdl.xml bc_name = "NXroot" bc_filename = find_definition_file(bc_name) if not bc_filename: - raise ValueError('nxdl file not found in definitions folder!') + raise ValueError("nxdl file not found in definitions folder!") bc_obj = ET.parse(bc_filename).getroot() - bc_obj.set('nxdlbase', bc_filename) - if 'category' in bc_obj.attrib: - bc_obj.set('nxdlbase_class', bc_obj.attrib['category']) - bc_obj.set('nxdlpath', '') + bc_obj.set("nxdlbase", bc_filename) + if "category" in bc_obj.attrib: + bc_obj.set("nxdlbase_class", bc_obj.attrib["category"]) + bc_obj.set("nxdlpath", "") return get_own_nxdl_child(bc_obj, name, class_type, hdf_name, nexus_type) @@ -388,12 +451,16 @@ def get_required_string(nxdl_elem): """Check for being REQUIRED, RECOMMENDED, OPTIONAL, NOT IN SCHEMA""" if nxdl_elem is None: return "<>" - is_optional = 'optional' in nxdl_elem.attrib.keys() \ - and nxdl_elem.attrib['optional'] == "true" - is_minoccurs = 'minOccurs' in nxdl_elem.attrib.keys() \ - and nxdl_elem.attrib['minOccurs'] == "0" - is_recommended = 'recommended' in nxdl_elem.attrib.keys() \ - and nxdl_elem.attrib['recommended'] == "true" + is_optional = ( + "optional" in nxdl_elem.attrib.keys() and nxdl_elem.attrib["optional"] == "true" + ) + is_minoccurs = ( + "minOccurs" in nxdl_elem.attrib.keys() and nxdl_elem.attrib["minOccurs"] == "0" + ) + is_recommended = ( + "recommended" in nxdl_elem.attrib.keys() + and nxdl_elem.attrib["recommended"] == "true" + ) if is_recommended: return "<>" @@ -401,7 +468,7 @@ def get_required_string(nxdl_elem): return "<>" # default optionality: in BASE CLASSES is true; in APPLICATIONS is false try: - if nxdl_elem.get('nxdlbase_class') == 'base': + if nxdl_elem.get("nxdlbase_class") == "base": return "<>" except TypeError: return "<>" @@ -410,40 +477,44 @@ def get_required_string(nxdl_elem): def chk_nxdataaxis_v2(hdf_node, name, logger): """Check if dataset is an axis""" - own_signal = hdf_node.attrs.get('signal') # check for being a Signal + own_signal = hdf_node.attrs.get("signal") # check for being a Signal if own_signal is str and own_signal == "1": logger.debug("Dataset referenced (v2) as NXdata SIGNAL") - own_axes = hdf_node.attrs.get('axes') # check for being an axis + own_axes = hdf_node.attrs.get("axes") # check for being an axis if own_axes is str: - axes = own_axes.split(':') + axes = own_axes.split(":") for i in len(axes): if axes[i] and name == axes[i]: logger.debug("Dataset referenced (v2) as NXdata AXIS #%d", i) return None - ownpaxis = hdf_node.attrs.get('primary') - own_axis = hdf_node.attrs.get('axis') + ownpaxis = hdf_node.attrs.get("primary") + own_axis = hdf_node.attrs.get("axis") if own_axis is int: # also convention v1 if ownpaxis is int and ownpaxis == 1: logger.debug("Dataset referenced (v2) as NXdata AXIS #%d", own_axis - 1) else: logger.debug( - "Dataset referenced (v2) as NXdata (primary/alternative) AXIS #%d", own_axis - 1) + "Dataset referenced (v2) as NXdata (primary/alternative) AXIS #%d", + own_axis - 1, + ) return None def chk_nxdataaxis(hdf_node, name, logger): """NEXUS Data Plotting Standard v3: new version from 2014""" - if not isinstance(hdf_node, h5py.Dataset): # check if it is a field in an NXdata node + if not isinstance( + hdf_node, h5py.Dataset + ): # check if it is a field in an NXdata node return None parent = hdf_node.parent - if not parent or (parent and not parent.attrs.get('NX_class') == "NXdata"): + if not parent or (parent and not parent.attrs.get("NX_class") == "NXdata"): return None - signal = parent.attrs.get('signal') # chk for Signal + signal = parent.attrs.get("signal") # chk for Signal if signal and name == signal: logger.debug("Dataset referenced as NXdata SIGNAL") return None - axes = parent.attrs.get('axes') # check for default Axes + axes = parent.attrs.get("axes") # check for default Axes if axes is str: if name == axes: logger.debug("Dataset referenced as NXdata AXIS") @@ -451,13 +522,13 @@ def chk_nxdataaxis(hdf_node, name, logger): elif axes is not None: for i, j in enumerate(axes): if name == j: - indices = parent.attrs.get(j + '_indices') + indices = parent.attrs.get(j + "_indices") if indices is int: logger.debug(f"Dataset referenced as NXdata AXIS #{indices}") else: logger.debug(f"Dataset referenced as NXdata AXIS #{i}") return None - indices = parent.attrs.get(name + '_indices') # check for alternative Axes + indices = parent.attrs.get(name + "_indices") # check for alternative Axes if indices is int: logger.debug(f"Dataset referenced as NXdata alternative AXIS #{indices}") return chk_nxdataaxis_v2(hdf_node, name, logger) # check for older conventions @@ -467,7 +538,7 @@ def chk_nxdataaxis(hdf_node, name, logger): def write_doc_string(logger, doc, attr): """Simple function that prints a line in the logger if doc exists""" if doc: - logger.debug("@" + attr + ' [NX_CHAR]') + logger.debug("@" + attr + " [NX_CHAR]") return logger, doc, attr @@ -477,61 +548,96 @@ def try_find_units(logger, elem, nxdl_path, doc, attr): try: # try to find if units is defined inside the field in the NXDL element unit = elem.attrib[attr] if doc: - logger.debug(get_node_concept_path(elem) + "@" + attr + ' [' + unit + ']') + logger.debug(get_node_concept_path(elem) + "@" + attr + " [" + unit + "]") elem = None nxdl_path.append(attr) - except KeyError: # otherwise try to find if units is defined as a child of the NXDL element + except ( + KeyError + ): # otherwise try to find if units is defined as a child of the NXDL element orig_elem = elem - elem = get_nxdl_child(elem, attr, nexus_type='attribute') + elem = get_nxdl_child(elem, attr, nexus_type="attribute") if elem is not None: if doc: - logger.debug(get_node_concept_path(orig_elem) - + "@" + attr + ' - [' + get_nx_class(elem) + ']') + logger.debug( + get_node_concept_path(orig_elem) + + "@" + + attr + + " - [" + + get_nx_class(elem) + + "]" + ) nxdl_path.append(elem) else: # if no units category were defined in NXDL: if doc: - logger.debug(get_node_concept_path(orig_elem) - + "@" + attr + " - REQUIRED, but undefined unit category") + logger.debug( + get_node_concept_path(orig_elem) + + "@" + + attr + + " - REQUIRED, but undefined unit category" + ) nxdl_path.append(attr) return logger, elem, nxdl_path, doc, attr def check_attr_name_nxdl(param): """Check for ATTRIBUTENAME_units in NXDL (normal). -If not defined, check for ATTRIBUTENAME to see if the ATTRIBUTE -is in the SCHEMA, but no units category were defined. """ + If not defined, check for ATTRIBUTENAME to see if the ATTRIBUTE + is in the SCHEMA, but no units category were defined.""" (logger, elem, nxdl_path, doc, attr, req_str) = param orig_elem = elem - elem2 = get_nxdl_child(elem, attr, nexus_type='attribute') + elem2 = get_nxdl_child(elem, attr, nexus_type="attribute") if elem2 is not None: # check for ATTRIBUTENAME_units in NXDL (normal) elem = elem2 if doc: - logger.debug(get_node_concept_path(orig_elem) - + "@" + attr + ' - [' + get_nx_class(elem) + ']') + logger.debug( + get_node_concept_path(orig_elem) + + "@" + + attr + + " - [" + + get_nx_class(elem) + + "]" + ) nxdl_path.append(elem) else: # if not defined, check for ATTRIBUTENAME to see if the ATTRIBUTE # is in the SCHEMA, but no units category were defined - elem2 = get_nxdl_child(elem, attr[:-6], nexus_type='attribute') + elem2 = get_nxdl_child(elem, attr[:-6], nexus_type="attribute") if elem2 is not None: - req_str = '<>' + req_str = "<>" if doc: - logger.debug(get_node_concept_path(orig_elem) - + "@" + attr + " - RECOMMENDED, but undefined unit category") + logger.debug( + get_node_concept_path(orig_elem) + + "@" + + attr + + " - RECOMMENDED, but undefined unit category" + ) nxdl_path.append(attr) else: # otherwise: NOT IN SCHEMA elem = elem2 if doc: - logger.debug(get_node_concept_path(orig_elem) + "@" + attr + " - IS NOT IN SCHEMA") + logger.debug( + get_node_concept_path(orig_elem) + + "@" + + attr + + " - IS NOT IN SCHEMA" + ) return logger, elem, nxdl_path, doc, attr, req_str -def try_find_default(logger, orig_elem, elem, nxdl_path, doc, attr): # pylint: disable=too-many-arguments - """Try to find if default is defined as a child of the NXDL element """ +def try_find_default( + logger, orig_elem, elem, nxdl_path, doc, attr +): # pylint: disable=too-many-arguments + """Try to find if default is defined as a child of the NXDL element""" if elem is not None: if doc: - logger.debug(get_node_concept_path(orig_elem) - + "@" + attr + ' - [' + get_nx_class(elem) + ']') + logger.debug( + get_node_concept_path(orig_elem) + + "@" + + attr + + " - [" + + get_nx_class(elem) + + "]" + ) nxdl_path.append(elem) else: # if no default category were defined in NXDL: if doc: @@ -540,38 +646,50 @@ def try_find_default(logger, orig_elem, elem, nxdl_path, doc, attr): # pylint: return logger, elem, nxdl_path, doc, attr -def other_attrs(logger, orig_elem, elem, nxdl_path, doc, attr): # pylint: disable=too-many-arguments - """Handle remaining attributes """ +def other_attrs( + logger, orig_elem, elem, nxdl_path, doc, attr +): # pylint: disable=too-many-arguments + """Handle remaining attributes""" if elem is not None: if doc: - logger.debug(get_node_concept_path(orig_elem) - + "@" + attr + ' - [' + get_nx_class(elem) + ']') + logger.debug( + get_node_concept_path(orig_elem) + + "@" + + attr + + " - [" + + get_nx_class(elem) + + "]" + ) nxdl_path.append(elem) else: if doc: - logger.debug(get_node_concept_path(orig_elem) + "@" + attr + " - IS NOT IN SCHEMA") + logger.debug( + get_node_concept_path(orig_elem) + "@" + attr + " - IS NOT IN SCHEMA" + ) return logger, elem, nxdl_path, doc, attr def check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node): - """Check for several attributes. - deprecation - enums - nxdataaxis """ + """Check for several attributes. - deprecation - enums - nxdataaxis""" logger, elem, path = variables - dep_str = elem.attrib.get('deprecated') # check for deprecation + dep_str = elem.attrib.get("deprecated") # check for deprecation if dep_str: if doc: logger.debug("DEPRECATED - " + dep_str) for base_elem in elist if not attr else [elem]: # check for enums - sdoc = get_nxdl_child(base_elem, 'enumeration', go_base=False) + sdoc = get_nxdl_child(base_elem, "enumeration", go_base=False) if sdoc is not None: if doc: logger.debug("enumeration (" + get_node_concept_path(base_elem) + "):") for item in sdoc: - if get_local_name_from_xml(item) == 'item': + if get_local_name_from_xml(item) == "item": if doc: - logger.debug("-> " + item.attrib['value']) - chk_nxdataaxis(hdf_node, path.split('/')[-1], logger) # look for NXdata reference (axes/signal) + logger.debug("-> " + item.attrib["value"]) + chk_nxdataaxis( + hdf_node, path.split("/")[-1], logger + ) # look for NXdata reference (axes/signal) for base_elem in elist if not attr else [elem]: # check for doc - sdoc = get_nxdl_child(base_elem, 'doc', go_base=False) + sdoc = get_nxdl_child(base_elem, "doc", go_base=False) if doc: logger.debug("documentation (" + get_node_concept_path(base_elem) + "):") logger.debug(sdoc.text if sdoc is not None else "") @@ -580,11 +698,12 @@ def check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node): def get_node_concept_path(elem): """get the short version of nxdlbase:nxdlpath""" - return str(elem.get('nxdlbase').split('/')[-1] + ":" + elem.get('nxdlpath')) + return str(elem.get("nxdlbase").split("/")[-1] + ":" + elem.get("nxdlpath")) def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals - elem, elist, attr, hdf_node, logger, doc, nxdl_path, req_str, path, hdf_info): + elem, elist, attr, hdf_node, logger, doc, nxdl_path, req_str, path, hdf_info +): """Get nxdl documentation for an attribute""" new_elem = [] old_elem = elem @@ -592,43 +711,44 @@ def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals act_elem = act_elem1 # NX_class is a compulsory attribute for groups in a nexus file # which should match the type of the corresponding NXDL element - if attr == 'NX_class' and not isinstance(hdf_node, h5py.Dataset) and elem_index == 0: + if ( + attr == "NX_class" + and not isinstance(hdf_node, h5py.Dataset) + and elem_index == 0 + ): elem = None logger, doc, attr = write_doc_string(logger, doc, attr) new_elem = elem break # units category is a compulsory attribute for any fields - if attr == 'units' and isinstance(hdf_node, h5py.Dataset): + if attr == "units" and isinstance(hdf_node, h5py.Dataset): req_str = "<>" - logger, act_elem, nxdl_path, doc, attr = try_find_units(logger, - act_elem, - nxdl_path, - doc, - attr) + logger, act_elem, nxdl_path, doc, attr = try_find_units( + logger, act_elem, nxdl_path, doc, attr + ) # units for attributes can be given as ATTRIBUTENAME_units - elif attr.endswith('_units'): - logger, act_elem, nxdl_path, doc, attr, req_str = check_attr_name_nxdl((logger, - act_elem, - nxdl_path, - doc, - attr, - req_str)) + elif attr.endswith("_units"): + logger, act_elem, nxdl_path, doc, attr, req_str = check_attr_name_nxdl( + (logger, act_elem, nxdl_path, doc, attr, req_str) + ) # default is allowed for groups - elif attr == 'default' and not isinstance(hdf_node, h5py.Dataset): + elif attr == "default" and not isinstance(hdf_node, h5py.Dataset): req_str = "<>" # try to find if default is defined as a child of the NXDL element - act_elem = get_nxdl_child(act_elem, attr, nexus_type='attribute', go_base=False) - logger, act_elem, nxdl_path, doc, attr = try_find_default(logger, - act_elem1, - act_elem, - nxdl_path, - doc, - attr) + act_elem = get_nxdl_child( + act_elem, attr, nexus_type="attribute", go_base=False + ) + logger, act_elem, nxdl_path, doc, attr = try_find_default( + logger, act_elem1, act_elem, nxdl_path, doc, attr + ) else: # other attributes - act_elem = get_nxdl_child(act_elem, attr, nexus_type='attribute', go_base=False) + act_elem = get_nxdl_child( + act_elem, attr, nexus_type="attribute", go_base=False + ) if act_elem is not None: - logger, act_elem, nxdl_path, doc, attr = \ - other_attrs(logger, act_elem1, act_elem, nxdl_path, doc, attr) + logger, act_elem, nxdl_path, doc, attr = other_attrs( + logger, act_elem1, act_elem, nxdl_path, doc, attr + ) if act_elem is not None: new_elem.append(act_elem) if req_str is None: @@ -636,14 +756,18 @@ def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals if doc: logger.debug(req_str) variables = [logger, act_elem, path] - logger, elem, path, doc, elist, attr, hdf_node = check_deprecation_enum_axis(variables, - doc, - elist, - attr, - hdf_node) + ( + logger, + elem, + path, + doc, + elist, + attr, + hdf_node, + ) = check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node) elem = old_elem if req_str is None and doc: - if attr != 'NX_class': + if attr != "NX_class": logger.debug("@" + attr + " - IS NOT IN SCHEMA") logger.debug("") return (req_str, get_nxdl_entry(hdf_info), nxdl_path) @@ -651,48 +775,54 @@ def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals def get_nxdl_doc(hdf_info, logger, doc, attr=False): """Get nxdl documentation for an HDF5 node (or its attribute)""" - hdf_node = hdf_info['hdf_node'] + hdf_node = hdf_info["hdf_node"] # new way: retrieve multiple inherited base classes - (class_path, nxdl_path, elist) = \ - get_inherited_nodes(None, nx_name=get_nxdl_entry(hdf_info), hdf_node=hdf_node, - hdf_path=hdf_info['hdf_path'] if 'hdf_path' in hdf_info else None, - hdf_root=hdf_info['hdf_root'] if 'hdf_root' in hdf_info else None) + (class_path, nxdl_path, elist) = get_inherited_nodes( + None, + nx_name=get_nxdl_entry(hdf_info), + hdf_node=hdf_node, + hdf_path=hdf_info["hdf_path"] if "hdf_path" in hdf_info else None, + hdf_root=hdf_info["hdf_root"] if "hdf_root" in hdf_info else None, + ) elem = elist[0] if class_path and elist else None if doc: logger.debug("classpath: " + str(class_path)) - logger.debug("NOT IN SCHEMA" if elem is None else - "classes:\n" + "\n".join - (get_node_concept_path(e) for e in elist)) + logger.debug( + "NOT IN SCHEMA" + if elem is None + else "classes:\n" + "\n".join(get_node_concept_path(e) for e in elist) + ) # old solution with a single elem instead of using elist path = get_nx_class_path(hdf_info) req_str = None if elem is None: if doc: logger.debug("") - return ('None', None, None) + return ("None", None, None) if attr: - return get_nxdl_attr_doc(elem, elist, attr, hdf_node, logger, doc, nxdl_path, - req_str, path, hdf_info) + return get_nxdl_attr_doc( + elem, elist, attr, hdf_node, logger, doc, nxdl_path, req_str, path, hdf_info + ) req_str = get_required_string(elem) # check for being required if doc: logger.debug(req_str) variables = [logger, elem, path] - logger, elem, path, doc, elist, attr, hdf_node = check_deprecation_enum_axis(variables, - doc, - elist, - attr, - hdf_node) + logger, elem, path, doc, elist, attr, hdf_node = check_deprecation_enum_axis( + variables, doc, elist, attr, hdf_node + ) return (req_str, get_nxdl_entry(hdf_info), nxdl_path) def get_doc(node, ntype, nxhtml, nxpath): """Get documentation""" # URL for html documentation - anchor = '' + anchor = "" for n_item in nxpath: anchor += n_item.lower() + "-" - anchor = ('https://manual.nexusformat.org/classes/', - nxhtml + "#" + anchor.replace('_', '-') + ntype) + anchor = ( + "https://manual.nexusformat.org/classes/", + nxhtml + "#" + anchor.replace("_", "-") + ntype, + ) if not ntype: anchor = anchor[:-1] doc = "" # RST documentation from the field 'doc' @@ -701,9 +831,13 @@ def get_doc(node, ntype, nxhtml, nxpath): doc = doc_field.text (index, enums) = get_enums(node) # enums if index: - enum_str = "\n " + ("Possible values:" - if len(enums.split(',')) > 1 - else "Obligatory value:") + "\n " + enums + "\n" + enum_str = ( + "\n " + + ("Possible values:" if len(enums.split(",")) > 1 else "Obligatory value:") + + "\n " + + enums + + "\n" + ) else: enum_str = "" return anchor, doc + enum_str @@ -714,17 +848,21 @@ def print_doc(node, ntype, level, nxhtml, nxpath): anchor, doc = get_doc(node, ntype, nxhtml, nxpath) print(" " * (level + 1) + anchor) preferred_width = 80 + level * 2 - wrapper = textwrap.TextWrapper(initial_indent=' ' * (level + 1), width=preferred_width, - subsequent_indent=' ' * (level + 1), expand_tabs=False, - tabsize=0) + wrapper = textwrap.TextWrapper( + initial_indent=" " * (level + 1), + width=preferred_width, + subsequent_indent=" " * (level + 1), + expand_tabs=False, + tabsize=0, + ) if doc is not None: - for par in doc.split('\n'): + for par in doc.split("\n"): print(wrapper.fill(par)) def get_namespace(element): """Extracts the namespace for elements in the NXDL""" - return element.tag[element.tag.index("{"):element.tag.rindex("}") + 1] + return element.tag[element.tag.index("{") : element.tag.rindex("}") + 1] def get_enums(node): @@ -737,15 +875,16 @@ def get_enums(node): for enumeration in node.findall(f"{namespace}enumeration"): for item in enumeration.findall(f"{namespace}item"): enums.append(item.attrib["value"]) - enums = ','.join(enums) + enums = ",".join(enums) if enums != "": - return (True, '[' + enums + ']') + return (True, "[" + enums + "]") return (False, "") # if there is no enumeration tag, returns empty string def add_base_classes(elist, nx_name=None, elem: ET.Element = None): """Add the base classes corresponding to the last eleme in elist to the list. Note that if -elist is empty, a nxdl file with the name of nx_name or a rather room elem is used if provided""" + elist is empty, a nxdl file with the name of nx_name or a rather room elem is used if provided + """ if elist and nx_name is None: nx_name = get_nx_class(elist[-1]) # to support recursive defintions, like NXsample in NXsample, the following test is removed @@ -758,48 +897,51 @@ def add_base_classes(elist, nx_name=None, elem: ET.Element = None): if nxdl_file_path is None: nxdl_file_path = f"{nx_name}.nxdl.xml" elem = ET.parse(nxdl_file_path).getroot() - elem.set('nxdlbase', nxdl_file_path) + elem.set("nxdlbase", nxdl_file_path) else: - elem.set('nxdlbase', '') - if 'category' in elem.attrib: - elem.set('nxdlbase_class', elem.attrib['category']) - elem.set('nxdlpath', '') + elem.set("nxdlbase", "") + if "category" in elem.attrib: + elem.set("nxdlbase_class", elem.attrib["category"]) + elem.set("nxdlpath", "") elist.append(elem) # add inherited base class - if 'extends' in elem.attrib and elem.attrib['extends'] != 'NXobject': - add_base_classes(elist, elem.attrib['extends']) + if "extends" in elem.attrib and elem.attrib["extends"] != "NXobject": + add_base_classes(elist, elem.attrib["extends"]) else: add_base_classes(elist) def set_nxdlpath(child, nxdl_elem): """ - Setting up child nxdlbase, nxdlpath and nxdlbase_class from nxdl_element. + Setting up child nxdlbase, nxdlpath and nxdlbase_class from nxdl_element. """ - if nxdl_elem.get('nxdlbase'): - child.set('nxdlbase', nxdl_elem.get('nxdlbase')) - child.set('nxdlbase_class', nxdl_elem.get('nxdlbase_class')) - child.set('nxdlpath', nxdl_elem.get('nxdlpath') + '/' + get_node_name(child)) + if nxdl_elem.get("nxdlbase"): + child.set("nxdlbase", nxdl_elem.get("nxdlbase")) + child.set("nxdlbase_class", nxdl_elem.get("nxdlbase_class")) + child.set("nxdlpath", nxdl_elem.get("nxdlpath") + "/" + get_node_name(child)) return child def get_direct_child(nxdl_elem, html_name): - """ returns the child of nxdl_elem which has a name - corresponding to the the html documentation name html_name""" + """returns the child of nxdl_elem which has a name + corresponding to the the html documentation name html_name""" for child in nxdl_elem: - if get_local_name_from_xml(child) in ('group', 'field', 'attribute') and \ - html_name == get_node_name(child): + if get_local_name_from_xml(child) in ( + "group", + "field", + "attribute", + ) and html_name == get_node_name(child): decorated_child = set_nxdlpath(child, nxdl_elem) return decorated_child return None def get_field_child(nxdl_elem, html_name): - """ returns the child of nxdl_elem which has a name - corresponding to the html documentation name html_name""" + """returns the child of nxdl_elem which has a name + corresponding to the html documentation name html_name""" data_child = None for child in nxdl_elem: - if get_local_name_from_xml(child) != 'field': + if get_local_name_from_xml(child) != "field": continue if get_node_name(child) == html_name: data_child = set_nxdlpath(child, nxdl_elem) @@ -808,27 +950,27 @@ def get_field_child(nxdl_elem, html_name): def get_best_nxdata_child(nxdl_elem, hdf_node, hdf_name): - """ returns the child of an NXdata nxdl_elem which has a name - corresponding to the hdf_name""" + """returns the child of an NXdata nxdl_elem which has a name + corresponding to the hdf_name""" nxdata = hdf_node.parent signals = [] - if 'signal' in nxdata.attrs.keys(): + if "signal" in nxdata.attrs.keys(): signals.append(nxdata.attrs.get("signal")) if "auxiliary_signals" in nxdata.attrs.keys(): for aux_signal in nxdata.attrs.get("auxiliary_signals"): signals.append(aux_signal) - data_child = get_field_child(nxdl_elem, 'DATA') - data_error_child = get_field_child(nxdl_elem, 'FIELDNAME_errors') + data_child = get_field_child(nxdl_elem, "DATA") + data_error_child = get_field_child(nxdl_elem, "FIELDNAME_errors") for signal in signals: if signal == hdf_name: return (data_child, 100) - if hdf_name.endswith('_errors') and signal == hdf_name[:-7]: + if hdf_name.endswith("_errors") and signal == hdf_name[:-7]: return (data_error_child, 100) axes = [] if "axes" in nxdata.attrs.keys(): for axis in nxdata.attrs.get("axes"): axes.append(axis) - axis_child = get_field_child(nxdl_elem, 'AXISNAME') + axis_child = get_field_child(nxdl_elem, "AXISNAME") for axis in axes: if axis == hdf_name: return (axis_child, 100) @@ -836,22 +978,29 @@ def get_best_nxdata_child(nxdl_elem, hdf_node, hdf_name): def get_best_child(nxdl_elem, hdf_node, hdf_name, hdf_class_name, nexus_type): - """ returns the child of nxdl_elem which has a name - corresponding to the the html documentation name html_name""" + """returns the child of nxdl_elem which has a name + corresponding to the the html documentation name html_name""" bestfit = -1 bestchild = None - if 'name' in nxdl_elem.attrib.keys() and nxdl_elem.attrib['name'] == 'NXdata' and \ - hdf_node is not None and hdf_node.parent is not None and \ - hdf_node.parent.attrs.get('NX_class') == 'NXdata': + if ( + "name" in nxdl_elem.attrib.keys() + and nxdl_elem.attrib["name"] == "NXdata" + and hdf_node is not None + and hdf_node.parent is not None + and hdf_node.parent.attrs.get("NX_class") == "NXdata" + ): (fnd_child, fit) = get_best_nxdata_child(nxdl_elem, hdf_node, hdf_name) if fnd_child is not None: return (fnd_child, fit) for child in nxdl_elem: fit = -2 - if get_local_name_from_xml(child) == nexus_type and \ - (nexus_type != 'group' or get_nx_class(child) == hdf_class_name): - name_any = "nameType" in nxdl_elem.attrib.keys() and \ - nxdl_elem.attrib["nameType"] == "any" + if get_local_name_from_xml(child) == nexus_type and ( + nexus_type != "group" or get_nx_class(child) == hdf_class_name + ): + name_any = ( + "nameType" in nxdl_elem.attrib.keys() + and nxdl_elem.attrib["nameType"] == "any" + ) fit = get_nx_namefit(hdf_name, get_node_name(child), name_any) if fit > bestfit: bestfit = fit @@ -869,9 +1018,13 @@ def walk_elist(elist, html_name): for potential_direct_parent in elist: main_child = get_direct_child(potential_direct_parent, html_name) if main_child is not None: - (fitting_child, _) = get_best_child(elist[ind], None, html_name, - get_nx_class(main_child), - get_local_name_from_xml(main_child)) + (fitting_child, _) = get_best_child( + elist[ind], + None, + html_name, + get_nx_class(main_child), + get_local_name_from_xml(main_child), + ) if fitting_child is not None: child = fitting_child break @@ -880,10 +1033,12 @@ def walk_elist(elist, html_name): del elist[ind] continue # override: remove low priority inheritance classes if class_type is overriden - if len(elist) > ind + 1 and get_nx_class(elist[ind]) != get_nx_class(elist[ind + 1]): - del elist[ind + 1:] + if len(elist) > ind + 1 and get_nx_class(elist[ind]) != get_nx_class( + elist[ind + 1] + ): + del elist[ind + 1 :] # add new base class(es) if new element brings such (and not a primitive type) - if len(elist) == ind + 1 and get_nx_class(elist[ind])[0:3] != 'NX_': + if len(elist) == ind + 1 and get_nx_class(elist[ind])[0:3] != "NX_": add_base_classes(elist) return elist, html_name @@ -894,20 +1049,18 @@ def helper_get_inherited_nodes(hdf_info2, elist, pind, attr): hdf_name = hdf_path[pind] hdf_class_name = hdf_class_path[pind] if pind < len(hdf_path) - (2 if attr else 1): - act_nexus_type = 'group' + act_nexus_type = "group" elif pind == len(hdf_path) - 1 and attr: - act_nexus_type = 'attribute' + act_nexus_type = "attribute" else: - act_nexus_type = 'field' if isinstance(hdf_node, h5py.Dataset) else 'group' + act_nexus_type = "field" if isinstance(hdf_node, h5py.Dataset) else "group" # find the best fitting name in all children bestfit = -1 html_name = None for ind in range(len(elist) - 1, -1, -1): - newelem, fit = get_best_child(elist[ind], - hdf_node, - hdf_name, - hdf_class_name, - act_nexus_type) + newelem, fit = get_best_child( + elist[ind], hdf_node, hdf_name, hdf_class_name, act_nexus_type + ) if fit >= bestfit and newelem is not None: html_name = get_node_name(newelem) return hdf_path, hdf_node, hdf_class_path, elist, pind, attr, html_name @@ -915,15 +1068,21 @@ def helper_get_inherited_nodes(hdf_info2, elist, pind, attr): def get_hdf_path(hdf_info): """Get the hdf_path from an hdf_info""" - if 'hdf_path' in hdf_info: - return hdf_info['hdf_path'].split('/')[1:] - return hdf_info['hdf_node'].name.split('/')[1:] + if "hdf_path" in hdf_info: + return hdf_info["hdf_path"].split("/")[1:] + return hdf_info["hdf_node"].name.split("/")[1:] @lru_cache(maxsize=None) -def get_inherited_nodes(nxdl_path: str = None, # pylint: disable=too-many-arguments,too-many-locals - nx_name: str = None, elem: ET.Element = None, - hdf_node=None, hdf_path=None, hdf_root=None, attr=False): +def get_inherited_nodes( + nxdl_path: str = None, # pylint: disable=too-many-arguments,too-many-locals + nx_name: str = None, + elem: ET.Element = None, + hdf_node=None, + hdf_path=None, + hdf_root=None, + attr=False, +): """Returns a list of ET.Element for the given path.""" # let us start with the given definition file elist = [] # type: ignore[var-annotated] @@ -932,27 +1091,33 @@ def get_inherited_nodes(nxdl_path: str = None, # pylint: disable=too-many-argum class_path = [] # type: ignore[var-annotated] if hdf_node is not None: - hdf_info = {'hdf_node': hdf_node} + hdf_info = {"hdf_node": hdf_node} if hdf_path: - hdf_info['hdf_path'] = hdf_path + hdf_info["hdf_path"] = hdf_path if hdf_root: - hdf_root['hdf_root'] = hdf_root - hdf_node = hdf_info['hdf_node'] + hdf_root["hdf_root"] = hdf_root + hdf_node = hdf_info["hdf_node"] hdf_path = get_hdf_path(hdf_info) - hdf_class_path = get_nx_class_path(hdf_info).split('/')[1:] + hdf_class_path = get_nx_class_path(hdf_info).split("/")[1:] if attr: hdf_path.append(attr) hdf_class_path.append(attr) path = hdf_path else: - html_path = nxdl_path.split('/')[1:] + html_path = nxdl_path.split("/")[1:] path = html_path for pind in range(len(path)): if hdf_node is not None: hdf_info2 = [hdf_path, hdf_node, hdf_class_path] - [hdf_path, hdf_node, hdf_class_path, elist, - pind, attr, html_name] = helper_get_inherited_nodes(hdf_info2, elist, - pind, attr) + [ + hdf_path, + hdf_node, + hdf_class_path, + elist, + pind, + attr, + html_name, + ] = helper_get_inherited_nodes(hdf_info2, elist, pind, attr) if html_name is None: # return if NOT IN SCHEMA return (class_path, nxdl_elem_path, None) else: @@ -964,9 +1129,12 @@ def get_inherited_nodes(nxdl_path: str = None, # pylint: disable=too-many-argum return (class_path, nxdl_elem_path, elist) -def get_node_at_nxdl_path(nxdl_path: str = None, - nx_name: str = None, elem: ET.Element = None, - exc: bool = True): +def get_node_at_nxdl_path( + nxdl_path: str = None, + nx_name: str = None, + elem: ET.Element = None, + exc: bool = True, +): """Returns an ET.Element for the given path. This function either takes the name for the NeXus Application Definition we are looking for or the root elem from a previously loaded NXDL file @@ -975,32 +1143,38 @@ def get_node_at_nxdl_path(nxdl_path: str = None, (class_path, nxdlpath, elist) = get_inherited_nodes(nxdl_path, nx_name, elem) except ValueError as value_error: if exc: - raise NxdlAttributeError(f"Attributes were not found for {nxdl_path}. " - "Please check this entry in the template dictionary.") \ - from value_error + raise NxdlAttributeError( + f"Attributes were not found for {nxdl_path}. " + "Please check this entry in the template dictionary." + ) from value_error return None if class_path and nxdlpath and elist: elem = elist[0] else: elem = None if exc: - raise NxdlAttributeError(f"Attributes were not found for {nxdl_path}. " - "Please check this entry in the template dictionary.") + raise NxdlAttributeError( + f"Attributes were not found for {nxdl_path}. " + "Please check this entry in the template dictionary." + ) return elem def process_node(hdf_node, hdf_path, parser, logger, doc=True): """Processes an hdf5 node. -- it logs the node found and also checks for its attributes -- retrieves the corresponding nxdl documentation -TODO: -- follow variants -- NOMAD parser: store in NOMAD """ - hdf_info = {'hdf_path': hdf_path, 'hdf_node': hdf_node} + - it logs the node found and also checks for its attributes + - retrieves the corresponding nxdl documentation + TODO: + - follow variants + - NOMAD parser: store in NOMAD""" + hdf_info = {"hdf_path": hdf_path, "hdf_node": hdf_node} if isinstance(hdf_node, h5py.Dataset): - logger.debug(f'===== FIELD (/{hdf_path}): {hdf_node}') - val = str(hdf_node[()]).split('\n') if len(hdf_node.shape) <= 1 else str( - hdf_node[0]).split('\n') + logger.debug(f"===== FIELD (/{hdf_path}): {hdf_node}") + val = ( + str(hdf_node[()]).split("\n") + if len(hdf_node.shape) <= 1 + else str(hdf_node[0]).split("\n") + ) logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}') else: logger.debug( @@ -1010,46 +1184,54 @@ def process_node(hdf_node, hdf_path, parser, logger, doc=True): ) (req_str, nxdef, nxdl_path) = get_nxdl_doc(hdf_info, logger, doc) if parser is not None and isinstance(hdf_node, h5py.Dataset): - parser({"hdf_info": hdf_info, + parser( + { + "hdf_info": hdf_info, "nxdef": nxdef, "nxdl_path": nxdl_path, "val": val, - "logger": logger}) + "logger": logger, + } + ) for key, value in hdf_node.attrs.items(): - logger.debug(f'===== ATTRS (/{hdf_path}@{key})') - val = str(value).split('\n') + logger.debug(f"===== ATTRS (/{hdf_path}@{key})") + val = str(value).split("\n") logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}') - (req_str, nxdef, nxdl_path) = \ - get_nxdl_doc(hdf_info, logger, doc, attr=key) + (req_str, nxdef, nxdl_path) = get_nxdl_doc(hdf_info, logger, doc, attr=key) if ( parser is not None and req_str is not None - and 'NOT IN SCHEMA' not in req_str - and 'None' not in req_str + and "NOT IN SCHEMA" not in req_str + and "None" not in req_str ): - parser({"hdf_info": hdf_info, + parser( + { + "hdf_info": hdf_info, "nxdef": nxdef, "nxdl_path": nxdl_path, "val": val, - "logger": logger}, attr=key) + "logger": logger, + }, + attr=key, + ) def logger_auxiliary_signal(logger, nxdata): """Handle the presence of auxiliary signal""" - aux = nxdata.attrs.get('auxiliary_signals') + aux = nxdata.attrs.get("auxiliary_signals") if aux is not None: if isinstance(aux, str): aux = [aux] for asig in aux: - logger.debug(f'Further auxiliary signal has been identified: {asig}') + logger.debug(f"Further auxiliary signal has been identified: {asig}") return logger def print_default_plotable_header(logger): """Print a three-lines header""" - logger.debug('========================') - logger.debug('=== Default Plotable ===') - logger.debug('========================') + logger.debug("========================") + logger.debug("=== Default Plotable ===") + logger.debug("========================") def get_default_plotable(root, logger): @@ -1067,10 +1249,10 @@ def get_default_plotable(root, logger): if not nxentry: nxentry = entry_helper(root) if not nxentry: - logger.debug('No NXentry has been found') + logger.debug("No NXentry has been found") return - logger.debug('') - logger.debug('NXentry has been identified: ' + nxentry.name) + logger.debug("") + logger.debug("NXentry has been identified: " + nxentry.name) # nxdata nxdata = None nxgroup = nxentry @@ -1086,10 +1268,10 @@ def get_default_plotable(root, logger): else: nxdata = nxgroup if not nxdata: - logger.debug('No NXdata group has been found') + logger.debug("No NXdata group has been found") return - logger.debug('') - logger.debug('NXdata group has been identified: ' + nxdata.name) + logger.debug("") + logger.debug("NXdata group has been identified: " + nxdata.name) process_node(nxdata, nxdata.name, None, logger, False) # signal signal = None @@ -1101,10 +1283,10 @@ def get_default_plotable(root, logger): if not signal: signal = signal_helper(nxdata) if not signal: - logger.debug('No Signal has been found') + logger.debug("No Signal has been found") return - logger.debug('') - logger.debug('Signal has been identified: ' + signal.name) + logger.debug("") + logger.debug("Signal has been identified: " + signal.name) process_node(signal, signal.name, None, logger, False) logger = logger_auxiliary_signal(logger, nxdata) # check auxiliary_signals dim = len(signal.shape) @@ -1116,8 +1298,11 @@ def entry_helper(root): """Check entry related data""" nxentries = [] for key in root.keys(): - if isinstance(root[key], h5py.Group) and root[key].attrs.get('NX_class') and \ - root[key].attrs['NX_class'] == "NXentry": + if ( + isinstance(root[key], h5py.Group) + and root[key].attrs.get("NX_class") + and root[key].attrs["NX_class"] == "NXentry" + ): nxentries.append(root[key]) if len(nxentries) >= 1: return nxentries[0] @@ -1126,11 +1311,14 @@ def entry_helper(root): def nxdata_helper(nxentry): """Check if nxentry hdf5 object has a NX_class and, if it contains NXdata, -return its value""" + return its value""" lnxdata = [] for key in nxentry.keys(): - if isinstance(nxentry[key], h5py.Group) and nxentry[key].attrs.get('NX_class') and \ - nxentry[key].attrs['NX_class'] == "NXdata": + if ( + isinstance(nxentry[key], h5py.Group) + and nxentry[key].attrs.get("NX_class") + and nxentry[key].attrs["NX_class"] == "NXdata" + ): lnxdata.append(nxentry[key]) if len(lnxdata) >= 1: return lnxdata[0] @@ -1143,12 +1331,17 @@ def signal_helper(nxdata): for key in nxdata.keys(): if isinstance(nxdata[key], h5py.Dataset): signals.append(nxdata[key]) - if len(signals) == 1: # v3: as there was no selection given, only 1 data field shall exists + if ( + len(signals) == 1 + ): # v3: as there was no selection given, only 1 data field shall exists return signals[0] if len(signals) > 1: # v2: select the one with an attribute signal="1" attribute for sig in signals: - if sig.attrs.get("signal") and sig.attrs.get("signal") is str and \ - sig.attrs.get("signal") == "1": + if ( + sig.attrs.get("signal") + and sig.attrs.get("signal") is str + and sig.attrs.get("signal") == "1" + ): return sig return None @@ -1160,7 +1353,7 @@ def find_attrib_axis_actual_dim_num(nxdata, a_item, ax_list): for key in nxdata.keys(): if isinstance(nxdata[key], h5py.Dataset): try: - if nxdata[key].attrs['axis'] == a_item + 1: + if nxdata[key].attrs["axis"] == a_item + 1: lax.append(nxdata[key]) except KeyError: pass @@ -1169,7 +1362,7 @@ def find_attrib_axis_actual_dim_num(nxdata, a_item, ax_list): # if there are more alternatives, prioritise the one with an attribute primary="1" elif len(lax) > 1: for sax in lax: - if sax.attrs.get('primary') and sax.attrs.get('primary') == 1: + if sax.attrs.get("primary") and sax.attrs.get("primary") == 1: ax_list.insert(0, sax) else: ax_list.append(sax) @@ -1180,7 +1373,7 @@ def get_single_or_multiple_axes(nxdata, ax_datasets, a_item, ax_list): try: if isinstance(ax_datasets, str): # single axis is defined # explicite definition of dimension number - ind = nxdata.attrs.get(ax_datasets + '_indices') + ind = nxdata.attrs.get(ax_datasets + "_indices") if ind and ind is int: if ind == a_item: ax_list.append(nxdata[ax_datasets]) @@ -1189,7 +1382,7 @@ def get_single_or_multiple_axes(nxdata, ax_datasets, a_item, ax_list): else: # multiple axes are listed # explicite definition of dimension number for aax in ax_datasets: - ind = nxdata.attrs.get(aax + '_indices') + ind = nxdata.attrs.get(aax + "_indices") if ind and isinstance(ind, int): if ind == a_item: ax_list.append(nxdata[aax]) @@ -1207,22 +1400,25 @@ def axis_helper(dim, nxdata, signal, axes, logger): ax_datasets = nxdata.attrs.get("axes") # primary axes listed in attribute axes ax_list = get_single_or_multiple_axes(nxdata, ax_datasets, a_item, ax_list) for attr in nxdata.attrs.keys(): # check for corresponding AXISNAME_indices - if attr.endswith('_indices') and nxdata.attrs[attr] == a_item and \ - nxdata[attr.split('_indices')[0]] not in ax_list: - ax_list.append(nxdata[attr.split('_indices')[0]]) + if ( + attr.endswith("_indices") + and nxdata.attrs[attr] == a_item + and nxdata[attr.split("_indices")[0]] not in ax_list + ): + ax_list.append(nxdata[attr.split("_indices")[0]]) # v2 # check for ':' separated axes defined in Signal if not ax_list: try: - ax_datasets = signal.attrs.get("axes").split(':') + ax_datasets = signal.attrs.get("axes").split(":") ax_list.append(nxdata[ax_datasets[a_item]]) except (KeyError, AttributeError): pass if not ax_list: # check for axis/primary specifications find_attrib_axis_actual_dim_num(nxdata, a_item, ax_list) axes.append(ax_list) - logger.debug('') + logger.debug("") logger.debug( - f'For Axis #{a_item}, {len(ax_list)} axes have been identified: {str(ax_list)}' + f"For Axis #{a_item}, {len(ax_list)} axes have been identified: {str(ax_list)}" ) @@ -1230,38 +1426,43 @@ def get_all_is_a_rel_from_hdf_node(hdf_node, hdf_path): """Return list of nxdl concept paths for a nxdl element which corresponds to hdf node. """ - hdf_info = {'hdf_path': hdf_path, 'hdf_node': hdf_node} - (_, _, elist) = \ - get_inherited_nodes(None, nx_name=get_nxdl_entry(hdf_info), hdf_node=hdf_node, - hdf_path=hdf_info['hdf_path'] if 'hdf_path' in hdf_info else None, - hdf_root=hdf_info['hdf_root'] if 'hdf_root' in hdf_info else None) + hdf_info = {"hdf_path": hdf_path, "hdf_node": hdf_node} + (_, _, elist) = get_inherited_nodes( + None, + nx_name=get_nxdl_entry(hdf_info), + hdf_node=hdf_node, + hdf_path=hdf_info["hdf_path"] if "hdf_path" in hdf_info else None, + hdf_root=hdf_info["hdf_root"] if "hdf_root" in hdf_info else None, + ) return elist def hdf_node_to_self_concept_path(hdf_info, logger): - """ Get concept or nxdl path from given hdf_node. - """ + """Get concept or nxdl path from given hdf_node.""" # The bellow logger is for deactivatine unnecessary debug message above if logger is None: logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) (_, _, nxdl_path) = get_nxdl_doc(hdf_info, logger, None) - con_path = '' + con_path = "" if nxdl_path: for nd_ in nxdl_path: - con_path = con_path + '/' + get_node_name(nd_) + con_path = con_path + "/" + get_node_name(nd_) return con_path class HandleNexus: """documentation""" - def __init__(self, logger, nexus_file, - d_inq_nd=None, c_inq_nd=None): + + def __init__(self, logger, nexus_file, d_inq_nd=None, c_inq_nd=None): self.logger = logger local_dir = os.path.abspath(os.path.dirname(__file__)) - self.input_file_name = nexus_file if nexus_file is not None else \ - os.path.join(local_dir, '../../tests/data/nexus/201805_WSe2_arpes.nxs') + self.input_file_name = ( + nexus_file + if nexus_file is not None + else os.path.join(local_dir, "../../tests/data/nexus/201805_WSe2_arpes.nxs") + ) self.parser = None self.in_file = None self.d_inq_nd = d_inq_nd @@ -1273,46 +1474,55 @@ def visit_node(self, hdf_name, hdf_node): """Function called by h5py that iterates on each node of hdf5file. It allows h5py visititems function to visit nodes.""" if self.d_inq_nd is None and self.c_inq_nd is None: - process_node(hdf_node, '/' + hdf_name, self.parser, self.logger) - elif (self.d_inq_nd is not None - and hdf_name in (self.d_inq_nd, self.d_inq_nd[1:])): - process_node(hdf_node, '/' + hdf_name, self.parser, self.logger) + process_node(hdf_node, "/" + hdf_name, self.parser, self.logger) + elif self.d_inq_nd is not None and hdf_name in ( + self.d_inq_nd, + self.d_inq_nd[1:], + ): + process_node(hdf_node, "/" + hdf_name, self.parser, self.logger) elif self.c_inq_nd is not None: - attributed_concept = self.c_inq_nd.split('@') + attributed_concept = self.c_inq_nd.split("@") attr = attributed_concept[1] if len(attributed_concept) > 1 else None - elist = get_all_is_a_rel_from_hdf_node(hdf_node, '/' + hdf_name) + elist = get_all_is_a_rel_from_hdf_node(hdf_node, "/" + hdf_name) if elist is None: return fnd_superclass = False fnd_superclass_attr = False for elem in reversed(elist): - tmp_path = elem.get('nxdlbase').split('.nxdl')[0] - con_path = '/NX' + tmp_path.split('NX')[-1] + elem.get('nxdlpath') + tmp_path = elem.get("nxdlbase").split(".nxdl")[0] + con_path = "/NX" + tmp_path.split("NX")[-1] + elem.get("nxdlpath") if fnd_superclass or con_path == attributed_concept[0]: fnd_superclass = True if attr is None: self.hdf_path_list_for_c_inq_nd.append(hdf_name) break for attribute in hdf_node.attrs.keys(): - attr_concept = get_nxdl_child(elem, attribute, nexus_type='attribute', - go_base=False) - if attr_concept is not None and \ - attr_concept.get('nxdlpath').endswith(attr): + attr_concept = get_nxdl_child( + elem, attribute, nexus_type="attribute", go_base=False + ) + if attr_concept is not None and attr_concept.get( + "nxdlpath" + ).endswith(attr): fnd_superclass_attr = True - con_path = '/NX' + tmp_path.split('NX')[-1] \ - + attr_concept.get('nxdlpath') - self.hdf_path_list_for_c_inq_nd.append(hdf_name + "@" + attribute) + con_path = ( + "/NX" + + tmp_path.split("NX")[-1] + + attr_concept.get("nxdlpath") + ) + self.hdf_path_list_for_c_inq_nd.append( + hdf_name + "@" + attribute + ) break if fnd_superclass_attr: break def not_yet_visited(self, root, name): """checking if a new node has already been visited in its path""" - path = name.split('/') + path = name.split("/") for i in range(1, len(path)): - act_path = '/'.join(path[:i]) + act_path = "/".join(path[:i]) # print(act_path+' - '+name) - if root['/' + act_path] == root['/' + name]: + if root["/" + act_path] == root["/" + name]: return False return True @@ -1323,7 +1533,7 @@ def full_visit(self, root, hdf_node, name, func): func(name, hdf_node) if isinstance(hdf_node, h5py.Group): for ch_name, child in hdf_node.items(): - full_name = ch_name if len(name) == 0 else name + '/' + ch_name + full_name = ch_name if len(name) == 0 else name + "/" + ch_name if self.not_yet_visited(root, full_name): self.full_visit(root, child, full_name, func) @@ -1333,9 +1543,10 @@ def process_nexus_master_file(self, parser): self.in_file = h5py.File( self.input_file_name[0] if isinstance(self.input_file_name, list) - else self.input_file_name, 'r' + else self.input_file_name, + "r", ) - self.full_visit(self.in_file, self.in_file, '', self.visit_node) + self.full_visit(self.in_file, self.in_file, "", self.visit_node) if self.d_inq_nd is None and self.c_inq_nd is None: get_default_plotable(self.in_file, self.logger) # To log the provided concept and concepts founded @@ -1347,48 +1558,58 @@ def process_nexus_master_file(self, parser): @click.command() @click.option( - '-f', - '--nexus-file', + "-f", + "--nexus-file", required=False, default=None, - help=('NeXus file with extension .nxs to learn NeXus different concept' - ' documentation and concept.') + help=( + "NeXus file with extension .nxs to learn NeXus different concept" + " documentation and concept." + ), ) @click.option( - '-d', - '--documentation', + "-d", + "--documentation", required=False, default=None, - help=("Definition path in nexus output (.nxs) file. Returns debug" - "log relavent with that definition path. Example: /entry/data/delays") + help=( + "Definition path in nexus output (.nxs) file. Returns debug" + "log relavent with that definition path. Example: /entry/data/delays" + ), ) @click.option( - '-c', - '--concept', + "-c", + "--concept", required=False, default=None, - help=("Concept path from application definition file (.nxdl,xml). Finds out" - "all the available concept definition (IS-A realation) for rendered" - "concept path. Example: /NXarpes/ENTRY/INSTRUMENT/analyser") + help=( + "Concept path from application definition file (.nxdl,xml). Finds out" + "all the available concept definition (IS-A realation) for rendered" + "concept path. Example: /NXarpes/ENTRY/INSTRUMENT/analyser" + ), ) def main(nexus_file, documentation, concept): """The main function to call when used as a script.""" logging_format = "%(levelname)s: %(message)s" stdout_handler = logging.StreamHandler(sys.stdout) stdout_handler.setLevel(logging.DEBUG) - logging.basicConfig(level=logging.INFO, format=logging_format, handlers=[stdout_handler]) + logging.basicConfig( + level=logging.INFO, format=logging_format, handlers=[stdout_handler] + ) logger = logging.getLogger(__name__) logger.addHandler(stdout_handler) logger.setLevel(logging.DEBUG) logger.propagate = False if documentation and concept: - raise ValueError("Only one option either documentation (-d) or is_a relation " - "with a concept (-c) can be requested.") - nexus_helper = HandleNexus(logger, nexus_file, - d_inq_nd=documentation, - c_inq_nd=concept) + raise ValueError( + "Only one option either documentation (-d) or is_a relation " + "with a concept (-c) can be requested." + ) + nexus_helper = HandleNexus( + logger, nexus_file, d_inq_nd=documentation, c_inq_nd=concept + ) nexus_helper.process_nexus_master_file(None) -if __name__ == '__main__': +if __name__ == "__main__": main() # pylint: disable=no-value-for-parameter From 5a1de22aba651428ed2b6623e401fea9186c0e93 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 00:48:53 +0200 Subject: [PATCH 12/32] linting --- dev_tools/docs/nxdl.py | 2 +- dev_tools/utils/nexus.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index 1316e230a..03c72be21 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -7,12 +7,12 @@ from typing import Optional import lxml -from ..utils import nexus as pynxtools_nxlib from ..globals.directories import get_nxdl_root from ..globals.errors import NXDLParseError from ..globals.nxdl import NXDL_NAMESPACE from ..globals.urls import REPO_URL +from ..utils import nexus as pynxtools_nxlib from ..utils.types import PathLike from .anchor_list import AnchorRegistry diff --git a/dev_tools/utils/nexus.py b/dev_tools/utils/nexus.py index 7b09e30f3..def84b160 100644 --- a/dev_tools/utils/nexus.py +++ b/dev_tools/utils/nexus.py @@ -2,15 +2,16 @@ """Read files from different format and print it in a standard NeXus format """ +import logging import os +import sys +import textwrap import xml.etree.ElementTree as ET from functools import lru_cache from glob import glob -import sys -import logging -import textwrap -import h5py + import click +import h5py class NxdlAttributeError(Exception): From 286c0c2e6262686ec87311cf299b8b49ae07b035 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 00:54:10 +0200 Subject: [PATCH 13/32] imports --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 6d024bda3..ac0c65737 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ # Prepare for Documentation lxml pyyaml +click>=7.1.2 +h5py>=3.6.0 # Documentation building sphinx>=5 From 6837fb6b1847a90e99979d0190dacb58078585d7 Mon Sep 17 00:00:00 2001 From: domna Date: Fri, 16 Jun 2023 10:01:05 +0200 Subject: [PATCH 14/32] Adds pyproject --- .gitignore | 20 +++++++++++++++++++ MANIFEST.in | 4 ++++ dev_tools/utils/nexus.py | 21 ++++++-------------- pyproject.toml | 43 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 15 deletions(-) create mode 100644 MANIFEST.in create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index ff21c1627..50408db4b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,23 @@ makelog.txt # Unknown /python/ __github_creds__.txt + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..20485f628 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +recursive-include applications/ *.nxdl.xml +recursive-include contributed_definitions/ *.nxdl.xml +recursive-include base_classes/ *.nxdl.xml +include ./ *.xsd \ No newline at end of file diff --git a/dev_tools/utils/nexus.py b/dev_tools/utils/nexus.py index def84b160..8e8ef771f 100644 --- a/dev_tools/utils/nexus.py +++ b/dev_tools/utils/nexus.py @@ -1457,13 +1457,11 @@ class HandleNexus: def __init__(self, logger, nexus_file, d_inq_nd=None, c_inq_nd=None): self.logger = logger - local_dir = os.path.abspath(os.path.dirname(__file__)) - self.input_file_name = ( - nexus_file - if nexus_file is not None - else os.path.join(local_dir, "../../tests/data/nexus/201805_WSe2_arpes.nxs") - ) + if nexus_file is None: + raise ValueError("Nexus file not specified. Cannot proceed.") + + self.input_file_name = nexus_file self.parser = None self.in_file = None self.d_inq_nd = d_inq_nd @@ -1558,15 +1556,8 @@ def process_nexus_master_file(self, parser): @click.command() -@click.option( - "-f", - "--nexus-file", - required=False, - default=None, - help=( - "NeXus file with extension .nxs to learn NeXus different concept" - " documentation and concept." - ), +@click.argument( + 'nexus_file', ) @click.option( "-d", diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..6a90ec573 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,43 @@ +[build-system] +requires = ["setuptools>=64.0.1", "setuptools-scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "nexusdefinitions" +dynamic = ["version"] +authors = [ + { name = "NIAC" } +] +description = "Nexus definitions" +readme = "README.md" +license = { file = "LGPL.txt" } +requires-python = "" +classifiers = [ + "Operating System :: OS Independent" +] +dependencies = [ + "lxml", + "pyyaml", + "click>=7.1.2", + "h5py>=3.6.0", + "sphinx>=5", + "sphinx-tabs", + "pytest", + "black>=22.3", + "flake8>=4", + "isort>=5.10", + "click>=7.1.2", +] + +[project.urls] +"Homepage" = "https://nexusformat.org" + +[project.scripts] +read_nexus = "dev_tools.utils.nexus:main" + +[tools.setuptools_scm] +version_scheme = "guess-next-dev" +local_scheme = "node-and-date" + +[tool.setuptools] +packages = ["dev_tools"] From 52a21eea39df34f6ba1d3cefd8f1181c48a3fa7e Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 10:16:51 +0200 Subject: [PATCH 15/32] linting --- dev_tools/utils/nexus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev_tools/utils/nexus.py b/dev_tools/utils/nexus.py index 8e8ef771f..e797092d1 100644 --- a/dev_tools/utils/nexus.py +++ b/dev_tools/utils/nexus.py @@ -1557,7 +1557,7 @@ def process_nexus_master_file(self, parser): @click.command() @click.argument( - 'nexus_file', + "nexus_file", ) @click.option( "-d", From d3d101f862572fa619ae11f90a703a49e885842d Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 10:58:21 +0200 Subject: [PATCH 16/32] adjusted default location of definitions inside the module --- Makefile | 1 - dev_tools/utils/nexus.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 44c076c34..ae556d733 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,6 @@ PYTHON = python3 SPHINX = sphinx-build BUILD_DIR = "build" -export NEXUS_DEF_PATH = $(shell pwd) .PHONY: help install style autoformat test clean prepare html pdf impatient-guide all local diff --git a/dev_tools/utils/nexus.py b/dev_tools/utils/nexus.py index e797092d1..4bcb1c9e9 100644 --- a/dev_tools/utils/nexus.py +++ b/dev_tools/utils/nexus.py @@ -45,7 +45,7 @@ def get_nexus_definitions_path(): return os.environ["NEXUS_DEF_PATH"] except KeyError: # or it should be available locally under the dir 'definitions' local_dir = os.path.abspath(os.path.dirname(__file__)) - return os.path.join(local_dir, f"..{os.sep}definitions") + return os.path.join(local_dir, f"..{os.sep}..") def get_hdf_root(hdf_node): From 025c0786858e80d416baf90c36b0d8dd100fb8d8 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 11:59:32 +0200 Subject: [PATCH 17/32] new characters as Code Camp suggested --- dev_tools/docs/nxdl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index 03c72be21..c8c693641 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -668,7 +668,7 @@ def get_first_parent_ref(self, path, tag): ) parent_display_name = f"{parent_def_name[1:]}{parent_path}" return ( - f":abbr:`... (override: {parent_display_name})" - + f"`:ref:`🔗 `" + f":abbr:`⤆ (override: {parent_display_name})" + + f"`:ref:`... `" ) return "" From adf098ed87b9e03585a30c7f3094fed989d35fa3 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 12:08:40 +0200 Subject: [PATCH 18/32] make new char available for latex --- manual/source/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/manual/source/conf.py b/manual/source/conf.py index 346e664d9..638a66559 100644 --- a/manual/source/conf.py +++ b/manual/source/conf.py @@ -97,4 +97,5 @@ 'preamble': r''' \usepackage{amsbsy} \DeclareUnicodeCharacter{1F517}{X}''' + \DeclareUnicodeCharacter{2906}{<-}''' } From 222a3c0a09fbabcd722061c763838f7464a14221 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 12:17:08 +0200 Subject: [PATCH 19/32] make new char available for latex --- manual/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manual/source/conf.py b/manual/source/conf.py index 638a66559..6ee889c54 100644 --- a/manual/source/conf.py +++ b/manual/source/conf.py @@ -96,6 +96,6 @@ 'maxlistdepth':7, # some application definitions are deeply nested 'preamble': r''' \usepackage{amsbsy} - \DeclareUnicodeCharacter{1F517}{X}''' + \DeclareUnicodeCharacter{1F517}{X} \DeclareUnicodeCharacter{2906}{<-}''' } From 7252a4983fdd935c1ef4df4c3c3e038cb71ad4b1 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 12:18:08 +0200 Subject: [PATCH 20/32] make new char available for latex --- manual/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manual/source/conf.py b/manual/source/conf.py index 6ee889c54..dcd444c8e 100644 --- a/manual/source/conf.py +++ b/manual/source/conf.py @@ -97,5 +97,5 @@ 'preamble': r''' \usepackage{amsbsy} \DeclareUnicodeCharacter{1F517}{X} - \DeclareUnicodeCharacter{2906}{<-}''' + \DeclareUnicodeCharacter{2906}{<=}''' } From fd4b4a6f07b5dae23469dcd0bc68b3dccfc1ea52 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 17:27:11 +0200 Subject: [PATCH 21/32] collapsing doc_enum-s --- dev_tools/docs/nxdl.py | 55 +++++++++++++++++++++++++++--------------- manual/source/conf.py | 1 + 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index c8c693641..815e8b777 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -110,7 +110,7 @@ def _parse_nxdl_file(self, nxdl_file: Path): # print official description of this class self._print("") self._print("**Description**:\n") - self._print_doc(self._INDENTATION_UNIT, ns, root, required=True) + self._print_doc_enum("", ns, root, required=True) # print symbol list node_list = root.xpath("nx:symbols", namespaces=ns) @@ -120,7 +120,7 @@ def _parse_nxdl_file(self, nxdl_file: Path): elif len(node_list) > 1: raise Exception(f"Invalid symbol table in {nxclass_name}") else: - self._print_doc(self._INDENTATION_UNIT, ns, node_list[0]) + self._print_doc_enum("", ns, node_list[0]) for node in node_list[0].xpath("nx:symbol", namespaces=ns): doc = self._get_doc_line(ns, node) self._print(f" **{node.get('name')}**", end="") @@ -499,6 +499,35 @@ def _print_doc(self, indent, ns, node, required=False): self._print(f"{indent}{line}") self._print() + def long_doc(self, ns, node): + length = 0 + line = "documentation" + fnd = False + blocks = self._get_doc_blocks(ns, node) + for block in blocks: + lines = block.splitlines() + length += len(lines) + for single_line in lines: + if len(single_line) > 2 and single_line[0] != "." and not fnd: + fnd = True + line = single_line + return (length, line, blocks) + + def _print_doc_enum(self, indent, ns, node, required=False): + collapse_indent = indent + node_list = node.xpath("nx:enumeration", namespaces=ns) + (doclen, line, blocks) = self.long_doc(ns, node) + if len(node_list) + doclen > 1: + collapse_indent = f"{indent} " + self._print(f"{indent}{self._INDENTATION_UNIT}.. collapse:: {line} ...\n") + self._print_doc( + collapse_indent + self._INDENTATION_UNIT, ns, node, required=required + ) + if len(node_list) == 1: + self._print_enumeration( + collapse_indent + self._INDENTATION_UNIT, ns, node_list[0] + ) + def _print_attribute(self, ns, kind, node, optional, indent, parent_path): name = node.get("name") index_name = name @@ -509,10 +538,7 @@ def _print_attribute(self, ns, kind, node, optional, indent, parent_path): self._print( f"{indent}**@{name}**: {optional}{self._format_type(node)}{self._format_units(node)} {self.get_first_parent_ref(f'{parent_path}/{name}', 'attribute')}\n" ) - self._print_doc(indent + self._INDENTATION_UNIT, ns, node) - node_list = node.xpath("nx:enumeration", namespaces=ns) - if len(node_list) == 1: - self._print_enumeration(indent + self._INDENTATION_UNIT, ns, node_list[0]) + self._print_doc_enum(indent, ns, node) def _print_if_deprecated(self, ns, node, indent): deprecated = node.get("deprecated", None) @@ -555,13 +581,7 @@ def _print_full_tree(self, ns, parent, name, indent, parent_path): ) self._print_if_deprecated(ns, node, indent + self._INDENTATION_UNIT) - self._print_doc(indent + self._INDENTATION_UNIT, ns, node) - - node_list = node.xpath("nx:enumeration", namespaces=ns) - if len(node_list) == 1: - self._print_enumeration( - indent + self._INDENTATION_UNIT, ns, node_list[0] - ) + self._print_doc_enum(indent, ns, node) for subnode in node.xpath("nx:attribute", namespaces=ns): optional = self._get_required_or_optional_text(subnode) @@ -592,7 +612,7 @@ def _print_full_tree(self, ns, parent, name, indent, parent_path): ) self._print_if_deprecated(ns, node, indent + self._INDENTATION_UNIT) - self._print_doc(indent + self._INDENTATION_UNIT, ns, node) + self._print_doc_enum(indent, ns, node) for subnode in node.xpath("nx:attribute", namespaces=ns): optional = self._get_required_or_optional_text(subnode) @@ -623,7 +643,7 @@ def _print_full_tree(self, ns, parent, name, indent, parent_path): f"(suggested target: ``{node.get('target')}``)" "\n" ) - self._print_doc(indent + self._INDENTATION_UNIT, ns, node) + self._print_doc_enum(indent, ns, node) def _print(self, *args, end="\n"): # TODO: change instances of \t to proper indentation @@ -667,8 +687,5 @@ def get_first_parent_ref(self, path, tag): + parent_path[pos_of_right_slash + 1 :] ) parent_display_name = f"{parent_def_name[1:]}{parent_path}" - return ( - f":abbr:`⤆ (override: {parent_display_name})" - + f"`:ref:`... `" - ) + return f":ref:`⤆ `" return "" diff --git a/manual/source/conf.py b/manual/source/conf.py index dcd444c8e..a1f854be4 100644 --- a/manual/source/conf.py +++ b/manual/source/conf.py @@ -42,6 +42,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + 'sphinx_toolbox.collapse', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', From 90ff26b8aa6e37552b298b8f3a70e887d3f7afa6 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Fri, 16 Jun 2023 17:32:11 +0200 Subject: [PATCH 22/32] missing sphinx dependency --- pyproject.toml | 1 - requirements.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6a90ec573..d4cf990c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ dependencies = [ "black>=22.3", "flake8>=4", "isort>=5.10", - "click>=7.1.2", ] [project.urls] diff --git a/requirements.txt b/requirements.txt index ac0c65737..bbfd892f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ h5py>=3.6.0 # Documentation building sphinx>=5 sphinx-tabs +sphinx-toolbox # Testing pytest From dc325851e5f3ca4ab020fc22d76148de9acc32e3 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Mon, 19 Jun 2023 14:53:03 +0200 Subject: [PATCH 23/32] nyaml2nxdl --- Makefile | 13 + dev_tools/nyaml2nxdl/README.md | 72 + dev_tools/nyaml2nxdl/__init__.py | 22 + dev_tools/nyaml2nxdl/comment_collector.py | 508 ++++++++ dev_tools/nyaml2nxdl/nyaml2nxdl.py | 227 ++++ .../nyaml2nxdl/nyaml2nxdl_backward_tools.py | 947 ++++++++++++++ .../nyaml2nxdl/nyaml2nxdl_forward_tools.py | 1161 +++++++++++++++++ dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py | 230 ++++ pyproject.toml | 43 + 9 files changed, 3223 insertions(+) create mode 100644 dev_tools/nyaml2nxdl/README.md create mode 100644 dev_tools/nyaml2nxdl/__init__.py create mode 100644 dev_tools/nyaml2nxdl/comment_collector.py create mode 100755 dev_tools/nyaml2nxdl/nyaml2nxdl.py create mode 100755 dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py create mode 100644 dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py create mode 100644 dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py create mode 100644 pyproject.toml diff --git a/Makefile b/Makefile index ae556d733..113e29db8 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ PYTHON = python3 SPHINX = sphinx-build BUILD_DIR = "build" +NXDL_DIRS := contributed_definitions applications base_classes .PHONY: help install style autoformat test clean prepare html pdf impatient-guide all local @@ -49,6 +50,9 @@ test :: clean :: $(RM) -rf $(BUILD_DIR) + for dir in $(NXDL_DIRS); do\ + $(RM) -rf $${dir}/nyaml;\ + done prepare :: $(PYTHON) -m dev_tools manual --prepare --build-root $(BUILD_DIR) @@ -83,6 +87,15 @@ all :: @echo "HTML built: `ls -lAFgh $(BUILD_DIR)/manual/build/html/index.html`" @echo "PDF built: `ls -lAFgh $(BUILD_DIR)/manual/build/latex/nexus.pdf`" +NXDLS := $(foreach dir,$(NXDL_DIRS),$(wildcard $(dir)/*.nxdl.xml)) +nyaml : $(DIRS) $(NXDLS) + for file in $^; do\ + mkdir -p "$${file%/*}/nyaml";\ + nyaml2nxdl --input-file $${file};\ + FNAME=$${file##*/};\ + mv -- "$${file%.nxdl.xml}_parsed.yaml" "$${file%/*}/nyaml/$${FNAME%.nxdl.xml}.yaml";\ + done + # NeXus - Neutron and X-ray Common Data Format # diff --git a/dev_tools/nyaml2nxdl/README.md b/dev_tools/nyaml2nxdl/README.md new file mode 100644 index 000000000..ff083e189 --- /dev/null +++ b/dev_tools/nyaml2nxdl/README.md @@ -0,0 +1,72 @@ +# YAML to NXDL converter and NXDL to YAML converter + +**NOTE: Please use python3.8 or above to run this converter** + +**Tools purpose**: Offer a simple YAML-based schema and a XML-based schema to describe NeXus instances. These can be NeXus application definitions or classes +such as base or contributed classes. Users either create NeXus instances by writing a YAML file or a XML file which details a hierarchy of data/metadata elements. +The forward (YAML -> NXDL.XML) and backward (NXDL.XML -> YAML) conversions are implemented. + +**How the tool works**: +- yaml2nxdl.py +1. Reads the user-specified NeXus instance, either in YML or XML format. +2. If input is in YAML, creates an instantiated NXDL schema XML tree by walking the dictionary nest. + If input is in XML, creates a YML file walking the dictionary nest. +3. Write the tree into a YAML file or a properly formatted NXDL XML schema file to disk. +4. Optionally, if --append argument is given, + the XML or YAML input file is interpreted as an extension of a base class and the entries contained in it + are appended below a standard NeXus base class. + You need to specify both your input file (with YAML or XML extension) and NeXus class (with no extension). + Both .yml and .nxdl.xml file of the extended class are printed. + +```console +user@box:~$ python yaml2nxdl.py + +Usage: python yaml2nxdl.py [OPTIONS] + +Options: + --input-file TEXT The path to the input data file to read. + --append TEXT Parse xml NeXus file and append to specified base class, + write the base class name with no extension. + --check-consistency Check consistency by generating another version of the input file. + E.g. for input file: NXexample.nxdl.xml the output file + NXexample_consistency.nxdl.xml. + --verbose Addictional std output info is printed to help debugging. + --help Show this message and exit. + +``` + +## Documentation + +**Rule set**: From transcoding YAML files we need to follow several rules. +* Named NeXus groups, which are instances of NeXus classes especially base or contributed classes. Creating (NXbeam) is a simple example of a request to define a group named according to NeXus default rules. mybeam1(NXbeam) or mybeam2(NXbeam) are examples how to create multiple named instances at the same hierarchy level. +* Members of groups so-called fields or attributes. A simple example of a member is voltage. Here the datatype is implied automatically as the default NeXus NX_CHAR type. By contrast, voltage(NX_FLOAT) can be used to instantiate a member of class which should be of NeXus type NX_FLOAT. +* And attributes of either groups or fields. Names of attributes have to be preceeded by \@ to mark them as attributes. +* Optionality: For all fields, groups and attributes in `application definitions` are `required` by default, except anything (`recommended` or `optional`) mentioned. + +**Special keywords**: Several keywords can be used as childs of groups, fields, and attributes to specify the members of these. Groups, fields and attributes are nodes of the XML tree. +* **doc**: A human-readable description/docstring +* **exists** Options are recommended, required, [min, 1, max, infty] numbers like here 1 can be replaced by any uint, or infty to indicate no restriction on how frequently the entry can occur inside the NXDL schema at the same hierarchy level. +* **link** Define links between nodes. +* **units** A statement introducing NeXus-compliant NXDL units arguments, like NX_VOLTAGE +* **dimensions** Details which dimensional arrays to expect +* **enumeration** Python list of strings which are considered as recommended entries to choose from. +* **dim_parameters** `dim` which is a child of `dimension` and the `dim` might have several attributes `ref`, +`incr` including `index` and `value`. So while writting `yaml` file schema definition please following structure: +``` +dimensions: + rank: integer value + dim: [[ind_1, val_1], [ind_2, val_2], ...] + dim_parameters: + ref: [ref_value_1, ref_value_2, ...] + incr: [incr_value_1, incr_value_2, ...] +``` +Keep in mind that length of all the lists must be same. + +## Next steps + +The NOMAD team is currently working on the establishing of a one-to-one mapping between +NeXus definitions and the NOMAD MetaInfo. As soon as this is in place the YAML files will +be annotated with further metadata so that they can serve two purposes. +On the one hand they can serve as an instance for a schema to create a GUI representation +of a NOMAD Oasis ELN schema. On the other hand the YAML to NXDL converter will skip all +those pieces of information which are irrelevant from a NeXus perspective. diff --git a/dev_tools/nyaml2nxdl/__init__.py b/dev_tools/nyaml2nxdl/__init__.py new file mode 100644 index 000000000..22eb35f68 --- /dev/null +++ b/dev_tools/nyaml2nxdl/__init__.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +""" +# Load paths +""" +# -*- coding: utf-8 -*- +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/dev_tools/nyaml2nxdl/comment_collector.py b/dev_tools/nyaml2nxdl/comment_collector.py new file mode 100644 index 000000000..5f0c5e3bc --- /dev/null +++ b/dev_tools/nyaml2nxdl/comment_collector.py @@ -0,0 +1,508 @@ +#!usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Collect comments in a list by CommentCollector class. Comment is a instance of Comment, +where each comment includes comment text and line info or neighbour info where the +comment must be assinged. + +The class Comment is an abstract class for general functions or method to be implemented +XMLComment and YAMLComment class. + +NOTE: Here comment block mainly stands for (comment text + line or element for what comment is +intended.) +""" + + +from typing import List, Type, Any, Tuple, Union, Dict +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import LineLoader + +__all__ = ['Comment', 'CommentCollector', 'XMLComment', 'YAMLComment'] + + +# pylint: disable=inconsistent-return-statements +class CommentCollector: + """CommentCollector will store a full comment ('Comment') object in + _comment_chain. + """ + + def __init__(self, input_file: str = None, + loaded_obj: Union[object, Dict] = None): + """ + Initialise CommentCollector + parameters: + input_file: raw input file (xml, yml) + loaded_obj: file loaded by third party library + """ + self._comment_chain: List = [] + self.file = input_file + self._comment_tracker = 0 + self._comment_hash: Dict[Tuple, Type[Comment]] = {} + self.comment: Type[Comment] + if self.file and not loaded_obj: + if self.file.split('.')[-1] == 'xml': + self.comment = XMLComment + if self.file.split('.')[-1] == 'yaml': + self.comment = YAMLComment + with open(self.file, "r", encoding="utf-8") as plain_text_yaml: + loader = LineLoader(plain_text_yaml) + self.comment.__yaml_dict__ = loader.get_single_data() + elif self.file and loaded_obj: + if self.file.split('.')[-1] == 'yaml' and isinstance(loaded_obj, dict): + self.comment = YAMLComment + self.comment.__yaml_dict__ = loaded_obj + else: + raise ValueError("Incorrect inputs for CommentCollector e.g. Wrong file extension.") + + else: + raise ValueError("Incorrect inputs for CommentCollector") + + def extract_all_comment_blocks(self): + """ + Collect all comments. Note that here comment means (comment text + element or line info + intended for comment. + """ + id_ = 0 + single_comment = self.comment(comment_id=id_) + with open(self.file, mode='r', encoding='UTF-8') as enc_f: + lines = enc_f.readlines() + # Make an empty line for last comment if no empty lines in original file + if lines[-1] != '': + lines.append('') + for line_num, line in enumerate(lines): + if single_comment.is_storing_single_comment(): + # If the last comment comes without post nxdl fields, groups and attributes + if '++ SHA HASH ++' in line: + # Handle with stored nxdl.xml file that is not part of yaml + line = '' + single_comment.process_each_line(line + 'post_comment', (line_num + 1)) + self._comment_chain.append(single_comment) + break + if line_num < (len(lines) - 1): + # Processing file from Line number 1 + single_comment.process_each_line(line, (line_num + 1)) + else: + # For processing last line of file + single_comment.process_each_line(line + 'post_comment', (line_num + 1)) + self._comment_chain.append(single_comment) + else: + self._comment_chain.append(single_comment) + single_comment = self.comment(last_comment=single_comment) + single_comment.process_each_line(line, (line_num + 1)) + + def get_comment(self): + """ + Return comment from comment_chain that must come earlier in order. + """ + return self._comment_chain[self._comment_tracker] + + def get_coment_by_line_info(self, comment_locs: Tuple[str, Union[int, str]]): + """ + Get comment using line information. + """ + if comment_locs in self._comment_hash: + return self._comment_hash[comment_locs] + + line_annot, line_loc = comment_locs + for cmnt in self._comment_chain: + if line_annot in cmnt: + line_loc_ = cmnt.get_line_number(line_annot) + if line_loc == line_loc_: + self._comment_hash[comment_locs] = cmnt + return cmnt + + def remove_comment(self, ind): + """Remove a comment from comment list. + """ + if ind < len(self._comment_chain): + del self._comment_chain[ind] + else: + raise ValueError("Oops! Index is out of range.") + + def reload_comment(self): + """ + Update self._comment_tracker after done with last comment. + """ + self._comment_tracker += 1 + + def __contains__(self, comment_locs: tuple): + """ + Confirm wether the comment corresponds to key_line and line_loc + is exist or not. + comment_locs is equvalant to (line_annotation, line_loc) e.g. + (__line__doc and 35) + """ + if not isinstance(comment_locs, tuple): + raise TypeError("Comment_locs should be 'tuple' containing line annotation " + "(e.g.__line__doc) and line_loc (e.g. 35).") + line_annot, line_loc = comment_locs + for cmnt in self._comment_chain: + if line_annot in cmnt: + line_loc_ = cmnt.get_line_number(line_annot) + if line_loc == line_loc_: + self._comment_hash[comment_locs] = cmnt + return True + return False + + def __getitem__(self, ind): + """Get comment from self.obj._comment_chain by index. + """ + if isinstance(ind, int): + if ind >= len(self._comment_chain): + raise IndexError(f'Oops! Comment index {ind} in {__class__} is out of range!') + return self._comment_chain[ind] + + if isinstance(ind, slice): + start_n = ind.start or 0 + end_n = ind.stop or len(self._comment_chain) + return self._comment_chain[start_n:end_n] + + def __iter__(self): + """get comment ieratively + """ + return iter(self._comment_chain) + + +# pylint: disable=too-many-instance-attributes +class Comment: + """ + This class is building yaml comment and the intended line for what comment is written. + """ + + def __init__(self, + comment_id: int = -1, + last_comment: 'Comment' = None) -> None: + """Comment object can be considered as a block element that includes + document element (an entity for what the comment is written). + """ + self._elemt: Any = None + self._elemt_text: str = None + self._is_elemt_found: bool = None + self._is_elemt_stored: bool = None + + self._comnt: str = '' + # If Multiple comments for one element or entity + self._comnt_list: List[str] = [] + self.last_comment: 'Comment' = last_comment if last_comment else None + if comment_id >= 0 and last_comment: + self.cid = comment_id + self.last_comment = last_comment + elif comment_id == 0 and not last_comment: + self.cid = comment_id + self.last_comment = None + elif last_comment: + self.cid = self.last_comment.cid + 1 + self.last_comment = last_comment + else: + raise ValueError("Neither last comment nor comment id dound") + self._comnt_start_found: bool = False + self._comnt_end_found: bool = False + self.is_storing_single_comment = lambda: not (self._comnt_end_found + and self._is_elemt_stored) + + def get_comment_text(self) -> Union[List, str]: + """ + Extract comment text from entrire comment (comment text + elment or + line for what comment is intended) + """ + + def append_comment(self, text: str) -> None: + """ + Append lines of the same comment. + """ + + def store_element(self, args) -> None: + """ + Strore comment text and line or element that is intended for comment. + """ + + +class XMLComment(Comment): + """ + XMLComment to store xml comment element. + """ + + def __init__(self, comment_id: int = -1, last_comment: 'Comment' = None) -> None: + super().__init__(comment_id, last_comment) + + def process_each_line(self, text, line_num): + """Take care of each line of text. Through which function the text + must be passed should be decide here. + """ + text = text.strip() + if text and line_num: + self.append_comment(text) + if self._comnt_end_found and not self._is_elemt_found: + # for multiple comment if exist + if self._comnt: + self._comnt_list.append(self._comnt) + self._comnt = '' + + if self._comnt_end_found: + self.store_element(text) + + def append_comment(self, text: str) -> None: + # Comment in single line + if '' == text[-4:]: + self._comnt_end_found = True + self._comnt_start_found = False + self._comnt = self._comnt.replace('-->', '') + + elif '-->' == text[0:4] and self._comnt_start_found: + self._comnt_end_found = True + self._comnt_start_found = False + self._comnt = self._comnt + '\n' + text.replace('-->', '') + elif self._comnt_start_found: + self._comnt = self._comnt + '\n' + text + + # pylint: disable=arguments-differ, arguments-renamed + def store_element(self, text) -> None: + def collect_xml_attributes(text_part): + for part in text_part: + part = part.strip() + if part and '">' == ''.join(part[-2:]): + self._is_elemt_stored = True + self._is_elemt_found = False + part = ''.join(part[0:-2]) + elif part and '"/>' == ''.join(part[-3:]): + self._is_elemt_stored = True + self._is_elemt_found = False + part = ''.join(part[0:-3]) + elif part and '/>' == ''.join(part[-2:]): + self._is_elemt_stored = True + self._is_elemt_found = False + part = ''.join(part[0:-2]) + elif part and '>' == part[-1]: + self._is_elemt_stored = True + self._is_elemt_found = False + part = ''.join(part[0:-1]) + elif part and '"' == part[-1]: + part = ''.join(part[0:-1]) + + if '="' in part: + lf_prt, rt_prt = part.split('="') + else: + continue + if ':' in lf_prt: + continue + self._elemt[lf_prt] = str(rt_prt) + if not self._elemt: + self._elemt = {} + # First check for comment part has been collected prefectly + if ' Union[List, str]: + """ + This method returns list of commnent text. As some xml element might have + multiple separated comment intended for a single element. + """ + return self._comnt_list + + +class YAMLComment(Comment): + """ + This class for stroing comment text as well as location of the comment e.g. line + number of other in the file. + NOTE: + 1. Do not delete any element form yaml dictionary (for loaded_obj. check: Comment_collector + class. because this loaded file has been exploited in nyaml2nxdl forward tools.) + """ + # Class level variable. The main reason behind that to follow structure of + # abstract class 'Comment' + __yaml_dict__: dict = {} + __yaml_line_info: dict = {} + __comment_escape_char = {'--': '-\\-'} + + def __init__(self, comment_id: int = -1, last_comment: 'Comment' = None) -> None: + """Initialization of YAMLComment follow Comment class. + """ + super().__init__(comment_id, last_comment) + self.collect_yaml_line_info(YAMLComment.__yaml_dict__, YAMLComment.__yaml_line_info) + + def process_each_line(self, text, line_num): + """Take care of each line of text. Through which function the text + must be passed should be decide here. + """ + text = text.strip() + self.append_comment(text) + if self._comnt_end_found and not self._is_elemt_found: + if self._comnt: + self._comnt_list.append(self._comnt) + self._comnt = '' + + if self._comnt_end_found: + line_key = '' + if ':' in text: + ind = text.index(':') + line_key = '__line__' + ''.join(text[0:ind]) + + for l_num, l_key in self.__yaml_line_info.items(): + if line_num == int(l_num) and line_key == l_key: + self.store_element(line_key, line_num) + break + # Comment comes very end of the file + if text == 'post_comment' and line_key == '': + line_key = '__line__post_comment' + self.store_element(line_key, line_num) + + def has_post_comment(self): + """ + Ensure is this a post coment or not. + Post comment means the comment that come at the very end without having any + nxdl element(class, group, filed and attribute.) + """ + for key, _ in self._elemt.items(): + if '__line__post_comment' == key: + return True + return False + + def append_comment(self, text: str) -> None: + """ + Collects all the line of the same comment and + append them with that single comment. + """ + # check for escape char + text = self.replace_scape_char(text) + # Empty line after last line of comment + if not text and self._comnt_start_found: + self._comnt_end_found = True + self._comnt_start_found = False + # For empty line inside doc or yaml file. + elif not text: + return + elif '# ' == ''.join(text[0:2]): + self._comnt_start_found = True + self._comnt_end_found = False + self._comnt = '' if not self._comnt else self._comnt + '\n' + self._comnt = self._comnt + ''.join(text[2:]) + elif '#' == text[0]: + self._comnt_start_found = True + self._comnt_end_found = False + self._comnt = '' if not self._comnt else self._comnt + '\n' + self._comnt = self._comnt + ''.join(text[1:]) + elif 'post_comment' == text: + self._comnt_end_found = True + self._comnt_start_found = False + # for any line after 'comment block' found + elif self._comnt_start_found: + self._comnt_start_found = False + self._comnt_end_found = True + + # pylint: disable=arguments-differ + def store_element(self, line_key, line_number): + """ + Store comment content and information of commen location (for what comment is + created.). + """ + self._elemt = {} + self._elemt[line_key] = int(line_number) + self._is_elemt_found = False + self._is_elemt_stored = True + + def get_comment_text(self): + """ + Return list of comments if there are multiple comment for same yaml line. + """ + return self._comnt_list + + def get_line_number(self, line_key): + """ + Retrun line number for what line the comment is created + """ + return self._elemt[line_key] + + def get_line_info(self): + """ + Return line annotation and line number from a comment. + """ + for line_anno, line_loc in self._elemt.items(): + return line_anno, line_loc + + def replace_scape_char(self, text): + """Replace escape char according to __comment_escape_char dict + """ + for ecp_char, ecp_alt in YAMLComment.__comment_escape_char.items(): + if ecp_char in text: + text = text.replace(ecp_char, ecp_alt) + return text + + def get_element_location(self): + """ + Retrun yaml line '__line__KEY' info and and line numner + """ + if len(self._elemt) > 1: + raise ValueError(f"Comment element should be one but got " + f"{self._elemt}") + + for key, val in self._elemt.items(): + yield key, val + + def collect_yaml_line_info(self, yaml_dict, line_info_dict): + """Collect __line__key and corresponding value from + a yaml file dictonary in another dictionary. + """ + for line_key, line_n in yaml_dict.items(): + if '__line__' in line_key: + line_info_dict[line_n] = line_key + + for _, val in yaml_dict.items(): + if isinstance(val, dict): + self.collect_yaml_line_info(val, line_info_dict) + + def __contains__(self, line_key): + """For Checking whether __line__NAME is in _elemt dict or not.""" + return line_key in self._elemt + + def __eq__(self, comment_obj): + """Check the self has same value as right comment. + """ + if len(self._comnt_list) != len(comment_obj._comnt_list): + return False + for left_cmnt, right_cmnt in zip(self._comnt_list, comment_obj._comnt_list): + left_cmnt = left_cmnt.split('\n') + right_cmnt = right_cmnt.split('\n') + for left_line, right_line in zip(left_cmnt, right_cmnt): + if left_line.strip() != right_line.strip(): + return False + return True diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl.py b/dev_tools/nyaml2nxdl/nyaml2nxdl.py new file mode 100755 index 000000000..160b3f830 --- /dev/null +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +"""Main file of yaml2nxdl tool. +Users create NeXus instances by writing a YAML file +which details a hierarchy of data/metadata elements + +""" +# -*- coding: utf-8 -*- +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import xml.etree.ElementTree as ET + +import click +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_sha256_hash, + extend_yamlfile_with_comment, + separate_hash_yaml_and_nxdl) +from pynxtools.nyaml2nxdl.nyaml2nxdl_forward_tools import nyaml2nxdl, pretty_print_xml +from pynxtools.nyaml2nxdl.nyaml2nxdl_backward_tools import (Nxdl2yaml, + compare_niac_and_my) + + +DEPTH_SIZE = 4 * " " + +# NOTE: Some handful links for nyaml2nxdl converter: +# https://manual.nexusformat.org/nxdl_desc.html?highlight=optional + + +def generate_nxdl_or_retrieve_nxdl(yaml_file, out_xml_file, verbose): + """ + Generate yaml, nxdl and hash. + if the extracted hash is exactly the same as producd from generated yaml then + retrieve the nxdl part from provided yaml. + Else, generate nxdl from separated yaml with the help of nyaml2nxdl function + """ + pa_path, rel_file = os.path.split(yaml_file) + sep_yaml = os.path.join(pa_path, f'temp_{rel_file}') + hash_found = separate_hash_yaml_and_nxdl(yaml_file, sep_yaml, out_xml_file) + + if hash_found: + gen_hash = get_sha256_hash(sep_yaml) + if hash_found == gen_hash: + os.remove(sep_yaml) + return + + nyaml2nxdl(sep_yaml, out_xml_file, verbose) + os.remove(sep_yaml) + + +# pylint: disable=too-many-locals +def append_yml(input_file, append, verbose): + """Append to an existing NeXus base class new elements provided in YML input file \ +and print both an XML and YML file of the extended base class. + +""" + nexus_def_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../definitions') + assert [s for s in os.listdir(os.path.join(nexus_def_path, 'base_classes') + ) if append.strip() == s.replace('.nxdl.xml', '')], \ + 'Your base class extension does not match any existing NeXus base classes' + tree = ET.parse(os.path.join(nexus_def_path + '/base_classes', append + '.nxdl.xml')) + root = tree.getroot() + # warning: tmp files are printed on disk and removed at the ends!! + pretty_print_xml(root, 'tmp.nxdl.xml') + input_tmp_xml = 'tmp.nxdl.xml' + out_tmp_yml = 'tmp_parsed.yaml' + converter = Nxdl2yaml([], []) + converter.print_yml(input_tmp_xml, out_tmp_yml, verbose) + nyaml2nxdl(input_file=out_tmp_yml, + out_file='tmp_parsed.nxdl.xml', + verbose=verbose) + tree = ET.parse('tmp_parsed.nxdl.xml') + tree2 = ET.parse(input_file) + root_no_duplicates = ET.Element( + 'definition', {'xmlns': 'http://definition.nexusformat.org/nxdl/3.1', + 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', + 'xsi:schemaLocation': 'http://www.w3.org/2001/XMLSchema-instance' + } + ) + for attribute_keys in root.attrib.keys(): + if attribute_keys != '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': + attribute_value = root.attrib[attribute_keys] + root_no_duplicates.set(attribute_keys, attribute_value) + for elems in root.iter(): + if 'doc' in elems.tag: + root_doc = ET.SubElement(root_no_duplicates, 'doc') + root_doc.text = elems.text + break + group = '{http://definition.nexusformat.org/nxdl/3.1}group' + root_no_duplicates = compare_niac_and_my(tree, tree2, verbose, + group, + root_no_duplicates) + field = '{http://definition.nexusformat.org/nxdl/3.1}field' + root_no_duplicates = compare_niac_and_my(tree, tree2, verbose, + field, + root_no_duplicates) + attribute = '{http://definition.nexusformat.org/nxdl/3.1}attribute' + root_no_duplicates = compare_niac_and_my(tree, tree2, verbose, + attribute, + root_no_duplicates) + pretty_print_xml(root_no_duplicates, f"{input_file.replace('.nxdl.xml', '')}" + f"_appended.nxdl.xml") + + input_file_xml = input_file.replace('.nxdl.xml', "_appended.nxdl.xml") + out_file_yml = input_file.replace('.nxdl.xml', "_appended_parsed.yaml") + converter = Nxdl2yaml([], []) + converter.print_yml(input_file_xml, out_file_yml, verbose) + nyaml2nxdl(input_file=out_file_yml, + out_file=out_file_yml.replace('.yaml', '.nxdl.xml'), + verbose=verbose) + os.rename(f"{input_file.replace('.nxdl.xml', '_appended_parsed.yaml')}", + f"{input_file.replace('.nxdl.xml', '_appended.yaml')}") + os.rename(f"{input_file.replace('.nxdl.xml', '_appended_parsed.nxdl.xml')}", + f"{input_file.replace('.nxdl.xml', '_appended.nxdl.xml')}") + os.remove('tmp.nxdl.xml') + os.remove('tmp_parsed.yaml') + os.remove('tmp_parsed.nxdl.xml') + + +def split_name_and_extension(file_name): + """ + Split file name into extension and rest of the file name. + return file raw nam and extension + """ + parts = file_name.rsplit('.', 3) + if len(parts) == 2: + raw = parts[0] + ext = parts[1] + if len(parts) == 3: + raw = parts[0] + ext = '.'.join(parts[1:]) + + return raw, ext + + +@click.command() +@click.option( + '--input-file', + required=True, + prompt=True, + help='The path to the XML or YAML input data file to read and create \ +a YAML or XML file from, respectively.' +) +@click.option( + '--append', + help='Parse xml file and append to base class, given that the xml file has same name \ +of an existing base class' +) +@click.option( + '--check-consistency', + is_flag=True, + default=False, + help=('Check wether yaml or nxdl has followed general rules of scema or not' + 'check whether your comment in the right place or not. The option render an ' + 'output file of the same extension(*_consistency.yaml or *_consistency.nxdl.xml)') +) +@click.option( + '--verbose', + is_flag=True, + default=False, + help='Print in standard output keywords and value types to help \ +possible issues in yaml files' +) +def launch_tool(input_file, verbose, append, check_consistency): + """ + Main function that distiguishes the input file format and launches the tools. + """ + if os.path.isfile(input_file): + raw_name, ext = split_name_and_extension(input_file) + else: + raise ValueError("Need a valid input file.") + + if ext == 'yaml': + xml_out_file = raw_name + '.nxdl.xml' + generate_nxdl_or_retrieve_nxdl(input_file, xml_out_file, verbose) + if append: + append_yml(raw_name + '.nxdl.xml', + append, + verbose + ) + # For consistency running + if check_consistency: + yaml_out_file = raw_name + '_consistency.' + ext + converter = Nxdl2yaml([], []) + converter.print_yml(xml_out_file, yaml_out_file, verbose) + os.remove(xml_out_file) + elif ext == 'nxdl.xml': + if not append: + yaml_out_file = raw_name + '_parsed' + '.yaml' + converter = Nxdl2yaml([], []) + converter.print_yml(input_file, yaml_out_file, verbose) + # Append nxdl.xml file with yaml output file + yaml_hash = get_sha256_hash(yaml_out_file) + # Lines as divider between yaml and nxdl + top_lines = [('\n# ++++++++++++++++++++++++++++++++++ SHA HASH' + ' ++++++++++++++++++++++++++++++++++\n'), + f'# {yaml_hash}\n'] + + extend_yamlfile_with_comment(yaml_file=yaml_out_file, + file_to_be_appended=input_file, + top_lines_list=top_lines) + else: + append_yml(input_file, append, verbose) + # Taking care of consistency running + if check_consistency: + xml_out_file = raw_name + '_consistency.' + ext + generate_nxdl_or_retrieve_nxdl(yaml_out_file, xml_out_file, verbose) + os.remove(yaml_out_file) + else: + raise ValueError("Provide correct file with extension '.yaml or '.nxdl.xml") + + +if __name__ == '__main__': + launch_tool().parse() # pylint: disable=no-value-for-parameter diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py new file mode 100755 index 000000000..72f5a6c42 --- /dev/null +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py @@ -0,0 +1,947 @@ +#!/usr/bin/env python3 +"""This file collects the function used in the reverse tool nxdl2yaml. + +""" +# -*- coding: utf-8 -*- +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import sys +from typing import List, Dict +import xml.etree.ElementTree as ET +import os + +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_node_parent_info, + get_yaml_escape_char_dict, + cleaning_empty_lines) +from pynxtools.dataconverter.helpers import remove_namespace_from_tag + + +DEPTH_SIZE = " " +CMNT_TAG = '!--' + + +def separate_pi_comments(input_file): + """ + Separate PI comments from ProcessesInstruction (pi) + """ + comments_list = [] + comment = [] + xml_lines = [] + + with open(input_file, "r", encoding='utf-8') as file: + lines = file.readlines() + has_pi = True + for line in lines: + c_start = '' + def_tag = ' 0 and has_pi: + comment.append(line.replace(cmnt_end, '')) + comments_list.append(''.join(comment)) + comment = [] + elif def_tag in line or not has_pi: + has_pi = False + xml_lines.append(line) + elif len(comment) > 0 and has_pi: + comment.append(line) + else: + xml_lines.append(line) + return comments_list, ''.join(xml_lines) + + +# Collected: https://dustinoprea.com/2019/01/22/python-parsing-xml-and-retaining-the-comments/ +class _CommentedTreeBuilder(ET.TreeBuilder): + + def comment(self, text): + """ + defining comment builder in TreeBuilder + """ + self.start('!--', {}) + self.data(text) + self.end('--') + + +def parse(filepath): + """ + Construct parse function for modified tree builder for including modified TreeBuilder + and rebuilding XMLParser. + """ + comments, xml_str = separate_pi_comments(filepath) + ctb = _CommentedTreeBuilder() + xp_parser = ET.XMLParser(target=ctb) + root = ET.fromstring(xml_str, parser=xp_parser) + return comments, root + + +def handle_mapping_char(text, depth=-1, skip_n_line_on_top=False): + """Check for ":" char and replace it by "':'". """ + + escape_char = get_yaml_escape_char_dict() + for esc_key, val in escape_char.items(): + if esc_key in text: + text = text.replace(esc_key, val) + if not skip_n_line_on_top: + if depth > 0: + text = add_new_line_with_pipe_on_top(text, depth) + else: + raise ValueError("Need depth size to co-ordinate text line in yaml file.") + return text + + +def add_new_line_with_pipe_on_top(text, depth): + """ + Return modified text for what we get error in converter, such as ':'. After adding a + new line at the start of text the error is solved. + """ + char_list_to_add_new_line_on_top_of_text = [":"] + for char in char_list_to_add_new_line_on_top_of_text: + if char in text: + return '|' + '\n' + depth * DEPTH_SIZE + text + return text + + +# pylint: disable=too-many-instance-attributes +class Nxdl2yaml(): + """ + Parse XML file and print a YML file + """ + + def __init__( + self, + symbol_list: List[str], + root_level_definition: List[str], + root_level_doc='', + root_level_symbols=''): + + # updated part of yaml_dict + self.found_definition = False + self.root_level_doc = root_level_doc + self.root_level_symbols = root_level_symbols + self.root_level_definition = root_level_definition + self.symbol_list = symbol_list + self.is_last_element_comment = False + self.include_comment = True + self.pi_comments = None + # NOTE: Here is how root_level_comments organised for storing comments + # root_level_comment= {'root_doc': comment, + # 'symbols': comment, + # The 'symbol_doc_comments' list is for comments from all 'symbol doc' + # 'symbol_doc_comments' : [comments] + # 'symbol_list': [symbols], + # The 'symbol_comments' contains comments for 'symbols doc' and all 'symbol' + # 'symbol_comments': [comments]} + self.root_level_comment: Dict[str, str] = {} + + def print_yml(self, input_file, output_yml, verbose): + """ + Parse an XML file provided as input and print a YML file + """ + if os.path.isfile(output_yml): + os.remove(output_yml) + + depth = 0 + + self.pi_comments, root = parse(input_file) + xml_tree = {'tree': root, 'node': root} + self.xmlparse(output_yml, xml_tree, depth, verbose) + + def handle_symbols(self, depth, node): + """Handle symbols field and its childs symbol""" + + # pylint: disable=consider-using-f-string + self.root_level_symbols = ( + f"{remove_namespace_from_tag(node.tag)}: " + f"{node.text.strip() if node.text else ''}" + ) + depth += 1 + last_comment = '' + sbl_doc_cmnt_list = [] + # Comments that come above symbol tag + symbol_cmnt_list = [] + for child in list(node): + tag = remove_namespace_from_tag(child.tag) + if tag == CMNT_TAG and self.include_comment: + last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE, child.text) + if tag == 'doc': + symbol_cmnt_list.append(last_comment) + # The bellow line is for handling lenth of 'symbol_comments' and + # 'symbol_doc_comments'. Otherwise print_root_level_info() gets inconsistency + # over for the loop while writting comment on file + sbl_doc_cmnt_list.append('') + last_comment = '' + self.symbol_list.append(self.handle_not_root_level_doc(depth, + text=child.text)) + elif tag == 'symbol': + # place holder is symbol name + symbol_cmnt_list.append(last_comment) + last_comment = '' + if 'doc' in child.attrib: + self.symbol_list.append( + self.handle_not_root_level_doc(depth, + tag=child.attrib['name'], + text=child.attrib['doc'])) + else: + for symbol_doc in list(child): + tag = remove_namespace_from_tag(symbol_doc.tag) + if tag == CMNT_TAG and self.include_comment: + last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE, + symbol_doc.text) + if tag == 'doc': + sbl_doc_cmnt_list.append(last_comment) + last_comment = '' + self.symbol_list.append( + self.handle_not_root_level_doc(depth, + tag=child.attrib['name'], + text=symbol_doc.text)) + self.store_root_level_comments('symbol_doc_comments', sbl_doc_cmnt_list) + self.store_root_level_comments('symbol_comments', symbol_cmnt_list) + + def store_root_level_comments(self, holder, comment): + """Store yaml text or section line and the comments inteded for that lines or section""" + + self.root_level_comment[holder] = comment + + def handle_definition(self, node): + """ + Handle definition group and its attributes + NOTE: Here we tried to store the order of the xml element attributes. So that we get + exactly the same file in nxdl from yaml. + """ + # pylint: disable=consider-using-f-string + # self.root_level_definition[0] = '' + keyword = '' + # tmp_word for reseving the location + tmp_word = "#xx#" + attribs = node.attrib + # for tracking the order of name and type + keyword_order = -1 + for item in attribs: + if "name" in item: + keyword = keyword + attribs[item] + if keyword_order == -1: + self.root_level_definition.append(tmp_word) + keyword_order = self.root_level_definition.index(tmp_word) + elif "extends" in item: + keyword = f"{keyword}({attribs[item]})" + if keyword_order == -1: + self.root_level_definition.append(tmp_word) + keyword_order = self.root_level_definition.index(tmp_word) + elif 'schemaLocation' not in item \ + and 'extends' != item: + text = f"{item}: {attribs[item]}" + self.root_level_definition.append(text) + self.root_level_definition[keyword_order] = f"{keyword}:" + + def handle_root_level_doc(self, node): + """ + Handle the documentation field found at root level. + """ + # tag = remove_namespace_from_tag(node.tag) + text = node.text + text = self.handle_not_root_level_doc(depth=0, text=text) + self.root_level_doc = text + + # pylint: disable=too-many-branches + def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None): + """ + Handle docs field along the yaml file. In this function we also tried to keep + the track of intended indentation. E.g. the bollow doc block. + * Topic name + Description of topic + """ + + # Handling empty doc + if not text: + text = "" + else: + text = handle_mapping_char(text, -1, True) + if "\n" in text: + # To remove '\n' character as it will be added before text. + text = cleaning_empty_lines(text.split('\n')) + text_tmp = [] + yaml_indent_n = len((depth + 1) * DEPTH_SIZE) + # Find indentaion in the first text line with alphabet + tmp_i = 0 + while tmp_i != -1: + first_line_indent_n = 0 + # Taking care of empty text whitout any character + if len(text) == 1 and text[0] == '': + break + for ch_ in text[tmp_i]: + if ch_ == ' ': + first_line_indent_n = first_line_indent_n + 1 + elif ch_ != '': + tmp_i = -2 + break + tmp_i = tmp_i + 1 + # Taking care of doc like bellow: + # Text liness + # text continues + # So no indentaion at the staring or doc. So doc group will come along general + # alignment + if first_line_indent_n == 0: + first_line_indent_n = yaml_indent_n + + # for indent_diff -ve all lines will move left by the same ammout + # for indect_diff +ve all lines will move right the same amount + indent_diff = yaml_indent_n - first_line_indent_n + # CHeck for first line empty if not keep first line empty + + for _, line in enumerate(text): + line_indent_n = 0 + # Collect first empty space without alphabate + for ch_ in line: + if ch_ == ' ': + line_indent_n = line_indent_n + 1 + else: + break + line_indent_n = line_indent_n + indent_diff + if line_indent_n < yaml_indent_n: + # if line still under yaml identation + text_tmp.append(yaml_indent_n * ' ' + line.strip()) + else: + text_tmp.append(line_indent_n * ' ' + line.strip()) + + text = '\n' + '\n'.join(text_tmp) + if "}" in tag: + tag = remove_namespace_from_tag(tag) + indent = depth * DEPTH_SIZE + elif text: + text = '\n' + (depth + 1) * DEPTH_SIZE + text.strip() + if "}" in tag: + tag = remove_namespace_from_tag(tag) + indent = depth * DEPTH_SIZE + else: + text = "" + if "}" in tag: + tag = remove_namespace_from_tag(tag) + indent = depth * DEPTH_SIZE + + doc_str = f"{indent}{tag}: |{text}\n" + if file_out: + file_out.write(doc_str) + return None + return doc_str + + def write_out(self, indent, text, file_out): + """ + Write text line in output file. + """ + line_string = f"{indent}{text.rstrip()}\n" + file_out.write(line_string) + + def print_root_level_doc(self, file_out): + """ + Print at the root level of YML file \ + the general documentation field found in XML file + """ + indent = 0 * DEPTH_SIZE + + if ('root_doc' in self.root_level_comment + and self.root_level_comment['root_doc'] != ''): + text = self.root_level_comment['root_doc'] + self.write_out(indent, text, file_out) + + text = self.root_level_doc + self.write_out(indent, text, file_out) + self.root_level_doc = '' + + def comvert_to_ymal_comment(self, indent, text): + """ + Convert into yaml comment by adding exta '#' char in front of comment lines + """ + lines = text.split('\n') + mod_lines = [] + for line in lines: + line = line.strip() + if line and line[0] != '#': + line = indent + '# ' + line + mod_lines.append(line) + elif line: + line = indent + line + mod_lines.append(line) + # The starting '\n' to keep multiple comments separate + return '\n' + '\n'.join(mod_lines) + + def print_root_level_info(self, depth, file_out): + """ + Print at the root level of YML file \ + the information stored as definition attributes in the XML file + """ + # pylint: disable=consider-using-f-string + if depth < 0: + raise ValueError("Somthing wrong with indentaion in root level.") + + has_categoty = False + for def_line in self.root_level_definition: + if def_line in ("category: application", "category: base"): + self.write_out(indent=0 * DEPTH_SIZE, text=def_line, file_out=file_out) + # file_out.write(f"{def_line}\n") + has_categoty = True + + if not has_categoty: + raise ValueError("Definition dose not get any category from 'base or application'.") + self.print_root_level_doc(file_out) + if 'symbols' in self.root_level_comment and self.root_level_comment['symbols'] != '': + indent = depth * DEPTH_SIZE + text = self.root_level_comment['symbols'] + self.write_out(indent, text, file_out) + if self.root_level_symbols: + self.write_out(indent=0 * DEPTH_SIZE, text=self.root_level_symbols, file_out=file_out) + # symbol_list include 'symbols doc', and all 'symbol' + for ind, symbol in enumerate(self.symbol_list): + # Taking care of comments that come on to of 'symbols doc' and 'symbol' + if 'symbol_comments' in self.root_level_comment and \ + self.root_level_comment['symbol_comments'][ind] != '': + indent = depth * DEPTH_SIZE + self.write_out(indent, + self.root_level_comment['symbol_comments'][ind], file_out) + if 'symbol_doc_comments' in self.root_level_comment and \ + self.root_level_comment['symbol_doc_comments'][ind] != '': + + indent = depth * DEPTH_SIZE + self.write_out(indent, + self.root_level_comment['symbol_doc_comments'][ind], file_out) + + self.write_out(indent=(0 * DEPTH_SIZE), text=symbol, file_out=file_out) + if len(self.pi_comments) > 1: + indent = DEPTH_SIZE * depth + # The first comment is top level copy-right doc string + for comment in self.pi_comments[1:]: + self.write_out(indent, self.comvert_to_ymal_comment(indent, comment), file_out) + if self.root_level_definition: + # Soring NXname for writting end of the definition attributes + nx_name = '' + for defs in self.root_level_definition: + if 'NX' in defs and defs[-1] == ':': + nx_name = defs + continue + if defs in ("category: application", "category: base"): + continue + self.write_out(indent=0 * DEPTH_SIZE, text=defs, file_out=file_out) + self.write_out(indent=0 * DEPTH_SIZE, text=nx_name, file_out=file_out) + self.found_definition = False + + def handle_exists(self, exists_dict, key, val): + """ + Create exist component as folows: + + {'min' : value for min, + 'max' : value for max, + 'optional' : value for optional} + + This is created separately so that the keys stays in order. + """ + if not val: + val = '' + else: + val = str(val) + if 'minOccurs' == key: + exists_dict['minOccurs'] = ['min', val] + if 'maxOccurs' == key: + exists_dict['maxOccurs'] = ['max', val] + if 'optional' == key: + exists_dict['optional'] = ['optional', val] + if 'recommended' == key: + exists_dict['recommended'] = ['recommended', val] + if 'required' == key: + exists_dict['required'] = ['required', val] + + # pylint: disable=too-many-branches, consider-using-f-string + def handle_group_or_field(self, depth, node, file_out): + """Handle all the possible attributes that come along a field or group""" + + allowed_attr = ['optional', 'recommended', 'name', 'type', 'axes', 'axis', 'data_offset', + 'interpretation', 'long_name', 'maxOccurs', 'minOccurs', 'nameType', + 'optional', 'primary', 'signal', 'stride', 'units', 'required', + 'deprecated', 'exists'] + + name_type = "" + node_attr = node.attrib + rm_key_list = [] + # Maintain order: name and type in form name(type) or (type)name that come first + for key, val in node_attr.items(): + if key == 'name': + name_type = name_type + val + rm_key_list.append(key) + if key == 'type': + name_type = name_type + "(%s)" % val + rm_key_list.append(key) + if not name_type: + raise ValueError(f"No 'name' or 'type' hase been found. But, 'group' or 'field' " + f"must have at list a nme.We got attributes: {node_attr}") + file_out.write('{indent}{name_type}:\n'.format( + indent=depth * DEPTH_SIZE, + name_type=name_type)) + + for key in rm_key_list: + del node_attr[key] + + # tmp_dict intended to persevere order of attribnutes + tmp_dict = {} + exists_dict = {} + for key, val in node_attr.items(): + # As both 'minOccurs', 'maxOccurs' and optionality move to the 'exists' + if key in ['minOccurs', 'maxOccurs', 'optional', 'recommended', 'required']: + if 'exists' not in tmp_dict: + tmp_dict['exists'] = [] + self.handle_exists(exists_dict, key, val) + elif key == 'units': + tmp_dict['unit'] = str(val) + else: + tmp_dict[key] = str(val) + if key not in allowed_attr: + raise ValueError(f"An attribute ({key}) in 'field' or 'group' has been found " + f"that is not allowed. The allowed attr is {allowed_attr}.") + + if exists_dict: + for key, val in exists_dict.items(): + if key in ['minOccurs', 'maxOccurs']: + tmp_dict['exists'] = tmp_dict['exists'] + val + elif key in ['optional', 'recommended', 'required']: + tmp_dict['exists'] = key + + depth_ = depth + 1 + for key, val in tmp_dict.items(): + # Increase depth size inside handle_map...() for writting text with one + # more indentation. + file_out.write(f'{depth_ * DEPTH_SIZE}{key}: ' + f'{handle_mapping_char(val, depth_ + 1, False)}\n') + + # pylint: disable=too-many-branches, too-many-locals + def handle_dimension(self, depth, node, file_out): + """ + Handle the dimension field. + NOTE: Usually we take care of any xml element in xmlparse(...) and + recursion_in_xml_tree(...) functions. But Here it is a bit different. The doc dimension + and attributes of dim has been handled inside this function here. + """ + # pylint: disable=consider-using-f-string + possible_dim_attrs = ['ref', 'required', + 'incr', 'refindex'] + possible_dimemsion_attrs = ['rank'] + + # taking care of Dimension tag + file_out.write( + '{indent}{tag}:\n'.format( + indent=depth * DEPTH_SIZE, + tag=node.tag.split("}", 1)[1])) + # Taking care of dimension attributes + for attr, value in node.attrib.items(): + if attr in possible_dimemsion_attrs and not isinstance(value, dict): + indent = (depth + 1) * DEPTH_SIZE + file_out.write(f'{indent}{attr}: {value}\n') + else: + raise ValueError(f"Dimension has got an attribute {attr} that is not valid." + f"Current the allowd atributes are {possible_dimemsion_attrs}." + f" Please have a look") + # taking carew of dimension doc + for child in list(node): + tag = remove_namespace_from_tag(child.tag) + if tag == 'doc': + text = self.handle_not_root_level_doc(depth + 1, child.text) + file_out.write(text) + node.remove(child) + + dim_index_value = '' + dim_other_parts = {} + dim_cmnt_node = [] + # taking care of dim and doc childs of dimension + for child in list(node): + tag = remove_namespace_from_tag(child.tag) + child_attrs = child.attrib + # taking care of index and value attributes + if tag == ('dim'): + # taking care of index and value in format [[index, value]] + dim_index_value = dim_index_value + '[{index}, {value}], '.format( + index=child_attrs['index'] if "index" in child_attrs else '', + value=child_attrs['value'] if "value" in child_attrs else '') + if "index" in child_attrs: + del child_attrs["index"] + if "value" in child_attrs: + del child_attrs["value"] + + # Taking care of doc comes as child of dim + for cchild in list(child): + ttag = cchild.tag.split("}", 1)[1] + if ttag == ('doc'): + if ttag not in dim_other_parts: + dim_other_parts[ttag] = [] + text = cchild.text + dim_other_parts[ttag].append(text.strip()) + child.remove(cchild) + continue + # taking care of other attributes except index and value + for attr, value in child_attrs.items(): + if attr in possible_dim_attrs: + if attr not in dim_other_parts: + dim_other_parts[attr] = [] + dim_other_parts[attr].append(value) + if tag == CMNT_TAG and self.include_comment: + # Store and remove node so that comment nodes from dim node so + # that it does not call in xmlparser function + dim_cmnt_node.append(child) + node.remove(child) + + # All 'dim' element comments on top of 'dim' yaml key + if dim_cmnt_node: + for ch_nd in dim_cmnt_node: + self.handel_comment(depth + 1, ch_nd, file_out) + # index and value attributes of dim elements + file_out.write( + '{indent}dim: [{value}]\n'.format( + indent=(depth + 1) * DEPTH_SIZE, + value=dim_index_value[:-2] or '')) + # Write the attributes, except index and value, and doc of dim as child of dim_parameter. + # But tthe doc or attributes for each dim come inside list according to the order of dim. + if dim_other_parts: + file_out.write( + '{indent}dim_parameters:\n'.format( + indent=(depth + 1) * DEPTH_SIZE)) + # depth = depth + 2 dim_paramerter has child such as doc of dim + indent = (depth + 2) * DEPTH_SIZE + for key, value in dim_other_parts.items(): + if key == 'doc': + value = self.handle_not_root_level_doc(depth + 2, str(value), key, file_out) + else: + # Increase depth size inside handle_map...() for writting text with one + # more indentation. + file_out.write(f"{indent}{key}: " + f"{handle_mapping_char(value, depth + 3, False)}\n") + + def handle_enumeration(self, depth, node, file_out): + """ + Handle the enumeration field parsed from the xml file. + + If the enumeration items contain a doc field, the yaml file will contain items as child + fields of the enumeration field. + + If no doc are inherited in the enumeration items, a list of the items is given for the + enumeration list. + + """ + # pylint: disable=consider-using-f-string + + check_doc = [] + for child in list(node): + if list(child): + check_doc.append(list(child)) + # pylint: disable=too-many-nested-blocks + if check_doc: + file_out.write( + '{indent}{tag}: \n'.format( + indent=depth * DEPTH_SIZE, + tag=node.tag.split("}", 1)[1])) + for child in list(node): + tag = remove_namespace_from_tag(child.tag) + itm_depth = depth + 1 + if tag == ('item'): + file_out.write( + '{indent}{value}: \n'.format( + indent=(itm_depth) * DEPTH_SIZE, + value=child.attrib['value'])) + + if list(child): + for item_doc in list(child): + if remove_namespace_from_tag(item_doc.tag) == 'doc': + item_doc_depth = itm_depth + 1 + self.handle_not_root_level_doc(item_doc_depth, item_doc.text, + item_doc.tag, file_out) + if (remove_namespace_from_tag(item_doc.tag) == CMNT_TAG + and self.include_comment): + self.handel_comment(itm_depth + 1, item_doc, file_out) + if tag == CMNT_TAG and self.include_comment: + self.handel_comment(itm_depth + 1, child, file_out) + else: + enum_list = '' + remove_nodes = [] + for item_child in list(node): + tag = remove_namespace_from_tag(item_child.tag) + if tag == ('item'): + enum_list = enum_list + '{value}, '.format( + value=item_child.attrib['value']) + if tag == CMNT_TAG and self.include_comment: + self.handel_comment(depth, item_child, file_out) + remove_nodes.append(item_child) + for ch_node in remove_nodes: + node.remove(ch_node) + + file_out.write( + '{indent}{tag}: [{enum_list}]\n'.format( + indent=depth * DEPTH_SIZE, + tag=remove_namespace_from_tag(node.tag), + enum_list=enum_list[:-2] or '')) + + def handle_attributes(self, depth, node, file_out): + """Handle the attributes parsed from the xml file""" + + allowed_attr = ['name', 'type', 'units', 'nameType', 'recommended', 'optional', + 'minOccurs', 'maxOccurs', 'deprecated'] + + name = "" + node_attr = node.attrib + if 'name' in node_attr: + pass + else: + raise ValueError("Attribute must have an name key.") + rm_key_list = [] + # Maintain order: name and type in form name(type) or (type)name that come first + for key, val in node_attr.items(): + if key == 'name': + name = val + rm_key_list.append(key) + + for key in rm_key_list: + del node_attr[key] + + file_out.write('{indent}{escapesymbol}{name}:\n'.format( + indent=depth * DEPTH_SIZE, + escapesymbol=r'\@', + name=name)) + + tmp_dict = {} + exists_dict = {} + for key, val in node_attr.items(): + # As both 'minOccurs', 'maxOccurs' and optionality move to the 'exists' + if key in ['minOccurs', 'maxOccurs', 'optional', 'recommended', 'required']: + if 'exists' not in tmp_dict: + tmp_dict['exists'] = [] + self.handle_exists(exists_dict, key, val) + elif key == 'units': + tmp_dict['unit'] = val + else: + tmp_dict[key] = val + if key not in allowed_attr: + raise ValueError(f"An attribute ({key}) has been found that is not allowed." + f"The allowed attr is {allowed_attr}.") + + has_min_max = False + has_opt_reco_requ = False + if exists_dict: + for key, val in exists_dict.items(): + if key in ['minOccurs', 'maxOccurs']: + tmp_dict['exists'] = tmp_dict['exists'] + val + has_min_max = True + elif key in ['optional', 'recommended', 'required']: + tmp_dict['exists'] = key + has_opt_reco_requ = True + if has_min_max and has_opt_reco_requ: + raise ValueError("Optionality 'exists' can take only either from ['minOccurs'," + " 'maxOccurs'] or from ['optional', 'recommended', 'required']" + ". But not from both of the groups together. Please check in" + " attributes") + + depth_ = depth + 1 + for key, val in tmp_dict.items(): + # Increase depth size inside handle_map...() for writting text with one + # more indentation. + file_out.write(f'{depth_ * DEPTH_SIZE}{key}: ' + f'{handle_mapping_char(val, depth_ + 1, False)}\n') + + def handel_link(self, depth, node, file_out): + """ + Handle link elements of nxdl + """ + + possible_link_attrs = ['name', 'target', 'napimount'] + node_attr = node.attrib + # Handle special cases + if 'name' in node_attr: + file_out.write('{indent}{name}(link):\n'.format( + indent=depth * DEPTH_SIZE, + name=node_attr['name'] or '')) + del node_attr['name'] + + depth_ = depth + 1 + # Handle general cases + for attr_key, val in node_attr.items(): + if attr_key in possible_link_attrs: + file_out.write('{indent}{attr}: {value}\n'.format( + indent=depth_ * DEPTH_SIZE, + attr=attr_key, + value=val)) + else: + raise ValueError(f"An anexpected attribute '{attr_key}' of link has found." + f"At this moment the alloed keys are {possible_link_attrs}") + + def handel_choice(self, depth, node, file_out): + """ + Handle choice element which is a parent node of group. + """ + + possible_attr = [] + + node_attr = node.attrib + # Handle special casees + if 'name' in node_attr: + file_out.write('{indent}{attr}(choice): \n'.format( + indent=depth * DEPTH_SIZE, + attr=node_attr['name'])) + del node_attr['name'] + + depth_ = depth + 1 + # Taking care of general attrinutes. Though, still no attrinutes have found, + # but could be used for future + for attr in node_attr.items(): + if attr in possible_attr: + file_out.write('{indent}{attr}: {value}\n'.format( + indent=depth_ * DEPTH_SIZE, + attr=attr, + value=node_attr[attr])) + else: + raise ValueError(f"An unexpected attribute '{attr}' of 'choice' has been found." + f"At this moment attributes for choice {possible_attr}") + + def handel_comment(self, depth, node, file_out): + """ + Collect comment element and pass to write_out function + """ + indent = depth * DEPTH_SIZE + if self.is_last_element_comment: + text = self.comvert_to_ymal_comment(indent, node.text) + self.write_out(indent, text, file_out) + else: + text = self.comvert_to_ymal_comment(indent, node.text) + self.write_out(indent, text, file_out) + self.is_last_element_comment = True + + def recursion_in_xml_tree(self, depth, xml_tree, output_yml, verbose): + """ + Descend lower level in xml tree. If we are in the symbols branch, the recursive + behaviour is not triggered as we already handled the symbols' childs. + """ + + tree = xml_tree['tree'] + node = xml_tree['node'] + for child in list(node): + xml_tree_children = {'tree': tree, 'node': child} + self.xmlparse(output_yml, xml_tree_children, depth, verbose) + + # pylint: disable=too-many-branches, too-many-statements + def xmlparse(self, output_yml, xml_tree, depth, verbose): + """ + Main of the nxdl2yaml converter. + It parses XML tree, then prints recursively each level of the tree + """ + tree = xml_tree['tree'] + node = xml_tree['node'] + if verbose: + sys.stdout.write(f'Node tag: {remove_namespace_from_tag(node.tag)}\n') + sys.stdout.write(f'Attributes: {node.attrib}\n') + with open(output_yml, "a", encoding="utf-8") as file_out: + tag = remove_namespace_from_tag(node.tag) + if tag == 'definition': + self.found_definition = True + self.handle_definition(node) + # Taking care of root level doc and symbols + remove_cmnt_n = None + last_comment = '' + for child in list(node): + tag_tmp = remove_namespace_from_tag(child.tag) + if tag_tmp == CMNT_TAG and self.include_comment: + last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE, child.text) + remove_cmnt_n = child + if tag_tmp == 'doc': + self.store_root_level_comments('root_doc', last_comment) + last_comment = '' + self.handle_root_level_doc(child) + node.remove(child) + if remove_cmnt_n is not None: + node.remove(remove_cmnt_n) + remove_cmnt_n = None + if tag_tmp == 'symbols': + self.store_root_level_comments('symbols', last_comment) + last_comment = '' + self.handle_symbols(depth, child) + node.remove(child) + if remove_cmnt_n is not None: + node.remove(remove_cmnt_n) + remove_cmnt_n = None + + if tag == ('doc') and depth != 1: + parent = get_node_parent_info(tree, node)[0] + doc_parent = remove_namespace_from_tag(parent.tag) + if doc_parent != 'item': + self.handle_not_root_level_doc(depth, text=node.text, + tag=node.tag, + file_out=file_out) + + if self.found_definition is True and self.root_level_doc: + self.print_root_level_info(depth, file_out) + # End of print root-level definitions in file + if tag in ('field', 'group') and depth != 0: + self.handle_group_or_field(depth, node, file_out) + if tag == ('enumeration'): + self.handle_enumeration(depth, node, file_out) + if tag == ('attribute'): + self.handle_attributes(depth, node, file_out) + if tag == ('dimensions'): + self.handle_dimension(depth, node, file_out) + if tag == ('link'): + self.handel_link(depth, node, file_out) + if tag == ('choice'): + self.handel_choice(depth, node, file_out) + if tag == CMNT_TAG and self.include_comment: + self.handel_comment(depth, node, file_out) + depth += 1 + # Write nested nodes + self.recursion_in_xml_tree(depth, xml_tree, output_yml, verbose) + + +def compare_niac_and_my(tree, tree2, verbose, node, root_no_duplicates): + """This function creates two trees with Niac XML file and My XML file. +The main aim is to compare the two trees and create a new one that is the +union of the two initial trees. + +""" + root = tree.getroot() + root2 = tree2.getroot() + attrs_list_niac = [] + for nodo in root.iter(node): + attrs_list_niac.append(nodo.attrib) + if verbose: + sys.stdout.write('Attributes found in Niac file: \n') + sys.stdout.write(str(attrs_list_niac) + '\n') + sys.stdout.write(' \n') + sys.stdout.write('Started merging of Niac and My file... \n') + for elem in root.iter(node): + if verbose: + sys.stdout.write('- Niac element inserted: \n') + sys.stdout.write(str(elem.attrib) + '\n') + index = get_node_parent_info(tree, elem)[1] + root_no_duplicates.insert(index, elem) + + for elem2 in root2.iter(node): + index = get_node_parent_info(tree2, elem2)[1] + if elem2.attrib not in attrs_list_niac: + if verbose: + sys.stdout.write('- My element inserted: \n') + sys.stdout.write(str(elem2.attrib) + '\n') + root_no_duplicates.insert(index, elem2) + + if verbose: + sys.stdout.write(' \n') + return root_no_duplicates diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py new file mode 100644 index 000000000..db4d4c464 --- /dev/null +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py @@ -0,0 +1,1161 @@ +#!/usr/bin/env python3 +"""Creates an instantiated NXDL schema XML tree by walking the dictionary nest + +""" +# -*- coding: utf-8 -*- +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import xml.etree.ElementTree as ET +from xml.dom import minidom +import os +import textwrap + +import yaml + +from pynxtools.nexus import nexus +from pynxtools.nyaml2nxdl.comment_collector import CommentCollector +from pynxtools.dataconverter.helpers import remove_namespace_from_tag +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_yaml_escape_char_reverter_dict, + nx_name_type_resolving, + cleaning_empty_lines, LineLoader) + + +# pylint: disable=too-many-lines, global-statement, invalid-name +DOM_COMMENT = ("\n" + "# NeXus - Neutron and X-ray Common Data Format\n" + "# \n" + "# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)\n" + "# \n" + "# This library is free software; you can redistribute it and/or\n" + "# modify it under the terms of the GNU Lesser General Public\n" + "# License as published by the Free Software Foundation; either\n" + "# version 3 of the License, or (at your option) any later version.\n" + "#\n" + "# This library is distributed in the hope that it will be useful,\n" + "# but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + "# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" + "# Lesser General Public License for more details.\n" + "#\n" + "# You should have received a copy of the GNU Lesser General Public\n" + "# License along with this library; if not, write to the Free Software\n" + "# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n" + "#\n" + "# For further information, see http://www.nexusformat.org\n") +NX_CLSS = nexus.get_nx_classes() +NX_NEW_DEFINED_CLASSES = ['NX_COMPLEX'] +NX_TYPE_KEYS = nexus.get_nx_attribute_type() +NX_ATTR_IDNT = '\\@' +NX_UNIT_IDNT = 'unit' +DEPTH_SIZE = " " +NX_UNIT_TYPES = nexus.get_nx_units() +COMMENT_BLOCKS: CommentCollector +CATEGORY = '' # Definition would be either 'base' or 'application' + + +def check_for_dom_comment_in_yaml(): + """Check the yaml file has dom comment or dom comment needed to be hard coded. + """ + dignature_keyword_list = ['NeXus', + 'GNU Lesser General Public', + 'Free Software Foundation', + 'Copyright (C)', + 'WITHOUT ANY WARRANTY'] + + # Check for dom comments in first three comments + dom_comment = '' + dom_comment_ind = 1 + for ind, comnt in enumerate(COMMENT_BLOCKS[0:5]): + cmnt_list = comnt.get_comment_text() + if len(cmnt_list) == 1: + text = cmnt_list[0] + else: + continue + dom_comment = text + dom_comment_ind = ind + for keyword in dignature_keyword_list: + if keyword not in text: + dom_comment = '' + break + if dom_comment: + break + + # deactivate the root dom_comment, So that the corresponding comment would not be + # considered as comment for definition xml element. + if dom_comment: + COMMENT_BLOCKS.remove_comment(dom_comment_ind) + + return dom_comment + + +def yml_reader(inputfile): + """ + This function launches the LineLoader class. + It parses the yaml in a dict and extends it with line tag keys for each key of the dict. + """ + global COMMENT_BLOCKS + with open(inputfile, "r", encoding="utf-8") as plain_text_yaml: + loader = LineLoader(plain_text_yaml) + loaded_yaml = loader.get_single_data() + COMMENT_BLOCKS = CommentCollector(inputfile, loaded_yaml) + COMMENT_BLOCKS.extract_all_comment_blocks() + dom_cmnt_frm_yaml = check_for_dom_comment_in_yaml() + global DOM_COMMENT + if dom_cmnt_frm_yaml: + DOM_COMMENT = dom_cmnt_frm_yaml + + if 'category' not in loaded_yaml.keys(): + raise ValueError("All definitions should be either 'base' or 'application' category. " + "No category has been found.") + global CATEGORY + CATEGORY = loaded_yaml['category'] + return loaded_yaml + + +def check_for_default_attribute_and_value(xml_element): + """NeXus Groups, fields and attributes might have xml default attributes and valuesthat must + come. For example: 'optional' which is 'true' by default for base class and false otherwise. + """ + + # base:Default attributes and value for all elements of base class except dimension element + base_attr_to_val = {'optional': 'true'} + + # application: Default attributes and value for all elements of application class except + # dimension element + application_attr_to_val = {'optional': 'false'} + + # Default attributes and value for dimension element + base_dim_attr_to_val = {'required': 'false'} + application_dim_attr_to_val = {'required': 'true'} + + # Eligible tag for default attr and value + elegible_tag = ['group', 'field', 'attribute'] + + def set_default_attribute(xml_elem, default_attr_to_val): + for deflt_attr, deflt_val in default_attr_to_val.items(): + if deflt_attr not in xml_elem.attrib \ + and 'maxOccurs' not in xml_elem.attrib \ + and 'minOccurs' not in xml_elem.attrib \ + and 'recommended' not in xml_elem.attrib: + xml_elem.set(deflt_attr, deflt_val) + + for child in list(xml_element): + # skiping comment 'function' that mainly collect comment from yaml file. + if not isinstance(child.tag, str): + continue + tag = remove_namespace_from_tag(child.tag) + + if tag == 'dim' and CATEGORY == 'base': + set_default_attribute(child, base_dim_attr_to_val) + if tag == 'dim' and CATEGORY == 'application': + set_default_attribute(child, application_dim_attr_to_val) + if tag in elegible_tag and CATEGORY == 'base': + set_default_attribute(child, base_attr_to_val) + if tag in elegible_tag and CATEGORY == 'application': + + set_default_attribute(child, application_attr_to_val) + check_for_default_attribute_and_value(child) + + +def yml_reader_nolinetag(inputfile): + """ + pyyaml based parsing of yaml file in python dict + """ + with open(inputfile, 'r', encoding="utf-8") as stream: + parsed_yaml = yaml.safe_load(stream) + return parsed_yaml + + +def check_for_skiped_attributes(component, value, allowed_attr=None, verbose=False): + """ + Check for any attributes have been skipped or not. + NOTE: We should keep in mind about 'doc' + """ + block_tag = ['enumeration'] + if value: + for attr, val in value.items(): + if attr in ['doc']: + continue + if '__line__' in attr or attr in block_tag: + continue + line_number = f'__line__{attr}' + if verbose: + print(f"__line__ : {value[line_number]}") + if not isinstance(val, dict) \ + and '\\@' not in attr\ + and attr not in allowed_attr\ + and 'NX' not in attr and val: + + raise ValueError(f"An attribute '{attr}' in part '{component}' has been found" + f". Please check arround line '{value[line_number]}. At this " + f"moment. The allowed attrbutes are {allowed_attr}") + + +def format_nxdl_doc(string): + """NeXus format for doc string + """ + string = check_for_mapping_char_other(string) + formatted_doc = '' + if "\n" not in string: + if len(string) > 80: + wrapped = textwrap.TextWrapper(width=80, + break_long_words=False, + replace_whitespace=False) + string = '\n'.join(wrapped.wrap(string)) + formatted_doc = '\n' + f"{string}" + else: + text_lines = string.split('\n') + text_lines = cleaning_empty_lines(text_lines) + formatted_doc += "\n" + "\n".join(text_lines) + if not formatted_doc.endswith("\n"): + formatted_doc += "\n" + return formatted_doc + + +def check_for_mapping_char_other(text): + """ + Check for mapping char \':\' which does not be passed through yaml library. + Then replace it by ':'. + """ + if not text: + text = '' + text = str(text) + if text == 'True': + text = 'true' + if text == 'False': + text = 'false' + # Some escape char is not valid in yaml libray which is written while writting + # yaml file. In the time of writting nxdl revert to that escape char. + escape_reverter = get_yaml_escape_char_reverter_dict() + for key, val in escape_reverter.items(): + if key in text: + text = text.replace(key, val) + return str(text).strip() + + +def xml_handle_doc(obj, value: str, + line_number=None, line_loc=None): + """This function creates a 'doc' element instance, and appends it to an existing element + + """ + # global comment_bolcks + doc_elemt = ET.SubElement(obj, 'doc') + text = format_nxdl_doc(check_for_mapping_char_other(value)).strip() + # To keep the doc middle of doc tag. + doc_elemt.text = f"\n{text}\n" + if line_loc is not None and line_number is not None: + xml_handle_comment(obj, line_number, + line_loc, doc_elemt) + + +def xml_handle_units(obj, value): + """This function creates a 'units' element instance, and appends it to an existing element + + """ + obj.set('units', str(value)) + + +# pylint: disable=too-many-branches +def xml_handle_exists(dct, obj, keyword, value): + """ + This function creates an 'exists' element instance, and appends it to an existing element + """ + line_number = f'__line__{keyword}' + assert value is not None, f'Line {dct[line_number]}: exists argument must not be None !' + if isinstance(value, list): + if len(value) == 4 and value[0] == 'min' and value[2] == 'max': + obj.set('minOccurs', str(value[1])) + if str(value[3]) != 'infty': + obj.set('maxOccurs', str(value[3])) + else: + obj.set('maxOccurs', 'unbounded') + elif len(value) == 2 and value[0] == 'min': + obj.set('minOccurs', str(value[1])) + elif len(value) == 2 and value[0] == 'max': + obj.set('maxOccurs', str(value[1])) + elif len(value) == 4 and value[0] == 'max' and value[2] == 'min': + obj.set('minOccurs', str(value[3])) + if str(value[1]) != 'infty': + obj.set('maxOccurs', str(value[3])) + else: + obj.set('maxOccurs', 'unbounded') + elif len(value) == 4 and (value[0] != 'min' or value[2] != 'max'): + raise ValueError(f'Line {dct[line_number]}: exists keyword' + f'needs to go either with an optional [recommended] list with two ' + f'entries either [min, ] or [max, ], or a list of four ' + f'entries [min, , max, ] !') + else: + raise ValueError(f'Line {dct[line_number]}: exists keyword ' + f'needs to go either with optional, recommended, a list with two ' + f'entries either [min, ] or [max, ], or a list of four ' + f'entries [min, , max, ] !') + else: + # This clause take optional in all concept except dimension where 'required' key is allowed + # not the 'optional' key. + if value == 'optional': + obj.set('optional', 'true') + elif value == 'recommended': + obj.set('recommended', 'true') + elif value == 'required': + obj.set('optional', 'false') + else: + obj.set('minOccurs', '0') + + +# pylint: disable=too-many-branches, too-many-locals, too-many-statements +def xml_handle_group(dct, obj, keyword, value, verbose=False): + """ + The function deals with group instances + """ + line_number = f'__line__{keyword}' + line_loc = dct[line_number] + xml_handle_comment(obj, line_number, line_loc) + list_of_attr = ['name', 'type', 'nameType', 'deprecated', 'optional', 'recommended', + 'exists', 'unit'] + l_bracket = -1 + r_bracket = -1 + if keyword.count('(') == 1: + l_bracket = keyword.index('(') + if keyword.count(')') == 1: + r_bracket = keyword.index(')') + + keyword_name, keyword_type = nx_name_type_resolving(keyword) + if not keyword_name and not keyword_type: + raise ValueError("A group must have both value and name. Check for group.") + grp = ET.SubElement(obj, 'group') + + if l_bracket == 0 and r_bracket > 0: + grp.set('type', keyword_type) + if keyword_name: + grp.set('name', keyword_name) + elif l_bracket > 0: + grp.set('name', keyword_name) + if keyword_type: + grp.set('type', keyword_type) + else: + grp.set('name', keyword_name) + + if value: + rm_key_list = [] + for attr, vval in value.items(): + if '__line__' in attr: + continue + line_number = f"__line__{attr}" + line_loc = value[line_number] + if attr == 'doc': + xml_handle_doc(grp, vval, line_number, line_loc) + rm_key_list.append(attr) + rm_key_list.append(line_number) + elif attr == 'exists' and vval: + xml_handle_exists(value, grp, attr, vval) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, + line_number, line_loc, grp) + elif attr == 'unit': + xml_handle_units(grp, vval) + xml_handle_comment(obj, line_number, line_loc, grp) + elif attr in list_of_attr and not isinstance(vval, dict) and vval: + validate_field_attribute_and_value(attr, vval, list_of_attr, value) + grp.set(attr, check_for_mapping_char_other(vval)) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, line_number, line_loc, grp) + + for key in rm_key_list: + del value[key] + # Check for skipped attrinutes + check_for_skiped_attributes('group', value, list_of_attr, verbose) + if isinstance(value, dict) and value != {}: + recursive_build(grp, value, verbose) + + +def xml_handle_dimensions(dct, obj, keyword, value: dict): + """ + This function creates a 'dimensions' element instance, and appends it to an existing element + + NOTE: we could create xml_handle_dim() function. + But, the dim elements in yaml file is defined as 'dim =[[index, value]]' + but dim has other attributes such as 'ref' and also might have doc as chlid. + so in that sense 'dim' should have come as dict keeping attributes and child as members of + dict. + Regarding this situation all the attributes of 'dimensions' and child 'doc' has been + included here. + + Other attributes, except 'index' and 'value', of 'dim' comes under nested dict named + 'dim_parameter: + incr:[...]' + """ + + possible_dimension_attrs = ['rank'] # nxdl attributes + line_number = f'__line__{keyword}' + line_loc = dct[line_number] + assert 'dim' in value.keys(), (f"Line {line_loc}: No dim as child of dimension has " + f"been found.") + xml_handle_comment(obj, line_number, line_loc) + dims = ET.SubElement(obj, 'dimensions') + # Consider all the childs under dimension is dim element and + # its attributes + + rm_key_list = [] + rank = '' + for key, val in value.items(): + if '__line__' in key: + continue + line_number = f"__line__{key}" + line_loc = value[line_number] + if key == 'rank': + rank = val or '' + if isinstance(rank, int) and rank < 0: + raise ValueError(f"Dimension must have some info about rank which is not " + f"available. Please check arround Line: {dct[line_number]}") + dims.set(key, str(val)) + rm_key_list.append(key) + rm_key_list.append(line_number) + xml_handle_comment(obj, line_number, line_loc, dims) + # Check dimension doc and handle it + elif key == 'doc' and isinstance(val, str): + xml_handle_doc(dims, val, line_number, line_loc) + rm_key_list.append(key) + rm_key_list.append(line_number) + elif key in possible_dimension_attrs and not isinstance(val, dict): + dims.set(key, str(val)) + rm_key_list.append(key) + rm_key_list.append(line_number) + xml_handle_comment(obj, line_number, line_loc, dims) + + for key in rm_key_list: + del value[key] + + xml_handle_dim_from_dimension_dict(dct, dims, keyword, value, rank=False) + + if isinstance(value, dict) and value != {}: + recursive_build(dims, value, verbose=None) + + +# pylint: disable=too-many-locals, too-many-arguments +def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verbose=False): + """ + Handling dim element. + NOTE: The inputs 'keyword' and 'value' are as input for xml_handle_dimensions + function. please also read note in xml_handle_dimensions. + """ + + possible_dim_attrs = ['ref', 'incr', 'refindex', 'required'] + + # Some attributes might have equivalent name e.g. 'required' is correct one and + # 'optional' could be another name. Then change attribute to the correct one. + wrong_to_correct_attr = [('optional', 'required')] + header_line_number = f"__line__{keyword}" + dim_list = [] + rm_key_list = [] + # NOTE: dim doc and other attributes except 'index' and 'value' will come as list of value + # under dim_parameters + if not value: + return + rank = '' + # pylint: disable=too-many-nested-blocks + for attr, vvalue in value.items(): + if '__line__' in attr: + continue + line_number = f"__line__{attr}" + line_loc = value[line_number] + # dim comes in precedence + if attr == 'dim': + # dim consists of list of [index, value] + llist_ind_value = vvalue + assert isinstance(llist_ind_value, list), (f'Line {value[line_number]}: dim' + f'argument not a list !') + xml_handle_comment(dims_obj, line_number, line_loc) + if isinstance(rank, int) and rank > 0: + assert rank == len(llist_ind_value), ( + f"Wrong dimension rank check around Line {dct[header_line_number]}.\n" + f"Line {[dct[header_line_number]]} rank value {rank} " + f"is not the same as dim array = " + f"{len(llist_ind_value)}.") + # Taking care of ind and value that comes as list of list + for dim_ind_val in llist_ind_value: + dim = ET.SubElement(dims_obj, 'dim') + + # Taking care of multidimensions or rank + if len(dim_ind_val) >= 1 and dim_ind_val[0]: + dim.set('index', str(dim_ind_val[0])) + if len(dim_ind_val) == 2 and dim_ind_val[1]: + dim.set('value', str(dim_ind_val[1])) + dim_list.append(dim) + rm_key_list.append(attr) + rm_key_list.append(line_number) + elif attr == 'dim_parameters' and isinstance(vvalue, dict): + xml_handle_comment(dims_obj, line_number, line_loc) + for kkkey, vvval in vvalue.items(): + if '__line__' in kkkey: + continue + cmnt_number = f'__line__{kkkey}' + cmnt_loc = vvalue[cmnt_number] + # Check whether any optional attributes added + for tuple_wng_crt in wrong_to_correct_attr: + if kkkey == tuple_wng_crt[0]: + raise ValueError(f"{cmnt_loc}: Attribute '{kkkey}' is prohibited, use " + f"'{tuple_wng_crt[1]}") + if kkkey == 'doc' and dim_list: + # doc comes as list of doc + for i, dim in enumerate(dim_list): + if isinstance(vvval, list) and i < len(vvval): + tmp_val = vvval[i] + xml_handle_doc(dim, vvval[i], cmnt_number, cmnt_loc) + # Check all the dim have doc if not skip + elif isinstance(vvval, list) and i >= len(vvval): + pass + else: + for i, dim in enumerate(dim_list): + # all atribute of dims comes as list + if isinstance(vvval, list) and i < len(vvval): + tmp_val = vvval[i] + dim.set(kkkey, str(tmp_val)) + + # Check all the dim have doc if not skip + elif isinstance(vvval, list) and i >= len(vvval): + pass + # All dim might have the same value for the same attribute + elif not isinstance(vvval, list): + tmp_val = value + dim.set(kkkey, str(tmp_val)) + rm_key_list.append(attr) + rm_key_list.append(line_number) + else: + raise ValueError(f"Got unexpected block except 'dim' and 'dim_parameters'." + f"Please check arround line {line_number}") + + for key in rm_key_list: + del value[key] + + check_for_skiped_attributes('dim', value, possible_dim_attrs, verbose) + + +def xml_handle_enumeration(dct, obj, keyword, value, verbose): + """This function creates an 'enumeration' element instance. + + Two cases are handled: + 1) the items are in a list + 2) the items are dictionaries and may contain a nested doc + """ + line_number = f'__line__{keyword}' + line_loc = dct[line_number] + xml_handle_comment(obj, line_number, line_loc) + enum = ET.SubElement(obj, 'enumeration') + + assert value is not None, f'Line {line_loc}: enumeration must \ +bear at least an argument !' + assert len( + value) >= 1, f'Line {dct[line_number]}: enumeration must not be an empty list!' + if isinstance(value, list): + for element in value: + itm = ET.SubElement(enum, 'item') + itm.set('value', str(element)) + if isinstance(value, dict) and value != {}: + for element in value.keys(): + if '__line__' not in element: + itm = ET.SubElement(enum, 'item') + itm.set('value', str(element)) + if isinstance(value[element], dict): + recursive_build(itm, value[element], verbose) + + +# pylint: disable=unused-argument +def xml_handle_link(dct, obj, keyword, value, verbose): + """ + If we have an NXDL link we decode the name attribute from (link)[:-6] + """ + + line_number = f"__line__{keyword}" + line_loc = dct[line_number] + xml_handle_comment(obj, line_number, line_loc) + possible_attrs = ['name', 'target', 'napimount'] + name = keyword[:-6] + link_obj = ET.SubElement(obj, 'link') + link_obj.set('name', str(name)) + + if value: + rm_key_list = [] + for attr, vval in value.items(): + if '__line__' in attr: + continue + line_number = f"__line__{attr}" + line_loc = value[line_number] + if attr == 'doc': + xml_handle_doc(link_obj, vval, line_number, line_loc) + rm_key_list.append(attr) + rm_key_list.append(line_number) + elif attr in possible_attrs and not isinstance(vval, dict): + if vval: + link_obj.set(attr, str(vval)) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, line_number, line_loc, link_obj) + + for key in rm_key_list: + del value[key] + # Check for skipped attrinutes + check_for_skiped_attributes('link', value, possible_attrs, verbose) + + if isinstance(value, dict) and value != {}: + recursive_build(link_obj, value, verbose=None) + + +def xml_handle_choice(dct, obj, keyword, value, verbose=False): + """ + Build choice xml elements. That consists of groups. + """ + line_number = f'__line__{keyword}' + line_loc = dct[line_number] + xml_handle_comment(obj, line_number, line_loc) + # Add attributes in possible if new attributs have been added nexus definition. + possible_attr = [] + choice_obj = ET.SubElement(obj, 'choice') + # take care of special attributes + name = keyword[:-8] + choice_obj.set('name', name) + + if value: + rm_key_list = [] + for attr, vval in value.items(): + if '__line__' in attr: + continue + line_number = f"__line__{attr}" + line_loc = value[line_number] + if attr == 'doc': + xml_handle_doc(choice_obj, vval, line_number, line_loc) + rm_key_list.append(attr) + rm_key_list.append(line_number) + elif attr in possible_attr and not isinstance(vval, dict): + if vval: + choice_obj.set(attr, str(vval)) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, line_number, line_loc, choice_obj) + + for key in rm_key_list: + del value[key] + # Check for skipped attrinutes + check_for_skiped_attributes('choice', value, possible_attr, verbose) + + if isinstance(value, dict) and value != {}: + recursive_build(choice_obj, value, verbose=None) + + +def xml_handle_symbols(dct, obj, keyword, value: dict): + """Handle a set of NXDL symbols as a child to obj + + """ + line_number = f'__line__{keyword}' + line_loc = dct[line_number] + assert len(list(value.keys()) + ) >= 1, f'Line {line_loc}: symbols table must not be empty !' + xml_handle_comment(obj, line_number, line_loc) + syms = ET.SubElement(obj, 'symbols') + if 'doc' in value.keys(): + line_number = '__line__doc' + line_loc = value[line_number] + xml_handle_comment(syms, line_number, line_loc) + doctag = ET.SubElement(syms, 'doc') + doctag.text = '\n' + textwrap.fill(value['doc'], width=70) + '\n' + rm_key_list = [] + for kkeyword, vvalue in value.items(): + if '__line__' in kkeyword: + continue + if kkeyword != 'doc': + line_number = f'__line__{kkeyword}' + line_loc = value[line_number] + xml_handle_comment(syms, line_number, line_loc) + assert vvalue is not None and isinstance( + vvalue, str), f'Line {line_loc}: put a comment in doc string !' + sym = ET.SubElement(syms, 'symbol') + sym.set('name', str(kkeyword)) + # sym_doc = ET.SubElement(sym, 'doc') + xml_handle_doc(sym, vvalue) + rm_key_list.append(kkeyword) + rm_key_list.append(line_number) + # sym_doc.text = '\n' + textwrap.fill(vvalue, width=70) + '\n' + for key in rm_key_list: + del value[key] + + +def check_keyword_variable(verbose, dct, keyword, value): + """ + Check whether both keyword_name and keyword_type are empty, + and complains if it is the case + """ + keyword_name, keyword_type = nx_name_type_resolving(keyword) + if verbose: + sys.stdout.write( + f'{keyword_name}({keyword_type}): value type is {type(value)}\n') + if keyword_name == '' and keyword_type == '': + line_number = f'__line__{keyword}' + raise ValueError(f'Line {dct[line_number]}: found an improper yaml key !') + + +def helper_keyword_type(kkeyword_type): + """ + This function is returning a value of keyword_type if it belong to NX_TYPE_KEYS + """ + if kkeyword_type in NX_TYPE_KEYS: + return kkeyword_type + return None + + +def verbose_flag(verbose, keyword, value): + """ + Verbose stdout printing for nested levels of yaml file, if verbose flag is active + """ + if verbose: + sys.stdout.write(f' key:{keyword}; value type is {type(value)}\n') + + +def xml_handle_attributes(dct, obj, keyword, value, verbose): + """Handle the attributes found connected to attribute field""" + + line_number = f"__line__{keyword}" + line_loc = dct[line_number] + xml_handle_comment(obj, line_number, line_loc) + # list of possible attribute of xml attribute elementsa + attr_attr_list = ['name', 'type', 'unit', 'nameType', + 'optional', 'recommended', 'minOccurs', + 'maxOccurs', 'deprecated', 'exists'] + # as an attribute identifier + keyword_name, keyword_typ = nx_name_type_resolving(keyword) + line_number = f'__line__{keyword}' + if verbose: + print(f"__line__ : {dct[line_number]}") + if keyword_name == '' and keyword_typ == '': + raise ValueError(f'Line {dct[line_number]}: found an improper yaml key !') + elemt_obj = ET.SubElement(obj, 'attribute') + elemt_obj.set('name', keyword_name[2:]) + if keyword_typ: + elemt_obj.set('type', keyword_typ) + + rm_key_list = [] + if value and value: + # taking care of attributes of attributes + for attr, attr_val in value.items(): + if '__line__' in attr: + continue + line_number = f"__line__{attr}" + line_loc = value[line_number] + if attr in ['doc', *attr_attr_list] and not isinstance(attr_val, dict): + if attr == 'unit': + elemt_obj.set(f"{attr}s", str(value[attr])) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, line_number, line_loc, elemt_obj) + elif attr == 'exists' and attr_val: + xml_handle_exists(value, elemt_obj, attr, attr_val) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, line_number, line_loc, elemt_obj) + elif attr == 'doc': + xml_handle_doc(elemt_obj, format_nxdl_doc(attr_val), + line_number, line_loc) + rm_key_list.append(attr) + rm_key_list.append(line_number) + else: + elemt_obj.set(attr, check_for_mapping_char_other(attr_val)) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, line_number, line_loc, elemt_obj) + + for key in rm_key_list: + del value[key] + # Check cor skiped attribute + check_for_skiped_attributes('Attribute', value, attr_attr_list, verbose) + if value: + recursive_build(elemt_obj, value, verbose) + + +def validate_field_attribute_and_value(v_attr, vval, allowed_attribute, value): + """ + Check for any attributes that comes with invalid name, + and invalid value. + """ + + # check for empty val + if (not isinstance(vval, dict) + and not str(vval)): # check for empty value + + line_number = f"__line__{v_attr}" + raise ValueError(f"In a field a valid attrbute ('{v_attr}') found that is not stored." + f" Please check arround line {value[line_number]}") + + # The bellow elements might come as child element + skipped_child_name = ['doc', 'dimension', 'enumeration', 'choice', 'exists'] + # check for invalid key or attributes + if (v_attr not in [*skipped_child_name, *allowed_attribute] + and '__line__' not in v_attr + and not isinstance(vval, dict) + and '(' not in v_attr # skip only groups and field that has name and type + and '\\@' not in v_attr): # skip nexus attributes + + line_number = f"__line__{v_attr}" + raise ValueError(f"In a field or group a invalid attribute ('{v_attr}') or child has found." + f" Please check arround line {value[line_number]}.") + + +def xml_handle_fields(obj, keyword, value, line_annot, line_loc, verbose=False): + """ + Handle a field in yaml file. + When a keyword is NOT: + symbol, + NX baseclass member, + attribute (\\@), + doc, + enumerations, + dimension, + exists, + then the not empty keyword_name is a field! + This simple function will define a new node of xml tree + """ + # List of possible attributes of xml elements + allowed_attr = ['name', 'type', 'nameType', 'unit', 'minOccurs', 'long_name', + 'axis', 'signal', 'deprecated', 'axes', 'exists', + 'data_offset', 'interpretation', 'maxOccurs', + 'primary', 'recommended', 'optional', 'stride'] + + xml_handle_comment(obj, line_annot, line_loc) + l_bracket = -1 + r_bracket = -1 + if keyword.count('(') == 1: + l_bracket = keyword.index('(') + if keyword.count(')') == 1: + r_bracket = keyword.index(')') + + keyword_name, keyword_type = nx_name_type_resolving(keyword) + if not keyword_type and not keyword_name: + raise ValueError("Check for name or type in field.") + elemt_obj = ET.SubElement(obj, 'field') + + # type come first + if l_bracket == 0 and r_bracket > 0: + elemt_obj.set('type', keyword_type) + if keyword_name: + elemt_obj.set('name', keyword_name) + elif l_bracket > 0: + elemt_obj.set('name', keyword_name) + if keyword_type: + elemt_obj.set('type', keyword_type) + else: + elemt_obj.set('name', keyword_name) + + if value: + rm_key_list = [] + # In each each if clause apply xml_handle_comment(), to collect + # comments on that yaml line. + for attr, vval in value.items(): + if '__line__' in attr: + continue + line_number = f"__line__{attr}" + line_loc = value[line_number] + if attr == 'doc': + xml_handle_doc(elemt_obj, vval, line_number, line_loc,) + rm_key_list.append(attr) + rm_key_list.append(line_number) + elif attr == 'exists' and vval: + xml_handle_exists(value, elemt_obj, attr, vval) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, + line_number, + line_loc, elemt_obj) + elif attr == 'unit': + xml_handle_units(elemt_obj, vval) + xml_handle_comment(obj, + line_number, + line_loc, elemt_obj) + elif attr in allowed_attr and not isinstance(vval, dict) and vval: + validate_field_attribute_and_value(attr, vval, allowed_attr, value) + elemt_obj.set(attr, check_for_mapping_char_other(vval)) + rm_key_list.append(attr) + rm_key_list.append(line_number) + xml_handle_comment(obj, + line_number, + line_loc, elemt_obj) + + for key in rm_key_list: + del value[key] + # Check for skipped attrinutes + check_for_skiped_attributes('field', value, allowed_attr, verbose) + + if isinstance(value, dict) and value != {}: + recursive_build(elemt_obj, value, verbose) + + +def xml_handle_comment(obj: ET.Element, + line_annotation: str, + line_loc_no: int, + xml_ele: ET.Element = None, + is_def_cmnt: bool = False): + """ + Add xml comment: check for comments that has the same 'line_annotation' + (e.g. __line__data) and the same line_loc_no (e.g. 30). After that, i + does of three tasks: + 1. Returns list of comments texts (multiple members if element has multiple comments) + 2. Rearrange comment element and xml_ele where comment comes first. + 3. Append comment element when no xml_ele will no be provided. + """ + + line_info = (line_annotation, int(line_loc_no)) + if line_info in COMMENT_BLOCKS: + cmnt = COMMENT_BLOCKS.get_coment_by_line_info(line_info) + cmnt_text = cmnt.get_comment_text() + + if is_def_cmnt: + return cmnt_text + if xml_ele is not None: + obj.remove(xml_ele) + for string in cmnt_text: + si_comnt = ET.Comment(string) + obj.append(si_comnt) + obj.append(xml_ele) + elif not is_def_cmnt and xml_ele is None: + for string in cmnt_text: + si_comnt = ET.Comment(string) + obj.append(si_comnt) + else: + raise ValueError("Provied correct parameter values.") + return '' + + +def recursive_build(obj, dct, verbose): + """obj is the current node of the XML tree where we want to append to, + dct is a dictionary object which represents the content of a child to obj + dct may contain further dictionary nests, representing NXDL groups, + which trigger recursive processing + NXDL fields may contain attributes but trigger no recursion so attributes are leafs. + + """ + for keyword, value in iter(dct.items()): + if '__line__' in keyword: + continue + line_number = f"__line__{keyword}" + line_loc = dct[line_number] + keyword_name, keyword_type = nx_name_type_resolving(keyword) + check_keyword_variable(verbose, dct, keyword, value) + if verbose: + sys.stdout.write( + f'keyword_name:{keyword_name} keyword_type {keyword_type}\n') + + if keyword[-6:] == '(link)': + xml_handle_link(dct, obj, keyword, value, verbose) + elif keyword[-8:] == '(choice)': + xml_handle_choice(dct, obj, keyword, value) + # The bellow xml_symbol clause is for the symbols that come ubde filed or attributes + # Root level symbols has been inside nyaml2nxdl() + elif keyword_type == '' and keyword_name == 'symbols': + xml_handle_symbols(dct, obj, keyword, value) + + elif ((keyword_type in NX_CLSS) or (keyword_type not in + [*NX_TYPE_KEYS, '', *NX_NEW_DEFINED_CLASSES])): + # we can be sure we need to instantiate a new group + xml_handle_group(dct, obj, keyword, value, verbose) + + elif keyword_name[0:2] == NX_ATTR_IDNT: # check if obj qualifies + xml_handle_attributes(dct, obj, keyword, value, verbose) + elif keyword == 'doc': + xml_handle_doc(obj, value, line_number, line_loc) + elif keyword == NX_UNIT_IDNT: + xml_handle_units(obj, value) + elif keyword == 'enumeration': + xml_handle_enumeration(dct, obj, keyword, value, verbose) + + elif keyword == 'dimensions': + xml_handle_dimensions(dct, obj, keyword, value) + + elif keyword == 'exists': + xml_handle_exists(dct, obj, keyword, value) + # Handles fileds e.g. AXISNAME + elif keyword_name != '' and '__line__' not in keyword_name: + xml_handle_fields(obj, keyword, + value, line_number, + line_loc, verbose) + else: + raise ValueError(f"An unfamiliar type of element {keyword} has been found which is " + f"not be able to be resolved. Chekc arround line {dct[line_number]}") + + +def pretty_print_xml(xml_root, output_xml, def_comments=None): + """ + Print better human-readable indented and formatted xml file using + built-in libraries and preceding XML processing instruction + """ + dom = minidom.parseString(ET.tostring( + xml_root, encoding='utf-8', method='xml')) + proc_instractionn = dom.createProcessingInstruction( + 'xml-stylesheet', 'type="text/xsl" href="nxdlformat.xsl"') + dom_comment = dom.createComment(DOM_COMMENT) + root = dom.firstChild + dom.insertBefore(proc_instractionn, root) + dom.insertBefore(dom_comment, root) + + if def_comments: + for string in def_comments: + def_comt_ele = dom.createComment(string) + dom.insertBefore(def_comt_ele, root) + + xml_string = dom.toprettyxml(indent=1 * DEPTH_SIZE, newl='\n', encoding='UTF-8') + with open('tmp.xml', "wb") as file_tmp: + file_tmp.write(xml_string) + flag = False + with open('tmp.xml', "r", encoding="utf-8") as file_out: + with open(output_xml, "w", encoding="utf-8") as file_out_mod: + for i in file_out.readlines(): + if '' not in i and '' not in i and flag is False: + file_out_mod.write(i) + elif '' in i and '' in i: + file_out_mod.write(i) + elif '' in i and '' not in i: + flag = True + white_spaces = len(i) - len(i.lstrip()) + file_out_mod.write(i) + elif '' not in i and '' not in i and flag is True: + file_out_mod.write((white_spaces + 5) * ' ' + i) + elif '' not in i and '' in i and flag is True: + file_out_mod.write(white_spaces * ' ' + i) + flag = False + os.remove('tmp.xml') + + +# pylint: disable=too-many-statements +def nyaml2nxdl(input_file: str, out_file, verbose: bool): + """ + Main of the nyaml2nxdl converter, creates XML tree, namespace and + schema, definitions then evaluates a dictionary nest of groups recursively and + fields or (their) attributes as childs of the groups + """ + + def_attributes = ['deprecated', 'ignoreExtraGroups', 'category', 'type', + 'ignoreExtraFields', 'ignoreExtraAttributes', 'restricts'] + yml_appdef = yml_reader(input_file) + def_cmnt_text = [] + if verbose: + sys.stdout.write(f'input-file: {input_file}\n') + sys.stdout.write('application/base contains the following root-level entries:\n') + sys.stdout.write(str(yml_appdef.keys())) + xml_root = ET.Element('definition', {}) + assert 'category' in yml_appdef.keys( + ), 'Required root-level keyword category is missing!' + assert yml_appdef['category'] in ['application', 'base'], 'Only \ +application and base are valid categories!' + assert 'doc' in yml_appdef.keys(), 'Required root-level keyword doc is missing!' + + name_extends = '' + yml_appdef_copy = yml_appdef.copy() + for kkey, vvalue in yml_appdef_copy.items(): + if '__line__' in kkey: + continue + line_number = f"__line__{kkey}" + line_loc_no = yml_appdef[line_number] + if not isinstance(vvalue, dict) and kkey in def_attributes: + xml_root.set(kkey, str(vvalue) or '') + cmnt_text = xml_handle_comment(xml_root, + line_number, line_loc_no, + is_def_cmnt=True) + def_cmnt_text += cmnt_text if cmnt_text else [] + + del yml_appdef[line_number] + del yml_appdef[kkey] + # Taking care or name and extends + elif 'NX' in kkey: + # Tacking the attribute order but the correct value will be stored later + # check for name first or type first if (NXobject)NXname then type first + l_bracket_ind = kkey.rfind('(') + r_bracket_ind = kkey.rfind(')') + if l_bracket_ind == 0: + extend = kkey[1:r_bracket_ind] + name = kkey[r_bracket_ind + 1:] + xml_root.set('extends', extend) + xml_root.set('name', name) + elif l_bracket_ind > 0: + name = kkey[0:l_bracket_ind] + extend = kkey[l_bracket_ind + 1: r_bracket_ind] + xml_root.set('name', name) + xml_root.set('extends', extend) + else: + name = kkey + xml_root.set('name', name) + xml_root.set('extends', 'NXobject') + cmnt_text = xml_handle_comment(xml_root, + line_number, line_loc_no, + is_def_cmnt=True) + def_cmnt_text += cmnt_text if cmnt_text else [] + + name_extends = kkey + + if 'type' not in xml_root.attrib: + xml_root.set('type', "group") + # Taking care of namespaces + namespaces = {'xmlns': 'http://definition.nexusformat.org/nxdl/3.1', + 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', + 'xsi:schemaLocation': 'http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd'} + for key, ns_ in namespaces.items(): + xml_root.attrib[key] = ns_ + # Taking care of Symbols elements + if 'symbols' in yml_appdef.keys(): + xml_handle_symbols(yml_appdef, + xml_root, + 'symbols', + yml_appdef['symbols']) + + del yml_appdef['symbols'] + del yml_appdef["__line__symbols"] + + assert isinstance(yml_appdef['doc'], str) and yml_appdef['doc'] != '', 'Doc \ +has to be a non-empty string!' + + line_number = '__line__doc' + line_loc_no = yml_appdef[line_number] + xml_handle_doc(xml_root, yml_appdef['doc'], line_number, line_loc_no) + + del yml_appdef['doc'] + + root_keys = 0 + for key in yml_appdef.keys(): + if '__line__' not in key: + root_keys += 1 + extra_key = key + + assert root_keys == 1, (f"Accepting at most keywords: category, doc, symbols, and NX... " + f"at root-level! check key at root level {extra_key}") + + assert ('NX' in name_extends and len(name_extends) > 2), 'NX \ +keyword has an invalid pattern, or is too short!' + # Taking care if definition has empty content + if yml_appdef[name_extends]: + recursive_build(xml_root, yml_appdef[name_extends], verbose) + # Taking care of comments that comes at the end of file that is might not be intended for + # any nxdl elements. + if COMMENT_BLOCKS[-1].has_post_comment: + post_comment = COMMENT_BLOCKS[-1] + (lin_annot, line_loc) = post_comment.get_line_info() + xml_handle_comment(xml_root, lin_annot, line_loc) + + # Note: Just to keep the functionality if we need this functionality later. + default_attr = False + if default_attr: + check_for_default_attribute_and_value(xml_root) + pretty_print_xml(xml_root, out_file, def_cmnt_text) + if verbose: + sys.stdout.write('Parsed YAML to NXDL successfully\n') diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py new file mode 100644 index 000000000..58d634c9d --- /dev/null +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +"""Main file of yaml2nxdl tool. +Users create NeXus instances by writing a YAML file +which details a hierarchy of data/metadata elements + +""" +# -*- coding: utf-8 -*- +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# Yaml library does not except the keys (escapechar "\t" and yaml separator ":") +# So the corresponding value is to skip them and +# and also carefull about this order +import hashlib +from yaml.composer import Composer +from yaml.constructor import Constructor + +from yaml.nodes import ScalarNode +from yaml.resolver import BaseResolver +from yaml.loader import Loader + +# NOTE: If any one change one of the bellow dict please change it for both +ESCAPE_CHAR_DICT_IN_YAML = {"\t": " ", + "\':\'": ":"} + +ESCAPE_CHAR_DICT_IN_XML = {" ": "\t", + "\':\'": ":"} + + +def remove_namespace_from_tag(tag): + """Helper function to remove the namespace from an XML tag.""" + + return tag.split("}")[-1] + + +class LineLoader(Loader): # pylint: disable=too-many-ancestors + """ + LineLoader parses a yaml into a python dictionary extended with extra items. + The new items have as keys __line__ and as values the yaml file line number + """ + + def compose_node(self, parent, index): + # the line number where the previous token has ended (plus empty lines) + node = Composer.compose_node(self, parent, index) + node.__line__ = self.line + 1 + return node + + def construct_mapping(self, node, deep=False): + node_pair_lst = node.value + node_pair_lst_for_appending = [] + + for key_node in node_pair_lst: + shadow_key_node = ScalarNode( + tag=BaseResolver.DEFAULT_SCALAR_TAG, value='__line__' + key_node[0].value) + shadow_value_node = ScalarNode( + tag=BaseResolver.DEFAULT_SCALAR_TAG, value=key_node[0].__line__) + node_pair_lst_for_appending.append( + (shadow_key_node, shadow_value_node)) + + node.value = node_pair_lst + node_pair_lst_for_appending + return Constructor.construct_mapping(self, node, deep=deep) + + +def get_yaml_escape_char_dict(): + """Get escape char and the way to skip them in yaml.""" + return ESCAPE_CHAR_DICT_IN_YAML + + +def get_yaml_escape_char_reverter_dict(): + """To revert yaml escape char in xml constructor from yaml.""" + + return ESCAPE_CHAR_DICT_IN_XML + + +def type_check(nx_type): + """ + Check for nexus type if type is NX_CHAR get '' or get as it is. + """ + + if nx_type in ['NX_CHAR', '']: + nx_type = '' + else: + nx_type = f"({nx_type})" + return nx_type + + +def get_node_parent_info(tree, node): + """ + Return tuple of (parent, index) where: + parent = node of parent within tree + index = index of node under parent + """ + + parent_map = {c: p for p in tree.iter() for c in p} + parent = parent_map[node] + return parent, list(parent).index(node) + + +def cleaning_empty_lines(line_list): + """ + Cleaning up empty lines on top and bottom. + """ + if not isinstance(line_list, list): + line_list = line_list.split('\n') if '\n' in line_list else [''] + + # Clining up top empty lines + while True: + if line_list[0].strip(): + break + line_list = line_list[1:] + if len(line_list) == 0: + line_list.append('') + return line_list + + # Clining bottom empty lines + while True: + if line_list[-1].strip(): + break + line_list = line_list[0:-1] + if len(line_list) == 0: + line_list.append('') + return line_list + + return line_list + + +def nx_name_type_resolving(tmp): + """ + extracts the eventually custom name {optional_string} + and type {nexus_type} from a YML section string. + YML section string syntax: optional_string(nexus_type) + """ + if tmp.count('(') == 1 and tmp.count(')') == 1: + # we can safely assume that every valid YML key resolves + # either an nx_ (type, base, candidate) class contains only 1 '(' and ')' + index_start = tmp.index('(') + index_end = tmp.index(')', index_start + 1) + typ = tmp[index_start + 1:index_end] + nam = tmp.replace('(' + typ + ')', '') + return nam, typ + + # or a name for a member + typ = '' + nam = tmp + return nam, typ + + +def get_sha256_hash(file_name): + """Generate a sha256_hash for a given file. + """ + sha_hash = hashlib.sha256() + + with open(file=file_name, mode='rb',) as file_obj: + # Update hash for each 4k block of bytes + for b_line in iter(lambda: file_obj.read(4096), b""): + sha_hash.update(b_line) + return sha_hash.hexdigest() + + +def extend_yamlfile_with_comment(yaml_file, + file_to_be_appended, + top_lines_list=None): + """Extend yaml file by the file_to_be_appended as comment. + """ + + with open(yaml_file, mode='a+', encoding='utf-8') as f1_obj: + if top_lines_list: + for line in top_lines_list: + f1_obj.write(line) + + with open(file_to_be_appended, mode='r', encoding='utf-8') as f2_obj: + lines = f2_obj.readlines() + for line in lines: + f1_obj.write(f"# {line}") + + +def separate_hash_yaml_and_nxdl(yaml_file, sep_yaml, sep_xml): + """Separate the provided yaml file into yaml, nxdl and hash if yaml was extended with + nxdl at the end of yaml by + '\n# ++++++++++++++++++++++++++++++++++ SHA HASH \ + ++++++++++++++++++++++++++++++++++\n' + # ' + """ + sha_hash = '' + with open(yaml_file, 'r', encoding='utf-8') as inp_file: + lines = inp_file.readlines() + # file to write yaml part + with open(sep_yaml, 'w', encoding='utf-8') as yml_f_ob, \ + open(sep_xml, 'w', encoding='utf-8') as xml_f_ob: + + last_line = '' + write_on_yaml = True + for ind, line in enumerate(lines): + if ind == 0: + last_line = line + # Write in file when ensured that the nest line is not with '++ SHA HASH ++' + elif '++ SHA HASH ++' not in line and write_on_yaml: + yml_f_ob.write(last_line) + last_line = line + elif '++ SHA HASH ++' in line: + write_on_yaml = False + last_line = '' + elif not write_on_yaml and not last_line: + # The first line of xml file has been found. Onward write lines directly + # into xml file. + if not sha_hash: + sha_hash = line.split('# ', 1)[-1].strip() + else: + xml_f_ob.write(line[2:]) + # If the yaml fiile does not contain any hash for nxdl then we may have last line. + if last_line: + yml_f_ob.write(last_line) + + return sha_hash diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..baa6afee7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,43 @@ +[build-system] +requires = ["setuptools>=64.0.1", "setuptools-scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "nexusdefinitions" +dynamic = ["version"] +authors = [ + { name = "NIAC" } +] +description = "Nexus definitions" +readme = "README.md" +license = { file = "LGPL.txt" } +requires-python = "" +classifiers = [ + "Operating System :: OS Independent" +] +dependencies = [ + "lxml", + "pyyaml", + "click>=7.1.2", + "h5py>=3.6.0", + "sphinx>=5", + "sphinx-tabs", + "pytest", + "black>=22.3", + "flake8>=4", + "isort>=5.10", +] + +[project.urls] +"Homepage" = "https://nexusformat.org" + +[project.scripts] +read_nexus = "dev_tools.utils.nexus:main" +nyaml2nxdl = "dev_tools.nyaml2nxdl.nyaml2nxdl:launch_tool" + +[tools.setuptools_scm] +version_scheme = "guess-next-dev" +local_scheme = "node-and-date" + +[tool.setuptools] +packages = ["dev_tools"] From 5d0820802d5872b5e24373e53b5001e30e97b59b Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Mon, 19 Jun 2023 15:01:41 +0200 Subject: [PATCH 24/32] linting --- dev_tools/nyaml2nxdl/comment_collector.py | 199 ++-- dev_tools/nyaml2nxdl/nyaml2nxdl.py | 206 +++-- .../nyaml2nxdl/nyaml2nxdl_backward_tools.py | 645 +++++++------ .../nyaml2nxdl/nyaml2nxdl_forward_tools.py | 854 ++++++++++-------- dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py | 85 +- 5 files changed, 1100 insertions(+), 889 deletions(-) diff --git a/dev_tools/nyaml2nxdl/comment_collector.py b/dev_tools/nyaml2nxdl/comment_collector.py index 5f0c5e3bc..dcb21021b 100644 --- a/dev_tools/nyaml2nxdl/comment_collector.py +++ b/dev_tools/nyaml2nxdl/comment_collector.py @@ -31,10 +31,16 @@ """ -from typing import List, Type, Any, Tuple, Union, Dict +from typing import Any +from typing import Dict +from typing import List +from typing import Tuple +from typing import Type +from typing import Union + from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import LineLoader -__all__ = ['Comment', 'CommentCollector', 'XMLComment', 'YAMLComment'] +__all__ = ["Comment", "CommentCollector", "XMLComment", "YAMLComment"] # pylint: disable=inconsistent-return-statements @@ -43,8 +49,7 @@ class CommentCollector: _comment_chain. """ - def __init__(self, input_file: str = None, - loaded_obj: Union[object, Dict] = None): + def __init__(self, input_file: str = None, loaded_obj: Union[object, Dict] = None): """ Initialise CommentCollector parameters: @@ -57,19 +62,21 @@ def __init__(self, input_file: str = None, self._comment_hash: Dict[Tuple, Type[Comment]] = {} self.comment: Type[Comment] if self.file and not loaded_obj: - if self.file.split('.')[-1] == 'xml': + if self.file.split(".")[-1] == "xml": self.comment = XMLComment - if self.file.split('.')[-1] == 'yaml': + if self.file.split(".")[-1] == "yaml": self.comment = YAMLComment with open(self.file, "r", encoding="utf-8") as plain_text_yaml: loader = LineLoader(plain_text_yaml) self.comment.__yaml_dict__ = loader.get_single_data() elif self.file and loaded_obj: - if self.file.split('.')[-1] == 'yaml' and isinstance(loaded_obj, dict): + if self.file.split(".")[-1] == "yaml" and isinstance(loaded_obj, dict): self.comment = YAMLComment self.comment.__yaml_dict__ = loaded_obj else: - raise ValueError("Incorrect inputs for CommentCollector e.g. Wrong file extension.") + raise ValueError( + "Incorrect inputs for CommentCollector e.g. Wrong file extension." + ) else: raise ValueError("Incorrect inputs for CommentCollector") @@ -81,18 +88,20 @@ def extract_all_comment_blocks(self): """ id_ = 0 single_comment = self.comment(comment_id=id_) - with open(self.file, mode='r', encoding='UTF-8') as enc_f: + with open(self.file, mode="r", encoding="UTF-8") as enc_f: lines = enc_f.readlines() # Make an empty line for last comment if no empty lines in original file - if lines[-1] != '': - lines.append('') + if lines[-1] != "": + lines.append("") for line_num, line in enumerate(lines): if single_comment.is_storing_single_comment(): # If the last comment comes without post nxdl fields, groups and attributes - if '++ SHA HASH ++' in line: + if "++ SHA HASH ++" in line: # Handle with stored nxdl.xml file that is not part of yaml - line = '' - single_comment.process_each_line(line + 'post_comment', (line_num + 1)) + line = "" + single_comment.process_each_line( + line + "post_comment", (line_num + 1) + ) self._comment_chain.append(single_comment) break if line_num < (len(lines) - 1): @@ -100,7 +109,9 @@ def extract_all_comment_blocks(self): single_comment.process_each_line(line, (line_num + 1)) else: # For processing last line of file - single_comment.process_each_line(line + 'post_comment', (line_num + 1)) + single_comment.process_each_line( + line + "post_comment", (line_num + 1) + ) self._comment_chain.append(single_comment) else: self._comment_chain.append(single_comment) @@ -109,13 +120,13 @@ def extract_all_comment_blocks(self): def get_comment(self): """ - Return comment from comment_chain that must come earlier in order. + Return comment from comment_chain that must come earlier in order. """ return self._comment_chain[self._comment_tracker] def get_coment_by_line_info(self, comment_locs: Tuple[str, Union[int, str]]): """ - Get comment using line information. + Get comment using line information. """ if comment_locs in self._comment_hash: return self._comment_hash[comment_locs] @@ -129,8 +140,7 @@ def get_coment_by_line_info(self, comment_locs: Tuple[str, Union[int, str]]): return cmnt def remove_comment(self, ind): - """Remove a comment from comment list. - """ + """Remove a comment from comment list.""" if ind < len(self._comment_chain): del self._comment_chain[ind] else: @@ -150,8 +160,10 @@ def __contains__(self, comment_locs: tuple): (__line__doc and 35) """ if not isinstance(comment_locs, tuple): - raise TypeError("Comment_locs should be 'tuple' containing line annotation " - "(e.g.__line__doc) and line_loc (e.g. 35).") + raise TypeError( + "Comment_locs should be 'tuple' containing line annotation " + "(e.g.__line__doc) and line_loc (e.g. 35)." + ) line_annot, line_loc = comment_locs for cmnt in self._comment_chain: if line_annot in cmnt: @@ -162,11 +174,12 @@ def __contains__(self, comment_locs: tuple): return False def __getitem__(self, ind): - """Get comment from self.obj._comment_chain by index. - """ + """Get comment from self.obj._comment_chain by index.""" if isinstance(ind, int): if ind >= len(self._comment_chain): - raise IndexError(f'Oops! Comment index {ind} in {__class__} is out of range!') + raise IndexError( + f"Oops! Comment index {ind} in {__class__} is out of range!" + ) return self._comment_chain[ind] if isinstance(ind, slice): @@ -175,8 +188,7 @@ def __getitem__(self, ind): return self._comment_chain[start_n:end_n] def __iter__(self): - """get comment ieratively - """ + """get comment ieratively""" return iter(self._comment_chain) @@ -186,21 +198,19 @@ class Comment: This class is building yaml comment and the intended line for what comment is written. """ - def __init__(self, - comment_id: int = -1, - last_comment: 'Comment' = None) -> None: + def __init__(self, comment_id: int = -1, last_comment: "Comment" = None) -> None: """Comment object can be considered as a block element that includes - document element (an entity for what the comment is written). + document element (an entity for what the comment is written). """ self._elemt: Any = None self._elemt_text: str = None self._is_elemt_found: bool = None self._is_elemt_stored: bool = None - self._comnt: str = '' + self._comnt: str = "" # If Multiple comments for one element or entity self._comnt_list: List[str] = [] - self.last_comment: 'Comment' = last_comment if last_comment else None + self.last_comment: "Comment" = last_comment if last_comment else None if comment_id >= 0 and last_comment: self.cid = comment_id self.last_comment = last_comment @@ -214,8 +224,9 @@ def __init__(self, raise ValueError("Neither last comment nor comment id dound") self._comnt_start_found: bool = False self._comnt_end_found: bool = False - self.is_storing_single_comment = lambda: not (self._comnt_end_found - and self._is_elemt_stored) + self.is_storing_single_comment = lambda: not ( + self._comnt_end_found and self._is_elemt_stored + ) def get_comment_text(self) -> Union[List, str]: """ @@ -239,7 +250,7 @@ class XMLComment(Comment): XMLComment to store xml comment element. """ - def __init__(self, comment_id: int = -1, last_comment: 'Comment' = None) -> None: + def __init__(self, comment_id: int = -1, last_comment: "Comment" = None) -> None: super().__init__(comment_id, last_comment) def process_each_line(self, text, line_num): @@ -253,76 +264,77 @@ def process_each_line(self, text, line_num): # for multiple comment if exist if self._comnt: self._comnt_list.append(self._comnt) - self._comnt = '' + self._comnt = "" if self._comnt_end_found: self.store_element(text) def append_comment(self, text: str) -> None: # Comment in single line - if '' == text[-4:]: + self._comnt = self._comnt + text.replace("" == text[-4:]: self._comnt_end_found = True self._comnt_start_found = False - self._comnt = self._comnt.replace('-->', '') + self._comnt = self._comnt.replace("-->", "") - elif '-->' == text[0:4] and self._comnt_start_found: + elif "-->" == text[0:4] and self._comnt_start_found: self._comnt_end_found = True self._comnt_start_found = False - self._comnt = self._comnt + '\n' + text.replace('-->', '') + self._comnt = self._comnt + "\n" + text.replace("-->", "") elif self._comnt_start_found: - self._comnt = self._comnt + '\n' + text + self._comnt = self._comnt + "\n" + text # pylint: disable=arguments-differ, arguments-renamed def store_element(self, text) -> None: def collect_xml_attributes(text_part): for part in text_part: part = part.strip() - if part and '">' == ''.join(part[-2:]): + if part and '">' == "".join(part[-2:]): self._is_elemt_stored = True self._is_elemt_found = False - part = ''.join(part[0:-2]) - elif part and '"/>' == ''.join(part[-3:]): + part = "".join(part[0:-2]) + elif part and '"/>' == "".join(part[-3:]): self._is_elemt_stored = True self._is_elemt_found = False - part = ''.join(part[0:-3]) - elif part and '/>' == ''.join(part[-2:]): + part = "".join(part[0:-3]) + elif part and "/>" == "".join(part[-2:]): self._is_elemt_stored = True self._is_elemt_found = False - part = ''.join(part[0:-2]) - elif part and '>' == part[-1]: + part = "".join(part[0:-2]) + elif part and ">" == part[-1]: self._is_elemt_stored = True self._is_elemt_found = False - part = ''.join(part[0:-1]) + part = "".join(part[0:-1]) elif part and '"' == part[-1]: - part = ''.join(part[0:-1]) + part = "".join(part[0:-1]) if '="' in part: lf_prt, rt_prt = part.split('="') else: continue - if ':' in lf_prt: + if ":" in lf_prt: continue self._elemt[lf_prt] = str(rt_prt) + if not self._elemt: self._elemt = {} # First check for comment part has been collected prefectly - if ' Union[List, str]: """ - This method returns list of commnent text. As some xml element might have - multiple separated comment intended for a single element. + This method returns list of commnent text. As some xml element might have + multiple separated comment intended for a single element. """ return self._comnt_list @@ -348,17 +360,19 @@ class YAMLComment(Comment): 1. Do not delete any element form yaml dictionary (for loaded_obj. check: Comment_collector class. because this loaded file has been exploited in nyaml2nxdl forward tools.) """ + # Class level variable. The main reason behind that to follow structure of # abstract class 'Comment' __yaml_dict__: dict = {} __yaml_line_info: dict = {} - __comment_escape_char = {'--': '-\\-'} + __comment_escape_char = {"--": "-\\-"} - def __init__(self, comment_id: int = -1, last_comment: 'Comment' = None) -> None: - """Initialization of YAMLComment follow Comment class. - """ + def __init__(self, comment_id: int = -1, last_comment: "Comment" = None) -> None: + """Initialization of YAMLComment follow Comment class.""" super().__init__(comment_id, last_comment) - self.collect_yaml_line_info(YAMLComment.__yaml_dict__, YAMLComment.__yaml_line_info) + self.collect_yaml_line_info( + YAMLComment.__yaml_dict__, YAMLComment.__yaml_line_info + ) def process_each_line(self, text, line_num): """Take care of each line of text. Through which function the text @@ -369,21 +383,21 @@ def process_each_line(self, text, line_num): if self._comnt_end_found and not self._is_elemt_found: if self._comnt: self._comnt_list.append(self._comnt) - self._comnt = '' + self._comnt = "" if self._comnt_end_found: - line_key = '' - if ':' in text: - ind = text.index(':') - line_key = '__line__' + ''.join(text[0:ind]) + line_key = "" + if ":" in text: + ind = text.index(":") + line_key = "__line__" + "".join(text[0:ind]) for l_num, l_key in self.__yaml_line_info.items(): if line_num == int(l_num) and line_key == l_key: self.store_element(line_key, line_num) break # Comment comes very end of the file - if text == 'post_comment' and line_key == '': - line_key = '__line__post_comment' + if text == "post_comment" and line_key == "": + line_key = "__line__post_comment" self.store_element(line_key, line_num) def has_post_comment(self): @@ -393,7 +407,7 @@ def has_post_comment(self): nxdl element(class, group, filed and attribute.) """ for key, _ in self._elemt.items(): - if '__line__post_comment' == key: + if "__line__post_comment" == key: return True return False @@ -411,17 +425,17 @@ def append_comment(self, text: str) -> None: # For empty line inside doc or yaml file. elif not text: return - elif '# ' == ''.join(text[0:2]): + elif "# " == "".join(text[0:2]): self._comnt_start_found = True self._comnt_end_found = False - self._comnt = '' if not self._comnt else self._comnt + '\n' - self._comnt = self._comnt + ''.join(text[2:]) - elif '#' == text[0]: + self._comnt = "" if not self._comnt else self._comnt + "\n" + self._comnt = self._comnt + "".join(text[2:]) + elif "#" == text[0]: self._comnt_start_found = True self._comnt_end_found = False - self._comnt = '' if not self._comnt else self._comnt + '\n' - self._comnt = self._comnt + ''.join(text[1:]) - elif 'post_comment' == text: + self._comnt = "" if not self._comnt else self._comnt + "\n" + self._comnt = self._comnt + "".join(text[1:]) + elif "post_comment" == text: self._comnt_end_found = True self._comnt_start_found = False # for any line after 'comment block' found @@ -432,8 +446,8 @@ def append_comment(self, text: str) -> None: # pylint: disable=arguments-differ def store_element(self, line_key, line_number): """ - Store comment content and information of commen location (for what comment is - created.). + Store comment content and information of commen location (for what comment is + created.). """ self._elemt = {} self._elemt[line_key] = int(line_number) @@ -454,14 +468,13 @@ def get_line_number(self, line_key): def get_line_info(self): """ - Return line annotation and line number from a comment. + Return line annotation and line number from a comment. """ for line_anno, line_loc in self._elemt.items(): return line_anno, line_loc def replace_scape_char(self, text): - """Replace escape char according to __comment_escape_char dict - """ + """Replace escape char according to __comment_escape_char dict""" for ecp_char, ecp_alt in YAMLComment.__comment_escape_char.items(): if ecp_char in text: text = text.replace(ecp_char, ecp_alt) @@ -472,8 +485,7 @@ def get_element_location(self): Retrun yaml line '__line__KEY' info and and line numner """ if len(self._elemt) > 1: - raise ValueError(f"Comment element should be one but got " - f"{self._elemt}") + raise ValueError(f"Comment element should be one but got " f"{self._elemt}") for key, val in self._elemt.items(): yield key, val @@ -483,7 +495,7 @@ def collect_yaml_line_info(self, yaml_dict, line_info_dict): a yaml file dictonary in another dictionary. """ for line_key, line_n in yaml_dict.items(): - if '__line__' in line_key: + if "__line__" in line_key: line_info_dict[line_n] = line_key for _, val in yaml_dict.items(): @@ -495,13 +507,12 @@ def __contains__(self, line_key): return line_key in self._elemt def __eq__(self, comment_obj): - """Check the self has same value as right comment. - """ + """Check the self has same value as right comment.""" if len(self._comnt_list) != len(comment_obj._comnt_list): return False for left_cmnt, right_cmnt in zip(self._comnt_list, comment_obj._comnt_list): - left_cmnt = left_cmnt.split('\n') - right_cmnt = right_cmnt.split('\n') + left_cmnt = left_cmnt.split("\n") + right_cmnt = right_cmnt.split("\n") for left_line, right_line in zip(left_cmnt, right_cmnt): if left_line.strip() != right_line.strip(): return False diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl.py b/dev_tools/nyaml2nxdl/nyaml2nxdl.py index 160b3f830..815b015e6 100755 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl.py @@ -26,13 +26,13 @@ import xml.etree.ElementTree as ET import click -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_sha256_hash, - extend_yamlfile_with_comment, - separate_hash_yaml_and_nxdl) -from pynxtools.nyaml2nxdl.nyaml2nxdl_forward_tools import nyaml2nxdl, pretty_print_xml -from pynxtools.nyaml2nxdl.nyaml2nxdl_backward_tools import (Nxdl2yaml, - compare_niac_and_my) - +from pynxtools.nyaml2nxdl.nyaml2nxdl_backward_tools import Nxdl2yaml +from pynxtools.nyaml2nxdl.nyaml2nxdl_backward_tools import compare_niac_and_my +from pynxtools.nyaml2nxdl.nyaml2nxdl_forward_tools import nyaml2nxdl +from pynxtools.nyaml2nxdl.nyaml2nxdl_forward_tools import pretty_print_xml +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import extend_yamlfile_with_comment +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import get_sha256_hash +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import separate_hash_yaml_and_nxdl DEPTH_SIZE = 4 * " " @@ -42,13 +42,13 @@ def generate_nxdl_or_retrieve_nxdl(yaml_file, out_xml_file, verbose): """ - Generate yaml, nxdl and hash. - if the extracted hash is exactly the same as producd from generated yaml then - retrieve the nxdl part from provided yaml. - Else, generate nxdl from separated yaml with the help of nyaml2nxdl function + Generate yaml, nxdl and hash. + if the extracted hash is exactly the same as producd from generated yaml then + retrieve the nxdl part from provided yaml. + Else, generate nxdl from separated yaml with the help of nyaml2nxdl function """ pa_path, rel_file = os.path.split(yaml_file) - sep_yaml = os.path.join(pa_path, f'temp_{rel_file}') + sep_yaml = os.path.join(pa_path, f"temp_{rel_file}") hash_found = separate_hash_yaml_and_nxdl(yaml_file, sep_yaml, out_xml_file) if hash_found: @@ -67,67 +67,84 @@ def append_yml(input_file, append, verbose): and print both an XML and YML file of the extended base class. """ - nexus_def_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../definitions') - assert [s for s in os.listdir(os.path.join(nexus_def_path, 'base_classes') - ) if append.strip() == s.replace('.nxdl.xml', '')], \ - 'Your base class extension does not match any existing NeXus base classes' - tree = ET.parse(os.path.join(nexus_def_path + '/base_classes', append + '.nxdl.xml')) + nexus_def_path = os.path.join( + os.path.abspath(os.path.dirname(__file__)), "../../definitions" + ) + assert [ + s + for s in os.listdir(os.path.join(nexus_def_path, "base_classes")) + if append.strip() == s.replace(".nxdl.xml", "") + ], "Your base class extension does not match any existing NeXus base classes" + tree = ET.parse( + os.path.join(nexus_def_path + "/base_classes", append + ".nxdl.xml") + ) root = tree.getroot() # warning: tmp files are printed on disk and removed at the ends!! - pretty_print_xml(root, 'tmp.nxdl.xml') - input_tmp_xml = 'tmp.nxdl.xml' - out_tmp_yml = 'tmp_parsed.yaml' + pretty_print_xml(root, "tmp.nxdl.xml") + input_tmp_xml = "tmp.nxdl.xml" + out_tmp_yml = "tmp_parsed.yaml" converter = Nxdl2yaml([], []) converter.print_yml(input_tmp_xml, out_tmp_yml, verbose) - nyaml2nxdl(input_file=out_tmp_yml, - out_file='tmp_parsed.nxdl.xml', - verbose=verbose) - tree = ET.parse('tmp_parsed.nxdl.xml') + nyaml2nxdl(input_file=out_tmp_yml, out_file="tmp_parsed.nxdl.xml", verbose=verbose) + tree = ET.parse("tmp_parsed.nxdl.xml") tree2 = ET.parse(input_file) root_no_duplicates = ET.Element( - 'definition', {'xmlns': 'http://definition.nexusformat.org/nxdl/3.1', - 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', - 'xsi:schemaLocation': 'http://www.w3.org/2001/XMLSchema-instance' - } + "definition", + { + "xmlns": "http://definition.nexusformat.org/nxdl/3.1", + "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "xsi:schemaLocation": "http://www.w3.org/2001/XMLSchema-instance", + }, ) for attribute_keys in root.attrib.keys(): - if attribute_keys != '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': + if ( + attribute_keys + != "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation" + ): attribute_value = root.attrib[attribute_keys] root_no_duplicates.set(attribute_keys, attribute_value) for elems in root.iter(): - if 'doc' in elems.tag: - root_doc = ET.SubElement(root_no_duplicates, 'doc') + if "doc" in elems.tag: + root_doc = ET.SubElement(root_no_duplicates, "doc") root_doc.text = elems.text break - group = '{http://definition.nexusformat.org/nxdl/3.1}group' - root_no_duplicates = compare_niac_and_my(tree, tree2, verbose, - group, - root_no_duplicates) - field = '{http://definition.nexusformat.org/nxdl/3.1}field' - root_no_duplicates = compare_niac_and_my(tree, tree2, verbose, - field, - root_no_duplicates) - attribute = '{http://definition.nexusformat.org/nxdl/3.1}attribute' - root_no_duplicates = compare_niac_and_my(tree, tree2, verbose, - attribute, - root_no_duplicates) - pretty_print_xml(root_no_duplicates, f"{input_file.replace('.nxdl.xml', '')}" - f"_appended.nxdl.xml") - - input_file_xml = input_file.replace('.nxdl.xml', "_appended.nxdl.xml") - out_file_yml = input_file.replace('.nxdl.xml', "_appended_parsed.yaml") + group = "{http://definition.nexusformat.org/nxdl/3.1}group" + root_no_duplicates = compare_niac_and_my( + tree, tree2, verbose, group, root_no_duplicates + ) + field = "{http://definition.nexusformat.org/nxdl/3.1}field" + root_no_duplicates = compare_niac_and_my( + tree, tree2, verbose, field, root_no_duplicates + ) + attribute = "{http://definition.nexusformat.org/nxdl/3.1}attribute" + root_no_duplicates = compare_niac_and_my( + tree, tree2, verbose, attribute, root_no_duplicates + ) + pretty_print_xml( + root_no_duplicates, + f"{input_file.replace('.nxdl.xml', '')}" f"_appended.nxdl.xml", + ) + + input_file_xml = input_file.replace(".nxdl.xml", "_appended.nxdl.xml") + out_file_yml = input_file.replace(".nxdl.xml", "_appended_parsed.yaml") converter = Nxdl2yaml([], []) converter.print_yml(input_file_xml, out_file_yml, verbose) - nyaml2nxdl(input_file=out_file_yml, - out_file=out_file_yml.replace('.yaml', '.nxdl.xml'), - verbose=verbose) - os.rename(f"{input_file.replace('.nxdl.xml', '_appended_parsed.yaml')}", - f"{input_file.replace('.nxdl.xml', '_appended.yaml')}") - os.rename(f"{input_file.replace('.nxdl.xml', '_appended_parsed.nxdl.xml')}", - f"{input_file.replace('.nxdl.xml', '_appended.nxdl.xml')}") - os.remove('tmp.nxdl.xml') - os.remove('tmp_parsed.yaml') - os.remove('tmp_parsed.nxdl.xml') + nyaml2nxdl( + input_file=out_file_yml, + out_file=out_file_yml.replace(".yaml", ".nxdl.xml"), + verbose=verbose, + ) + os.rename( + f"{input_file.replace('.nxdl.xml', '_appended_parsed.yaml')}", + f"{input_file.replace('.nxdl.xml', '_appended.yaml')}", + ) + os.rename( + f"{input_file.replace('.nxdl.xml', '_appended_parsed.nxdl.xml')}", + f"{input_file.replace('.nxdl.xml', '_appended.nxdl.xml')}", + ) + os.remove("tmp.nxdl.xml") + os.remove("tmp_parsed.yaml") + os.remove("tmp_parsed.nxdl.xml") def split_name_and_extension(file_name): @@ -135,93 +152,98 @@ def split_name_and_extension(file_name): Split file name into extension and rest of the file name. return file raw nam and extension """ - parts = file_name.rsplit('.', 3) + parts = file_name.rsplit(".", 3) if len(parts) == 2: raw = parts[0] ext = parts[1] if len(parts) == 3: raw = parts[0] - ext = '.'.join(parts[1:]) + ext = ".".join(parts[1:]) return raw, ext @click.command() @click.option( - '--input-file', + "--input-file", required=True, prompt=True, - help='The path to the XML or YAML input data file to read and create \ -a YAML or XML file from, respectively.' + help="The path to the XML or YAML input data file to read and create \ +a YAML or XML file from, respectively.", ) @click.option( - '--append', - help='Parse xml file and append to base class, given that the xml file has same name \ -of an existing base class' + "--append", + help="Parse xml file and append to base class, given that the xml file has same name \ +of an existing base class", ) @click.option( - '--check-consistency', + "--check-consistency", is_flag=True, default=False, - help=('Check wether yaml or nxdl has followed general rules of scema or not' - 'check whether your comment in the right place or not. The option render an ' - 'output file of the same extension(*_consistency.yaml or *_consistency.nxdl.xml)') + help=( + "Check wether yaml or nxdl has followed general rules of scema or not" + "check whether your comment in the right place or not. The option render an " + "output file of the same extension(*_consistency.yaml or *_consistency.nxdl.xml)" + ), ) @click.option( - '--verbose', + "--verbose", is_flag=True, default=False, - help='Print in standard output keywords and value types to help \ -possible issues in yaml files' + help="Print in standard output keywords and value types to help \ +possible issues in yaml files", ) def launch_tool(input_file, verbose, append, check_consistency): """ - Main function that distiguishes the input file format and launches the tools. + Main function that distiguishes the input file format and launches the tools. """ if os.path.isfile(input_file): raw_name, ext = split_name_and_extension(input_file) else: raise ValueError("Need a valid input file.") - if ext == 'yaml': - xml_out_file = raw_name + '.nxdl.xml' + if ext == "yaml": + xml_out_file = raw_name + ".nxdl.xml" generate_nxdl_or_retrieve_nxdl(input_file, xml_out_file, verbose) if append: - append_yml(raw_name + '.nxdl.xml', - append, - verbose - ) + append_yml(raw_name + ".nxdl.xml", append, verbose) # For consistency running if check_consistency: - yaml_out_file = raw_name + '_consistency.' + ext + yaml_out_file = raw_name + "_consistency." + ext converter = Nxdl2yaml([], []) converter.print_yml(xml_out_file, yaml_out_file, verbose) os.remove(xml_out_file) - elif ext == 'nxdl.xml': + elif ext == "nxdl.xml": if not append: - yaml_out_file = raw_name + '_parsed' + '.yaml' + yaml_out_file = raw_name + "_parsed" + ".yaml" converter = Nxdl2yaml([], []) converter.print_yml(input_file, yaml_out_file, verbose) # Append nxdl.xml file with yaml output file yaml_hash = get_sha256_hash(yaml_out_file) # Lines as divider between yaml and nxdl - top_lines = [('\n# ++++++++++++++++++++++++++++++++++ SHA HASH' - ' ++++++++++++++++++++++++++++++++++\n'), - f'# {yaml_hash}\n'] - - extend_yamlfile_with_comment(yaml_file=yaml_out_file, - file_to_be_appended=input_file, - top_lines_list=top_lines) + top_lines = [ + ( + "\n# ++++++++++++++++++++++++++++++++++ SHA HASH" + " ++++++++++++++++++++++++++++++++++\n" + ), + f"# {yaml_hash}\n", + ] + + extend_yamlfile_with_comment( + yaml_file=yaml_out_file, + file_to_be_appended=input_file, + top_lines_list=top_lines, + ) else: append_yml(input_file, append, verbose) # Taking care of consistency running if check_consistency: - xml_out_file = raw_name + '_consistency.' + ext + xml_out_file = raw_name + "_consistency." + ext generate_nxdl_or_retrieve_nxdl(yaml_out_file, xml_out_file, verbose) os.remove(yaml_out_file) else: raise ValueError("Provide correct file with extension '.yaml or '.nxdl.xml") -if __name__ == '__main__': +if __name__ == "__main__": launch_tool().parse() # pylint: disable=no-value-for-parameter diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py index 72f5a6c42..faa22cc23 100755 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py @@ -2,6 +2,8 @@ """This file collects the function used in the reverse tool nxdl2yaml. """ +import os + # -*- coding: utf-8 -*- # # Copyright The NOMAD Authors. @@ -21,18 +23,17 @@ # limitations under the License. # import sys -from typing import List, Dict import xml.etree.ElementTree as ET -import os +from typing import Dict +from typing import List -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_node_parent_info, - get_yaml_escape_char_dict, - cleaning_empty_lines) from pynxtools.dataconverter.helpers import remove_namespace_from_tag - +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import cleaning_empty_lines +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import get_node_parent_info +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import get_yaml_escape_char_dict DEPTH_SIZE = " " -CMNT_TAG = '!--' +CMNT_TAG = "!--" def separate_pi_comments(input_file): @@ -43,24 +44,24 @@ def separate_pi_comments(input_file): comment = [] xml_lines = [] - with open(input_file, "r", encoding='utf-8') as file: + with open(input_file, "r", encoding="utf-8") as file: lines = file.readlines() has_pi = True for line in lines: - c_start = '' - def_tag = ' 0 and has_pi: - comment.append(line.replace(cmnt_end, '')) - comments_list.append(''.join(comment)) + comment.append(line.replace(cmnt_end, "")) + comments_list.append("".join(comment)) comment = [] elif def_tag in line or not has_pi: has_pi = False @@ -69,25 +70,24 @@ def separate_pi_comments(input_file): comment.append(line) else: xml_lines.append(line) - return comments_list, ''.join(xml_lines) + return comments_list, "".join(xml_lines) # Collected: https://dustinoprea.com/2019/01/22/python-parsing-xml-and-retaining-the-comments/ class _CommentedTreeBuilder(ET.TreeBuilder): - def comment(self, text): """ defining comment builder in TreeBuilder """ - self.start('!--', {}) + self.start("!--", {}) self.data(text) - self.end('--') + self.end("--") def parse(filepath): """ - Construct parse function for modified tree builder for including modified TreeBuilder - and rebuilding XMLParser. + Construct parse function for modified tree builder for including modified TreeBuilder + and rebuilding XMLParser. """ comments, xml_str = separate_pi_comments(filepath) ctb = _CommentedTreeBuilder() @@ -97,7 +97,7 @@ def parse(filepath): def handle_mapping_char(text, depth=-1, skip_n_line_on_top=False): - """Check for ":" char and replace it by "':'". """ + """Check for ":" char and replace it by "':'".""" escape_char = get_yaml_escape_char_dict() for esc_key, val in escape_char.items(): @@ -119,23 +119,23 @@ def add_new_line_with_pipe_on_top(text, depth): char_list_to_add_new_line_on_top_of_text = [":"] for char in char_list_to_add_new_line_on_top_of_text: if char in text: - return '|' + '\n' + depth * DEPTH_SIZE + text + return "|" + "\n" + depth * DEPTH_SIZE + text return text # pylint: disable=too-many-instance-attributes -class Nxdl2yaml(): +class Nxdl2yaml: """ - Parse XML file and print a YML file + Parse XML file and print a YML file """ def __init__( - self, - symbol_list: List[str], - root_level_definition: List[str], - root_level_doc='', - root_level_symbols=''): - + self, + symbol_list: List[str], + root_level_definition: List[str], + root_level_doc="", + root_level_symbols="", + ): # updated part of yaml_dict self.found_definition = False self.root_level_doc = root_level_doc @@ -157,7 +157,7 @@ def __init__( def print_yml(self, input_file, output_yml, verbose): """ - Parse an XML file provided as input and print a YML file + Parse an XML file provided as input and print a YML file """ if os.path.isfile(output_yml): os.remove(output_yml) @@ -165,7 +165,7 @@ def print_yml(self, input_file, output_yml, verbose): depth = 0 self.pi_comments, root = parse(input_file) - xml_tree = {'tree': root, 'node': root} + xml_tree = {"tree": root, "node": root} self.xmlparse(output_yml, xml_tree, depth, verbose) def handle_symbols(self, depth, node): @@ -177,47 +177,55 @@ def handle_symbols(self, depth, node): f"{node.text.strip() if node.text else ''}" ) depth += 1 - last_comment = '' + last_comment = "" sbl_doc_cmnt_list = [] # Comments that come above symbol tag symbol_cmnt_list = [] for child in list(node): tag = remove_namespace_from_tag(child.tag) if tag == CMNT_TAG and self.include_comment: - last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE, child.text) - if tag == 'doc': + last_comment = self.comvert_to_ymal_comment( + depth * DEPTH_SIZE, child.text + ) + if tag == "doc": symbol_cmnt_list.append(last_comment) # The bellow line is for handling lenth of 'symbol_comments' and # 'symbol_doc_comments'. Otherwise print_root_level_info() gets inconsistency # over for the loop while writting comment on file - sbl_doc_cmnt_list.append('') - last_comment = '' - self.symbol_list.append(self.handle_not_root_level_doc(depth, - text=child.text)) - elif tag == 'symbol': + sbl_doc_cmnt_list.append("") + last_comment = "" + self.symbol_list.append( + self.handle_not_root_level_doc(depth, text=child.text) + ) + elif tag == "symbol": # place holder is symbol name symbol_cmnt_list.append(last_comment) - last_comment = '' - if 'doc' in child.attrib: + last_comment = "" + if "doc" in child.attrib: self.symbol_list.append( - self.handle_not_root_level_doc(depth, - tag=child.attrib['name'], - text=child.attrib['doc'])) + self.handle_not_root_level_doc( + depth, tag=child.attrib["name"], text=child.attrib["doc"] + ) + ) else: for symbol_doc in list(child): tag = remove_namespace_from_tag(symbol_doc.tag) if tag == CMNT_TAG and self.include_comment: - last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE, - symbol_doc.text) - if tag == 'doc': + last_comment = self.comvert_to_ymal_comment( + depth * DEPTH_SIZE, symbol_doc.text + ) + if tag == "doc": sbl_doc_cmnt_list.append(last_comment) - last_comment = '' + last_comment = "" self.symbol_list.append( - self.handle_not_root_level_doc(depth, - tag=child.attrib['name'], - text=symbol_doc.text)) - self.store_root_level_comments('symbol_doc_comments', sbl_doc_cmnt_list) - self.store_root_level_comments('symbol_comments', symbol_cmnt_list) + self.handle_not_root_level_doc( + depth, + tag=child.attrib["name"], + text=symbol_doc.text, + ) + ) + self.store_root_level_comments("symbol_doc_comments", sbl_doc_cmnt_list) + self.store_root_level_comments("symbol_comments", symbol_cmnt_list) def store_root_level_comments(self, holder, comment): """Store yaml text or section line and the comments inteded for that lines or section""" @@ -226,13 +234,13 @@ def store_root_level_comments(self, holder, comment): def handle_definition(self, node): """ - Handle definition group and its attributes - NOTE: Here we tried to store the order of the xml element attributes. So that we get - exactly the same file in nxdl from yaml. + Handle definition group and its attributes + NOTE: Here we tried to store the order of the xml element attributes. So that we get + exactly the same file in nxdl from yaml. """ # pylint: disable=consider-using-f-string # self.root_level_definition[0] = '' - keyword = '' + keyword = "" # tmp_word for reseving the location tmp_word = "#xx#" attribs = node.attrib @@ -249,15 +257,14 @@ def handle_definition(self, node): if keyword_order == -1: self.root_level_definition.append(tmp_word) keyword_order = self.root_level_definition.index(tmp_word) - elif 'schemaLocation' not in item \ - and 'extends' != item: + elif "schemaLocation" not in item and "extends" != item: text = f"{item}: {attribs[item]}" self.root_level_definition.append(text) self.root_level_definition[keyword_order] = f"{keyword}:" def handle_root_level_doc(self, node): """ - Handle the documentation field found at root level. + Handle the documentation field found at root level. """ # tag = remove_namespace_from_tag(node.tag) text = node.text @@ -265,7 +272,7 @@ def handle_root_level_doc(self, node): self.root_level_doc = text # pylint: disable=too-many-branches - def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None): + def handle_not_root_level_doc(self, depth, text, tag="doc", file_out=None): """ Handle docs field along the yaml file. In this function we also tried to keep the track of intended indentation. E.g. the bollow doc block. @@ -280,7 +287,7 @@ def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None): text = handle_mapping_char(text, -1, True) if "\n" in text: # To remove '\n' character as it will be added before text. - text = cleaning_empty_lines(text.split('\n')) + text = cleaning_empty_lines(text.split("\n")) text_tmp = [] yaml_indent_n = len((depth + 1) * DEPTH_SIZE) # Find indentaion in the first text line with alphabet @@ -288,12 +295,12 @@ def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None): while tmp_i != -1: first_line_indent_n = 0 # Taking care of empty text whitout any character - if len(text) == 1 and text[0] == '': + if len(text) == 1 and text[0] == "": break for ch_ in text[tmp_i]: - if ch_ == ' ': + if ch_ == " ": first_line_indent_n = first_line_indent_n + 1 - elif ch_ != '': + elif ch_ != "": tmp_i = -2 break tmp_i = tmp_i + 1 @@ -314,23 +321,23 @@ def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None): line_indent_n = 0 # Collect first empty space without alphabate for ch_ in line: - if ch_ == ' ': + if ch_ == " ": line_indent_n = line_indent_n + 1 else: break line_indent_n = line_indent_n + indent_diff if line_indent_n < yaml_indent_n: # if line still under yaml identation - text_tmp.append(yaml_indent_n * ' ' + line.strip()) + text_tmp.append(yaml_indent_n * " " + line.strip()) else: - text_tmp.append(line_indent_n * ' ' + line.strip()) + text_tmp.append(line_indent_n * " " + line.strip()) - text = '\n' + '\n'.join(text_tmp) + text = "\n" + "\n".join(text_tmp) if "}" in tag: tag = remove_namespace_from_tag(tag) indent = depth * DEPTH_SIZE elif text: - text = '\n' + (depth + 1) * DEPTH_SIZE + text.strip() + text = "\n" + (depth + 1) * DEPTH_SIZE + text.strip() if "}" in tag: tag = remove_namespace_from_tag(tag) indent = depth * DEPTH_SIZE @@ -360,31 +367,33 @@ def print_root_level_doc(self, file_out): """ indent = 0 * DEPTH_SIZE - if ('root_doc' in self.root_level_comment - and self.root_level_comment['root_doc'] != ''): - text = self.root_level_comment['root_doc'] + if ( + "root_doc" in self.root_level_comment + and self.root_level_comment["root_doc"] != "" + ): + text = self.root_level_comment["root_doc"] self.write_out(indent, text, file_out) text = self.root_level_doc self.write_out(indent, text, file_out) - self.root_level_doc = '' + self.root_level_doc = "" def comvert_to_ymal_comment(self, indent, text): """ - Convert into yaml comment by adding exta '#' char in front of comment lines + Convert into yaml comment by adding exta '#' char in front of comment lines """ - lines = text.split('\n') + lines = text.split("\n") mod_lines = [] for line in lines: line = line.strip() - if line and line[0] != '#': - line = indent + '# ' + line + if line and line[0] != "#": + line = indent + "# " + line mod_lines.append(line) elif line: line = indent + line mod_lines.append(line) # The starting '\n' to keep multiple comments separate - return '\n' + '\n'.join(mod_lines) + return "\n" + "\n".join(mod_lines) def print_root_level_info(self, depth, file_out): """ @@ -403,40 +412,58 @@ def print_root_level_info(self, depth, file_out): has_categoty = True if not has_categoty: - raise ValueError("Definition dose not get any category from 'base or application'.") + raise ValueError( + "Definition dose not get any category from 'base or application'." + ) self.print_root_level_doc(file_out) - if 'symbols' in self.root_level_comment and self.root_level_comment['symbols'] != '': + if ( + "symbols" in self.root_level_comment + and self.root_level_comment["symbols"] != "" + ): indent = depth * DEPTH_SIZE - text = self.root_level_comment['symbols'] + text = self.root_level_comment["symbols"] self.write_out(indent, text, file_out) if self.root_level_symbols: - self.write_out(indent=0 * DEPTH_SIZE, text=self.root_level_symbols, file_out=file_out) + self.write_out( + indent=0 * DEPTH_SIZE, text=self.root_level_symbols, file_out=file_out + ) # symbol_list include 'symbols doc', and all 'symbol' for ind, symbol in enumerate(self.symbol_list): # Taking care of comments that come on to of 'symbols doc' and 'symbol' - if 'symbol_comments' in self.root_level_comment and \ - self.root_level_comment['symbol_comments'][ind] != '': + if ( + "symbol_comments" in self.root_level_comment + and self.root_level_comment["symbol_comments"][ind] != "" + ): indent = depth * DEPTH_SIZE - self.write_out(indent, - self.root_level_comment['symbol_comments'][ind], file_out) - if 'symbol_doc_comments' in self.root_level_comment and \ - self.root_level_comment['symbol_doc_comments'][ind] != '': - + self.write_out( + indent, + self.root_level_comment["symbol_comments"][ind], + file_out, + ) + if ( + "symbol_doc_comments" in self.root_level_comment + and self.root_level_comment["symbol_doc_comments"][ind] != "" + ): indent = depth * DEPTH_SIZE - self.write_out(indent, - self.root_level_comment['symbol_doc_comments'][ind], file_out) + self.write_out( + indent, + self.root_level_comment["symbol_doc_comments"][ind], + file_out, + ) self.write_out(indent=(0 * DEPTH_SIZE), text=symbol, file_out=file_out) if len(self.pi_comments) > 1: indent = DEPTH_SIZE * depth # The first comment is top level copy-right doc string for comment in self.pi_comments[1:]: - self.write_out(indent, self.comvert_to_ymal_comment(indent, comment), file_out) + self.write_out( + indent, self.comvert_to_ymal_comment(indent, comment), file_out + ) if self.root_level_definition: # Soring NXname for writting end of the definition attributes - nx_name = '' + nx_name = "" for defs in self.root_level_definition: - if 'NX' in defs and defs[-1] == ':': + if "NX" in defs and defs[-1] == ":": nx_name = defs continue if defs in ("category: application", "category: base"): @@ -447,55 +474,77 @@ def print_root_level_info(self, depth, file_out): def handle_exists(self, exists_dict, key, val): """ - Create exist component as folows: + Create exist component as folows: - {'min' : value for min, - 'max' : value for max, - 'optional' : value for optional} + {'min' : value for min, + 'max' : value for max, + 'optional' : value for optional} - This is created separately so that the keys stays in order. + This is created separately so that the keys stays in order. """ if not val: - val = '' + val = "" else: val = str(val) - if 'minOccurs' == key: - exists_dict['minOccurs'] = ['min', val] - if 'maxOccurs' == key: - exists_dict['maxOccurs'] = ['max', val] - if 'optional' == key: - exists_dict['optional'] = ['optional', val] - if 'recommended' == key: - exists_dict['recommended'] = ['recommended', val] - if 'required' == key: - exists_dict['required'] = ['required', val] + if "minOccurs" == key: + exists_dict["minOccurs"] = ["min", val] + if "maxOccurs" == key: + exists_dict["maxOccurs"] = ["max", val] + if "optional" == key: + exists_dict["optional"] = ["optional", val] + if "recommended" == key: + exists_dict["recommended"] = ["recommended", val] + if "required" == key: + exists_dict["required"] = ["required", val] # pylint: disable=too-many-branches, consider-using-f-string def handle_group_or_field(self, depth, node, file_out): """Handle all the possible attributes that come along a field or group""" - allowed_attr = ['optional', 'recommended', 'name', 'type', 'axes', 'axis', 'data_offset', - 'interpretation', 'long_name', 'maxOccurs', 'minOccurs', 'nameType', - 'optional', 'primary', 'signal', 'stride', 'units', 'required', - 'deprecated', 'exists'] + allowed_attr = [ + "optional", + "recommended", + "name", + "type", + "axes", + "axis", + "data_offset", + "interpretation", + "long_name", + "maxOccurs", + "minOccurs", + "nameType", + "optional", + "primary", + "signal", + "stride", + "units", + "required", + "deprecated", + "exists", + ] name_type = "" node_attr = node.attrib rm_key_list = [] # Maintain order: name and type in form name(type) or (type)name that come first for key, val in node_attr.items(): - if key == 'name': + if key == "name": name_type = name_type + val rm_key_list.append(key) - if key == 'type': + if key == "type": name_type = name_type + "(%s)" % val rm_key_list.append(key) if not name_type: - raise ValueError(f"No 'name' or 'type' hase been found. But, 'group' or 'field' " - f"must have at list a nme.We got attributes: {node_attr}") - file_out.write('{indent}{name_type}:\n'.format( - indent=depth * DEPTH_SIZE, - name_type=name_type)) + raise ValueError( + f"No 'name' or 'type' hase been found. But, 'group' or 'field' " + f"must have at list a nme.We got attributes: {node_attr}" + ) + file_out.write( + "{indent}{name_type}:\n".format( + indent=depth * DEPTH_SIZE, name_type=name_type + ) + ) for key in rm_key_list: del node_attr[key] @@ -505,31 +554,35 @@ def handle_group_or_field(self, depth, node, file_out): exists_dict = {} for key, val in node_attr.items(): # As both 'minOccurs', 'maxOccurs' and optionality move to the 'exists' - if key in ['minOccurs', 'maxOccurs', 'optional', 'recommended', 'required']: - if 'exists' not in tmp_dict: - tmp_dict['exists'] = [] + if key in ["minOccurs", "maxOccurs", "optional", "recommended", "required"]: + if "exists" not in tmp_dict: + tmp_dict["exists"] = [] self.handle_exists(exists_dict, key, val) - elif key == 'units': - tmp_dict['unit'] = str(val) + elif key == "units": + tmp_dict["unit"] = str(val) else: tmp_dict[key] = str(val) if key not in allowed_attr: - raise ValueError(f"An attribute ({key}) in 'field' or 'group' has been found " - f"that is not allowed. The allowed attr is {allowed_attr}.") + raise ValueError( + f"An attribute ({key}) in 'field' or 'group' has been found " + f"that is not allowed. The allowed attr is {allowed_attr}." + ) if exists_dict: for key, val in exists_dict.items(): - if key in ['minOccurs', 'maxOccurs']: - tmp_dict['exists'] = tmp_dict['exists'] + val - elif key in ['optional', 'recommended', 'required']: - tmp_dict['exists'] = key + if key in ["minOccurs", "maxOccurs"]: + tmp_dict["exists"] = tmp_dict["exists"] + val + elif key in ["optional", "recommended", "required"]: + tmp_dict["exists"] = key depth_ = depth + 1 for key, val in tmp_dict.items(): # Increase depth size inside handle_map...() for writting text with one # more indentation. - file_out.write(f'{depth_ * DEPTH_SIZE}{key}: ' - f'{handle_mapping_char(val, depth_ + 1, False)}\n') + file_out.write( + f"{depth_ * DEPTH_SIZE}{key}: " + f"{handle_mapping_char(val, depth_ + 1, False)}\n" + ) # pylint: disable=too-many-branches, too-many-locals def handle_dimension(self, depth, node, file_out): @@ -540,33 +593,35 @@ def handle_dimension(self, depth, node, file_out): and attributes of dim has been handled inside this function here. """ # pylint: disable=consider-using-f-string - possible_dim_attrs = ['ref', 'required', - 'incr', 'refindex'] - possible_dimemsion_attrs = ['rank'] + possible_dim_attrs = ["ref", "required", "incr", "refindex"] + possible_dimemsion_attrs = ["rank"] # taking care of Dimension tag file_out.write( - '{indent}{tag}:\n'.format( - indent=depth * DEPTH_SIZE, - tag=node.tag.split("}", 1)[1])) + "{indent}{tag}:\n".format( + indent=depth * DEPTH_SIZE, tag=node.tag.split("}", 1)[1] + ) + ) # Taking care of dimension attributes for attr, value in node.attrib.items(): if attr in possible_dimemsion_attrs and not isinstance(value, dict): indent = (depth + 1) * DEPTH_SIZE - file_out.write(f'{indent}{attr}: {value}\n') + file_out.write(f"{indent}{attr}: {value}\n") else: - raise ValueError(f"Dimension has got an attribute {attr} that is not valid." - f"Current the allowd atributes are {possible_dimemsion_attrs}." - f" Please have a look") + raise ValueError( + f"Dimension has got an attribute {attr} that is not valid." + f"Current the allowd atributes are {possible_dimemsion_attrs}." + f" Please have a look" + ) # taking carew of dimension doc for child in list(node): tag = remove_namespace_from_tag(child.tag) - if tag == 'doc': + if tag == "doc": text = self.handle_not_root_level_doc(depth + 1, child.text) file_out.write(text) node.remove(child) - dim_index_value = '' + dim_index_value = "" dim_other_parts = {} dim_cmnt_node = [] # taking care of dim and doc childs of dimension @@ -574,11 +629,12 @@ def handle_dimension(self, depth, node, file_out): tag = remove_namespace_from_tag(child.tag) child_attrs = child.attrib # taking care of index and value attributes - if tag == ('dim'): + if tag == ("dim"): # taking care of index and value in format [[index, value]] - dim_index_value = dim_index_value + '[{index}, {value}], '.format( - index=child_attrs['index'] if "index" in child_attrs else '', - value=child_attrs['value'] if "value" in child_attrs else '') + dim_index_value = dim_index_value + "[{index}, {value}], ".format( + index=child_attrs["index"] if "index" in child_attrs else "", + value=child_attrs["value"] if "value" in child_attrs else "", + ) if "index" in child_attrs: del child_attrs["index"] if "value" in child_attrs: @@ -587,7 +643,7 @@ def handle_dimension(self, depth, node, file_out): # Taking care of doc comes as child of dim for cchild in list(child): ttag = cchild.tag.split("}", 1)[1] - if ttag == ('doc'): + if ttag == ("doc"): if ttag not in dim_other_parts: dim_other_parts[ttag] = [] text = cchild.text @@ -612,25 +668,30 @@ def handle_dimension(self, depth, node, file_out): self.handel_comment(depth + 1, ch_nd, file_out) # index and value attributes of dim elements file_out.write( - '{indent}dim: [{value}]\n'.format( - indent=(depth + 1) * DEPTH_SIZE, - value=dim_index_value[:-2] or '')) + "{indent}dim: [{value}]\n".format( + indent=(depth + 1) * DEPTH_SIZE, value=dim_index_value[:-2] or "" + ) + ) # Write the attributes, except index and value, and doc of dim as child of dim_parameter. # But tthe doc or attributes for each dim come inside list according to the order of dim. if dim_other_parts: file_out.write( - '{indent}dim_parameters:\n'.format( - indent=(depth + 1) * DEPTH_SIZE)) + "{indent}dim_parameters:\n".format(indent=(depth + 1) * DEPTH_SIZE) + ) # depth = depth + 2 dim_paramerter has child such as doc of dim indent = (depth + 2) * DEPTH_SIZE for key, value in dim_other_parts.items(): - if key == 'doc': - value = self.handle_not_root_level_doc(depth + 2, str(value), key, file_out) + if key == "doc": + value = self.handle_not_root_level_doc( + depth + 2, str(value), key, file_out + ) else: # Increase depth size inside handle_map...() for writting text with one # more indentation. - file_out.write(f"{indent}{key}: " - f"{handle_mapping_char(value, depth + 3, False)}\n") + file_out.write( + f"{indent}{key}: " + f"{handle_mapping_char(value, depth + 3, False)}\n" + ) def handle_enumeration(self, depth, node, file_out): """ @@ -642,7 +703,7 @@ def handle_enumeration(self, depth, node, file_out): If no doc are inherited in the enumeration items, a list of the items is given for the enumeration list. - """ + """ # pylint: disable=consider-using-f-string check_doc = [] @@ -652,37 +713,46 @@ def handle_enumeration(self, depth, node, file_out): # pylint: disable=too-many-nested-blocks if check_doc: file_out.write( - '{indent}{tag}: \n'.format( - indent=depth * DEPTH_SIZE, - tag=node.tag.split("}", 1)[1])) + "{indent}{tag}: \n".format( + indent=depth * DEPTH_SIZE, tag=node.tag.split("}", 1)[1] + ) + ) for child in list(node): tag = remove_namespace_from_tag(child.tag) itm_depth = depth + 1 - if tag == ('item'): + if tag == ("item"): file_out.write( - '{indent}{value}: \n'.format( - indent=(itm_depth) * DEPTH_SIZE, - value=child.attrib['value'])) + "{indent}{value}: \n".format( + indent=(itm_depth) * DEPTH_SIZE, value=child.attrib["value"] + ) + ) if list(child): for item_doc in list(child): - if remove_namespace_from_tag(item_doc.tag) == 'doc': + if remove_namespace_from_tag(item_doc.tag) == "doc": item_doc_depth = itm_depth + 1 - self.handle_not_root_level_doc(item_doc_depth, item_doc.text, - item_doc.tag, file_out) - if (remove_namespace_from_tag(item_doc.tag) == CMNT_TAG - and self.include_comment): + self.handle_not_root_level_doc( + item_doc_depth, + item_doc.text, + item_doc.tag, + file_out, + ) + if ( + remove_namespace_from_tag(item_doc.tag) == CMNT_TAG + and self.include_comment + ): self.handel_comment(itm_depth + 1, item_doc, file_out) if tag == CMNT_TAG and self.include_comment: self.handel_comment(itm_depth + 1, child, file_out) else: - enum_list = '' + enum_list = "" remove_nodes = [] for item_child in list(node): tag = remove_namespace_from_tag(item_child.tag) - if tag == ('item'): - enum_list = enum_list + '{value}, '.format( - value=item_child.attrib['value']) + if tag == ("item"): + enum_list = enum_list + "{value}, ".format( + value=item_child.attrib["value"] + ) if tag == CMNT_TAG and self.include_comment: self.handel_comment(depth, item_child, file_out) remove_nodes.append(item_child) @@ -690,134 +760,162 @@ def handle_enumeration(self, depth, node, file_out): node.remove(ch_node) file_out.write( - '{indent}{tag}: [{enum_list}]\n'.format( + "{indent}{tag}: [{enum_list}]\n".format( indent=depth * DEPTH_SIZE, tag=remove_namespace_from_tag(node.tag), - enum_list=enum_list[:-2] or '')) + enum_list=enum_list[:-2] or "", + ) + ) def handle_attributes(self, depth, node, file_out): """Handle the attributes parsed from the xml file""" - allowed_attr = ['name', 'type', 'units', 'nameType', 'recommended', 'optional', - 'minOccurs', 'maxOccurs', 'deprecated'] + allowed_attr = [ + "name", + "type", + "units", + "nameType", + "recommended", + "optional", + "minOccurs", + "maxOccurs", + "deprecated", + ] name = "" node_attr = node.attrib - if 'name' in node_attr: + if "name" in node_attr: pass else: raise ValueError("Attribute must have an name key.") rm_key_list = [] # Maintain order: name and type in form name(type) or (type)name that come first for key, val in node_attr.items(): - if key == 'name': + if key == "name": name = val rm_key_list.append(key) for key in rm_key_list: del node_attr[key] - file_out.write('{indent}{escapesymbol}{name}:\n'.format( - indent=depth * DEPTH_SIZE, - escapesymbol=r'\@', - name=name)) + file_out.write( + "{indent}{escapesymbol}{name}:\n".format( + indent=depth * DEPTH_SIZE, escapesymbol=r"\@", name=name + ) + ) tmp_dict = {} exists_dict = {} for key, val in node_attr.items(): # As both 'minOccurs', 'maxOccurs' and optionality move to the 'exists' - if key in ['minOccurs', 'maxOccurs', 'optional', 'recommended', 'required']: - if 'exists' not in tmp_dict: - tmp_dict['exists'] = [] + if key in ["minOccurs", "maxOccurs", "optional", "recommended", "required"]: + if "exists" not in tmp_dict: + tmp_dict["exists"] = [] self.handle_exists(exists_dict, key, val) - elif key == 'units': - tmp_dict['unit'] = val + elif key == "units": + tmp_dict["unit"] = val else: tmp_dict[key] = val if key not in allowed_attr: - raise ValueError(f"An attribute ({key}) has been found that is not allowed." - f"The allowed attr is {allowed_attr}.") + raise ValueError( + f"An attribute ({key}) has been found that is not allowed." + f"The allowed attr is {allowed_attr}." + ) has_min_max = False has_opt_reco_requ = False if exists_dict: for key, val in exists_dict.items(): - if key in ['minOccurs', 'maxOccurs']: - tmp_dict['exists'] = tmp_dict['exists'] + val + if key in ["minOccurs", "maxOccurs"]: + tmp_dict["exists"] = tmp_dict["exists"] + val has_min_max = True - elif key in ['optional', 'recommended', 'required']: - tmp_dict['exists'] = key + elif key in ["optional", "recommended", "required"]: + tmp_dict["exists"] = key has_opt_reco_requ = True if has_min_max and has_opt_reco_requ: - raise ValueError("Optionality 'exists' can take only either from ['minOccurs'," - " 'maxOccurs'] or from ['optional', 'recommended', 'required']" - ". But not from both of the groups together. Please check in" - " attributes") + raise ValueError( + "Optionality 'exists' can take only either from ['minOccurs'," + " 'maxOccurs'] or from ['optional', 'recommended', 'required']" + ". But not from both of the groups together. Please check in" + " attributes" + ) depth_ = depth + 1 for key, val in tmp_dict.items(): # Increase depth size inside handle_map...() for writting text with one # more indentation. - file_out.write(f'{depth_ * DEPTH_SIZE}{key}: ' - f'{handle_mapping_char(val, depth_ + 1, False)}\n') + file_out.write( + f"{depth_ * DEPTH_SIZE}{key}: " + f"{handle_mapping_char(val, depth_ + 1, False)}\n" + ) def handel_link(self, depth, node, file_out): """ - Handle link elements of nxdl + Handle link elements of nxdl """ - possible_link_attrs = ['name', 'target', 'napimount'] + possible_link_attrs = ["name", "target", "napimount"] node_attr = node.attrib # Handle special cases - if 'name' in node_attr: - file_out.write('{indent}{name}(link):\n'.format( - indent=depth * DEPTH_SIZE, - name=node_attr['name'] or '')) - del node_attr['name'] + if "name" in node_attr: + file_out.write( + "{indent}{name}(link):\n".format( + indent=depth * DEPTH_SIZE, name=node_attr["name"] or "" + ) + ) + del node_attr["name"] depth_ = depth + 1 # Handle general cases for attr_key, val in node_attr.items(): if attr_key in possible_link_attrs: - file_out.write('{indent}{attr}: {value}\n'.format( - indent=depth_ * DEPTH_SIZE, - attr=attr_key, - value=val)) + file_out.write( + "{indent}{attr}: {value}\n".format( + indent=depth_ * DEPTH_SIZE, attr=attr_key, value=val + ) + ) else: - raise ValueError(f"An anexpected attribute '{attr_key}' of link has found." - f"At this moment the alloed keys are {possible_link_attrs}") + raise ValueError( + f"An anexpected attribute '{attr_key}' of link has found." + f"At this moment the alloed keys are {possible_link_attrs}" + ) def handel_choice(self, depth, node, file_out): """ - Handle choice element which is a parent node of group. + Handle choice element which is a parent node of group. """ possible_attr = [] node_attr = node.attrib # Handle special casees - if 'name' in node_attr: - file_out.write('{indent}{attr}(choice): \n'.format( - indent=depth * DEPTH_SIZE, - attr=node_attr['name'])) - del node_attr['name'] + if "name" in node_attr: + file_out.write( + "{indent}{attr}(choice): \n".format( + indent=depth * DEPTH_SIZE, attr=node_attr["name"] + ) + ) + del node_attr["name"] depth_ = depth + 1 # Taking care of general attrinutes. Though, still no attrinutes have found, # but could be used for future for attr in node_attr.items(): if attr in possible_attr: - file_out.write('{indent}{attr}: {value}\n'.format( - indent=depth_ * DEPTH_SIZE, - attr=attr, - value=node_attr[attr])) + file_out.write( + "{indent}{attr}: {value}\n".format( + indent=depth_ * DEPTH_SIZE, attr=attr, value=node_attr[attr] + ) + ) else: - raise ValueError(f"An unexpected attribute '{attr}' of 'choice' has been found." - f"At this moment attributes for choice {possible_attr}") + raise ValueError( + f"An unexpected attribute '{attr}' of 'choice' has been found." + f"At this moment attributes for choice {possible_attr}" + ) def handel_comment(self, depth, node, file_out): """ - Collect comment element and pass to write_out function + Collect comment element and pass to write_out function """ indent = depth * DEPTH_SIZE if self.is_last_element_comment: @@ -834,10 +932,10 @@ def recursion_in_xml_tree(self, depth, xml_tree, output_yml, verbose): behaviour is not triggered as we already handled the symbols' childs. """ - tree = xml_tree['tree'] - node = xml_tree['node'] + tree = xml_tree["tree"] + node = xml_tree["node"] for child in list(node): - xml_tree_children = {'tree': tree, 'node': child} + xml_tree_children = {"tree": tree, "node": child} self.xmlparse(output_yml, xml_tree_children, depth, verbose) # pylint: disable=too-many-branches, too-many-statements @@ -846,63 +944,65 @@ def xmlparse(self, output_yml, xml_tree, depth, verbose): Main of the nxdl2yaml converter. It parses XML tree, then prints recursively each level of the tree """ - tree = xml_tree['tree'] - node = xml_tree['node'] + tree = xml_tree["tree"] + node = xml_tree["node"] if verbose: - sys.stdout.write(f'Node tag: {remove_namespace_from_tag(node.tag)}\n') - sys.stdout.write(f'Attributes: {node.attrib}\n') + sys.stdout.write(f"Node tag: {remove_namespace_from_tag(node.tag)}\n") + sys.stdout.write(f"Attributes: {node.attrib}\n") with open(output_yml, "a", encoding="utf-8") as file_out: tag = remove_namespace_from_tag(node.tag) - if tag == 'definition': + if tag == "definition": self.found_definition = True self.handle_definition(node) # Taking care of root level doc and symbols remove_cmnt_n = None - last_comment = '' + last_comment = "" for child in list(node): tag_tmp = remove_namespace_from_tag(child.tag) if tag_tmp == CMNT_TAG and self.include_comment: - last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE, child.text) + last_comment = self.comvert_to_ymal_comment( + depth * DEPTH_SIZE, child.text + ) remove_cmnt_n = child - if tag_tmp == 'doc': - self.store_root_level_comments('root_doc', last_comment) - last_comment = '' + if tag_tmp == "doc": + self.store_root_level_comments("root_doc", last_comment) + last_comment = "" self.handle_root_level_doc(child) node.remove(child) if remove_cmnt_n is not None: node.remove(remove_cmnt_n) remove_cmnt_n = None - if tag_tmp == 'symbols': - self.store_root_level_comments('symbols', last_comment) - last_comment = '' + if tag_tmp == "symbols": + self.store_root_level_comments("symbols", last_comment) + last_comment = "" self.handle_symbols(depth, child) node.remove(child) if remove_cmnt_n is not None: node.remove(remove_cmnt_n) remove_cmnt_n = None - if tag == ('doc') and depth != 1: + if tag == ("doc") and depth != 1: parent = get_node_parent_info(tree, node)[0] doc_parent = remove_namespace_from_tag(parent.tag) - if doc_parent != 'item': - self.handle_not_root_level_doc(depth, text=node.text, - tag=node.tag, - file_out=file_out) + if doc_parent != "item": + self.handle_not_root_level_doc( + depth, text=node.text, tag=node.tag, file_out=file_out + ) if self.found_definition is True and self.root_level_doc: self.print_root_level_info(depth, file_out) # End of print root-level definitions in file - if tag in ('field', 'group') and depth != 0: + if tag in ("field", "group") and depth != 0: self.handle_group_or_field(depth, node, file_out) - if tag == ('enumeration'): + if tag == ("enumeration"): self.handle_enumeration(depth, node, file_out) - if tag == ('attribute'): + if tag == ("attribute"): self.handle_attributes(depth, node, file_out) - if tag == ('dimensions'): + if tag == ("dimensions"): self.handle_dimension(depth, node, file_out) - if tag == ('link'): + if tag == ("link"): self.handel_link(depth, node, file_out) - if tag == ('choice'): + if tag == ("choice"): self.handel_choice(depth, node, file_out) if tag == CMNT_TAG and self.include_comment: self.handel_comment(depth, node, file_out) @@ -913,24 +1013,23 @@ def xmlparse(self, output_yml, xml_tree, depth, verbose): def compare_niac_and_my(tree, tree2, verbose, node, root_no_duplicates): """This function creates two trees with Niac XML file and My XML file. -The main aim is to compare the two trees and create a new one that is the -union of the two initial trees. - -""" + The main aim is to compare the two trees and create a new one that is the + union of the two initial trees. + """ root = tree.getroot() root2 = tree2.getroot() attrs_list_niac = [] for nodo in root.iter(node): attrs_list_niac.append(nodo.attrib) if verbose: - sys.stdout.write('Attributes found in Niac file: \n') - sys.stdout.write(str(attrs_list_niac) + '\n') - sys.stdout.write(' \n') - sys.stdout.write('Started merging of Niac and My file... \n') + sys.stdout.write("Attributes found in Niac file: \n") + sys.stdout.write(str(attrs_list_niac) + "\n") + sys.stdout.write(" \n") + sys.stdout.write("Started merging of Niac and My file... \n") for elem in root.iter(node): if verbose: - sys.stdout.write('- Niac element inserted: \n') - sys.stdout.write(str(elem.attrib) + '\n') + sys.stdout.write("- Niac element inserted: \n") + sys.stdout.write(str(elem.attrib) + "\n") index = get_node_parent_info(tree, elem)[1] root_no_duplicates.insert(index, elem) @@ -938,10 +1037,10 @@ def compare_niac_and_my(tree, tree2, verbose, node, root_no_duplicates): index = get_node_parent_info(tree2, elem2)[1] if elem2.attrib not in attrs_list_niac: if verbose: - sys.stdout.write('- My element inserted: \n') - sys.stdout.write(str(elem2.attrib) + '\n') + sys.stdout.write("- My element inserted: \n") + sys.stdout.write(str(elem2.attrib) + "\n") root_no_duplicates.insert(index, elem2) if verbose: - sys.stdout.write(' \n') + sys.stdout.write(" \n") return root_no_duplicates diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py index db4d4c464..ca0435e37 100644 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py @@ -21,65 +21,67 @@ # limitations under the License. # +import os import sys +import textwrap import xml.etree.ElementTree as ET from xml.dom import minidom -import os -import textwrap import yaml - +from pynxtools.dataconverter.helpers import remove_namespace_from_tag from pynxtools.nexus import nexus from pynxtools.nyaml2nxdl.comment_collector import CommentCollector -from pynxtools.dataconverter.helpers import remove_namespace_from_tag -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_yaml_escape_char_reverter_dict, - nx_name_type_resolving, - cleaning_empty_lines, LineLoader) - +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import LineLoader +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import cleaning_empty_lines +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import get_yaml_escape_char_reverter_dict +from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import nx_name_type_resolving # pylint: disable=too-many-lines, global-statement, invalid-name -DOM_COMMENT = ("\n" - "# NeXus - Neutron and X-ray Common Data Format\n" - "# \n" - "# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)\n" - "# \n" - "# This library is free software; you can redistribute it and/or\n" - "# modify it under the terms of the GNU Lesser General Public\n" - "# License as published by the Free Software Foundation; either\n" - "# version 3 of the License, or (at your option) any later version.\n" - "#\n" - "# This library is distributed in the hope that it will be useful,\n" - "# but WITHOUT ANY WARRANTY; without even the implied warranty of\n" - "# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" - "# Lesser General Public License for more details.\n" - "#\n" - "# You should have received a copy of the GNU Lesser General Public\n" - "# License along with this library; if not, write to the Free Software\n" - "# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n" - "#\n" - "# For further information, see http://www.nexusformat.org\n") +DOM_COMMENT = ( + "\n" + "# NeXus - Neutron and X-ray Common Data Format\n" + "# \n" + "# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)\n" + "# \n" + "# This library is free software; you can redistribute it and/or\n" + "# modify it under the terms of the GNU Lesser General Public\n" + "# License as published by the Free Software Foundation; either\n" + "# version 3 of the License, or (at your option) any later version.\n" + "#\n" + "# This library is distributed in the hope that it will be useful,\n" + "# but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + "# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" + "# Lesser General Public License for more details.\n" + "#\n" + "# You should have received a copy of the GNU Lesser General Public\n" + "# License along with this library; if not, write to the Free Software\n" + "# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n" + "#\n" + "# For further information, see http://www.nexusformat.org\n" +) NX_CLSS = nexus.get_nx_classes() -NX_NEW_DEFINED_CLASSES = ['NX_COMPLEX'] +NX_NEW_DEFINED_CLASSES = ["NX_COMPLEX"] NX_TYPE_KEYS = nexus.get_nx_attribute_type() -NX_ATTR_IDNT = '\\@' -NX_UNIT_IDNT = 'unit' +NX_ATTR_IDNT = "\\@" +NX_UNIT_IDNT = "unit" DEPTH_SIZE = " " NX_UNIT_TYPES = nexus.get_nx_units() COMMENT_BLOCKS: CommentCollector -CATEGORY = '' # Definition would be either 'base' or 'application' +CATEGORY = "" # Definition would be either 'base' or 'application' def check_for_dom_comment_in_yaml(): - """Check the yaml file has dom comment or dom comment needed to be hard coded. - """ - dignature_keyword_list = ['NeXus', - 'GNU Lesser General Public', - 'Free Software Foundation', - 'Copyright (C)', - 'WITHOUT ANY WARRANTY'] + """Check the yaml file has dom comment or dom comment needed to be hard coded.""" + dignature_keyword_list = [ + "NeXus", + "GNU Lesser General Public", + "Free Software Foundation", + "Copyright (C)", + "WITHOUT ANY WARRANTY", + ] # Check for dom comments in first three comments - dom_comment = '' + dom_comment = "" dom_comment_ind = 1 for ind, comnt in enumerate(COMMENT_BLOCKS[0:5]): cmnt_list = comnt.get_comment_text() @@ -91,7 +93,7 @@ def check_for_dom_comment_in_yaml(): dom_comment_ind = ind for keyword in dignature_keyword_list: if keyword not in text: - dom_comment = '' + dom_comment = "" break if dom_comment: break @@ -120,11 +122,13 @@ def yml_reader(inputfile): if dom_cmnt_frm_yaml: DOM_COMMENT = dom_cmnt_frm_yaml - if 'category' not in loaded_yaml.keys(): - raise ValueError("All definitions should be either 'base' or 'application' category. " - "No category has been found.") + if "category" not in loaded_yaml.keys(): + raise ValueError( + "All definitions should be either 'base' or 'application' category. " + "No category has been found." + ) global CATEGORY - CATEGORY = loaded_yaml['category'] + CATEGORY = loaded_yaml["category"] return loaded_yaml @@ -134,25 +138,27 @@ def check_for_default_attribute_and_value(xml_element): """ # base:Default attributes and value for all elements of base class except dimension element - base_attr_to_val = {'optional': 'true'} + base_attr_to_val = {"optional": "true"} # application: Default attributes and value for all elements of application class except # dimension element - application_attr_to_val = {'optional': 'false'} + application_attr_to_val = {"optional": "false"} # Default attributes and value for dimension element - base_dim_attr_to_val = {'required': 'false'} - application_dim_attr_to_val = {'required': 'true'} + base_dim_attr_to_val = {"required": "false"} + application_dim_attr_to_val = {"required": "true"} # Eligible tag for default attr and value - elegible_tag = ['group', 'field', 'attribute'] + elegible_tag = ["group", "field", "attribute"] def set_default_attribute(xml_elem, default_attr_to_val): for deflt_attr, deflt_val in default_attr_to_val.items(): - if deflt_attr not in xml_elem.attrib \ - and 'maxOccurs' not in xml_elem.attrib \ - and 'minOccurs' not in xml_elem.attrib \ - and 'recommended' not in xml_elem.attrib: + if ( + deflt_attr not in xml_elem.attrib + and "maxOccurs" not in xml_elem.attrib + and "minOccurs" not in xml_elem.attrib + and "recommended" not in xml_elem.attrib + ): xml_elem.set(deflt_attr, deflt_val) for child in list(xml_element): @@ -161,14 +167,13 @@ def set_default_attribute(xml_elem, default_attr_to_val): continue tag = remove_namespace_from_tag(child.tag) - if tag == 'dim' and CATEGORY == 'base': + if tag == "dim" and CATEGORY == "base": set_default_attribute(child, base_dim_attr_to_val) - if tag == 'dim' and CATEGORY == 'application': + if tag == "dim" and CATEGORY == "application": set_default_attribute(child, application_dim_attr_to_val) - if tag in elegible_tag and CATEGORY == 'base': + if tag in elegible_tag and CATEGORY == "base": set_default_attribute(child, base_attr_to_val) - if tag in elegible_tag and CATEGORY == 'application': - + if tag in elegible_tag and CATEGORY == "application": set_default_attribute(child, application_attr_to_val) check_for_default_attribute_and_value(child) @@ -177,50 +182,53 @@ def yml_reader_nolinetag(inputfile): """ pyyaml based parsing of yaml file in python dict """ - with open(inputfile, 'r', encoding="utf-8") as stream: + with open(inputfile, "r", encoding="utf-8") as stream: parsed_yaml = yaml.safe_load(stream) return parsed_yaml def check_for_skiped_attributes(component, value, allowed_attr=None, verbose=False): """ - Check for any attributes have been skipped or not. - NOTE: We should keep in mind about 'doc' + Check for any attributes have been skipped or not. + NOTE: We should keep in mind about 'doc' """ - block_tag = ['enumeration'] + block_tag = ["enumeration"] if value: for attr, val in value.items(): - if attr in ['doc']: + if attr in ["doc"]: continue - if '__line__' in attr or attr in block_tag: + if "__line__" in attr or attr in block_tag: continue - line_number = f'__line__{attr}' + line_number = f"__line__{attr}" if verbose: print(f"__line__ : {value[line_number]}") - if not isinstance(val, dict) \ - and '\\@' not in attr\ - and attr not in allowed_attr\ - and 'NX' not in attr and val: - - raise ValueError(f"An attribute '{attr}' in part '{component}' has been found" - f". Please check arround line '{value[line_number]}. At this " - f"moment. The allowed attrbutes are {allowed_attr}") + if ( + not isinstance(val, dict) + and "\\@" not in attr + and attr not in allowed_attr + and "NX" not in attr + and val + ): + raise ValueError( + f"An attribute '{attr}' in part '{component}' has been found" + f". Please check arround line '{value[line_number]}. At this " + f"moment. The allowed attrbutes are {allowed_attr}" + ) def format_nxdl_doc(string): - """NeXus format for doc string - """ + """NeXus format for doc string""" string = check_for_mapping_char_other(string) - formatted_doc = '' + formatted_doc = "" if "\n" not in string: if len(string) > 80: - wrapped = textwrap.TextWrapper(width=80, - break_long_words=False, - replace_whitespace=False) - string = '\n'.join(wrapped.wrap(string)) - formatted_doc = '\n' + f"{string}" + wrapped = textwrap.TextWrapper( + width=80, break_long_words=False, replace_whitespace=False + ) + string = "\n".join(wrapped.wrap(string)) + formatted_doc = "\n" + f"{string}" else: - text_lines = string.split('\n') + text_lines = string.split("\n") text_lines = cleaning_empty_lines(text_lines) formatted_doc += "\n" + "\n".join(text_lines) if not formatted_doc.endswith("\n"): @@ -234,12 +242,12 @@ def check_for_mapping_char_other(text): Then replace it by ':'. """ if not text: - text = '' + text = "" text = str(text) - if text == 'True': - text = 'true' - if text == 'False': - text = 'false' + if text == "True": + text = "true" + if text == "False": + text = "false" # Some escape char is not valid in yaml libray which is written while writting # yaml file. In the time of writting nxdl revert to that escape char. escape_reverter = get_yaml_escape_char_reverter_dict() @@ -249,26 +257,20 @@ def check_for_mapping_char_other(text): return str(text).strip() -def xml_handle_doc(obj, value: str, - line_number=None, line_loc=None): - """This function creates a 'doc' element instance, and appends it to an existing element - - """ +def xml_handle_doc(obj, value: str, line_number=None, line_loc=None): + """This function creates a 'doc' element instance, and appends it to an existing element""" # global comment_bolcks - doc_elemt = ET.SubElement(obj, 'doc') + doc_elemt = ET.SubElement(obj, "doc") text = format_nxdl_doc(check_for_mapping_char_other(value)).strip() # To keep the doc middle of doc tag. doc_elemt.text = f"\n{text}\n" if line_loc is not None and line_number is not None: - xml_handle_comment(obj, line_number, - line_loc, doc_elemt) + xml_handle_comment(obj, line_number, line_loc, doc_elemt) def xml_handle_units(obj, value): - """This function creates a 'units' element instance, and appends it to an existing element - - """ - obj.set('units', str(value)) + """This function creates a 'units' element instance, and appends it to an existing element""" + obj.set("units", str(value)) # pylint: disable=too-many-branches @@ -276,46 +278,52 @@ def xml_handle_exists(dct, obj, keyword, value): """ This function creates an 'exists' element instance, and appends it to an existing element """ - line_number = f'__line__{keyword}' - assert value is not None, f'Line {dct[line_number]}: exists argument must not be None !' + line_number = f"__line__{keyword}" + assert ( + value is not None + ), f"Line {dct[line_number]}: exists argument must not be None !" if isinstance(value, list): - if len(value) == 4 and value[0] == 'min' and value[2] == 'max': - obj.set('minOccurs', str(value[1])) - if str(value[3]) != 'infty': - obj.set('maxOccurs', str(value[3])) + if len(value) == 4 and value[0] == "min" and value[2] == "max": + obj.set("minOccurs", str(value[1])) + if str(value[3]) != "infty": + obj.set("maxOccurs", str(value[3])) else: - obj.set('maxOccurs', 'unbounded') - elif len(value) == 2 and value[0] == 'min': - obj.set('minOccurs', str(value[1])) - elif len(value) == 2 and value[0] == 'max': - obj.set('maxOccurs', str(value[1])) - elif len(value) == 4 and value[0] == 'max' and value[2] == 'min': - obj.set('minOccurs', str(value[3])) - if str(value[1]) != 'infty': - obj.set('maxOccurs', str(value[3])) + obj.set("maxOccurs", "unbounded") + elif len(value) == 2 and value[0] == "min": + obj.set("minOccurs", str(value[1])) + elif len(value) == 2 and value[0] == "max": + obj.set("maxOccurs", str(value[1])) + elif len(value) == 4 and value[0] == "max" and value[2] == "min": + obj.set("minOccurs", str(value[3])) + if str(value[1]) != "infty": + obj.set("maxOccurs", str(value[3])) else: - obj.set('maxOccurs', 'unbounded') - elif len(value) == 4 and (value[0] != 'min' or value[2] != 'max'): - raise ValueError(f'Line {dct[line_number]}: exists keyword' - f'needs to go either with an optional [recommended] list with two ' - f'entries either [min, ] or [max, ], or a list of four ' - f'entries [min, , max, ] !') + obj.set("maxOccurs", "unbounded") + elif len(value) == 4 and (value[0] != "min" or value[2] != "max"): + raise ValueError( + f"Line {dct[line_number]}: exists keyword" + f"needs to go either with an optional [recommended] list with two " + f"entries either [min, ] or [max, ], or a list of four " + f"entries [min, , max, ] !" + ) else: - raise ValueError(f'Line {dct[line_number]}: exists keyword ' - f'needs to go either with optional, recommended, a list with two ' - f'entries either [min, ] or [max, ], or a list of four ' - f'entries [min, , max, ] !') + raise ValueError( + f"Line {dct[line_number]}: exists keyword " + f"needs to go either with optional, recommended, a list with two " + f"entries either [min, ] or [max, ], or a list of four " + f"entries [min, , max, ] !" + ) else: # This clause take optional in all concept except dimension where 'required' key is allowed # not the 'optional' key. - if value == 'optional': - obj.set('optional', 'true') - elif value == 'recommended': - obj.set('recommended', 'true') - elif value == 'required': - obj.set('optional', 'false') + if value == "optional": + obj.set("optional", "true") + elif value == "recommended": + obj.set("recommended", "true") + elif value == "required": + obj.set("optional", "false") else: - obj.set('minOccurs', '0') + obj.set("minOccurs", "0") # pylint: disable=too-many-branches, too-many-locals, too-many-statements @@ -323,52 +331,59 @@ def xml_handle_group(dct, obj, keyword, value, verbose=False): """ The function deals with group instances """ - line_number = f'__line__{keyword}' + line_number = f"__line__{keyword}" line_loc = dct[line_number] xml_handle_comment(obj, line_number, line_loc) - list_of_attr = ['name', 'type', 'nameType', 'deprecated', 'optional', 'recommended', - 'exists', 'unit'] + list_of_attr = [ + "name", + "type", + "nameType", + "deprecated", + "optional", + "recommended", + "exists", + "unit", + ] l_bracket = -1 r_bracket = -1 - if keyword.count('(') == 1: - l_bracket = keyword.index('(') - if keyword.count(')') == 1: - r_bracket = keyword.index(')') + if keyword.count("(") == 1: + l_bracket = keyword.index("(") + if keyword.count(")") == 1: + r_bracket = keyword.index(")") keyword_name, keyword_type = nx_name_type_resolving(keyword) if not keyword_name and not keyword_type: raise ValueError("A group must have both value and name. Check for group.") - grp = ET.SubElement(obj, 'group') + grp = ET.SubElement(obj, "group") if l_bracket == 0 and r_bracket > 0: - grp.set('type', keyword_type) + grp.set("type", keyword_type) if keyword_name: - grp.set('name', keyword_name) + grp.set("name", keyword_name) elif l_bracket > 0: - grp.set('name', keyword_name) + grp.set("name", keyword_name) if keyword_type: - grp.set('type', keyword_type) + grp.set("type", keyword_type) else: - grp.set('name', keyword_name) + grp.set("name", keyword_name) if value: rm_key_list = [] for attr, vval in value.items(): - if '__line__' in attr: + if "__line__" in attr: continue line_number = f"__line__{attr}" line_loc = value[line_number] - if attr == 'doc': + if attr == "doc": xml_handle_doc(grp, vval, line_number, line_loc) rm_key_list.append(attr) rm_key_list.append(line_number) - elif attr == 'exists' and vval: + elif attr == "exists" and vval: xml_handle_exists(value, grp, attr, vval) rm_key_list.append(attr) rm_key_list.append(line_number) - xml_handle_comment(obj, - line_number, line_loc, grp) - elif attr == 'unit': + xml_handle_comment(obj, line_number, line_loc, grp) + elif attr == "unit": xml_handle_units(grp, vval) xml_handle_comment(obj, line_number, line_loc, grp) elif attr in list_of_attr and not isinstance(vval, dict) and vval: @@ -381,7 +396,7 @@ def xml_handle_group(dct, obj, keyword, value, verbose=False): for key in rm_key_list: del value[key] # Check for skipped attrinutes - check_for_skiped_attributes('group', value, list_of_attr, verbose) + check_for_skiped_attributes("group", value, list_of_attr, verbose) if isinstance(value, dict) and value != {}: recursive_build(grp, value, verbose) @@ -403,34 +418,37 @@ def xml_handle_dimensions(dct, obj, keyword, value: dict): incr:[...]' """ - possible_dimension_attrs = ['rank'] # nxdl attributes - line_number = f'__line__{keyword}' + possible_dimension_attrs = ["rank"] # nxdl attributes + line_number = f"__line__{keyword}" line_loc = dct[line_number] - assert 'dim' in value.keys(), (f"Line {line_loc}: No dim as child of dimension has " - f"been found.") + assert "dim" in value.keys(), ( + f"Line {line_loc}: No dim as child of dimension has " f"been found." + ) xml_handle_comment(obj, line_number, line_loc) - dims = ET.SubElement(obj, 'dimensions') + dims = ET.SubElement(obj, "dimensions") # Consider all the childs under dimension is dim element and # its attributes rm_key_list = [] - rank = '' + rank = "" for key, val in value.items(): - if '__line__' in key: + if "__line__" in key: continue line_number = f"__line__{key}" line_loc = value[line_number] - if key == 'rank': - rank = val or '' + if key == "rank": + rank = val or "" if isinstance(rank, int) and rank < 0: - raise ValueError(f"Dimension must have some info about rank which is not " - f"available. Please check arround Line: {dct[line_number]}") + raise ValueError( + f"Dimension must have some info about rank which is not " + f"available. Please check arround Line: {dct[line_number]}" + ) dims.set(key, str(val)) rm_key_list.append(key) rm_key_list.append(line_number) xml_handle_comment(obj, line_number, line_loc, dims) # Check dimension doc and handle it - elif key == 'doc' and isinstance(val, str): + elif key == "doc" and isinstance(val, str): xml_handle_doc(dims, val, line_number, line_loc) rm_key_list.append(key) rm_key_list.append(line_number) @@ -450,18 +468,20 @@ def xml_handle_dimensions(dct, obj, keyword, value: dict): # pylint: disable=too-many-locals, too-many-arguments -def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verbose=False): +def xml_handle_dim_from_dimension_dict( + dct, dims_obj, keyword, value, rank, verbose=False +): """ - Handling dim element. - NOTE: The inputs 'keyword' and 'value' are as input for xml_handle_dimensions - function. please also read note in xml_handle_dimensions. + Handling dim element. + NOTE: The inputs 'keyword' and 'value' are as input for xml_handle_dimensions + function. please also read note in xml_handle_dimensions. """ - possible_dim_attrs = ['ref', 'incr', 'refindex', 'required'] + possible_dim_attrs = ["ref", "incr", "refindex", "required"] # Some attributes might have equivalent name e.g. 'required' is correct one and # 'optional' could be another name. Then change attribute to the correct one. - wrong_to_correct_attr = [('optional', 'required')] + wrong_to_correct_attr = [("optional", "required")] header_line_number = f"__line__{keyword}" dim_list = [] rm_key_list = [] @@ -469,51 +489,55 @@ def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verb # under dim_parameters if not value: return - rank = '' + rank = "" # pylint: disable=too-many-nested-blocks for attr, vvalue in value.items(): - if '__line__' in attr: + if "__line__" in attr: continue line_number = f"__line__{attr}" line_loc = value[line_number] # dim comes in precedence - if attr == 'dim': + if attr == "dim": # dim consists of list of [index, value] llist_ind_value = vvalue - assert isinstance(llist_ind_value, list), (f'Line {value[line_number]}: dim' - f'argument not a list !') + assert isinstance(llist_ind_value, list), ( + f"Line {value[line_number]}: dim" f"argument not a list !" + ) xml_handle_comment(dims_obj, line_number, line_loc) if isinstance(rank, int) and rank > 0: assert rank == len(llist_ind_value), ( f"Wrong dimension rank check around Line {dct[header_line_number]}.\n" f"Line {[dct[header_line_number]]} rank value {rank} " f"is not the same as dim array = " - f"{len(llist_ind_value)}.") + f"{len(llist_ind_value)}." + ) # Taking care of ind and value that comes as list of list for dim_ind_val in llist_ind_value: - dim = ET.SubElement(dims_obj, 'dim') + dim = ET.SubElement(dims_obj, "dim") # Taking care of multidimensions or rank if len(dim_ind_val) >= 1 and dim_ind_val[0]: - dim.set('index', str(dim_ind_val[0])) + dim.set("index", str(dim_ind_val[0])) if len(dim_ind_val) == 2 and dim_ind_val[1]: - dim.set('value', str(dim_ind_val[1])) + dim.set("value", str(dim_ind_val[1])) dim_list.append(dim) rm_key_list.append(attr) rm_key_list.append(line_number) - elif attr == 'dim_parameters' and isinstance(vvalue, dict): + elif attr == "dim_parameters" and isinstance(vvalue, dict): xml_handle_comment(dims_obj, line_number, line_loc) for kkkey, vvval in vvalue.items(): - if '__line__' in kkkey: + if "__line__" in kkkey: continue - cmnt_number = f'__line__{kkkey}' + cmnt_number = f"__line__{kkkey}" cmnt_loc = vvalue[cmnt_number] # Check whether any optional attributes added for tuple_wng_crt in wrong_to_correct_attr: if kkkey == tuple_wng_crt[0]: - raise ValueError(f"{cmnt_loc}: Attribute '{kkkey}' is prohibited, use " - f"'{tuple_wng_crt[1]}") - if kkkey == 'doc' and dim_list: + raise ValueError( + f"{cmnt_loc}: Attribute '{kkkey}' is prohibited, use " + f"'{tuple_wng_crt[1]}" + ) + if kkkey == "doc" and dim_list: # doc comes as list of doc for i, dim in enumerate(dim_list): if isinstance(vvval, list) and i < len(vvval): @@ -539,13 +563,15 @@ def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verb rm_key_list.append(attr) rm_key_list.append(line_number) else: - raise ValueError(f"Got unexpected block except 'dim' and 'dim_parameters'." - f"Please check arround line {line_number}") + raise ValueError( + f"Got unexpected block except 'dim' and 'dim_parameters'." + f"Please check arround line {line_number}" + ) for key in rm_key_list: del value[key] - check_for_skiped_attributes('dim', value, possible_dim_attrs, verbose) + check_for_skiped_attributes("dim", value, possible_dim_attrs, verbose) def xml_handle_enumeration(dct, obj, keyword, value, verbose): @@ -555,24 +581,27 @@ def xml_handle_enumeration(dct, obj, keyword, value, verbose): 1) the items are in a list 2) the items are dictionaries and may contain a nested doc """ - line_number = f'__line__{keyword}' + line_number = f"__line__{keyword}" line_loc = dct[line_number] xml_handle_comment(obj, line_number, line_loc) - enum = ET.SubElement(obj, 'enumeration') - - assert value is not None, f'Line {line_loc}: enumeration must \ -bear at least an argument !' - assert len( - value) >= 1, f'Line {dct[line_number]}: enumeration must not be an empty list!' + enum = ET.SubElement(obj, "enumeration") + + assert ( + value is not None + ), f"Line {line_loc}: enumeration must \ +bear at least an argument !" + assert ( + len(value) >= 1 + ), f"Line {dct[line_number]}: enumeration must not be an empty list!" if isinstance(value, list): for element in value: - itm = ET.SubElement(enum, 'item') - itm.set('value', str(element)) + itm = ET.SubElement(enum, "item") + itm.set("value", str(element)) if isinstance(value, dict) and value != {}: for element in value.keys(): - if '__line__' not in element: - itm = ET.SubElement(enum, 'item') - itm.set('value', str(element)) + if "__line__" not in element: + itm = ET.SubElement(enum, "item") + itm.set("value", str(element)) if isinstance(value[element], dict): recursive_build(itm, value[element], verbose) @@ -580,25 +609,25 @@ def xml_handle_enumeration(dct, obj, keyword, value, verbose): # pylint: disable=unused-argument def xml_handle_link(dct, obj, keyword, value, verbose): """ - If we have an NXDL link we decode the name attribute from (link)[:-6] + If we have an NXDL link we decode the name attribute from (link)[:-6] """ line_number = f"__line__{keyword}" line_loc = dct[line_number] xml_handle_comment(obj, line_number, line_loc) - possible_attrs = ['name', 'target', 'napimount'] + possible_attrs = ["name", "target", "napimount"] name = keyword[:-6] - link_obj = ET.SubElement(obj, 'link') - link_obj.set('name', str(name)) + link_obj = ET.SubElement(obj, "link") + link_obj.set("name", str(name)) if value: rm_key_list = [] for attr, vval in value.items(): - if '__line__' in attr: + if "__line__" in attr: continue line_number = f"__line__{attr}" line_loc = value[line_number] - if attr == 'doc': + if attr == "doc": xml_handle_doc(link_obj, vval, line_number, line_loc) rm_key_list.append(attr) rm_key_list.append(line_number) @@ -612,7 +641,7 @@ def xml_handle_link(dct, obj, keyword, value, verbose): for key in rm_key_list: del value[key] # Check for skipped attrinutes - check_for_skiped_attributes('link', value, possible_attrs, verbose) + check_for_skiped_attributes("link", value, possible_attrs, verbose) if isinstance(value, dict) and value != {}: recursive_build(link_obj, value, verbose=None) @@ -620,26 +649,26 @@ def xml_handle_link(dct, obj, keyword, value, verbose): def xml_handle_choice(dct, obj, keyword, value, verbose=False): """ - Build choice xml elements. That consists of groups. + Build choice xml elements. That consists of groups. """ - line_number = f'__line__{keyword}' + line_number = f"__line__{keyword}" line_loc = dct[line_number] xml_handle_comment(obj, line_number, line_loc) # Add attributes in possible if new attributs have been added nexus definition. possible_attr = [] - choice_obj = ET.SubElement(obj, 'choice') + choice_obj = ET.SubElement(obj, "choice") # take care of special attributes name = keyword[:-8] - choice_obj.set('name', name) + choice_obj.set("name", name) if value: rm_key_list = [] for attr, vval in value.items(): - if '__line__' in attr: + if "__line__" in attr: continue line_number = f"__line__{attr}" line_loc = value[line_number] - if attr == 'doc': + if attr == "doc": xml_handle_doc(choice_obj, vval, line_number, line_loc) rm_key_list.append(attr) rm_key_list.append(line_number) @@ -653,40 +682,40 @@ def xml_handle_choice(dct, obj, keyword, value, verbose=False): for key in rm_key_list: del value[key] # Check for skipped attrinutes - check_for_skiped_attributes('choice', value, possible_attr, verbose) + check_for_skiped_attributes("choice", value, possible_attr, verbose) if isinstance(value, dict) and value != {}: recursive_build(choice_obj, value, verbose=None) def xml_handle_symbols(dct, obj, keyword, value: dict): - """Handle a set of NXDL symbols as a child to obj - - """ - line_number = f'__line__{keyword}' + """Handle a set of NXDL symbols as a child to obj""" + line_number = f"__line__{keyword}" line_loc = dct[line_number] - assert len(list(value.keys()) - ) >= 1, f'Line {line_loc}: symbols table must not be empty !' + assert ( + len(list(value.keys())) >= 1 + ), f"Line {line_loc}: symbols table must not be empty !" xml_handle_comment(obj, line_number, line_loc) - syms = ET.SubElement(obj, 'symbols') - if 'doc' in value.keys(): - line_number = '__line__doc' + syms = ET.SubElement(obj, "symbols") + if "doc" in value.keys(): + line_number = "__line__doc" line_loc = value[line_number] xml_handle_comment(syms, line_number, line_loc) - doctag = ET.SubElement(syms, 'doc') - doctag.text = '\n' + textwrap.fill(value['doc'], width=70) + '\n' + doctag = ET.SubElement(syms, "doc") + doctag.text = "\n" + textwrap.fill(value["doc"], width=70) + "\n" rm_key_list = [] for kkeyword, vvalue in value.items(): - if '__line__' in kkeyword: + if "__line__" in kkeyword: continue - if kkeyword != 'doc': - line_number = f'__line__{kkeyword}' + if kkeyword != "doc": + line_number = f"__line__{kkeyword}" line_loc = value[line_number] xml_handle_comment(syms, line_number, line_loc) assert vvalue is not None and isinstance( - vvalue, str), f'Line {line_loc}: put a comment in doc string !' - sym = ET.SubElement(syms, 'symbol') - sym.set('name', str(kkeyword)) + vvalue, str + ), f"Line {line_loc}: put a comment in doc string !" + sym = ET.SubElement(syms, "symbol") + sym.set("name", str(kkeyword)) # sym_doc = ET.SubElement(sym, 'doc') xml_handle_doc(sym, vvalue) rm_key_list.append(kkeyword) @@ -704,15 +733,16 @@ def check_keyword_variable(verbose, dct, keyword, value): keyword_name, keyword_type = nx_name_type_resolving(keyword) if verbose: sys.stdout.write( - f'{keyword_name}({keyword_type}): value type is {type(value)}\n') - if keyword_name == '' and keyword_type == '': - line_number = f'__line__{keyword}' - raise ValueError(f'Line {dct[line_number]}: found an improper yaml key !') + f"{keyword_name}({keyword_type}): value type is {type(value)}\n" + ) + if keyword_name == "" and keyword_type == "": + line_number = f"__line__{keyword}" + raise ValueError(f"Line {dct[line_number]}: found an improper yaml key !") def helper_keyword_type(kkeyword_type): """ - This function is returning a value of keyword_type if it belong to NX_TYPE_KEYS + This function is returning a value of keyword_type if it belong to NX_TYPE_KEYS """ if kkeyword_type in NX_TYPE_KEYS: return kkeyword_type @@ -721,10 +751,10 @@ def helper_keyword_type(kkeyword_type): def verbose_flag(verbose, keyword, value): """ - Verbose stdout printing for nested levels of yaml file, if verbose flag is active + Verbose stdout printing for nested levels of yaml file, if verbose flag is active """ if verbose: - sys.stdout.write(f' key:{keyword}; value type is {type(value)}\n') + sys.stdout.write(f" key:{keyword}; value type is {type(value)}\n") def xml_handle_attributes(dct, obj, keyword, value, verbose): @@ -734,43 +764,53 @@ def xml_handle_attributes(dct, obj, keyword, value, verbose): line_loc = dct[line_number] xml_handle_comment(obj, line_number, line_loc) # list of possible attribute of xml attribute elementsa - attr_attr_list = ['name', 'type', 'unit', 'nameType', - 'optional', 'recommended', 'minOccurs', - 'maxOccurs', 'deprecated', 'exists'] + attr_attr_list = [ + "name", + "type", + "unit", + "nameType", + "optional", + "recommended", + "minOccurs", + "maxOccurs", + "deprecated", + "exists", + ] # as an attribute identifier keyword_name, keyword_typ = nx_name_type_resolving(keyword) - line_number = f'__line__{keyword}' + line_number = f"__line__{keyword}" if verbose: print(f"__line__ : {dct[line_number]}") - if keyword_name == '' and keyword_typ == '': - raise ValueError(f'Line {dct[line_number]}: found an improper yaml key !') - elemt_obj = ET.SubElement(obj, 'attribute') - elemt_obj.set('name', keyword_name[2:]) + if keyword_name == "" and keyword_typ == "": + raise ValueError(f"Line {dct[line_number]}: found an improper yaml key !") + elemt_obj = ET.SubElement(obj, "attribute") + elemt_obj.set("name", keyword_name[2:]) if keyword_typ: - elemt_obj.set('type', keyword_typ) + elemt_obj.set("type", keyword_typ) rm_key_list = [] if value and value: # taking care of attributes of attributes for attr, attr_val in value.items(): - if '__line__' in attr: + if "__line__" in attr: continue line_number = f"__line__{attr}" line_loc = value[line_number] - if attr in ['doc', *attr_attr_list] and not isinstance(attr_val, dict): - if attr == 'unit': + if attr in ["doc", *attr_attr_list] and not isinstance(attr_val, dict): + if attr == "unit": elemt_obj.set(f"{attr}s", str(value[attr])) rm_key_list.append(attr) rm_key_list.append(line_number) xml_handle_comment(obj, line_number, line_loc, elemt_obj) - elif attr == 'exists' and attr_val: + elif attr == "exists" and attr_val: xml_handle_exists(value, elemt_obj, attr, attr_val) rm_key_list.append(attr) rm_key_list.append(line_number) xml_handle_comment(obj, line_number, line_loc, elemt_obj) - elif attr == 'doc': - xml_handle_doc(elemt_obj, format_nxdl_doc(attr_val), - line_number, line_loc) + elif attr == "doc": + xml_handle_doc( + elemt_obj, format_nxdl_doc(attr_val), line_number, line_loc + ) rm_key_list.append(attr) rm_key_list.append(line_number) else: @@ -782,7 +822,7 @@ def xml_handle_attributes(dct, obj, keyword, value, verbose): for key in rm_key_list: del value[key] # Check cor skiped attribute - check_for_skiped_attributes('Attribute', value, attr_attr_list, verbose) + check_for_skiped_attributes("Attribute", value, attr_attr_list, verbose) if value: recursive_build(elemt_obj, value, verbose) @@ -794,25 +834,28 @@ def validate_field_attribute_and_value(v_attr, vval, allowed_attribute, value): """ # check for empty val - if (not isinstance(vval, dict) - and not str(vval)): # check for empty value - + if not isinstance(vval, dict) and not str(vval): # check for empty value line_number = f"__line__{v_attr}" - raise ValueError(f"In a field a valid attrbute ('{v_attr}') found that is not stored." - f" Please check arround line {value[line_number]}") + raise ValueError( + f"In a field a valid attrbute ('{v_attr}') found that is not stored." + f" Please check arround line {value[line_number]}" + ) # The bellow elements might come as child element - skipped_child_name = ['doc', 'dimension', 'enumeration', 'choice', 'exists'] + skipped_child_name = ["doc", "dimension", "enumeration", "choice", "exists"] # check for invalid key or attributes - if (v_attr not in [*skipped_child_name, *allowed_attribute] - and '__line__' not in v_attr + if ( + v_attr not in [*skipped_child_name, *allowed_attribute] + and "__line__" not in v_attr and not isinstance(vval, dict) - and '(' not in v_attr # skip only groups and field that has name and type - and '\\@' not in v_attr): # skip nexus attributes - + and "(" not in v_attr # skip only groups and field that has name and type + and "\\@" not in v_attr + ): # skip nexus attributes line_number = f"__line__{v_attr}" - raise ValueError(f"In a field or group a invalid attribute ('{v_attr}') or child has found." - f" Please check arround line {value[line_number]}.") + raise ValueError( + f"In a field or group a invalid attribute ('{v_attr}') or child has found." + f" Please check arround line {value[line_number]}." + ) def xml_handle_fields(obj, keyword, value, line_annot, line_loc, verbose=False): @@ -830,84 +873,101 @@ def xml_handle_fields(obj, keyword, value, line_annot, line_loc, verbose=False): This simple function will define a new node of xml tree """ # List of possible attributes of xml elements - allowed_attr = ['name', 'type', 'nameType', 'unit', 'minOccurs', 'long_name', - 'axis', 'signal', 'deprecated', 'axes', 'exists', - 'data_offset', 'interpretation', 'maxOccurs', - 'primary', 'recommended', 'optional', 'stride'] + allowed_attr = [ + "name", + "type", + "nameType", + "unit", + "minOccurs", + "long_name", + "axis", + "signal", + "deprecated", + "axes", + "exists", + "data_offset", + "interpretation", + "maxOccurs", + "primary", + "recommended", + "optional", + "stride", + ] xml_handle_comment(obj, line_annot, line_loc) l_bracket = -1 r_bracket = -1 - if keyword.count('(') == 1: - l_bracket = keyword.index('(') - if keyword.count(')') == 1: - r_bracket = keyword.index(')') + if keyword.count("(") == 1: + l_bracket = keyword.index("(") + if keyword.count(")") == 1: + r_bracket = keyword.index(")") keyword_name, keyword_type = nx_name_type_resolving(keyword) if not keyword_type and not keyword_name: raise ValueError("Check for name or type in field.") - elemt_obj = ET.SubElement(obj, 'field') + elemt_obj = ET.SubElement(obj, "field") # type come first if l_bracket == 0 and r_bracket > 0: - elemt_obj.set('type', keyword_type) + elemt_obj.set("type", keyword_type) if keyword_name: - elemt_obj.set('name', keyword_name) + elemt_obj.set("name", keyword_name) elif l_bracket > 0: - elemt_obj.set('name', keyword_name) + elemt_obj.set("name", keyword_name) if keyword_type: - elemt_obj.set('type', keyword_type) + elemt_obj.set("type", keyword_type) else: - elemt_obj.set('name', keyword_name) + elemt_obj.set("name", keyword_name) if value: rm_key_list = [] # In each each if clause apply xml_handle_comment(), to collect # comments on that yaml line. for attr, vval in value.items(): - if '__line__' in attr: + if "__line__" in attr: continue line_number = f"__line__{attr}" line_loc = value[line_number] - if attr == 'doc': - xml_handle_doc(elemt_obj, vval, line_number, line_loc,) + if attr == "doc": + xml_handle_doc( + elemt_obj, + vval, + line_number, + line_loc, + ) rm_key_list.append(attr) rm_key_list.append(line_number) - elif attr == 'exists' and vval: + elif attr == "exists" and vval: xml_handle_exists(value, elemt_obj, attr, vval) rm_key_list.append(attr) rm_key_list.append(line_number) - xml_handle_comment(obj, - line_number, - line_loc, elemt_obj) - elif attr == 'unit': + xml_handle_comment(obj, line_number, line_loc, elemt_obj) + elif attr == "unit": xml_handle_units(elemt_obj, vval) - xml_handle_comment(obj, - line_number, - line_loc, elemt_obj) + xml_handle_comment(obj, line_number, line_loc, elemt_obj) elif attr in allowed_attr and not isinstance(vval, dict) and vval: validate_field_attribute_and_value(attr, vval, allowed_attr, value) elemt_obj.set(attr, check_for_mapping_char_other(vval)) rm_key_list.append(attr) rm_key_list.append(line_number) - xml_handle_comment(obj, - line_number, - line_loc, elemt_obj) + xml_handle_comment(obj, line_number, line_loc, elemt_obj) for key in rm_key_list: del value[key] # Check for skipped attrinutes - check_for_skiped_attributes('field', value, allowed_attr, verbose) + check_for_skiped_attributes("field", value, allowed_attr, verbose) if isinstance(value, dict) and value != {}: recursive_build(elemt_obj, value, verbose) -def xml_handle_comment(obj: ET.Element, - line_annotation: str, - line_loc_no: int, - xml_ele: ET.Element = None, - is_def_cmnt: bool = False): +def xml_handle_comment( + obj: ET.Element, + line_annotation: str, + line_loc_no: int, + xml_ele: ET.Element = None, + is_def_cmnt: bool = False, +): """ Add xml comment: check for comments that has the same 'line_annotation' (e.g. __line__data) and the same line_loc_no (e.g. 30). After that, i @@ -936,7 +996,7 @@ def xml_handle_comment(obj: ET.Element, obj.append(si_comnt) else: raise ValueError("Provied correct parameter values.") - return '' + return "" def recursive_build(obj, dct, verbose): @@ -948,7 +1008,7 @@ def recursive_build(obj, dct, verbose): """ for keyword, value in iter(dct.items()): - if '__line__' in keyword: + if "__line__" in keyword: continue line_number = f"__line__{keyword}" line_loc = dct[line_number] @@ -956,44 +1016,46 @@ def recursive_build(obj, dct, verbose): check_keyword_variable(verbose, dct, keyword, value) if verbose: sys.stdout.write( - f'keyword_name:{keyword_name} keyword_type {keyword_type}\n') + f"keyword_name:{keyword_name} keyword_type {keyword_type}\n" + ) - if keyword[-6:] == '(link)': + if keyword[-6:] == "(link)": xml_handle_link(dct, obj, keyword, value, verbose) - elif keyword[-8:] == '(choice)': + elif keyword[-8:] == "(choice)": xml_handle_choice(dct, obj, keyword, value) # The bellow xml_symbol clause is for the symbols that come ubde filed or attributes # Root level symbols has been inside nyaml2nxdl() - elif keyword_type == '' and keyword_name == 'symbols': + elif keyword_type == "" and keyword_name == "symbols": xml_handle_symbols(dct, obj, keyword, value) - elif ((keyword_type in NX_CLSS) or (keyword_type not in - [*NX_TYPE_KEYS, '', *NX_NEW_DEFINED_CLASSES])): + elif (keyword_type in NX_CLSS) or ( + keyword_type not in [*NX_TYPE_KEYS, "", *NX_NEW_DEFINED_CLASSES] + ): # we can be sure we need to instantiate a new group xml_handle_group(dct, obj, keyword, value, verbose) elif keyword_name[0:2] == NX_ATTR_IDNT: # check if obj qualifies xml_handle_attributes(dct, obj, keyword, value, verbose) - elif keyword == 'doc': + elif keyword == "doc": xml_handle_doc(obj, value, line_number, line_loc) elif keyword == NX_UNIT_IDNT: xml_handle_units(obj, value) - elif keyword == 'enumeration': + elif keyword == "enumeration": xml_handle_enumeration(dct, obj, keyword, value, verbose) - elif keyword == 'dimensions': + elif keyword == "dimensions": xml_handle_dimensions(dct, obj, keyword, value) - elif keyword == 'exists': + elif keyword == "exists": xml_handle_exists(dct, obj, keyword, value) # Handles fileds e.g. AXISNAME - elif keyword_name != '' and '__line__' not in keyword_name: - xml_handle_fields(obj, keyword, - value, line_number, - line_loc, verbose) + elif keyword_name != "" and "__line__" not in keyword_name: + xml_handle_fields(obj, keyword, value, line_number, line_loc, verbose) else: - raise ValueError(f"An unfamiliar type of element {keyword} has been found which is " - f"not be able to be resolved. Chekc arround line {dct[line_number]}") + raise ValueError( + f"An unfamiliar type of element {keyword} has been found which is " + f"not be able to be resolved. Chekc arround line {dct[line_number]}" + ) def pretty_print_xml(xml_root, output_xml, def_comments=None): @@ -1001,10 +1063,10 @@ def pretty_print_xml(xml_root, output_xml, def_comments=None): Print better human-readable indented and formatted xml file using built-in libraries and preceding XML processing instruction """ - dom = minidom.parseString(ET.tostring( - xml_root, encoding='utf-8', method='xml')) + dom = minidom.parseString(ET.tostring(xml_root, encoding="utf-8", method="xml")) proc_instractionn = dom.createProcessingInstruction( - 'xml-stylesheet', 'type="text/xsl" href="nxdlformat.xsl"') + "xml-stylesheet", 'type="text/xsl" href="nxdlformat.xsl"' + ) dom_comment = dom.createComment(DOM_COMMENT) root = dom.firstChild dom.insertBefore(proc_instractionn, root) @@ -1015,27 +1077,27 @@ def pretty_print_xml(xml_root, output_xml, def_comments=None): def_comt_ele = dom.createComment(string) dom.insertBefore(def_comt_ele, root) - xml_string = dom.toprettyxml(indent=1 * DEPTH_SIZE, newl='\n', encoding='UTF-8') - with open('tmp.xml', "wb") as file_tmp: + xml_string = dom.toprettyxml(indent=1 * DEPTH_SIZE, newl="\n", encoding="UTF-8") + with open("tmp.xml", "wb") as file_tmp: file_tmp.write(xml_string) flag = False - with open('tmp.xml', "r", encoding="utf-8") as file_out: + with open("tmp.xml", "r", encoding="utf-8") as file_out: with open(output_xml, "w", encoding="utf-8") as file_out_mod: for i in file_out.readlines(): - if '' not in i and '' not in i and flag is False: + if "" not in i and "" not in i and flag is False: file_out_mod.write(i) - elif '' in i and '' in i: + elif "" in i and "" in i: file_out_mod.write(i) - elif '' in i and '' not in i: + elif "" in i and "" not in i: flag = True white_spaces = len(i) - len(i.lstrip()) file_out_mod.write(i) - elif '' not in i and '' not in i and flag is True: - file_out_mod.write((white_spaces + 5) * ' ' + i) - elif '' not in i and '' in i and flag is True: - file_out_mod.write(white_spaces * ' ' + i) + elif "" not in i and "" not in i and flag is True: + file_out_mod.write((white_spaces + 5) * " " + i) + elif "" not in i and "" in i and flag is True: + file_out_mod.write(white_spaces * " " + i) flag = False - os.remove('tmp.xml') + os.remove("tmp.xml") # pylint: disable=too-many-statements @@ -1046,102 +1108,120 @@ def nyaml2nxdl(input_file: str, out_file, verbose: bool): fields or (their) attributes as childs of the groups """ - def_attributes = ['deprecated', 'ignoreExtraGroups', 'category', 'type', - 'ignoreExtraFields', 'ignoreExtraAttributes', 'restricts'] + def_attributes = [ + "deprecated", + "ignoreExtraGroups", + "category", + "type", + "ignoreExtraFields", + "ignoreExtraAttributes", + "restricts", + ] yml_appdef = yml_reader(input_file) def_cmnt_text = [] if verbose: - sys.stdout.write(f'input-file: {input_file}\n') - sys.stdout.write('application/base contains the following root-level entries:\n') + sys.stdout.write(f"input-file: {input_file}\n") + sys.stdout.write( + "application/base contains the following root-level entries:\n" + ) sys.stdout.write(str(yml_appdef.keys())) - xml_root = ET.Element('definition', {}) - assert 'category' in yml_appdef.keys( - ), 'Required root-level keyword category is missing!' - assert yml_appdef['category'] in ['application', 'base'], 'Only \ -application and base are valid categories!' - assert 'doc' in yml_appdef.keys(), 'Required root-level keyword doc is missing!' - - name_extends = '' + xml_root = ET.Element("definition", {}) + assert ( + "category" in yml_appdef.keys() + ), "Required root-level keyword category is missing!" + assert yml_appdef["category"] in [ + "application", + "base", + ], "Only \ +application and base are valid categories!" + assert "doc" in yml_appdef.keys(), "Required root-level keyword doc is missing!" + + name_extends = "" yml_appdef_copy = yml_appdef.copy() for kkey, vvalue in yml_appdef_copy.items(): - if '__line__' in kkey: + if "__line__" in kkey: continue line_number = f"__line__{kkey}" line_loc_no = yml_appdef[line_number] if not isinstance(vvalue, dict) and kkey in def_attributes: - xml_root.set(kkey, str(vvalue) or '') - cmnt_text = xml_handle_comment(xml_root, - line_number, line_loc_no, - is_def_cmnt=True) + xml_root.set(kkey, str(vvalue) or "") + cmnt_text = xml_handle_comment( + xml_root, line_number, line_loc_no, is_def_cmnt=True + ) def_cmnt_text += cmnt_text if cmnt_text else [] del yml_appdef[line_number] del yml_appdef[kkey] # Taking care or name and extends - elif 'NX' in kkey: + elif "NX" in kkey: # Tacking the attribute order but the correct value will be stored later # check for name first or type first if (NXobject)NXname then type first - l_bracket_ind = kkey.rfind('(') - r_bracket_ind = kkey.rfind(')') + l_bracket_ind = kkey.rfind("(") + r_bracket_ind = kkey.rfind(")") if l_bracket_ind == 0: extend = kkey[1:r_bracket_ind] - name = kkey[r_bracket_ind + 1:] - xml_root.set('extends', extend) - xml_root.set('name', name) + name = kkey[r_bracket_ind + 1 :] + xml_root.set("extends", extend) + xml_root.set("name", name) elif l_bracket_ind > 0: name = kkey[0:l_bracket_ind] - extend = kkey[l_bracket_ind + 1: r_bracket_ind] - xml_root.set('name', name) - xml_root.set('extends', extend) + extend = kkey[l_bracket_ind + 1 : r_bracket_ind] + xml_root.set("name", name) + xml_root.set("extends", extend) else: name = kkey - xml_root.set('name', name) - xml_root.set('extends', 'NXobject') - cmnt_text = xml_handle_comment(xml_root, - line_number, line_loc_no, - is_def_cmnt=True) + xml_root.set("name", name) + xml_root.set("extends", "NXobject") + cmnt_text = xml_handle_comment( + xml_root, line_number, line_loc_no, is_def_cmnt=True + ) def_cmnt_text += cmnt_text if cmnt_text else [] name_extends = kkey - if 'type' not in xml_root.attrib: - xml_root.set('type', "group") + if "type" not in xml_root.attrib: + xml_root.set("type", "group") # Taking care of namespaces - namespaces = {'xmlns': 'http://definition.nexusformat.org/nxdl/3.1', - 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', - 'xsi:schemaLocation': 'http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd'} + namespaces = { + "xmlns": "http://definition.nexusformat.org/nxdl/3.1", + "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "xsi:schemaLocation": "http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd", + } for key, ns_ in namespaces.items(): xml_root.attrib[key] = ns_ # Taking care of Symbols elements - if 'symbols' in yml_appdef.keys(): - xml_handle_symbols(yml_appdef, - xml_root, - 'symbols', - yml_appdef['symbols']) + if "symbols" in yml_appdef.keys(): + xml_handle_symbols(yml_appdef, xml_root, "symbols", yml_appdef["symbols"]) - del yml_appdef['symbols'] + del yml_appdef["symbols"] del yml_appdef["__line__symbols"] - assert isinstance(yml_appdef['doc'], str) and yml_appdef['doc'] != '', 'Doc \ -has to be a non-empty string!' + assert ( + isinstance(yml_appdef["doc"], str) and yml_appdef["doc"] != "" + ), "Doc \ +has to be a non-empty string!" - line_number = '__line__doc' + line_number = "__line__doc" line_loc_no = yml_appdef[line_number] - xml_handle_doc(xml_root, yml_appdef['doc'], line_number, line_loc_no) + xml_handle_doc(xml_root, yml_appdef["doc"], line_number, line_loc_no) - del yml_appdef['doc'] + del yml_appdef["doc"] root_keys = 0 for key in yml_appdef.keys(): - if '__line__' not in key: + if "__line__" not in key: root_keys += 1 extra_key = key - assert root_keys == 1, (f"Accepting at most keywords: category, doc, symbols, and NX... " - f"at root-level! check key at root level {extra_key}") + assert root_keys == 1, ( + f"Accepting at most keywords: category, doc, symbols, and NX... " + f"at root-level! check key at root level {extra_key}" + ) - assert ('NX' in name_extends and len(name_extends) > 2), 'NX \ -keyword has an invalid pattern, or is too short!' + assert ( + "NX" in name_extends and len(name_extends) > 2 + ), "NX \ +keyword has an invalid pattern, or is too short!" # Taking care if definition has empty content if yml_appdef[name_extends]: recursive_build(xml_root, yml_appdef[name_extends], verbose) @@ -1158,4 +1238,4 @@ def nyaml2nxdl(input_file: str, out_file, verbose: bool): check_for_default_attribute_and_value(xml_root) pretty_print_xml(xml_root, out_file, def_cmnt_text) if verbose: - sys.stdout.write('Parsed YAML to NXDL successfully\n') + sys.stdout.write("Parsed YAML to NXDL successfully\n") diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py index 58d634c9d..c55f5da7a 100644 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_helper.py @@ -28,19 +28,17 @@ # So the corresponding value is to skip them and # and also carefull about this order import hashlib + from yaml.composer import Composer from yaml.constructor import Constructor - +from yaml.loader import Loader from yaml.nodes import ScalarNode from yaml.resolver import BaseResolver -from yaml.loader import Loader # NOTE: If any one change one of the bellow dict please change it for both -ESCAPE_CHAR_DICT_IN_YAML = {"\t": " ", - "\':\'": ":"} +ESCAPE_CHAR_DICT_IN_YAML = {"\t": " ", "':'": ":"} -ESCAPE_CHAR_DICT_IN_XML = {" ": "\t", - "\':\'": ":"} +ESCAPE_CHAR_DICT_IN_XML = {" ": "\t", "':'": ":"} def remove_namespace_from_tag(tag): @@ -67,11 +65,13 @@ def construct_mapping(self, node, deep=False): for key_node in node_pair_lst: shadow_key_node = ScalarNode( - tag=BaseResolver.DEFAULT_SCALAR_TAG, value='__line__' + key_node[0].value) + tag=BaseResolver.DEFAULT_SCALAR_TAG, + value="__line__" + key_node[0].value, + ) shadow_value_node = ScalarNode( - tag=BaseResolver.DEFAULT_SCALAR_TAG, value=key_node[0].__line__) - node_pair_lst_for_appending.append( - (shadow_key_node, shadow_value_node)) + tag=BaseResolver.DEFAULT_SCALAR_TAG, value=key_node[0].__line__ + ) + node_pair_lst_for_appending.append((shadow_key_node, shadow_value_node)) node.value = node_pair_lst + node_pair_lst_for_appending return Constructor.construct_mapping(self, node, deep=deep) @@ -90,11 +90,11 @@ def get_yaml_escape_char_reverter_dict(): def type_check(nx_type): """ - Check for nexus type if type is NX_CHAR get '' or get as it is. + Check for nexus type if type is NX_CHAR get '' or get as it is. """ - if nx_type in ['NX_CHAR', '']: - nx_type = '' + if nx_type in ["NX_CHAR", ""]: + nx_type = "" else: nx_type = f"({nx_type})" return nx_type @@ -114,10 +114,10 @@ def get_node_parent_info(tree, node): def cleaning_empty_lines(line_list): """ - Cleaning up empty lines on top and bottom. + Cleaning up empty lines on top and bottom. """ if not isinstance(line_list, list): - line_list = line_list.split('\n') if '\n' in line_list else [''] + line_list = line_list.split("\n") if "\n" in line_list else [""] # Clining up top empty lines while True: @@ -125,7 +125,7 @@ def cleaning_empty_lines(line_list): break line_list = line_list[1:] if len(line_list) == 0: - line_list.append('') + line_list.append("") return line_list # Clining bottom empty lines @@ -134,7 +134,7 @@ def cleaning_empty_lines(line_list): break line_list = line_list[0:-1] if len(line_list) == 0: - line_list.append('') + line_list.append("") return line_list return line_list @@ -146,45 +146,44 @@ def nx_name_type_resolving(tmp): and type {nexus_type} from a YML section string. YML section string syntax: optional_string(nexus_type) """ - if tmp.count('(') == 1 and tmp.count(')') == 1: + if tmp.count("(") == 1 and tmp.count(")") == 1: # we can safely assume that every valid YML key resolves # either an nx_ (type, base, candidate) class contains only 1 '(' and ')' - index_start = tmp.index('(') - index_end = tmp.index(')', index_start + 1) - typ = tmp[index_start + 1:index_end] - nam = tmp.replace('(' + typ + ')', '') + index_start = tmp.index("(") + index_end = tmp.index(")", index_start + 1) + typ = tmp[index_start + 1 : index_end] + nam = tmp.replace("(" + typ + ")", "") return nam, typ # or a name for a member - typ = '' + typ = "" nam = tmp return nam, typ def get_sha256_hash(file_name): - """Generate a sha256_hash for a given file. - """ + """Generate a sha256_hash for a given file.""" sha_hash = hashlib.sha256() - with open(file=file_name, mode='rb',) as file_obj: + with open( + file=file_name, + mode="rb", + ) as file_obj: # Update hash for each 4k block of bytes for b_line in iter(lambda: file_obj.read(4096), b""): sha_hash.update(b_line) return sha_hash.hexdigest() -def extend_yamlfile_with_comment(yaml_file, - file_to_be_appended, - top_lines_list=None): - """Extend yaml file by the file_to_be_appended as comment. - """ +def extend_yamlfile_with_comment(yaml_file, file_to_be_appended, top_lines_list=None): + """Extend yaml file by the file_to_be_appended as comment.""" - with open(yaml_file, mode='a+', encoding='utf-8') as f1_obj: + with open(yaml_file, mode="a+", encoding="utf-8") as f1_obj: if top_lines_list: for line in top_lines_list: f1_obj.write(line) - with open(file_to_be_appended, mode='r', encoding='utf-8') as f2_obj: + with open(file_to_be_appended, mode="r", encoding="utf-8") as f2_obj: lines = f2_obj.readlines() for line in lines: f1_obj.write(f"# {line}") @@ -197,30 +196,30 @@ def separate_hash_yaml_and_nxdl(yaml_file, sep_yaml, sep_xml): ++++++++++++++++++++++++++++++++++\n' # ' """ - sha_hash = '' - with open(yaml_file, 'r', encoding='utf-8') as inp_file: + sha_hash = "" + with open(yaml_file, "r", encoding="utf-8") as inp_file: lines = inp_file.readlines() # file to write yaml part - with open(sep_yaml, 'w', encoding='utf-8') as yml_f_ob, \ - open(sep_xml, 'w', encoding='utf-8') as xml_f_ob: - - last_line = '' + with open(sep_yaml, "w", encoding="utf-8") as yml_f_ob, open( + sep_xml, "w", encoding="utf-8" + ) as xml_f_ob: + last_line = "" write_on_yaml = True for ind, line in enumerate(lines): if ind == 0: last_line = line # Write in file when ensured that the nest line is not with '++ SHA HASH ++' - elif '++ SHA HASH ++' not in line and write_on_yaml: + elif "++ SHA HASH ++" not in line and write_on_yaml: yml_f_ob.write(last_line) last_line = line - elif '++ SHA HASH ++' in line: + elif "++ SHA HASH ++" in line: write_on_yaml = False - last_line = '' + last_line = "" elif not write_on_yaml and not last_line: # The first line of xml file has been found. Onward write lines directly # into xml file. if not sha_hash: - sha_hash = line.split('# ', 1)[-1].strip() + sha_hash = line.split("# ", 1)[-1].strip() else: xml_f_ob.write(line[2:]) # If the yaml fiile does not contain any hash for nxdl then we may have last line. From f76a1a96ac19c1253b81f5e3f44675c6e6702bf4 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Mon, 19 Jun 2023 15:12:21 +0200 Subject: [PATCH 25/32] linting --- dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py index ca0435e37..56a33a453 100644 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py @@ -978,8 +978,8 @@ def xml_handle_comment( """ line_info = (line_annotation, int(line_loc_no)) - if line_info in COMMENT_BLOCKS: - cmnt = COMMENT_BLOCKS.get_coment_by_line_info(line_info) + if line_info in COMMENT_BLOCKS: # noqa: F821 + cmnt = COMMENT_BLOCKS.get_coment_by_line_info(line_info) # noqa: F821 cmnt_text = cmnt.get_comment_text() if is_def_cmnt: @@ -1227,8 +1227,8 @@ def nyaml2nxdl(input_file: str, out_file, verbose: bool): recursive_build(xml_root, yml_appdef[name_extends], verbose) # Taking care of comments that comes at the end of file that is might not be intended for # any nxdl elements. - if COMMENT_BLOCKS[-1].has_post_comment: - post_comment = COMMENT_BLOCKS[-1] + if COMMENT_BLOCKS[-1].has_post_comment: # noqa: F821 + post_comment = COMMENT_BLOCKS[-1] # noqa: F821 (lin_annot, line_loc) = post_comment.get_line_info() xml_handle_comment(xml_root, lin_annot, line_loc) From 9462a1d2ca120f9722463b2db59d8325ab2a0ee3 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Mon, 19 Jun 2023 15:48:06 +0200 Subject: [PATCH 26/32] imports --- dev_tools/nyaml2nxdl/comment_collector.py | 2 +- .../nyaml2nxdl/nyaml2nxdl_backward_tools.py | 8 ++++---- .../nyaml2nxdl/nyaml2nxdl_forward_tools.py | 20 +++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/dev_tools/nyaml2nxdl/comment_collector.py b/dev_tools/nyaml2nxdl/comment_collector.py index dcb21021b..0041c14ec 100644 --- a/dev_tools/nyaml2nxdl/comment_collector.py +++ b/dev_tools/nyaml2nxdl/comment_collector.py @@ -38,7 +38,7 @@ from typing import Type from typing import Union -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import LineLoader +from .nyaml2nxdl_helper import LineLoader __all__ = ["Comment", "CommentCollector", "XMLComment", "YAMLComment"] diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py index faa22cc23..c0f672305 100755 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py @@ -27,10 +27,10 @@ from typing import Dict from typing import List -from pynxtools.dataconverter.helpers import remove_namespace_from_tag -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import cleaning_empty_lines -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import get_node_parent_info -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import get_yaml_escape_char_dict +from .nyaml2nxdl_helper import remove_namespace_from_tag +from .nyaml2nxdl_helper import cleaning_empty_lines +from .nyaml2nxdl_helper import get_node_parent_info +from .nyaml2nxdl_helper import get_yaml_escape_char_dict DEPTH_SIZE = " " CMNT_TAG = "!--" diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py index 56a33a453..85b3ece55 100644 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py @@ -28,13 +28,13 @@ from xml.dom import minidom import yaml -from pynxtools.dataconverter.helpers import remove_namespace_from_tag -from pynxtools.nexus import nexus -from pynxtools.nyaml2nxdl.comment_collector import CommentCollector -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import LineLoader -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import cleaning_empty_lines -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import get_yaml_escape_char_reverter_dict -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import nx_name_type_resolving +from .nyaml2nxdl_helper import remove_namespace_from_tag +from ..utils import nexus as pynxtools_nxlib +from .nyaml2nxdl.comment_collector import CommentCollector +from .nyaml2nxdl_helper import LineLoader +from .nyaml2nxdl_helper import cleaning_empty_lines +from .nyaml2nxdl_helper import get_yaml_escape_char_reverter_dict +from .nyaml2nxdl_helper import nx_name_type_resolving # pylint: disable=too-many-lines, global-statement, invalid-name DOM_COMMENT = ( @@ -59,13 +59,13 @@ "#\n" "# For further information, see http://www.nexusformat.org\n" ) -NX_CLSS = nexus.get_nx_classes() +NX_CLSS = pynxtools_nxlib.get_nx_classes() NX_NEW_DEFINED_CLASSES = ["NX_COMPLEX"] -NX_TYPE_KEYS = nexus.get_nx_attribute_type() +NX_TYPE_KEYS = pynxtools_nxlib.get_nx_attribute_type() NX_ATTR_IDNT = "\\@" NX_UNIT_IDNT = "unit" DEPTH_SIZE = " " -NX_UNIT_TYPES = nexus.get_nx_units() +NX_UNIT_TYPES = pynxtools_nxlib.get_nx_units() COMMENT_BLOCKS: CommentCollector CATEGORY = "" # Definition would be either 'base' or 'application' From b65bf235d284b99c27fa6003bf46e6984c2f6121 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Mon, 19 Jun 2023 15:57:55 +0200 Subject: [PATCH 27/32] fixing imports --- dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py | 2 +- dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py index c0f672305..dcf56b998 100755 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_backward_tools.py @@ -27,10 +27,10 @@ from typing import Dict from typing import List -from .nyaml2nxdl_helper import remove_namespace_from_tag from .nyaml2nxdl_helper import cleaning_empty_lines from .nyaml2nxdl_helper import get_node_parent_info from .nyaml2nxdl_helper import get_yaml_escape_char_dict +from .nyaml2nxdl_helper import remove_namespace_from_tag DEPTH_SIZE = " " CMNT_TAG = "!--" diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py index 85b3ece55..d54aa9f93 100644 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py @@ -28,13 +28,14 @@ from xml.dom import minidom import yaml -from .nyaml2nxdl_helper import remove_namespace_from_tag + from ..utils import nexus as pynxtools_nxlib from .nyaml2nxdl.comment_collector import CommentCollector from .nyaml2nxdl_helper import LineLoader from .nyaml2nxdl_helper import cleaning_empty_lines from .nyaml2nxdl_helper import get_yaml_escape_char_reverter_dict from .nyaml2nxdl_helper import nx_name_type_resolving +from .nyaml2nxdl_helper import remove_namespace_from_tag # pylint: disable=too-many-lines, global-statement, invalid-name DOM_COMMENT = ( From 90a9e45a070018f45e2a9ec5fe5d9660594c15bb Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Mon, 19 Jun 2023 18:19:29 +0200 Subject: [PATCH 28/32] test case added --- dev_tools/nyaml2nxdl/nyaml2nxdl.py | 24 ++++++++++------- .../nyaml2nxdl/nyaml2nxdl_forward_tools.py | 2 +- dev_tools/tests/test_nyaml2nxdl.py | 27 +++++++++++++++++++ 3 files changed, 42 insertions(+), 11 deletions(-) create mode 100644 dev_tools/tests/test_nyaml2nxdl.py diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl.py b/dev_tools/nyaml2nxdl/nyaml2nxdl.py index 815b015e6..dccfff6e4 100755 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl.py @@ -26,13 +26,14 @@ import xml.etree.ElementTree as ET import click -from pynxtools.nyaml2nxdl.nyaml2nxdl_backward_tools import Nxdl2yaml -from pynxtools.nyaml2nxdl.nyaml2nxdl_backward_tools import compare_niac_and_my -from pynxtools.nyaml2nxdl.nyaml2nxdl_forward_tools import nyaml2nxdl -from pynxtools.nyaml2nxdl.nyaml2nxdl_forward_tools import pretty_print_xml -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import extend_yamlfile_with_comment -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import get_sha256_hash -from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import separate_hash_yaml_and_nxdl + +from .nyaml2nxdl_backward_tools import Nxdl2yaml +from .nyaml2nxdl_backward_tools import compare_niac_and_my +from .nyaml2nxdl_forward_tools import nyaml2nxdl +from .nyaml2nxdl_forward_tools import pretty_print_xml +from .nyaml2nxdl_helper import extend_yamlfile_with_comment +from .nyaml2nxdl_helper import get_sha256_hash +from .nyaml2nxdl_helper import separate_hash_yaml_and_nxdl DEPTH_SIZE = 4 * " " @@ -152,15 +153,18 @@ def split_name_and_extension(file_name): Split file name into extension and rest of the file name. return file raw nam and extension """ - parts = file_name.rsplit(".", 3) + path = file_name.rsplit("/", 1) + (pathn, filen) = ["", path[0]] if len(path) == 1 else [path[0] + "/", path[1]] + parts = filen.rsplit(".", 2) + raw = ext = "" if len(parts) == 2: raw = parts[0] ext = parts[1] - if len(parts) == 3: + elif len(parts) == 3: raw = parts[0] ext = ".".join(parts[1:]) - return raw, ext + return pathn + raw, ext @click.command() diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py index d54aa9f93..984d7674f 100644 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py @@ -30,7 +30,7 @@ import yaml from ..utils import nexus as pynxtools_nxlib -from .nyaml2nxdl.comment_collector import CommentCollector +from .comment_collector import CommentCollector from .nyaml2nxdl_helper import LineLoader from .nyaml2nxdl_helper import cleaning_empty_lines from .nyaml2nxdl_helper import get_yaml_escape_char_reverter_dict diff --git a/dev_tools/tests/test_nyaml2nxdl.py b/dev_tools/tests/test_nyaml2nxdl.py new file mode 100644 index 000000000..2722cf475 --- /dev/null +++ b/dev_tools/tests/test_nyaml2nxdl.py @@ -0,0 +1,27 @@ +import os + +from click.testing import CliRunner + +from ..nyaml2nxdl import nyaml2nxdl as conv +from ..utils.nexus import find_definition_file + +# import subprocess + + +def test_conversion(): + root = find_definition_file("NXentry") + # subprocess.run(["python3","-m","dev_tools.nyaml2nxdl.nyaml2nxdl","--input-file",root]) + result = CliRunner().invoke(conv.launch_tool, ["--input-file", root]) + assert result.exit_code == 0 + yaml = root[:-9] + "_parsed.yaml" + # subprocess.run(["python3","-m","dev_tools.nyaml2nxdl.nyaml2nxdl","--input-file",yaml]) + result = CliRunner().invoke(conv.launch_tool, ["--input-file", yaml]) + assert result.exit_code == 0 + new_root = yaml[:-4] + "nxdl.xml" + with open(root, encoding="utf-8", mode="r") as tmp_f: + root_content = tmp_f.readlines() + with open(new_root, encoding="utf-8", mode="r") as tmp_f: + new_root_content = tmp_f.readlines() + assert root_content == new_root_content + os.remove(yaml) + os.remove(new_root) From 015aa7761b1814e2dadb4f30aebe0ac39ef04b32 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Wed, 21 Jun 2023 12:40:56 +0200 Subject: [PATCH 29/32] removing h5py dependency --- dev_tools/docs/nxdl.py | 2 +- dev_tools/utils/{nexus.py => nxdl_utils.py} | 761 +------------------- 2 files changed, 5 insertions(+), 758 deletions(-) rename dev_tools/utils/{nexus.py => nxdl_utils.py} (51%) diff --git a/dev_tools/docs/nxdl.py b/dev_tools/docs/nxdl.py index 815e8b777..4c36ef277 100644 --- a/dev_tools/docs/nxdl.py +++ b/dev_tools/docs/nxdl.py @@ -12,7 +12,7 @@ from ..globals.errors import NXDLParseError from ..globals.nxdl import NXDL_NAMESPACE from ..globals.urls import REPO_URL -from ..utils import nexus as pynxtools_nxlib +from ..utils import nxdl_utils as pynxtools_nxlib from ..utils.types import PathLike from .anchor_list import AnchorRegistry diff --git a/dev_tools/utils/nexus.py b/dev_tools/utils/nxdl_utils.py similarity index 51% rename from dev_tools/utils/nexus.py rename to dev_tools/utils/nxdl_utils.py index 4bcb1c9e9..efba439be 100644 --- a/dev_tools/utils/nexus.py +++ b/dev_tools/utils/nxdl_utils.py @@ -1,18 +1,13 @@ # pylint: disable=too-many-lines -"""Read files from different format and print it in a standard NeXus format +"""Parse NeXus definition files """ -import logging import os -import sys import textwrap import xml.etree.ElementTree as ET from functools import lru_cache from glob import glob -import click -import h5py - class NxdlAttributeError(Exception): """An exception for throwing an error when an Nxdl attribute is not found.""" @@ -89,49 +84,6 @@ def get_hdf_info_parent(hdf_info): return {"hdf_node": node, "hdf_path": get_parent_path(hdf_info["hdf_path"])} -def get_nx_class_path(hdf_info): - """Get the full path of an HDF5 node using nexus classes - in case of a field, end with the field name""" - hdf_node = hdf_info["hdf_node"] - if hdf_node.name == "/": - return "" - if isinstance(hdf_node, h5py.Group): - return ( - get_nx_class_path(get_hdf_info_parent(hdf_info)) - + "/" - + ( - hdf_node.attrs["NX_class"] - if "NX_class" in hdf_node.attrs.keys() - else hdf_node.name.split("/")[-1] - ) - ) - if isinstance(hdf_node, h5py.Dataset): - return ( - get_nx_class_path(get_hdf_info_parent(hdf_info)) - + "/" - + hdf_node.name.split("/")[-1] - ) - return "" - - -def get_nxdl_entry(hdf_info): - """Get the nxdl application definition for an HDF5 node""" - entry = hdf_info - while ( - isinstance(entry["hdf_node"], h5py.Dataset) - or "NX_class" not in entry["hdf_node"].attrs.keys() - or entry["hdf_node"].attrs["NX_class"] != "NXentry" - ): - entry = get_hdf_info_parent(entry) - if entry["hdf_node"].name == "/": - return "NO NXentry found" - try: - nxdef = entry["hdf_node"]["definition"][()] - return nxdef.decode() - except KeyError: # 'NO Definition referenced' - return "NXentry" - - def get_nx_class(nxdl_elem): """Get the nexus class for a NXDL node""" if "category" in nxdl_elem.attrib.keys(): @@ -476,65 +428,6 @@ def get_required_string(nxdl_elem): return "<>" -def chk_nxdataaxis_v2(hdf_node, name, logger): - """Check if dataset is an axis""" - own_signal = hdf_node.attrs.get("signal") # check for being a Signal - if own_signal is str and own_signal == "1": - logger.debug("Dataset referenced (v2) as NXdata SIGNAL") - own_axes = hdf_node.attrs.get("axes") # check for being an axis - if own_axes is str: - axes = own_axes.split(":") - for i in len(axes): - if axes[i] and name == axes[i]: - logger.debug("Dataset referenced (v2) as NXdata AXIS #%d", i) - return None - ownpaxis = hdf_node.attrs.get("primary") - own_axis = hdf_node.attrs.get("axis") - if own_axis is int: - # also convention v1 - if ownpaxis is int and ownpaxis == 1: - logger.debug("Dataset referenced (v2) as NXdata AXIS #%d", own_axis - 1) - else: - logger.debug( - "Dataset referenced (v2) as NXdata (primary/alternative) AXIS #%d", - own_axis - 1, - ) - return None - - -def chk_nxdataaxis(hdf_node, name, logger): - """NEXUS Data Plotting Standard v3: new version from 2014""" - if not isinstance( - hdf_node, h5py.Dataset - ): # check if it is a field in an NXdata node - return None - parent = hdf_node.parent - if not parent or (parent and not parent.attrs.get("NX_class") == "NXdata"): - return None - signal = parent.attrs.get("signal") # chk for Signal - if signal and name == signal: - logger.debug("Dataset referenced as NXdata SIGNAL") - return None - axes = parent.attrs.get("axes") # check for default Axes - if axes is str: - if name == axes: - logger.debug("Dataset referenced as NXdata AXIS") - return None - elif axes is not None: - for i, j in enumerate(axes): - if name == j: - indices = parent.attrs.get(j + "_indices") - if indices is int: - logger.debug(f"Dataset referenced as NXdata AXIS #{indices}") - else: - logger.debug(f"Dataset referenced as NXdata AXIS #{i}") - return None - indices = parent.attrs.get(name + "_indices") # check for alternative Axes - if indices is int: - logger.debug(f"Dataset referenced as NXdata alternative AXIS #{indices}") - return chk_nxdataaxis_v2(hdf_node, name, logger) # check for older conventions - - # below there are some functions used in get_nxdl_doc function: def write_doc_string(logger, doc, attr): """Simple function that prints a line in the logger if doc exists""" @@ -670,150 +563,11 @@ def other_attrs( return logger, elem, nxdl_path, doc, attr -def check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node): - """Check for several attributes. - deprecation - enums - nxdataaxis""" - logger, elem, path = variables - dep_str = elem.attrib.get("deprecated") # check for deprecation - if dep_str: - if doc: - logger.debug("DEPRECATED - " + dep_str) - for base_elem in elist if not attr else [elem]: # check for enums - sdoc = get_nxdl_child(base_elem, "enumeration", go_base=False) - if sdoc is not None: - if doc: - logger.debug("enumeration (" + get_node_concept_path(base_elem) + "):") - for item in sdoc: - if get_local_name_from_xml(item) == "item": - if doc: - logger.debug("-> " + item.attrib["value"]) - chk_nxdataaxis( - hdf_node, path.split("/")[-1], logger - ) # look for NXdata reference (axes/signal) - for base_elem in elist if not attr else [elem]: # check for doc - sdoc = get_nxdl_child(base_elem, "doc", go_base=False) - if doc: - logger.debug("documentation (" + get_node_concept_path(base_elem) + "):") - logger.debug(sdoc.text if sdoc is not None else "") - return logger, elem, path, doc, elist, attr, hdf_node - - def get_node_concept_path(elem): """get the short version of nxdlbase:nxdlpath""" return str(elem.get("nxdlbase").split("/")[-1] + ":" + elem.get("nxdlpath")) -def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals - elem, elist, attr, hdf_node, logger, doc, nxdl_path, req_str, path, hdf_info -): - """Get nxdl documentation for an attribute""" - new_elem = [] - old_elem = elem - for elem_index, act_elem1 in enumerate(elist): - act_elem = act_elem1 - # NX_class is a compulsory attribute for groups in a nexus file - # which should match the type of the corresponding NXDL element - if ( - attr == "NX_class" - and not isinstance(hdf_node, h5py.Dataset) - and elem_index == 0 - ): - elem = None - logger, doc, attr = write_doc_string(logger, doc, attr) - new_elem = elem - break - # units category is a compulsory attribute for any fields - if attr == "units" and isinstance(hdf_node, h5py.Dataset): - req_str = "<>" - logger, act_elem, nxdl_path, doc, attr = try_find_units( - logger, act_elem, nxdl_path, doc, attr - ) - # units for attributes can be given as ATTRIBUTENAME_units - elif attr.endswith("_units"): - logger, act_elem, nxdl_path, doc, attr, req_str = check_attr_name_nxdl( - (logger, act_elem, nxdl_path, doc, attr, req_str) - ) - # default is allowed for groups - elif attr == "default" and not isinstance(hdf_node, h5py.Dataset): - req_str = "<>" - # try to find if default is defined as a child of the NXDL element - act_elem = get_nxdl_child( - act_elem, attr, nexus_type="attribute", go_base=False - ) - logger, act_elem, nxdl_path, doc, attr = try_find_default( - logger, act_elem1, act_elem, nxdl_path, doc, attr - ) - else: # other attributes - act_elem = get_nxdl_child( - act_elem, attr, nexus_type="attribute", go_base=False - ) - if act_elem is not None: - logger, act_elem, nxdl_path, doc, attr = other_attrs( - logger, act_elem1, act_elem, nxdl_path, doc, attr - ) - if act_elem is not None: - new_elem.append(act_elem) - if req_str is None: - req_str = get_required_string(act_elem) # check for being required - if doc: - logger.debug(req_str) - variables = [logger, act_elem, path] - ( - logger, - elem, - path, - doc, - elist, - attr, - hdf_node, - ) = check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node) - elem = old_elem - if req_str is None and doc: - if attr != "NX_class": - logger.debug("@" + attr + " - IS NOT IN SCHEMA") - logger.debug("") - return (req_str, get_nxdl_entry(hdf_info), nxdl_path) - - -def get_nxdl_doc(hdf_info, logger, doc, attr=False): - """Get nxdl documentation for an HDF5 node (or its attribute)""" - hdf_node = hdf_info["hdf_node"] - # new way: retrieve multiple inherited base classes - (class_path, nxdl_path, elist) = get_inherited_nodes( - None, - nx_name=get_nxdl_entry(hdf_info), - hdf_node=hdf_node, - hdf_path=hdf_info["hdf_path"] if "hdf_path" in hdf_info else None, - hdf_root=hdf_info["hdf_root"] if "hdf_root" in hdf_info else None, - ) - elem = elist[0] if class_path and elist else None - if doc: - logger.debug("classpath: " + str(class_path)) - logger.debug( - "NOT IN SCHEMA" - if elem is None - else "classes:\n" + "\n".join(get_node_concept_path(e) for e in elist) - ) - # old solution with a single elem instead of using elist - path = get_nx_class_path(hdf_info) - req_str = None - if elem is None: - if doc: - logger.debug("") - return ("None", None, None) - if attr: - return get_nxdl_attr_doc( - elem, elist, attr, hdf_node, logger, doc, nxdl_path, req_str, path, hdf_info - ) - req_str = get_required_string(elem) # check for being required - if doc: - logger.debug(req_str) - variables = [logger, elem, path] - logger, elem, path, doc, elist, attr, hdf_node = check_deprecation_enum_axis( - variables, doc, elist, attr, hdf_node - ) - return (req_str, get_nxdl_entry(hdf_info), nxdl_path) - - def get_doc(node, ntype, nxhtml, nxpath): """Get documentation""" # URL for html documentation @@ -1044,44 +798,11 @@ def walk_elist(elist, html_name): return elist, html_name -def helper_get_inherited_nodes(hdf_info2, elist, pind, attr): - """find the best fitting name in all children""" - hdf_path, hdf_node, hdf_class_path = hdf_info2 - hdf_name = hdf_path[pind] - hdf_class_name = hdf_class_path[pind] - if pind < len(hdf_path) - (2 if attr else 1): - act_nexus_type = "group" - elif pind == len(hdf_path) - 1 and attr: - act_nexus_type = "attribute" - else: - act_nexus_type = "field" if isinstance(hdf_node, h5py.Dataset) else "group" - # find the best fitting name in all children - bestfit = -1 - html_name = None - for ind in range(len(elist) - 1, -1, -1): - newelem, fit = get_best_child( - elist[ind], hdf_node, hdf_name, hdf_class_name, act_nexus_type - ) - if fit >= bestfit and newelem is not None: - html_name = get_node_name(newelem) - return hdf_path, hdf_node, hdf_class_path, elist, pind, attr, html_name - - -def get_hdf_path(hdf_info): - """Get the hdf_path from an hdf_info""" - if "hdf_path" in hdf_info: - return hdf_info["hdf_path"].split("/")[1:] - return hdf_info["hdf_node"].name.split("/")[1:] - - @lru_cache(maxsize=None) def get_inherited_nodes( nxdl_path: str = None, # pylint: disable=too-many-arguments,too-many-locals nx_name: str = None, elem: ET.Element = None, - hdf_node=None, - hdf_path=None, - hdf_root=None, attr=False, ): """Returns a list of ET.Element for the given path.""" @@ -1091,38 +812,10 @@ def get_inherited_nodes( nxdl_elem_path = [elist[0]] class_path = [] # type: ignore[var-annotated] - if hdf_node is not None: - hdf_info = {"hdf_node": hdf_node} - if hdf_path: - hdf_info["hdf_path"] = hdf_path - if hdf_root: - hdf_root["hdf_root"] = hdf_root - hdf_node = hdf_info["hdf_node"] - hdf_path = get_hdf_path(hdf_info) - hdf_class_path = get_nx_class_path(hdf_info).split("/")[1:] - if attr: - hdf_path.append(attr) - hdf_class_path.append(attr) - path = hdf_path - else: - html_path = nxdl_path.split("/")[1:] - path = html_path + html_path = nxdl_path.split("/")[1:] + path = html_path for pind in range(len(path)): - if hdf_node is not None: - hdf_info2 = [hdf_path, hdf_node, hdf_class_path] - [ - hdf_path, - hdf_node, - hdf_class_path, - elist, - pind, - attr, - html_name, - ] = helper_get_inherited_nodes(hdf_info2, elist, pind, attr) - if html_name is None: # return if NOT IN SCHEMA - return (class_path, nxdl_elem_path, None) - else: - html_name = html_path[pind] + html_name = html_path[pind] elist, html_name = walk_elist(elist, html_name) if elist: class_path.append(get_nx_class(elist[0])) @@ -1159,449 +852,3 @@ def get_node_at_nxdl_path( "Please check this entry in the template dictionary." ) return elem - - -def process_node(hdf_node, hdf_path, parser, logger, doc=True): - """Processes an hdf5 node. - - it logs the node found and also checks for its attributes - - retrieves the corresponding nxdl documentation - TODO: - - follow variants - - NOMAD parser: store in NOMAD""" - hdf_info = {"hdf_path": hdf_path, "hdf_node": hdf_node} - if isinstance(hdf_node, h5py.Dataset): - logger.debug(f"===== FIELD (/{hdf_path}): {hdf_node}") - val = ( - str(hdf_node[()]).split("\n") - if len(hdf_node.shape) <= 1 - else str(hdf_node[0]).split("\n") - ) - logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}') - else: - logger.debug( - f"===== GROUP (/{hdf_path} " - f"[{get_nxdl_entry(hdf_info)}" - f"::{get_nx_class_path(hdf_info)}]): {hdf_node}" - ) - (req_str, nxdef, nxdl_path) = get_nxdl_doc(hdf_info, logger, doc) - if parser is not None and isinstance(hdf_node, h5py.Dataset): - parser( - { - "hdf_info": hdf_info, - "nxdef": nxdef, - "nxdl_path": nxdl_path, - "val": val, - "logger": logger, - } - ) - for key, value in hdf_node.attrs.items(): - logger.debug(f"===== ATTRS (/{hdf_path}@{key})") - val = str(value).split("\n") - logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}') - (req_str, nxdef, nxdl_path) = get_nxdl_doc(hdf_info, logger, doc, attr=key) - if ( - parser is not None - and req_str is not None - and "NOT IN SCHEMA" not in req_str - and "None" not in req_str - ): - parser( - { - "hdf_info": hdf_info, - "nxdef": nxdef, - "nxdl_path": nxdl_path, - "val": val, - "logger": logger, - }, - attr=key, - ) - - -def logger_auxiliary_signal(logger, nxdata): - """Handle the presence of auxiliary signal""" - aux = nxdata.attrs.get("auxiliary_signals") - if aux is not None: - if isinstance(aux, str): - aux = [aux] - for asig in aux: - logger.debug(f"Further auxiliary signal has been identified: {asig}") - return logger - - -def print_default_plotable_header(logger): - """Print a three-lines header""" - logger.debug("========================") - logger.debug("=== Default Plotable ===") - logger.debug("========================") - - -def get_default_plotable(root, logger): - """Get default plotable""" - print_default_plotable_header(logger) - # v3 from 2014 - # nxentry - nxentry = None - default_nxentry_group_name = root.attrs.get("default") - if default_nxentry_group_name: - try: - nxentry = root[default_nxentry_group_name] - except KeyError: - nxentry = None - if not nxentry: - nxentry = entry_helper(root) - if not nxentry: - logger.debug("No NXentry has been found") - return - logger.debug("") - logger.debug("NXentry has been identified: " + nxentry.name) - # nxdata - nxdata = None - nxgroup = nxentry - default_group_name = nxgroup.attrs.get("default") - while default_group_name: - try: - nxgroup = nxgroup[default_group_name] - default_group_name = nxgroup.attrs.get("default") - except KeyError: - pass - if nxgroup == nxentry: - nxdata = nxdata_helper(nxentry) - else: - nxdata = nxgroup - if not nxdata: - logger.debug("No NXdata group has been found") - return - logger.debug("") - logger.debug("NXdata group has been identified: " + nxdata.name) - process_node(nxdata, nxdata.name, None, logger, False) - # signal - signal = None - signal_dataset_name = nxdata.attrs.get("signal") - try: - signal = nxdata[signal_dataset_name] - except (TypeError, KeyError): - signal = None - if not signal: - signal = signal_helper(nxdata) - if not signal: - logger.debug("No Signal has been found") - return - logger.debug("") - logger.debug("Signal has been identified: " + signal.name) - process_node(signal, signal.name, None, logger, False) - logger = logger_auxiliary_signal(logger, nxdata) # check auxiliary_signals - dim = len(signal.shape) - axes = [] # axes - axis_helper(dim, nxdata, signal, axes, logger) - - -def entry_helper(root): - """Check entry related data""" - nxentries = [] - for key in root.keys(): - if ( - isinstance(root[key], h5py.Group) - and root[key].attrs.get("NX_class") - and root[key].attrs["NX_class"] == "NXentry" - ): - nxentries.append(root[key]) - if len(nxentries) >= 1: - return nxentries[0] - return None - - -def nxdata_helper(nxentry): - """Check if nxentry hdf5 object has a NX_class and, if it contains NXdata, - return its value""" - lnxdata = [] - for key in nxentry.keys(): - if ( - isinstance(nxentry[key], h5py.Group) - and nxentry[key].attrs.get("NX_class") - and nxentry[key].attrs["NX_class"] == "NXdata" - ): - lnxdata.append(nxentry[key]) - if len(lnxdata) >= 1: - return lnxdata[0] - return None - - -def signal_helper(nxdata): - """Check signal related data""" - signals = [] - for key in nxdata.keys(): - if isinstance(nxdata[key], h5py.Dataset): - signals.append(nxdata[key]) - if ( - len(signals) == 1 - ): # v3: as there was no selection given, only 1 data field shall exists - return signals[0] - if len(signals) > 1: # v2: select the one with an attribute signal="1" attribute - for sig in signals: - if ( - sig.attrs.get("signal") - and sig.attrs.get("signal") is str - and sig.attrs.get("signal") == "1" - ): - return sig - return None - - -def find_attrib_axis_actual_dim_num(nxdata, a_item, ax_list): - """Finds axis that have defined dimensions""" - # find those with attribute axis= actual dimension number - lax = [] - for key in nxdata.keys(): - if isinstance(nxdata[key], h5py.Dataset): - try: - if nxdata[key].attrs["axis"] == a_item + 1: - lax.append(nxdata[key]) - except KeyError: - pass - if len(lax) == 1: - ax_list.append(lax[0]) - # if there are more alternatives, prioritise the one with an attribute primary="1" - elif len(lax) > 1: - for sax in lax: - if sax.attrs.get("primary") and sax.attrs.get("primary") == 1: - ax_list.insert(0, sax) - else: - ax_list.append(sax) - - -def get_single_or_multiple_axes(nxdata, ax_datasets, a_item, ax_list): - """Gets either single or multiple axes from the NXDL""" - try: - if isinstance(ax_datasets, str): # single axis is defined - # explicite definition of dimension number - ind = nxdata.attrs.get(ax_datasets + "_indices") - if ind and ind is int: - if ind == a_item: - ax_list.append(nxdata[ax_datasets]) - elif a_item == 0: # positional determination of the dimension number - ax_list.append(nxdata[ax_datasets]) - else: # multiple axes are listed - # explicite definition of dimension number - for aax in ax_datasets: - ind = nxdata.attrs.get(aax + "_indices") - if ind and isinstance(ind, int): - if ind == a_item: - ax_list.append(nxdata[aax]) - if not ax_list: # positional determination of the dimension number - ax_list.append(nxdata[ax_datasets[a_item]]) - except KeyError: - pass - return ax_list - - -def axis_helper(dim, nxdata, signal, axes, logger): - """Check axis related data""" - for a_item in range(dim): - ax_list = [] - ax_datasets = nxdata.attrs.get("axes") # primary axes listed in attribute axes - ax_list = get_single_or_multiple_axes(nxdata, ax_datasets, a_item, ax_list) - for attr in nxdata.attrs.keys(): # check for corresponding AXISNAME_indices - if ( - attr.endswith("_indices") - and nxdata.attrs[attr] == a_item - and nxdata[attr.split("_indices")[0]] not in ax_list - ): - ax_list.append(nxdata[attr.split("_indices")[0]]) - # v2 # check for ':' separated axes defined in Signal - if not ax_list: - try: - ax_datasets = signal.attrs.get("axes").split(":") - ax_list.append(nxdata[ax_datasets[a_item]]) - except (KeyError, AttributeError): - pass - if not ax_list: # check for axis/primary specifications - find_attrib_axis_actual_dim_num(nxdata, a_item, ax_list) - axes.append(ax_list) - logger.debug("") - logger.debug( - f"For Axis #{a_item}, {len(ax_list)} axes have been identified: {str(ax_list)}" - ) - - -def get_all_is_a_rel_from_hdf_node(hdf_node, hdf_path): - """Return list of nxdl concept paths for a nxdl element which corresponds to - hdf node. - """ - hdf_info = {"hdf_path": hdf_path, "hdf_node": hdf_node} - (_, _, elist) = get_inherited_nodes( - None, - nx_name=get_nxdl_entry(hdf_info), - hdf_node=hdf_node, - hdf_path=hdf_info["hdf_path"] if "hdf_path" in hdf_info else None, - hdf_root=hdf_info["hdf_root"] if "hdf_root" in hdf_info else None, - ) - return elist - - -def hdf_node_to_self_concept_path(hdf_info, logger): - """Get concept or nxdl path from given hdf_node.""" - # The bellow logger is for deactivatine unnecessary debug message above - if logger is None: - logger = logging.getLogger(__name__) - logger.setLevel(logging.INFO) - (_, _, nxdl_path) = get_nxdl_doc(hdf_info, logger, None) - con_path = "" - if nxdl_path: - for nd_ in nxdl_path: - con_path = con_path + "/" + get_node_name(nd_) - return con_path - - -class HandleNexus: - """documentation""" - - def __init__(self, logger, nexus_file, d_inq_nd=None, c_inq_nd=None): - self.logger = logger - - if nexus_file is None: - raise ValueError("Nexus file not specified. Cannot proceed.") - - self.input_file_name = nexus_file - self.parser = None - self.in_file = None - self.d_inq_nd = d_inq_nd - self.c_inq_nd = c_inq_nd - # Aggregating hdf path corresponds to concept query node - self.hdf_path_list_for_c_inq_nd = [] - - def visit_node(self, hdf_name, hdf_node): - """Function called by h5py that iterates on each node of hdf5file. - It allows h5py visititems function to visit nodes.""" - if self.d_inq_nd is None and self.c_inq_nd is None: - process_node(hdf_node, "/" + hdf_name, self.parser, self.logger) - elif self.d_inq_nd is not None and hdf_name in ( - self.d_inq_nd, - self.d_inq_nd[1:], - ): - process_node(hdf_node, "/" + hdf_name, self.parser, self.logger) - elif self.c_inq_nd is not None: - attributed_concept = self.c_inq_nd.split("@") - attr = attributed_concept[1] if len(attributed_concept) > 1 else None - elist = get_all_is_a_rel_from_hdf_node(hdf_node, "/" + hdf_name) - if elist is None: - return - fnd_superclass = False - fnd_superclass_attr = False - for elem in reversed(elist): - tmp_path = elem.get("nxdlbase").split(".nxdl")[0] - con_path = "/NX" + tmp_path.split("NX")[-1] + elem.get("nxdlpath") - if fnd_superclass or con_path == attributed_concept[0]: - fnd_superclass = True - if attr is None: - self.hdf_path_list_for_c_inq_nd.append(hdf_name) - break - for attribute in hdf_node.attrs.keys(): - attr_concept = get_nxdl_child( - elem, attribute, nexus_type="attribute", go_base=False - ) - if attr_concept is not None and attr_concept.get( - "nxdlpath" - ).endswith(attr): - fnd_superclass_attr = True - con_path = ( - "/NX" - + tmp_path.split("NX")[-1] - + attr_concept.get("nxdlpath") - ) - self.hdf_path_list_for_c_inq_nd.append( - hdf_name + "@" + attribute - ) - break - if fnd_superclass_attr: - break - - def not_yet_visited(self, root, name): - """checking if a new node has already been visited in its path""" - path = name.split("/") - for i in range(1, len(path)): - act_path = "/".join(path[:i]) - # print(act_path+' - '+name) - if root["/" + act_path] == root["/" + name]: - return False - return True - - def full_visit(self, root, hdf_node, name, func): - """visiting recursivly all children, but avoiding endless cycles""" - # print(name) - if len(name) > 0: - func(name, hdf_node) - if isinstance(hdf_node, h5py.Group): - for ch_name, child in hdf_node.items(): - full_name = ch_name if len(name) == 0 else name + "/" + ch_name - if self.not_yet_visited(root, full_name): - self.full_visit(root, child, full_name, func) - - def process_nexus_master_file(self, parser): - """Process a nexus master file by processing all its nodes and their attributes""" - self.parser = parser - self.in_file = h5py.File( - self.input_file_name[0] - if isinstance(self.input_file_name, list) - else self.input_file_name, - "r", - ) - self.full_visit(self.in_file, self.in_file, "", self.visit_node) - if self.d_inq_nd is None and self.c_inq_nd is None: - get_default_plotable(self.in_file, self.logger) - # To log the provided concept and concepts founded - if self.c_inq_nd is not None: - for hdf_path in self.hdf_path_list_for_c_inq_nd: - self.logger.info(hdf_path) - self.in_file.close() - - -@click.command() -@click.argument( - "nexus_file", -) -@click.option( - "-d", - "--documentation", - required=False, - default=None, - help=( - "Definition path in nexus output (.nxs) file. Returns debug" - "log relavent with that definition path. Example: /entry/data/delays" - ), -) -@click.option( - "-c", - "--concept", - required=False, - default=None, - help=( - "Concept path from application definition file (.nxdl,xml). Finds out" - "all the available concept definition (IS-A realation) for rendered" - "concept path. Example: /NXarpes/ENTRY/INSTRUMENT/analyser" - ), -) -def main(nexus_file, documentation, concept): - """The main function to call when used as a script.""" - logging_format = "%(levelname)s: %(message)s" - stdout_handler = logging.StreamHandler(sys.stdout) - stdout_handler.setLevel(logging.DEBUG) - logging.basicConfig( - level=logging.INFO, format=logging_format, handlers=[stdout_handler] - ) - logger = logging.getLogger(__name__) - logger.addHandler(stdout_handler) - logger.setLevel(logging.DEBUG) - logger.propagate = False - if documentation and concept: - raise ValueError( - "Only one option either documentation (-d) or is_a relation " - "with a concept (-c) can be requested." - ) - nexus_helper = HandleNexus( - logger, nexus_file, d_inq_nd=documentation, c_inq_nd=concept - ) - nexus_helper.process_nexus_master_file(None) - - -if __name__ == "__main__": - main() # pylint: disable=no-value-for-parameter From f2cd2ac51c37bfd4144961cbe1e5ab901fe43303 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Wed, 21 Jun 2023 12:53:20 +0200 Subject: [PATCH 30/32] remove dependencies also from pypi configuration --- pyproject.toml | 3 --- requirements.txt | 2 -- 2 files changed, 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d4cf990c8..97430f09b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,8 +18,6 @@ classifiers = [ dependencies = [ "lxml", "pyyaml", - "click>=7.1.2", - "h5py>=3.6.0", "sphinx>=5", "sphinx-tabs", "pytest", @@ -32,7 +30,6 @@ dependencies = [ "Homepage" = "https://nexusformat.org" [project.scripts] -read_nexus = "dev_tools.utils.nexus:main" [tools.setuptools_scm] version_scheme = "guess-next-dev" diff --git a/requirements.txt b/requirements.txt index bbfd892f7..54b7bb86f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,6 @@ # Prepare for Documentation lxml pyyaml -click>=7.1.2 -h5py>=3.6.0 # Documentation building sphinx>=5 From 5e934d642db2f326164c1d80dab9405f78cc5353 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Wed, 21 Jun 2023 13:04:34 +0200 Subject: [PATCH 31/32] fixing imports --- dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py | 2 +- dev_tools/tests/test_nyaml2nxdl.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py index 984d7674f..664f68748 100644 --- a/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py +++ b/dev_tools/nyaml2nxdl/nyaml2nxdl_forward_tools.py @@ -29,7 +29,7 @@ import yaml -from ..utils import nexus as pynxtools_nxlib +from ..utils import nxdl_utils as pynxtools_nxlib from .comment_collector import CommentCollector from .nyaml2nxdl_helper import LineLoader from .nyaml2nxdl_helper import cleaning_empty_lines diff --git a/dev_tools/tests/test_nyaml2nxdl.py b/dev_tools/tests/test_nyaml2nxdl.py index 2722cf475..792d8d462 100644 --- a/dev_tools/tests/test_nyaml2nxdl.py +++ b/dev_tools/tests/test_nyaml2nxdl.py @@ -3,7 +3,7 @@ from click.testing import CliRunner from ..nyaml2nxdl import nyaml2nxdl as conv -from ..utils.nexus import find_definition_file +from ..utils.nxdl_utils import find_definition_file # import subprocess From ac5b156a4345ea375840ad66bbae83851544e9a9 Mon Sep 17 00:00:00 2001 From: Sandor Brockhauser Date: Wed, 21 Jun 2023 13:12:09 +0200 Subject: [PATCH 32/32] removing the unnecessary ignoring of unknowns --- .gitignore | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitignore b/.gitignore index 50408db4b..7867d7665 100644 --- a/.gitignore +++ b/.gitignore @@ -9,10 +9,6 @@ __pycache__/ build/ makelog.txt -# Unknown -/python/ -__github_creds__.txt - # Distribution / packaging .Python build/