From 7b01a78c929ed5028bf9cf674561c0e82177cf33 Mon Sep 17 00:00:00 2001 From: valentin-gauthier-geosiris <88202743+valentin-gauthier-geosiris@users.noreply.github.com> Date: Fri, 14 Jun 2024 16:36:34 +0200 Subject: [PATCH] Uri + OSDU json serialization * bugfix for date * exception will not be raise for a notFound CRS during mesh reading. * An error is now printed for UnknownProperty exception during xml parsing (case of xsi:type filled without namespace specified) * epc improvement, bugfix in mesh reading * new tests * adding uri class * json serialization/deserialization for OSDU official version * using logging --- .../docs/src/energyml/utils/manager.html | 16 +- .../docs/src/energyml/utils/xml.html | 56 +- energyml-utils/example/main.py | 127 +++- energyml-utils/example/main201.py | 1 + energyml-utils/example/main_data.py | 179 +++-- energyml-utils/example/mainjson.py | 45 ++ ...s_9789c24e-9b5a-4c87-82e8-32dc3f751681.xml | 14 + ...e_61fa2fdf-46ab-4c02-ab72-7895cce58e37.xml | 11 + ...s_716f6472-18a3-4f19-a57c-d4f5642ccc53.xml | 27 + ...n_2f8778ca-6a09-446b-b25d-b725ec759a70.xml | 238 +++++++ ..._7194be4d-169d-420c-98a5-d3ec4671f0cc.json | 478 +++++++++++++ energyml-utils/src/energyml/utils/__init__.py | 2 +- .../src/energyml/utils/constants.py | 242 +++++++ .../src/energyml/utils/data/__init__.py | 2 +- energyml-utils/src/energyml/utils/data/hdf.py | 172 +++-- .../src/energyml/utils/data/helper.py | 420 +++++++---- .../src/energyml/utils/data/mesh.py | 428 +++++++---- energyml-utils/src/energyml/utils/epc.py | 376 ++++++---- .../src/energyml/utils/exception.py | 15 +- .../src/energyml/utils/introspection.py | 668 ++++++++++++------ energyml-utils/src/energyml/utils/manager.py | 55 +- .../src/energyml/utils/serialization.py | 417 ++++++++++- energyml-utils/src/energyml/utils/uri.py | 94 +++ .../src/energyml/utils/validation.py | 39 +- energyml-utils/src/energyml/utils/xml.py | 119 +--- energyml-utils/tests/test_epc.py | 143 ++++ energyml-utils/tests/test_introspection.py | 31 +- energyml-utils/tests/test_uri.py | 90 +++ energyml-utils/tests/test_xml.py | 225 ++++++ 29 files changed, 3767 insertions(+), 963 deletions(-) create mode 100644 energyml-utils/example/mainjson.py create mode 100644 energyml-utils/rc/VerticalCrs_9789c24e-9b5a-4c87-82e8-32dc3f751681.xml create mode 100644 energyml-utils/rc/obj_EpcExternalPartReference_61fa2fdf-46ab-4c02-ab72-7895cce58e37.xml create mode 100644 energyml-utils/rc/obj_LocalDepth3dCrs_716f6472-18a3-4f19-a57c-d4f5642ccc53.xml create mode 100644 energyml-utils/rc/obj_WellboreMarkerFrameRepresentation_2f8778ca-6a09-446b-b25d-b725ec759a70.xml create mode 100644 energyml-utils/rc/resqml20.obj_Grid2dRepresentation_7194be4d-169d-420c-98a5-d3ec4671f0cc.json create mode 100644 energyml-utils/src/energyml/utils/constants.py create mode 100644 energyml-utils/src/energyml/utils/uri.py create mode 100644 energyml-utils/tests/test_epc.py create mode 100644 energyml-utils/tests/test_uri.py create mode 100644 energyml-utils/tests/test_xml.py diff --git a/energyml-utils/docs/src/energyml/utils/manager.html b/energyml-utils/docs/src/energyml/utils/manager.html index 016a54e..6dbad1a 100644 --- a/energyml-utils/docs/src/energyml/utils/manager.html +++ b/energyml-utils/docs/src/energyml/utils/manager.html @@ -34,8 +34,8 @@

Module src.energyml.utils.manager

import re from typing import List, Union, Any -REGEX_ENERGYML_MODULE_NAME = r"energyml\.(?P<pkg>.*)\.v(?P<version>(?P<versionNumber>\d+(_\d+)*)(_dev(?P<versionDev>.*))?)\..*" -REGEX_PROJECT_VERSION = r"(?P<n0>[\d]+)(.(?P<n1>[\d]+)(.(?P<n2>[\d]+))?)?" +RGX_ENERGYML_MODULE_NAME = r"energyml\.(?P<pkg>.*)\.v(?P<version>(?P<versionNumber>\d+(_\d+)*)(_dev(?P<versionDev>.*))?)\..*" +RGX_PROJECT_VERSION = r"(?P<n0>[\d]+)(.(?P<n1>[\d]+)(.(?P<n2>[\d]+))?)?" ENERGYML_MODULES_NAMES = ["eml", "prodml", "witsml", "resqml"] @@ -196,7 +196,7 @@

Module src.energyml.utils.manager

def get_class_pkg(cls): try: - p = re.compile(REGEX_ENERGYML_MODULE_NAME) + p = re.compile(RGX_ENERGYML_MODULE_NAME) m = p.search(cls.__module__) return m.group("pkg") except AttributeError as e: @@ -210,7 +210,7 @@

Module src.energyml.utils.manager

else, the original version is returned. Example : reshapeVersion("v2.0.1", 2) ==> "2.0" and reshapeVersion("version2.0.1.3.2.5", 4) ==> "version2.0.1.3.2.5" """ - p = re.compile(REGEX_PROJECT_VERSION) + p = re.compile(RGX_PROJECT_VERSION) m = p.search(version) if m is not None: n0 = m.group("n0") @@ -233,7 +233,7 @@

Module src.energyml.utils.manager

def get_class_pkg_version( cls, print_dev_version: bool = True, nb_max_version_digits: int = 2 ): - p = re.compile(REGEX_ENERGYML_MODULE_NAME) + p = re.compile(RGX_ENERGYML_MODULE_NAME) m = p.search( cls.__module__ if isinstance(cls, type) else type(cls).__module__ ) @@ -364,7 +364,7 @@

Functions

def get_class_pkg(cls):
     try:
-        p = re.compile(REGEX_ENERGYML_MODULE_NAME)
+        p = re.compile(RGX_ENERGYML_MODULE_NAME)
         m = p.search(cls.__module__)
         return m.group("pkg")
     except AttributeError as e:
@@ -384,7 +384,7 @@ 

Functions

def get_class_pkg_version(
     cls, print_dev_version: bool = True, nb_max_version_digits: int = 2
 ):
-    p = re.compile(REGEX_ENERGYML_MODULE_NAME)
+    p = re.compile(RGX_ENERGYML_MODULE_NAME)
     m = p.search(
         cls.__module__ if isinstance(cls, type) else type(cls).__module__
     )
@@ -554,7 +554,7 @@ 

Functions

else, the original version is returned. Example : reshapeVersion("v2.0.1", 2) ==> "2.0" and reshapeVersion("version2.0.1.3.2.5", 4) ==> "version2.0.1.3.2.5" """ - p = re.compile(REGEX_PROJECT_VERSION) + p = re.compile(RGX_PROJECT_VERSION) m = p.search(version) if m is not None: n0 = m.group("n0") diff --git a/energyml-utils/docs/src/energyml/utils/xml.html b/energyml-utils/docs/src/energyml/utils/xml.html index 2740f59..a2100c4 100644 --- a/energyml-utils/docs/src/energyml/utils/xml.html +++ b/energyml-utils/docs/src/energyml/utils/xml.html @@ -58,46 +58,46 @@

Module src.energyml.utils.xml

dict of all energyml namespace packages """ # pylint: disable=W0105 -REGEX_UUID_NO_GRP = ( +RGX_UUID_NO_GRP = ( r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" ) -REGEX_UUID = r"(?P<uuid>" + REGEX_UUID_NO_GRP + ")" -REGEX_DOMAIN_VERSION = r"(?P<domainVersion>(?P<versionNum>([\d]+[\._])*\d)\s*(?P<dev>dev\s*(?P<devNum>[\d]+))?)" -REGEX_DOMAIN_VERSION_FLAT = r"(?P<domainVersion>(?P<versionNumFlat>([\d]+)*\d)\s*(?P<dev>dev\s*(?P<devNum>[\d]+))?)" +RGX_UUID = r"(?P<uuid>" + RGX_UUID_NO_GRP + ")" +RGX_DOMAIN_VERSION = r"(?P<domainVersion>(?P<versionNum>([\d]+[\._])*\d)\s*(?P<dev>dev\s*(?P<devNum>[\d]+))?)" +RGX_DOMAIN_VERSION_FLAT = r"(?P<domainVersion>(?P<versionNumFlat>([\d]+)*\d)\s*(?P<dev>dev\s*(?P<devNum>[\d]+))?)" # ContentType -REGEX_MIME_TYPE_MEDIA = r"(?P<media>application|audio|font|example|image|message|model|multipart|text|video)" -REGEX_CT_ENERGYML_DOMAIN = r"(?P<energymlDomain>x-(?P<domain>[\w]+)\+xml)" -REGEX_CT_XML_DOMAIN = r"(?P<xmlRawDomain>(x\-)?(?P<xmlDomain>.+)\+xml)" -REGEX_CT_TOKEN_VERSION = r"version=" + REGEX_DOMAIN_VERSION -REGEX_CT_TOKEN_TYPE = r"type=(?P<type>[\w\_]+)" - -REGEX_CONTENT_TYPE = ( - REGEX_MIME_TYPE_MEDIA + "/" - + "(?P<rawDomain>(" + REGEX_CT_ENERGYML_DOMAIN + ")|(" + REGEX_CT_XML_DOMAIN + r")|([\w-]+\.?)+)" - + "(;((" + REGEX_CT_TOKEN_VERSION + ")|(" + REGEX_CT_TOKEN_TYPE + ")))*" +RGX_MIME_TYPE_MEDIA = r"(?P<media>application|audio|font|example|image|message|model|multipart|text|video)" +RGX_CT_ENERGYML_DOMAIN = r"(?P<energymlDomain>x-(?P<domain>[\w]+)\+xml)" +RGX_CT_XML_DOMAIN = r"(?P<xmlRawDomain>(x\-)?(?P<xmlDomain>.+)\+xml)" +RGX_CT_TOKEN_VERSION = r"version=" + RGX_DOMAIN_VERSION +RGX_CT_TOKEN_TYPE = r"type=(?P<type>[\w\_]+)" + +RGX_CONTENT_TYPE = ( + RGX_MIME_TYPE_MEDIA + "/" + + "(?P<rawDomain>(" + RGX_CT_ENERGYML_DOMAIN + ")|(" + RGX_CT_XML_DOMAIN + r")|([\w-]+\.?)+)" + + "(;((" + RGX_CT_TOKEN_VERSION + ")|(" + RGX_CT_TOKEN_TYPE + ")))*" ) -REGEX_QUALIFIED_TYPE = ( - r"(?P<domain>[a-zA-Z]+)" + REGEX_DOMAIN_VERSION_FLAT + r"\.(?P<type>[\w_]+)" +RGX_QUALIFIED_TYPE = ( + r"(?P<domain>[a-zA-Z]+)" + RGX_DOMAIN_VERSION_FLAT + r"\.(?P<type>[\w_]+)" ) # ========= -REGEX_SCHEMA_VERSION = ( +RGX_SCHEMA_VERSION = ( r"(?P<name>[eE]ml|[cC]ommon|[rR]esqml|[wW]itsml|[pP]rodml)?\s*v?" - + REGEX_DOMAIN_VERSION + + RGX_DOMAIN_VERSION + r"\s*$" ) -REGEX_ENERGYML_FILE_NAME_OLD = r"(?P<type>[\w]+)_" + REGEX_UUID_NO_GRP + r"\.xml$" -REGEX_ENERGYML_FILE_NAME_NEW = ( - REGEX_UUID_NO_GRP + r"\.(?P<objectVersion>\d+(\.\d+)*)\.xml$" +RGX_ENERGYML_FILE_NAME_OLD = r"(?P<type>[\w]+)_" + RGX_UUID_NO_GRP + r"\.xml$" +RGX_ENERGYML_FILE_NAME_NEW = ( + RGX_UUID_NO_GRP + r"\.(?P<objectVersion>\d+(\.\d+)*)\.xml$" ) -REGEX_ENERGYML_FILE_NAME = ( - rf"^(.*/)?({REGEX_ENERGYML_FILE_NAME_OLD})|({REGEX_ENERGYML_FILE_NAME_NEW})" +RGX_ENERGYML_FILE_NAME = ( + rf"^(.*/)?({RGX_ENERGYML_FILE_NAME_OLD})|({RGX_ENERGYML_FILE_NAME_NEW})" ) -REGEX_XML_HEADER = r"^\s*\<\?xml\s+((encoding\s*=\s*\"(?P<encoding>[^\"]+)\"|version\s*=\s*\"(?P<version>[^\"]+)\"|standalone\s*=\s*\"(?P<standalone>[^\"]+)\")\s+)+" +RGX_XML_HEADER = r"^\s*\<\?xml\s+((encoding\s*=\s*\"(?P<encoding>[^\"]+)\"|version\s*=\s*\"(?P<version>[^\"]+)\"|standalone\s*=\s*\"(?P<standalone>[^\"]+)\")\s+)+" def get_pkg_from_namespace(namespace: str) -> Optional[str]: @@ -138,7 +138,7 @@

Module src.energyml.utils.xml

def get_xml_encoding(xml_content: str) -> Optional[str]: try: - m = re.search(REGEX_XML_HEADER, xml_content) + m = re.search(RGX_XML_HEADER, xml_content) return m.group("encoding") except AttributeError: return "utf-8" @@ -207,7 +207,7 @@

Module src.energyml.utils.xml

def parse_content_type(ct: str): - return re.search(REGEX_CONTENT_TYPE, ct)
+ return re.search(RGX_CONTENT_TYPE, ct)
@@ -402,7 +402,7 @@

Functions

def get_xml_encoding(xml_content: str) -> Optional[str]:
     try:
-        m = re.search(REGEX_XML_HEADER, xml_content)
+        m = re.search(RGX_XML_HEADER, xml_content)
         return m.group("encoding")
     except AttributeError:
         return "utf-8"
@@ -432,7 +432,7 @@

Functions

Expand source code
def parse_content_type(ct: str):
-    return re.search(REGEX_CONTENT_TYPE, ct)
+ return re.search(RGX_CONTENT_TYPE, ct)
diff --git a/energyml-utils/example/main.py b/energyml-utils/example/main.py index 0e7e007..3d2d376 100644 --- a/energyml-utils/example/main.py +++ b/energyml-utils/example/main.py @@ -1,12 +1,16 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 +import json from dataclasses import fields from energyml.eml.v2_3.commonv2 import * +from energyml.resqml.v2_0_1.resqmlv2 import DoubleHdf5Array from energyml.resqml.v2_2.resqmlv2 import ( TriangulatedSetRepresentation, FaultInterpretation, - ContactElement, AbstractPoint3DArray, AbstractColorMap, + ContactElement, + AbstractPoint3DArray, + AbstractColorMap, ) from src.energyml.utils.data.hdf import * @@ -15,8 +19,10 @@ from src.energyml.utils.manager import * from src.energyml.utils.serialization import * from src.energyml.utils.validation import ( - patterns_verification, - dor_verification, validate_epc, correct_dor, + patterns_validation, + dor_validation, + validate_epc, + correct_dor, ) from src.energyml.utils.xml import * @@ -52,6 +58,13 @@ qualified_type="a wrong qualified type", ) +dor_correct = DataObjectReference( + uuid=fi.uuid, + title="a DOR title", + object_version="0", + qualified_type="resqml23.TriangulatedSetRepresentation", +) + tr = TriangulatedSetRepresentation( citation=tr_cit, uuid=gen_uuid(), @@ -72,7 +85,7 @@ def tests_0(): print(serialize_xml(tr_cit)) - print(serialize_json(tr)) + print(serialize_json(tr, JSON_VERSION.XSDATA)) print(tr.citation) print(get_obj_uuid(tr)) print("path: ", gen_energyml_object_path(tr)) @@ -126,7 +139,7 @@ def file_test(): def tests_content_type(): - print(REGEX_CONTENT_TYPE) + print(RGX_CONTENT_TYPE) print( parse_content_type( @@ -164,6 +177,18 @@ def tests_content_type(): print(get_content_type_from_class(tr)) print(get_qualified_type_from_class(tr)) + print( + get_qualified_type_from_class(DoubleHdf5Array()), + get_class_from_qualified_type( + get_qualified_type_from_class(DoubleHdf5Array()) + ), + ) + print( + get_qualified_type_from_class(dor_correct), + get_class_from_qualified_type( + get_qualified_type_from_class(dor_correct) + ), + ) print(gen_energyml_object_path(tr, EpcExportVersion.EXPANDED)) print(gen_energyml_object_path(tr)) @@ -173,7 +198,7 @@ def tests_epc(): epc = Epc.read_file( "D:/Geosiris/Github/energyml/#data/Volve_Horizons_and_Faults_Depth_originEQN_v2.2_colorised.epc" ) - print(serialize_json(epc.gen_opc_content_type())) + print(serialize_json(epc.gen_opc_content_type(), JSON_VERSION.XSDATA)) print(epc) epc.export_file("D:/Geosiris/Github/energyml/energyml-python/test.epc") epc.export_version = EpcExportVersion.EXPANDED @@ -225,15 +250,15 @@ def tests_dor(): def test_verif(): print(get_class_fields(tr)) - for err in patterns_verification(tr): + for err in patterns_validation(tr): print(err) print("DOR verif no fi") - for err in dor_verification([tr]): + for err in dor_validation([tr]): print(err) print("DOR verif with fi") - for err in dor_verification([tr, fi]): + for err in dor_validation([tr, fi]): print(err) @@ -298,13 +323,15 @@ def test_introspection(): print(get_class_fields(tr)["citation"]) print(EPCRelsRelationshipType._member_names_) - print(EPCRelsRelationshipType['DESTINATION_OBJECT'].value) + print(EPCRelsRelationshipType["DESTINATION_OBJECT"].value) print(random_value_from_class(EPCRelsRelationshipType)) print(random_value_from_class(EPCRelsRelationshipType)) print(TriangulatedSetRepresentation.__dataclass_params__) # print(random_value_from_class(int)) - print(serialize_xml(random_value_from_class(TriangulatedSetRepresentation))) + print( + serialize_xml(random_value_from_class(TriangulatedSetRepresentation)) + ) # print(serialize_json(random_value_from_class(TriangulatedSetRepresentation))) print(search_attribute_matching_name_with_path(tr, "[tT]it.*")) @@ -313,7 +340,13 @@ def test_introspection(): print(AbstractPoint3DArray.__dict__) print(TriangulatedSetRepresentation.__dict__) print(get_sub_classes(AbstractObject)) - print(list(filter(lambda _c: not is_abstract(_c), get_sub_classes(AbstractObject)))) + print( + list( + filter( + lambda _c: not is_abstract(_c), get_sub_classes(AbstractObject) + ) + ) + ) print(AbstractColorMap.__name__.startswith("Abstract")) print(is_abstract(AbstractColorMap)) @@ -331,13 +364,17 @@ def test_introspection(): print(f"object: {is_abstract(object)}") print(f"HDF5FileReader: {is_abstract(HDF5FileReader)}") - print(f"TriangulatedSetRepresentation: {is_abstract(TriangulatedSetRepresentation)}") + print( + f"TriangulatedSetRepresentation: {is_abstract(TriangulatedSetRepresentation)}" + ) # print("HDF5FileReader") # for func in dir(HDF5FileReader): # if callable(getattr(HDF5FileReader, func)) and not func.startswith("__"): # print(f"\t{func} {type(getattr(HDF5FileReader, func))}") - print(get_classes_matching_name(TriangulatedSetRepresentation, "Abstract.*")) + print( + get_classes_matching_name(TriangulatedSetRepresentation, "Abstract.*") + ) # print(get_matching_class_attribute_name(ExternalDataArrayPart, "(PathInHdfFile|PathInExternalFile)")) # print(object.__module__) # print(serialize_xml(random_value_from_class(PointSetRepresentation))) @@ -351,15 +388,21 @@ def test_introspection(): # print(serialize_xml(poly)) print("=====] ", r"ClosedPolylines.\d+") - for array_path, array_value in search_attribute_matching_name_with_path(poly, r"ClosedPolylines.\d+"): + for array_path, array_value in search_attribute_matching_name_with_path( + poly, r"ClosedPolylines.\d+" + ): print(f"{array_path}\n\t{array_value}") print("=====] ", r"ClosedPolylines.values.\d+") - for array_path, array_value in search_attribute_matching_name_with_path(poly, r"ClosedPolylines.values.\d+"): + for array_path, array_value in search_attribute_matching_name_with_path( + poly, r"ClosedPolylines.values.\d+" + ): print(f"{array_path}\n\t{array_value}") print("=====] ", r"LinePatch.\d+") - for array_path, array_value in search_attribute_matching_name_with_path(poly, r"LinePatch.\d+"): + for array_path, array_value in search_attribute_matching_name_with_path( + poly, r"LinePatch.\d+" + ): print(f"{array_path}\n\t{array_value}") @@ -368,7 +411,12 @@ def tests_hdf(): "D:/Geosiris/Github/energyml/#data/Volve_Horizons_and_Faults_Depth_originEQN_v2.2_colorised.epc" ) - tr_list = list(filter(lambda e: e.__class__.__name__ == "TriangulatedSetRepresentation", epc.energyml_objects)) + tr_list = list( + filter( + lambda e: e.__class__.__name__ == "TriangulatedSetRepresentation", + epc.energyml_objects, + ) + ) print(len(epc.energyml_objects)) # print(tr_list) @@ -378,6 +426,44 @@ def tests_hdf(): exit(0) +def test_local_depth_crs(): + # Fails because the xsi:type="VerticalCrsEpsgCode" doesn't + # contain the namespace : xsi:type="eml:VerticalCrsEpsgCode" + try: + depth3d = read_energyml_xml_file( + "../rc/obj_LocalDepth3dCrs_716f6472-18a3-4f19-a57c-d4f5642ccc53.xml" + ) + print(serialize_json(depth3d, JSON_VERSION.XSDATA)) + print(serialize_xml(depth3d)) + except Exception as e: + print(e) + + +def test_wellbore_marker_frame_representation(): + # Fails because the xsi:type="VerticalCrsEpsgCode" doesn't + # contain the namespace : xsi:type="eml:VerticalCrsEpsgCode" + try: + depth3d = read_energyml_xml_file( + "../rc/obj_WellboreMarkerFrameRepresentation_2f8778ca-6a09-446b-b25d-b725ec759a70.xml" + ) + print("read_success") + # print(serialize_json(depth3d, JSON_VERSION.XSDATA)) + print(serialize_json(depth3d, JSON_VERSION.OSDU_OFFICIAL)) + # print(serialize_xml(depth3d)) + except Exception as e: + print(e) + raise e + # print(traceback.print_stack()) + + +def test_obj_attribs(): + print(get_obj_pkg_pkgv_type_uuid_version(dor_correct)) + print(get_obj_pkg_pkgv_type_uuid_version(tr)) + + print(get_obj_uri(dor_correct, "coucou")) + print(get_obj_uri(tr, "coucou")) + + if __name__ == "__main__": tests_0() tests_content_type() @@ -389,4 +475,7 @@ def tests_hdf(): test_introspection() tests_hdf() - print(get_object_attribute("")) + test_local_depth_crs() + test_wellbore_marker_frame_representation() + + test_obj_attribs() diff --git a/energyml-utils/example/main201.py b/energyml-utils/example/main201.py index 7557585..d18c097 100644 --- a/energyml-utils/example/main201.py +++ b/energyml-utils/example/main201.py @@ -17,6 +17,7 @@ def import_modules(): if __name__ == "__main__": import ast + mod = importlib.import_module("energyml.eml.v2_3.commonv2") oa = getattr(mod, "ObjectAlias") print(exec("from energyml.eml.v2_3.commonv2 import *")) diff --git a/energyml-utils/example/main_data.py b/energyml-utils/example/main_data.py index 489511d..6544a29 100644 --- a/energyml-utils/example/main_data.py +++ b/energyml-utils/example/main_data.py @@ -1,26 +1,51 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 -from energyml.eml.v2_3.commonv2 import JaggedArray, AbstractValueArray, AbstractIntegerArray, StringXmlArray, \ - IntegerXmlArray - -from src.energyml.utils.data.hdf import get_hdf_reference_with_path, get_hdf5_path_from_external_path -from src.energyml.utils.data.helper import get_array_reader_function, get_supported_array, get_not_supported_array +from energyml.eml.v2_3.commonv2 import ( + JaggedArray, + AbstractValueArray, + AbstractIntegerArray, + StringXmlArray, + IntegerXmlArray, +) +from energyml.resqml.v2_0_1.resqmlv2 import WellboreMarkerFrameRepresentation + +from src.energyml.utils.data.hdf import ( + get_hdf_reference_with_path, + get_hdf5_path_from_external_path, +) +from src.energyml.utils.data.helper import ( + get_array_reader_function, + get_supported_array, + get_not_supported_array, +) from src.energyml.utils.data.mesh import * from src.energyml.utils.epc import gen_energyml_object_path -from src.energyml.utils.introspection import is_abstract, get_obj_uuid +from src.energyml.utils.introspection import is_abstract, get_obj_uuid, get_class_fields from src.energyml.utils.manager import get_sub_classes -from src.energyml.utils.serialization import read_energyml_xml_file, read_energyml_xml_str, read_energyml_xml_bytes +from src.energyml.utils.serialization import ( + read_energyml_xml_file, + read_energyml_xml_str, + read_energyml_xml_bytes, +) from src.energyml.utils.validation import validate_epc -from src.energyml.utils.xml import REGEX_CONTENT_TYPE +from src.energyml.utils.xml import RGX_CONTENT_TYPE def test_array(): hdf5filereader = HDF5FileReader() - hdf5filereader.read_array("../../../#data/Volve_Horizons_and_Faults_Depth_originEQN_Plus.h5", "/RESQML/d9b95bb5-019d-4341-bcf6-df392338187f/points_patch0") - print(hdf5filereader.get_array_dimension("../../../#data/Volve_Horizons_and_Faults_Depth_originEQN_Plus.h5", "/RESQML/d9b95bb5-019d-4341-bcf6-df392338187f/points_patch0")) + hdf5filereader.read_array( + "../../../#data/Volve_Horizons_and_Faults_Depth_originEQN_Plus.h5", + "/RESQML/d9b95bb5-019d-4341-bcf6-df392338187f/points_patch0", + ) + print( + hdf5filereader.get_array_dimension( + "../../../#data/Volve_Horizons_and_Faults_Depth_originEQN_Plus.h5", + "/RESQML/d9b95bb5-019d-4341-bcf6-df392338187f/points_patch0", + ) + ) # print(hdf5filereader.read_array("../../../#data/Volve_Horizons_and_Faults_Depth_originEQN_Plus.h5", "/RESQML/d9b95bb5-019d-4341-bcf6-df392338187f/points_patch0")) @@ -32,12 +57,16 @@ def test_h5_path(): ref_obj = epc.get_object_by_uuid("2bbac140-ff17-4649-ae85-52a9285a4373")[0] for refer_path, refer_value in get_hdf_reference_with_path(ref_obj): try: - print(get_hdf5_path_from_external_path( - external_path_obj=get_object_attribute(ref_obj, refer_path), - path_in_root=refer_path, - root_obj=ref_obj, - epc=epc, - )) + print( + get_hdf5_path_from_external_path( + external_path_obj=get_object_attribute( + ref_obj, refer_path + ), + path_in_root=refer_path, + root_obj=ref_obj, + epc=epc, + ) + ) # print("CRS:", get_crs_obj( # context_obj=get_object_attribute(ref_obj, refer_path), # path_in_root=refer_path, @@ -56,15 +85,19 @@ def test_h5_path(): print(epc201.additional_rels) - ref_obj = epc201.get_object_by_uuid("2bbac140-ff17-4649-ae85-52a9285a4373")[0] + ref_obj = epc201.get_object_by_uuid( + "2bbac140-ff17-4649-ae85-52a9285a4373" + )[0] for refer_path, refer_value in get_hdf_reference_with_path(ref_obj): try: - print(get_hdf5_path_from_external_path( - external_path_obj=refer_value, - path_in_root=refer_path, - root_obj=ref_obj, - epc=epc201, - )) + print( + get_hdf5_path_from_external_path( + external_path_obj=refer_value, + path_in_root=refer_path, + root_obj=ref_obj, + epc=epc201, + ) + ) # crs = get_crs_obj( # context_obj=refer_value, # path_in_root=refer_path, @@ -87,8 +120,7 @@ def read_h5_datasets(): print(epc201.epc_file_path) pt_set_list = read_point_representation( - energyml_object=psr, - workspace=EPCWorkspace(epc=epc201) + energyml_object=psr, workspace=EPCWorkspace(epc=epc201) ) with open("../example/result/file_point_set.off", "wb") as f: @@ -202,12 +234,16 @@ def read_meshes(): workspace=EPCWorkspace(epc22), ) print("Exporting") - with open(f"result/{gen_energyml_object_path(energyml_obj)}.obj", "wb") as f: + with open( + f"result/{gen_energyml_object_path(energyml_obj)}.obj", "wb" + ) as f: export_obj( mesh_list=mesh_list, out=f, ) - with open(f"result/{gen_energyml_object_path(energyml_obj)}.off", "wb") as f: + with open( + f"result/{gen_energyml_object_path(energyml_obj)}.off", "wb" + ) as f: export_off( mesh_list=mesh_list, out=f, @@ -221,7 +257,9 @@ def read_arrays(): print(get_array_reader_function("BooleanConstantArray")) print("=====] ", r"LinePatch.\d+") - for array_path, array_value in search_attribute_matching_name_with_path(poly, r"LinePatch.\d+.ClosedPolylines"): + for array_path, array_value in search_attribute_matching_name_with_path( + poly, r"LinePatch.\d+.ClosedPolylines" + ): # print(f"{array_path}\n\t{array_value}") try: val = read_array( @@ -234,8 +272,16 @@ def read_arrays(): except Exception as e: print(e) - print([x for x in get_sub_classes(AbstractValueArray) if not is_abstract(x)]) - print([x for x in get_sub_classes(AbstractIntegerArray) if not is_abstract(x)]) + print( + [x for x in get_sub_classes(AbstractValueArray) if not is_abstract(x)] + ) + print( + [ + x + for x in get_sub_classes(AbstractIntegerArray) + if not is_abstract(x) + ] + ) jagged_array = JaggedArray( elements=StringXmlArray( @@ -265,7 +311,6 @@ def test_export_multiple(): "8659a66c-8727-420a-badf-578819698239", # TrSet "4e23ee3e-54a7-427a-83f9-1473de6c56a4", # polyline "38bf3283-9514-43ab-81e3-17080dc5826f", # polyline - ] export_multiple_data( epc_path="D:/Geosiris/Cloud/Resqml_Tools/2023-DATA/03_VOLVE/V2.0.1/EQN_ORIGIN_PLUS_TRIANG_SET/" @@ -325,7 +370,6 @@ def test_export_closed_poly(): uuid_list=[ "4e23ee3e-54a7-427a-83f9-1473de6c56a4", # polyline "38bf3283-9514-43ab-81e3-17080dc5826f", # polyline - ], output_folder_path="../example/result/export-energyml-utils", # output_folder_path="D:/Geosiris/OSDU/manifestTranslation/#Data/export-energyml-utils", @@ -338,7 +382,6 @@ def test_export_closed_poly(): uuid_list=[ "4e23ee3e-54a7-427a-83f9-1473de6c56a4", # polyline "38bf3283-9514-43ab-81e3-17080dc5826f", # polyline - ], output_folder_path="../example/result/export-energyml-utils", # output_folder_path="D:/Geosiris/OSDU/manifestTranslation/#Data/export-energyml-utils", @@ -381,23 +424,61 @@ def test_read_resqml22dev3(): ) -if __name__ == "__main__": - test_array() - test_h5_path() - read_h5_datasets() - read_h5_polyline() - read_arrays() +def test_read_external_part_with_xsi(): + path = "../rc/obj_EpcExternalPartReference_61fa2fdf-46ab-4c02-ab72-7895cce58e37.xml" + + with open(path, "rb") as f: + xml_content = f.read() + # print(xml_content) + + print(read_energyml_xml_bytes(xml_content)) + + path = "../rc/obj_WellboreMarkerFrameRepresentation_2f8778ca-6a09-446b-b25d-b725ec759a70.xml" + + with open(path, "rb") as f: + xml_content = f.read() + # print(xml_content) + + print(read_energyml_xml_bytes(xml_content)) + - print("Supported : ", get_supported_array()) - print("Not supported : ", get_not_supported_array()) +def read_unreferenced_h5_file(): + epc_path = "D:/Geosiris/#Data/RDDMS/F2F_Demo.epc" + # epc_path = "D:/Geosiris/Cloud/Resqml_Tools/OSDU/OSDU_RESERVOIR_DDMS/F2F_Demo.epc" + # epc = Epc.read_file(epc_path) - read_h5_grid2d() - read_h5_grid2d_bis() - print(REGEX_CONTENT_TYPE) + uuid_list = ["3f8ee378-f3d2-40ab-9980-abb0853f69c3"] - read_meshes() + export_multiple_data( + epc_path=epc_path, + uuid_list=uuid_list, + output_folder_path="../example/result/notReferencedH5/", + # output_folder_path="D:/Geosiris/Clients/Egis/Documents/Data/4 MNT Trojena/", + file_format=MeshFileFormat.OBJ, + ) - test_export_multiple() - test_export_closed_poly() - test_export_multiple_testing_package() - test_read_resqml22dev3() + +if __name__ == "__main__": + print(get_class_fields(WellboreMarkerFrameRepresentation)) + # test_array() + # test_h5_path() + # read_h5_datasets() + # read_h5_polyline() + # read_arrays() + # + # print("Supported : ", get_supported_array()) + # print("Not supported : ", get_not_supported_array()) + # + # read_h5_grid2d() + # read_h5_grid2d_bis() + # print(RGX_CONTENT_TYPE) + # + # read_meshes() + # + # test_export_multiple() + # test_export_closed_poly() + # test_export_multiple_testing_package() + # test_read_resqml22dev3() + # + # test_read_external_part_with_xsi() + # read_unreferenced_h5_file() diff --git a/energyml-utils/example/mainjson.py b/energyml-utils/example/mainjson.py new file mode 100644 index 0000000..bf66f45 --- /dev/null +++ b/energyml-utils/example/mainjson.py @@ -0,0 +1,45 @@ +import json + +from src.energyml.utils.introspection import get_obj_identifier +from src.energyml.utils.serialization import ( + read_energyml_json_str, + JSON_VERSION, +) +from src.energyml.utils.serialization import to_json_dict + + +def read_json0(): + filePath = "../rc/resqml20.obj_Grid2dRepresentation_7194be4d-169d-420c-98a5-d3ec4671f0cc.json" + with open(filePath, "r") as f: + f_content = f.read() + for o in read_energyml_json_str(f_content, JSON_VERSION.OSDU_OFFICIAL): + print("> ", o) + + +# import energyml.resqml.v2_0_1.resqmlv2 + + +def write_json0(): + filePath = "../rc/resqml20.obj_Grid2dRepresentation_7194be4d-169d-420c-98a5-d3ec4671f0cc.json" + with open(filePath, "r") as f: + f_content = f.read() + objs = read_energyml_json_str(f_content, JSON_VERSION.OSDU_OFFICIAL) + result = json.dumps( + to_json_dict(objs[0], {get_obj_identifier(o): o for o in objs}), + indent=4, + ) + + print(json.dumps(json.loads(f_content), sort_keys=True)) + print(json.dumps(json.loads(result), sort_keys=True)) + + assert json.dumps(json.loads(f_content), sort_keys=True) == json.dumps( + json.loads(result), sort_keys=True + ) + + +if __name__ == "__main__": + # read_json0() + write_json0() + # print(sys.modules["energyml.resqml.v2_0_1.resqmlv2"].__dict__) + # for name, obj in inspect.getmembers(sys.modules["energyml.resqml.v2_0_1.resqmlv2"], inspect.isclass): + # print(obj) diff --git a/energyml-utils/rc/VerticalCrs_9789c24e-9b5a-4c87-82e8-32dc3f751681.xml b/energyml-utils/rc/VerticalCrs_9789c24e-9b5a-4c87-82e8-32dc3f751681.xml new file mode 100644 index 0000000..d5e1416 --- /dev/null +++ b/energyml-utils/rc/VerticalCrs_9789c24e-9b5a-4c87-82e8-32dc3f751681.xml @@ -0,0 +1,14 @@ + + + + VerticalCrs of 9a71ab6c-a21a-4d11-aa41-873417b01f3d + ResqmlConverter (Geosiris) + 2023-05-23T09:30:21.918+02:00 + ResqmlConverter v1.3.11 + 2023-05-23T09:30:21.918+02:00 + + down + + Unknown + + diff --git a/energyml-utils/rc/obj_EpcExternalPartReference_61fa2fdf-46ab-4c02-ab72-7895cce58e37.xml b/energyml-utils/rc/obj_EpcExternalPartReference_61fa2fdf-46ab-4c02-ab72-7895cce58e37.xml new file mode 100644 index 0000000..4144417 --- /dev/null +++ b/energyml-utils/rc/obj_EpcExternalPartReference_61fa2fdf-46ab-4c02-ab72-7895cce58e37.xml @@ -0,0 +1,11 @@ + + + + Hdf5File + A User + 2024-04-29T15:45:33Z + Format fot test + 2024-04-29T15:45:33Z + + application/x-hdf5 + \ No newline at end of file diff --git a/energyml-utils/rc/obj_LocalDepth3dCrs_716f6472-18a3-4f19-a57c-d4f5642ccc53.xml b/energyml-utils/rc/obj_LocalDepth3dCrs_716f6472-18a3-4f19-a57c-d4f5642ccc53.xml new file mode 100644 index 0000000..c007c07 --- /dev/null +++ b/energyml-utils/rc/obj_LocalDepth3dCrs_716f6472-18a3-4f19-a57c-d4f5642ccc53.xml @@ -0,0 +1,27 @@ + + + + Default + aplougoulen + 2019-03-22T10:29:55Z + Paradigm SKUA-GOCAD 19 Alpha 2 Build://skua-gocad/Production/trunk - 20190322-cl867561 for Win_x64_6.1_v15 + + + pdgm/dx/resqml/project + 79ae8a84-c896-46f8-81cf-c9a689c5352d + + 6470000.0 + -0.0 + 0.0 + easting northing + m + m + 420000.0 + true + + 6230 + + + 23031 + + diff --git a/energyml-utils/rc/obj_WellboreMarkerFrameRepresentation_2f8778ca-6a09-446b-b25d-b725ec759a70.xml b/energyml-utils/rc/obj_WellboreMarkerFrameRepresentation_2f8778ca-6a09-446b-b25d-b725ec759a70.xml new file mode 100644 index 0000000..806f371 --- /dev/null +++ b/energyml-utils/rc/obj_WellboreMarkerFrameRepresentation_2f8778ca-6a09-446b-b25d-b725ec759a70.xml @@ -0,0 +1,238 @@ + + + + Imported_Markers + maap + 2024-02-06T09:35:00Z + PDGM-DX ETP Client 2.0.1 + 2024-02-06T09:35:00Z + + + + + 0 + + + + + + pdgm/dx/resqml/creatorGroup + maap + + + pdgm/dx/resqml/project + e18f74cf-ba12-4fa0-bcde-a74b38b0d2df + + + pdgm/dx/gocad/ScenarioName + |UNNAMED| + + + pdgm/dx/gocad/ScenarioUid + 9ddb3435-474e-428b-aad6-e669624eed55 + + + pdgm/dx/gocad/activeInScenarios + 9ddb3435-474e-428b-aad6-e669624eed55,12aa74a8-0576-4da9-8f7c-5be74a38a6ec,9318fe4d-15af-4b43-85e8-96cbcd11c4ab,85c87f18-d214-44fc-b8f0-8c9be7aa87ee,a98c2cdd-4d1b-495c-8fd3-6dd1186556e0,4851161a-c674-4563-89e3-159a455542c9,9d7fa43b-ca4a-4c58-a1b9-d92efbe91a53,ebee3c6a-2445-40f2-9397-786fd13772db,e83cc775-cc60-4255-afcc-69188b25fb22,1e6472c9-4afa-4851-b650-c55028534cba,b0344013-ef5f-481c-9fa0-8ad3715c75a8,68470459-cd52-4548-8318-9e506f07b766,974b809a-578f-4101-8074-28b81c03987a,1da35488-74a1-45aa-a83a-4b060adfddd4,624b5763-a642-4bd5-8afb-72b16473d21c,987a86e9-75e1-48eb-9779-7014a997a171,bc12289f-0661-4e45-bb6b-8349058c8ac1,5cf954d7-85c0-4e06-a0dd-f32b50006627,45e28153-50bc-475d-a8a9-daf22b5b3399,2462cb77-df7d-4cc3-9dba-e0749976c296,3e2f6626-4346-40de-bc80-6db6b7b56010,73b9f9fd-cd9c-47f2-a7dc-74fff4e29b5d,2900c744-0ae9-4569-bcb9-0f57a3c9172f,759d6e22-af75-4865-9804-bce7d1eeedba,9e3d6d42-b5da-4006-85b2-789f7bbc1217,aea0dccc-5f61-43d2-b461-7fabb02abab5,0bed2161-5779-421b-871a-afc85398397f,dd662540-5f0d-4822-88e9-5fd6b2228f2f,fd985491-5e87-4e60-b766-3e742e20f98f,edbe0a57-120d-4393-83ae-b2a39a2a951d,4276ccfc-f121-4afc-8b59-d0768d6ee89d,a02ed0a8-23e1-4962-92cd-fcff40930623,a0f51fa7-afb0-4058-8645-a814e773dfb0,295dcf7e-bbab-4042-bd9d-059de3df3cc2,4c7ae317-521b-4c0d-a6cd-3f6aff771454,6ea5e895-762d-4017-a013-051e84404b26 + + + pdgm/dx/resqml/markerSetsCount + 1 + + + pdgm/dx/resqml/markerSet0/Uid + 82965533-4338-47c9-8a91-8169902d5908 + + + pdgm/dx/resqml/markerSet0/Title + Imported_Markers + + + pdgm/dx/resqml/markerSet0/Creation + 2024-02-06T09:35:00Z + + + pdgm/dx/resqml/markerSet0/Originator + maap + + + pdgm/dx/resqml/markerSet0/LastUpdate + 2024-02-06T09:35:00Z + + + pdgm/dx/resqml/markerSet0/MetaDataKeyCount + 2 + + + pdgm/dx/resqml/markerSet0/MetaData0/Name + pdgm/dx/gocad/ScenarioName + + + pdgm/dx/resqml/markerSet0/MetaData0/Value + |UNNAMED| + + + pdgm/dx/resqml/markerSet0/MetaData1/Name + pdgm/dx/gocad/ScenarioUid + + + pdgm/dx/resqml/markerSet0/MetaData1/Value + 9ddb3435-474e-428b-aad6-e669624eed55 + + + pdgm/dx/resqml/isActive + true + + + application/x-resqml+xml;version=2.0;type=obj_WellboreInterpretation + 15/9-19 SR + a25df2d2-dd32-45d3-acda-128fa33d5933 + pdgm + + 3 + + + /RESQML/2f8778ca-6a09-446b-b25d-b725ec759a70/mdValues + + application/x-eml+xml;version=2.0;type=obj_EpcExternalPartReference + Hdf5File + 61fa2fdf-46ab-4c02-ab72-7895cce58e37 + pdgm + 2024-04-29T15:45:33Z + + + + + application/x-resqml+xml;version=2.0;type=obj_WellboreTrajectoryRepresentation + well_trajectory_15_9-19 SR + b04c5f27-6cd0-4fc2-99a5-06662674c96d + pdgm + + + + TY + maap + 2024-02-06T09:35:00Z + PDGM-DX ETP Client 2.0.1 + maap + 2024-02-06T09:35:00Z + + + pdgm/dx/resqml/markerSet + 82965533-4338-47c9-8a91-8169902d5908 + + + pdgm/dx/resqml/creatorGroup + maap + + + pdgm/dx/resqml/markerConfidenceFactor + 1 + + + pdgm/dx/gocad/ScenarioName + |UNNAMED| + + + pdgm/dx/gocad/ScenarioUid + 9ddb3435-474e-428b-aad6-e669624eed55 + + horizon + + application/x-resqml+xml;version=2.0;type=obj_HorizonInterpretation + |UNNAMED| + 4b9cbd53-3c42-42ec-acf5-cd3eae517b3a + + + + + TOR + maap + 2024-02-06T09:35:00Z + PDGM-DX ETP Client 2.0.1 + maap + 2024-02-06T09:35:00Z + + + pdgm/dx/resqml/markerSet + 82965533-4338-47c9-8a91-8169902d5908 + + + pdgm/dx/resqml/creatorGroup + maap + + + pdgm/dx/resqml/markerConfidenceFactor + 1 + + + pdgm/dx/gocad/ScenarioName + |UNNAMED| + + + pdgm/dx/gocad/ScenarioUid + 9ddb3435-474e-428b-aad6-e669624eed55 + + horizon + + application/x-resqml+xml;version=2.0;type=obj_HorizonInterpretation + |UNNAMED| + 47ef11f5-4934-4d5e-ae01-8d1ff9d366bb + + + + + Sleipner + maap + 2024-02-06T09:35:00Z + PDGM-DX ETP Client 2.0.1 + maap + 2024-02-06T09:35:00Z + + + pdgm/dx/resqml/markerSet + 82965533-4338-47c9-8a91-8169902d5908 + + + pdgm/dx/resqml/creatorGroup + maap + + + pdgm/dx/resqml/markerConfidenceFactor + 1 + + + pdgm/dx/gocad/ScenarioName + |UNNAMED| + + + pdgm/dx/gocad/ScenarioUid + 9ddb3435-474e-428b-aad6-e669624eed55 + + horizon + + application/x-resqml+xml;version=2.0;type=obj_HorizonInterpretation + |UNNAMED| + 20517815-5f1f-4eab-9054-cdd45b03efd1 + + + \ No newline at end of file diff --git a/energyml-utils/rc/resqml20.obj_Grid2dRepresentation_7194be4d-169d-420c-98a5-d3ec4671f0cc.json b/energyml-utils/rc/resqml20.obj_Grid2dRepresentation_7194be4d-169d-420c-98a5-d3ec4671f0cc.json new file mode 100644 index 0000000..e79bad6 --- /dev/null +++ b/energyml-utils/rc/resqml20.obj_Grid2dRepresentation_7194be4d-169d-420c-98a5-d3ec4671f0cc.json @@ -0,0 +1,478 @@ +{ + "$type": "resqml20.obj_Grid2dRepresentation", + "SchemaVersion": "2.0", + "Uuid": "7194be4d-169d-420c-98a5-d3ec4671f0cc", + "Citation": { + "$type": "eml20.Citation", + "Title": "Sleipner", + "Originator": "dalsaab", + "Creation": "2024-03-21T13:22:19.000Z", + "Format": "PDGM-DX ETP Client 2.0.1", + "Editor": "dalsaab", + "LastUpdate": "2024-03-21T13:31:16.000Z" + }, + "ExtraMetadata": [ + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/creatorGroup", + "Value": "dalsaab" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/project", + "Value": "da497110-8fba-455f-adac-f2b495bc707e" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/Epos/MultiValued", + "Value": "false" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioName", + "Value": "|UNNAMED|" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioUid", + "Value": "3b808d54-c9b1-47cb-a631-5ae34b8c8bc6" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/gocad/default_dimension", + "Value": "Nodes" + } + ], + "RepresentedInterpretation": { + "$type": "eml20.DataObjectReference", + "ContentType": "application/x-resqml+xml;version=2.0;type=obj_HorizonInterpretation", + "Title": "|UNNAMED|", + "UUID": "7266fda0-f5a4-416c-93e3-aa7417581057", + "UuidAuthority": "pdgm", + "_data": { + "$type": "resqml20.obj_HorizonInterpretation", + "SchemaVersion": "2.0", + "Uuid": "7266fda0-f5a4-416c-93e3-aa7417581057", + "Citation": { + "$type": "eml20.Citation", + "Title": "|UNNAMED|", + "Originator": "dalsaab", + "Creation": "2024-03-20T17:51:49.000Z", + "Format": "PDGM-DX ETP Client 2.0.1", + "Editor": "dalsaab", + "LastUpdate": "2024-03-26T19:30:34.000Z" + }, + "ExtraMetadata": [ + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/creatorGroup", + "Value": "dalsaab" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/project", + "Value": "da497110-8fba-455f-adac-f2b495bc707e" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioName", + "Value": "|UNNAMED|" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioUid", + "Value": "3b808d54-c9b1-47cb-a631-5ae34b8c8bc6" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/pdgm/InterpretationColor", + "Value": "1 0 1" + } + ], + "Domain": "depth", + "InterpretedFeature": { + "$type": "eml20.DataObjectReference", + "ContentType": "application/x-resqml+xml;version=2.0;type=obj_GeneticBoundaryFeature", + "Title": "Sleipner", + "UUID": "814d6805-9fef-441e-8c1f-29ca64692410", + "UuidAuthority": "pdgm", + "_data": { + "$type": "resqml20.obj_GeneticBoundaryFeature", + "SchemaVersion": "2.0", + "Uuid": "814d6805-9fef-441e-8c1f-29ca64692410", + "Citation": { + "$type": "eml20.Citation", + "Title": "Sleipner", + "Originator": "dalsaab2", + "Creation": "2024-03-20T15:44:40.000Z", + "Format": "PDGM-DX ETP Client 2.0.1", + "Editor": "dalsaab", + "LastUpdate": "2024-03-20T15:44:40.000Z" + }, + "ExtraMetadata": [ + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/project", + "Value": "da497110-8fba-455f-adac-f2b495bc707e" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioName", + "Value": "|UNNAMED|" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioUid", + "Value": "3b808d54-c9b1-47cb-a631-5ae34b8c8bc6" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/gocad/classification", + "Value": "Horizon" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/geologicProvince", + "Value": null + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/color", + "Value": "1 0 1 1" + } + ], + "GeneticBoundaryKind": "horizon" + } + }, + "BoundaryRelation": [ + "conformable" + ] + } + }, + "SurfaceRole": "pick", + "Grid2dPatch": { + "$type": "resqml20.Grid2dPatch", + "PatchIndex": 0, + "FastestAxisCount": 433, + "SlowestAxisCount": 761, + "Geometry": { + "$type": "resqml20.PointGeometry", + "LocalCrs": { + "$type": "eml20.DataObjectReference", + "ContentType": "application/x-resqml+xml;version=2.0;type=obj_LocalDepth3dCrs", + "Title": "SKUA Local", + "UUID": "946ae9b2-4adb-48ef-bb39-b9984907bd1c", + "UuidAuthority": "pdgm", + "_data": { + "$type": "resqml20.obj_LocalDepth3dCrs", + "SchemaVersion": "2.0", + "Uuid": "946ae9b2-4adb-48ef-bb39-b9984907bd1c", + "Citation": { + "$type": "eml20.Citation", + "Title": "SKUA Local", + "Originator": "dalsaab", + "Creation": "2023-11-28T19:33:55.000Z", + "Format": "PDGM-DX ETP Client 2.0.1", + "Editor": "dalsaab", + "LastUpdate": "2023-12-15T08:12:17.000Z" + }, + "ExtraMetadata": [ + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/project", + "Value": "da497110-8fba-455f-adac-f2b495bc707e" + } + ], + "YOffset": 6470000, + "ZOffset": 0, + "ArealRotation": { + "$type": "eml20.PlaneAngleMeasure", + "Uom": "rad", + "_": 0 + }, + "ProjectedAxisOrder": "easting northing", + "ProjectedUom": "m", + "VerticalUom": "m", + "XOffset": 420000, + "ZIncreasingDownward": true, + "VerticalCrs": { + "$type": "eml20.VerticalUnknownCrs", + "Unknown": "Unknown" + }, + "ProjectedCrs": { + "$type": "eml20.ProjectedCrsEpsgCode", + "EpsgCode": 23031 + } + } + }, + "Points": { + "$type": "resqml20.Point3dZValueArray", + "SupportingGeometry": { + "$type": "resqml20.Point3dFromRepresentationLatticeArray", + "NodeIndicesOnSupportingRepresentation": { + "$type": "resqml20.IntegerLatticeArray", + "StartValue": 0, + "Offset": [ + { + "$type": "resqml20.IntegerConstantArray", + "Value": 1, + "Count": 760 + }, + { + "$type": "resqml20.IntegerConstantArray", + "Value": 1, + "Count": 432 + } + ] + }, + "SupportingRepresentation": { + "$type": "eml20.DataObjectReference", + "ContentType": "application/x-resqml+xml;version=2.0;type=obj_Grid2dRepresentation", + "Title": "volve3D", + "UUID": "975c6d8d-6eed-4025-a3a7-7078380f4695", + "UuidAuthority": "pdgm", + "_data": { + "$type": "resqml20.obj_Grid2dRepresentation", + "SchemaVersion": "2.0", + "Uuid": "975c6d8d-6eed-4025-a3a7-7078380f4695", + "Citation": { + "$type": "eml20.Citation", + "Title": "volve3D", + "Originator": "mrobertson", + "Creation": "2018-08-21T02:16:12.000Z", + "Format": "PDGM-DX ETP Client 2.0.1", + "Editor": "dalsaab", + "LastUpdate": "2018-08-21T02:16:12.000Z" + }, + "ExtraMetadata": [ + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/project", + "Value": "da497110-8fba-455f-adac-f2b495bc707e" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioName", + "Value": "|UNNAMED|" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioUid", + "Value": "3b808d54-c9b1-47cb-a631-5ae34b8c8bc6" + } + ], + "RepresentedInterpretation": { + "$type": "eml20.DataObjectReference", + "ContentType": "application/x-resqml+xml;version=2.0;type=obj_GenericFeatureInterpretation", + "Title": "Interpretation volve3D", + "UUID": "4103b4ae-7c78-44cb-bcf5-218b9385c281", + "UuidAuthority": "pdgm", + "_data": { + "$type": "resqml20.obj_GenericFeatureInterpretation", + "SchemaVersion": "2.0", + "Uuid": "4103b4ae-7c78-44cb-bcf5-218b9385c281", + "Citation": { + "$type": "eml20.Citation", + "Title": "Interpretation volve3D", + "Originator": "dalsaab", + "Creation": "2024-04-19T06:24:01.000Z", + "Format": "PDGM-DX ETP Client 2.0.1", + "LastUpdate": "2024-04-19T06:24:01.000Z" + }, + "ExtraMetadata": [ + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/creatorGroup", + "Value": "dalsaab" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/project", + "Value": "da497110-8fba-455f-adac-f2b495bc707e" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioName", + "Value": "|UNNAMED|" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioUid", + "Value": "3b808d54-c9b1-47cb-a631-5ae34b8c8bc6" + } + ], + "Domain": "depth", + "InterpretedFeature": { + "$type": "eml20.DataObjectReference", + "ContentType": "application/x-resqml+xml;version=2.0;type=obj_SeismicLatticeFeature", + "Title": "Feature volve3D", + "UUID": "93c42801-eb01-451e-9bfd-605d5643d61c", + "UuidAuthority": "pdgm", + "_data": { + "$type": "resqml20.obj_SeismicLatticeFeature", + "SchemaVersion": "2.0", + "Uuid": "93c42801-eb01-451e-9bfd-605d5643d61c", + "Citation": { + "$type": "eml20.Citation", + "Title": "Feature volve3D", + "Originator": "dalsaab", + "Creation": "2024-04-19T06:24:01.000Z", + "Format": "PDGM-DX ETP Client 2.0.1", + "LastUpdate": "2024-04-19T06:24:01.000Z" + }, + "ExtraMetadata": [ + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/creatorGroup", + "Value": "dalsaab" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/project", + "Value": "da497110-8fba-455f-adac-f2b495bc707e" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioName", + "Value": "|UNNAMED|" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/gocad/ScenarioUid", + "Value": "3b808d54-c9b1-47cb-a631-5ae34b8c8bc6" + }, + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/geologicProvince", + "Value": null + } + ], + "CrosslineCount": 761, + "CrosslineIndexIncrement": 1, + "FirstCrosslineIndex": 1920, + "FirstInlineIndex": 9963, + "InlineCount": 433, + "InlineIndexIncrement": 1 + } + } + } + }, + "SurfaceRole": "map", + "Grid2dPatch": { + "$type": "resqml20.Grid2dPatch", + "PatchIndex": 0, + "FastestAxisCount": 433, + "SlowestAxisCount": 761, + "Geometry": { + "$type": "resqml20.PointGeometry", + "LocalCrs": { + "$type": "eml20.DataObjectReference", + "ContentType": "application/x-resqml+xml;version=2.0;type=obj_LocalDepth3dCrs", + "Title": "SKUA Local", + "UUID": "946ae9b2-4adb-48ef-bb39-b9984907bd1c", + "UuidAuthority": "pdgm", + "_data": { + "$type": "resqml20.obj_LocalDepth3dCrs", + "SchemaVersion": "2.0", + "Uuid": "946ae9b2-4adb-48ef-bb39-b9984907bd1c", + "Citation": { + "$type": "eml20.Citation", + "Title": "SKUA Local", + "Originator": "dalsaab", + "Creation": "2023-11-28T19:33:55.000Z", + "Format": "PDGM-DX ETP Client 2.0.1", + "Editor": "dalsaab", + "LastUpdate": "2023-12-15T08:12:17.000Z" + }, + "ExtraMetadata": [ + { + "$type": "resqml20.NameValuePair", + "Name": "pdgm/dx/resqml/project", + "Value": "da497110-8fba-455f-adac-f2b495bc707e" + } + ], + "YOffset": 6470000, + "ZOffset": 0, + "ArealRotation": { + "$type": "eml20.PlaneAngleMeasure", + "Uom": "rad", + "_": 0 + }, + "ProjectedAxisOrder": "easting northing", + "ProjectedUom": "m", + "VerticalUom": "m", + "XOffset": 420000, + "ZIncreasingDownward": true, + "VerticalCrs": { + "$type": "eml20.VerticalUnknownCrs", + "Unknown": "Unknown" + }, + "ProjectedCrs": { + "$type": "eml20.ProjectedCrsEpsgCode", + "EpsgCode": 23031 + } + } + }, + "Points": { + "$type": "resqml20.Point3dLatticeArray", + "Origin": { + "$type": "resqml20.Point3d", + "Coordinate1": 18806.001953125, + "Coordinate2": 5211.10009765625, + "Coordinate3": 0 + }, + "Offset": [ + { + "$type": "resqml20.Point3dOffset", + "Offset": { + "$type": "resqml20.Point3d", + "Coordinate1": -0.9702886507378544, + "Coordinate2": 0.24195027226542634, + "Coordinate3": 0 + }, + "Spacing": { + "$type": "resqml20.DoubleConstantArray", + "Value": 12.499999602066202, + "Count": 760 + } + }, + { + "$type": "resqml20.Point3dOffset", + "Offset": { + "$type": "resqml20.Point3d", + "Coordinate1": 0.24194989863410435, + "Coordinate2": 0.9702887439061357, + "Coordinate3": 0 + }, + "Spacing": { + "$type": "resqml20.DoubleConstantArray", + "Value": 12.499999431383065, + "Count": 432 + } + } + ] + } + } + } + } + } + }, + "ZValues": { + "$type": "resqml20.DoubleHdf5Array", + "Values": { + "$type": "eml20.Hdf5Dataset", + "PathInHdfFile": "/RESQML/7194be4d-169d-420c-98a5-d3ec4671f0cc/points_patch0", + "HdfProxy": { + "$type": "eml20.DataObjectReference", + "ContentType": "application/x-eml+xml;version=2.0;type=obj_EpcExternalPartReference", + "Title": "Hdf5File", + "UUID": "68659942-81c1-4df3-80b6-e2853a0f4e07", + "UuidAuthority": "pdgm", + "VersionString": "2024-05-02T10:24:30Z" + } + } + } + } + } + } +} \ No newline at end of file diff --git a/energyml-utils/src/energyml/utils/__init__.py b/energyml-utils/src/energyml/utils/__init__.py index 0749f5f..5a7aa68 100644 --- a/energyml-utils/src/energyml/utils/__init__.py +++ b/energyml-utils/src/energyml/utils/__init__.py @@ -18,4 +18,4 @@ - energyml-witsml2-1 - energyml-prodml2-0 - energyml-prodml2-2 -""" \ No newline at end of file +""" diff --git a/energyml-utils/src/energyml/utils/constants.py b/energyml-utils/src/energyml/utils/constants.py new file mode 100644 index 0000000..233c274 --- /dev/null +++ b/energyml-utils/src/energyml/utils/constants.py @@ -0,0 +1,242 @@ +import datetime +import re +import uuid as uuid_mod +from typing import List + +ENERGYML_NAMESPACES = { + "eml": "http://www.energistics.org/energyml/data/commonv2", + "prodml": "http://www.energistics.org/energyml/data/prodmlv2", + "witsml": "http://www.energistics.org/energyml/data/witsmlv2", + "resqml": "http://www.energistics.org/energyml/data/resqmlv2", +} +""" +dict of all energyml namespaces +""" # pylint: disable=W0105 + +ENERGYML_NAMESPACES_PACKAGE = { + "eml": ["http://www.energistics.org/energyml/data/commonv2"], + "prodml": ["http://www.energistics.org/energyml/data/prodmlv2"], + "witsml": ["http://www.energistics.org/energyml/data/witsmlv2"], + "resqml": ["http://www.energistics.org/energyml/data/resqmlv2"], + "opc": [ + "http://schemas.openxmlformats.org/package/2006/content-types", + "http://schemas.openxmlformats.org/package/2006/metadata/core-properties", + ], +} +""" +dict of all energyml namespace packages +""" # pylint: disable=W0105 + +RGX_ENERGYML_MODULE_NAME = r"energyml\.(?P.*)\.v(?P(?P\d+(_\d+)*)(_dev(?P.*))?)\..*" +RGX_PROJECT_VERSION = r"(?P[\d]+)(.(?P[\d]+)(.(?P[\d]+))?)?" + +ENERGYML_MODULES_NAMES = ["eml", "prodml", "witsml", "resqml"] + +RELATED_MODULES = [ + ["energyml.eml.v2_0.commonv2", "energyml.resqml.v2_0_1.resqmlv2"], + [ + "energyml.eml.v2_1.commonv2", + "energyml.prodml.v2_0.prodmlv2", + "energyml.witsml.v2_0.witsmlv2", + ], + ["energyml.eml.v2_2.commonv2", "energyml.resqml.v2_2_dev3.resqmlv2"], + [ + "energyml.eml.v2_3.commonv2", + "energyml.resqml.v2_2.resqmlv2", + "energyml.prodml.v2_2.prodmlv2", + "energyml.witsml.v2_1.witsmlv2", + ], +] + +RGX_UUID_NO_GRP = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" +RGX_UUID = r"(?P" + RGX_UUID_NO_GRP + ")" +RGX_DOMAIN_VERSION = r"(?P(?P([\d]+[\._])*\d)\s*(?Pdev\s*(?P[\d]+))?)" +RGX_DOMAIN_VERSION_FLAT = r"(?P(?P([\d]+)*\d)\s*(?Pdev\s*(?P[\d]+))?)" + + +# ContentType +RGX_MIME_TYPE_MEDIA = r"(?Papplication|audio|font|example|image|message|model|multipart|text|video)" +RGX_CT_ENERGYML_DOMAIN = r"(?Px-(?P[\w]+)\+xml)" +RGX_CT_XML_DOMAIN = r"(?P(x\-)?(?P.+)\+xml)" +RGX_CT_TOKEN_VERSION = r"version=" + RGX_DOMAIN_VERSION +RGX_CT_TOKEN_TYPE = r"type=(?P[\w\_]+)" + +RGX_CONTENT_TYPE = ( + RGX_MIME_TYPE_MEDIA + + "/" + + "(?P(" + + RGX_CT_ENERGYML_DOMAIN + + ")|(" + + RGX_CT_XML_DOMAIN + + r")|([\w-]+\.?)+)" + + "(;((" + + RGX_CT_TOKEN_VERSION + + ")|(" + + RGX_CT_TOKEN_TYPE + + ")))*" +) +RGX_QUALIFIED_TYPE = ( + r"(?P[a-zA-Z]+)" + RGX_DOMAIN_VERSION_FLAT + r"\.(?P[\w_]+)" +) +# ========= + +RGX_SCHEMA_VERSION = ( + r"(?P[eE]ml|[cC]ommon|[rR]esqml|[wW]itsml|[pP]rodml|[oO]pc)?\s*v?" + + RGX_DOMAIN_VERSION + + r"\s*$" +) + +RGX_ENERGYML_FILE_NAME_OLD = r"(?P[\w]+)_" + RGX_UUID_NO_GRP + r"\.xml$" +RGX_ENERGYML_FILE_NAME_NEW = ( + RGX_UUID_NO_GRP + r"\.(?P\d+(\.\d+)*)\.xml$" +) +RGX_ENERGYML_FILE_NAME = ( + rf"^(.*/)?({RGX_ENERGYML_FILE_NAME_OLD})|({RGX_ENERGYML_FILE_NAME_NEW})" +) + +RGX_XML_HEADER = r"^\s*<\?xml(\s+(encoding\s*=\s*\"(?P[^\"]+)\"|version\s*=\s*\"(?P[^\"]+)\"|standalone\s*=\s*\"(?P[^\"]+)\"))+" + +# __ ______ ____ +# / / / / __ \/ _/ +# / / / / /_/ // / +# / /_/ / _, _// / +# \____/_/ |_/___/ + +URI_RGX_GRP_DOMAIN = "domain" +URI_RGX_GRP_DOMAIN_VERSION = "domainVersion" +URI_RGX_GRP_UUID = "uuid" +URI_RGX_GRP_DATASPACE = "dataspace" +URI_RGX_GRP_VERSION = "version" +URI_RGX_GRP_OBJECT_TYPE = "objectType" +URI_RGX_GRP_UUID2 = "uuid2" +URI_RGX_GRP_COLLECTION_DOMAIN = "collectionDomain" +URI_RGX_GRP_COLLECTION_DOMAIN_VERSION = "collectionDomainVersion" +URI_RGX_GRP_COLLECTION_TYPE = "collectionType" +URI_RGX_GRP_QUERY = "query" + +# Patterns +_uri_rgx_pkg_name = "|".join( + ENERGYML_NAMESPACES.keys() +) # "[a-zA-Z]+\w+" //witsml|resqml|prodml|eml +URI_RGX = ( + r"^eml:\/\/\/(?:dataspace\('(?P<" + + URI_RGX_GRP_DATASPACE + + r">[^']*?(?:''[^']*?)*)'\)\/?)?((?P<" + + URI_RGX_GRP_DOMAIN + + r">" + + _uri_rgx_pkg_name + + r")(?P<" + + URI_RGX_GRP_DOMAIN_VERSION + + r">[1-9]\d)\.(?P<" + + URI_RGX_GRP_OBJECT_TYPE + + r">\w+)(\((?:(?P<" + + URI_RGX_GRP_UUID + + r">(uuid=)?" + + RGX_UUID_NO_GRP + + r")|uuid=(?P<" + + URI_RGX_GRP_UUID2 + + r">" + + RGX_UUID_NO_GRP + + r"),\s*version='(?P<" + + URI_RGX_GRP_VERSION + + r">[^']*?(?:''[^']*?)*)')\))?)?(\/(?P<" + + URI_RGX_GRP_COLLECTION_DOMAIN + + r">" + + _uri_rgx_pkg_name + + r")(?P<" + + URI_RGX_GRP_COLLECTION_DOMAIN_VERSION + + r">[1-9]\d)\.(?P<" + + URI_RGX_GRP_COLLECTION_TYPE + + r">\w+))?(?:\?(?P<" + + URI_RGX_GRP_QUERY + + r">[^#]+))?$" +) + +# ================================ +RELS_CONTENT_TYPE = ( + "application/vnd.openxmlformats-package.core-properties+xml" +) +RELS_FOLDER_NAME = "_rels" + +primitives = (bool, str, int, float, type(None)) + + +# ______ __ _ +# / ____/_ ______ _____/ /_(_)___ ____ _____ +# / /_ / / / / __ \/ ___/ __/ / __ \/ __ \/ ___/ +# / __/ / /_/ / / / / /__/ /_/ / /_/ / / / (__ ) +# /_/ \__,_/_/ /_/\___/\__/_/\____/_/ /_/____/ + + +def snake_case(s: str) -> str: + """Transform a str into snake case.""" + s = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", s) + s = re.sub("__([A-Z])", r"_\1", s) + s = re.sub("([a-z0-9])([A-Z])", r"\1_\2", s) + return s.lower() + + +def pascal_case(s: str) -> str: + """Transform a str into pascal case.""" + return snake_case(s).replace("_", " ").title().replace(" ", "") + + +def flatten_concatenation(matrix) -> List: + """ + Flatten a matrix. + + Example : + [ [a,b,c], [d,e,f], [ [x,y,z], [0] ] ] + will be translated in: [a, b, c, d, e, f, [x,y,z], [0]] + :param matrix: + :return: + """ + flat_list = [] + for row in matrix: + flat_list += row + return flat_list + + +def parse_content_type(ct: str): + return re.search(RGX_CONTENT_TYPE, ct) + + +def parse_qualified_type(ct: str): + return re.search(RGX_QUALIFIED_TYPE, ct) + + +def now( + time_zone=datetime.timezone(datetime.timedelta(hours=1), "UTC") +) -> float: + """Return an epoch value""" + return datetime.datetime.timestamp(datetime.datetime.now(time_zone)) + + +def epoch( + time_zone=datetime.timezone(datetime.timedelta(hours=1), "UTC") +) -> int: + return int(now(time_zone)) + + +def date_to_epoch(date: str) -> int: + """ + Transform a energyml date into an epoch datetime + :return: int + """ + return int(datetime.datetime.fromisoformat(date).timestamp()) + + +def epoch_to_date( + epoch_value: int, + time_zone=datetime.timezone(datetime.timedelta(hours=1), "UTC"), +) -> str: + date = datetime.datetime.fromtimestamp(epoch_value, time_zone) + return date.strftime("%Y-%m-%dT%H:%M:%S%z") + + +def gen_uuid() -> str: + """ + Generate a new uuid. + :return: + """ + return str(uuid_mod.uuid4()) diff --git a/energyml-utils/src/energyml/utils/data/__init__.py b/energyml-utils/src/energyml/utils/data/__init__.py index c2ed8e7..be38189 100644 --- a/energyml-utils/src/energyml/utils/data/__init__.py +++ b/energyml-utils/src/energyml/utils/data/__init__.py @@ -5,4 +5,4 @@ Contains functions to help the read of specific entities like Grid2DRepresentation, TriangulatedSetRepresentation etc. It also contains functions to export data into OFF/OBJ format. -""" \ No newline at end of file +""" diff --git a/energyml-utils/src/energyml/utils/data/hdf.py b/energyml-utils/src/energyml/utils/data/hdf.py index 2f1839e..bea23f8 100644 --- a/energyml-utils/src/energyml/utils/data/hdf.py +++ b/energyml-utils/src/energyml/utils/data/hdf.py @@ -1,5 +1,6 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 +import logging import os from dataclasses import dataclass from io import BytesIO @@ -8,41 +9,60 @@ import h5py from ..epc import Epc, get_obj_identifier, EPCRelsRelationshipType -from ..introspection import search_attribute_matching_name_with_path, search_attribute_matching_name, \ - get_object_attribute, get_object_attribute_no_verif +from ..introspection import ( + search_attribute_matching_name_with_path, + search_attribute_matching_name, + get_object_attribute, + get_object_attribute_no_verif, +) @dataclass class DatasetReader: - def read_array(self, source: str, path_in_external_file: str) -> Optional[List[Any]]: + def read_array( + self, source: str, path_in_external_file: str + ) -> Optional[List[Any]]: return None - def get_array_dimension(self, source: str, path_in_external_file: str) -> Optional[List[Any]]: + def get_array_dimension( + self, source: str, path_in_external_file: str + ) -> Optional[List[Any]]: return None @dataclass class ETPReader(DatasetReader): - def read_array(self, obj_uri: str, path_in_external_file: str) -> Optional[List[Any]]: + def read_array( + self, obj_uri: str, path_in_external_file: str + ) -> Optional[List[Any]]: return None - def get_array_dimension(self, source: str, path_in_external_file: str) -> Optional[List[Any]]: + def get_array_dimension( + self, source: str, path_in_external_file: str + ) -> Optional[List[Any]]: return None @dataclass class HDF5FileReader(DatasetReader): - def read_array(self, source: Union[BytesIO, str], path_in_external_file: str) -> Optional[List[Any]]: + def read_array( + self, source: Union[BytesIO, str], path_in_external_file: str + ) -> Optional[List[Any]]: with h5py.File(source, "r") as f: d_group = f[path_in_external_file] return d_group[()].tolist() - def get_array_dimension(self, source: Union[BytesIO, str], path_in_external_file: str) -> Optional[List[Any]]: + def get_array_dimension( + self, source: Union[BytesIO, str], path_in_external_file: str + ) -> Optional[List[Any]]: with h5py.File(source, "r") as f: return list(f[path_in_external_file].shape) def extract_h5_datasets( - self, input_h5: Union[BytesIO, str], output_h5: Union[BytesIO, str], h5_datasets_paths: List[str] + self, + input_h5: Union[BytesIO, str], + output_h5: Union[BytesIO, str], + h5_datasets_paths: List[str], ) -> None: """ Copy all dataset from :param input_h5 matching with paths in :param h5_datasets_paths into the :param output @@ -64,10 +84,7 @@ def get_hdf_reference(obj) -> List[Any]: :param obj: :return: """ - return [ - val - for path, val in get_hdf_reference_with_path(obj=obj) - ] + return [val for path, val in get_hdf_reference_with_path(obj=obj)] def get_hdf_reference_with_path(obj: any) -> List[Tuple[str, Any]]: @@ -79,8 +96,7 @@ def get_hdf_reference_with_path(obj: any) -> List[Tuple[str, Any]]: :return: [ (Dot_Path_In_Obj, value), ...] """ return search_attribute_matching_name_with_path( - obj, - "(PathInHdfFile|PathInExternalFile)" + obj, "(PathInHdfFile|PathInExternalFile)" ) @@ -93,23 +109,28 @@ def get_h5_path_possibilities(value_in_xml: str, epc: Epc) -> List[str]: With our example we will have : 'D:/a_folder/C:/my_file.h5' this function returns (following our example): [ 'C:/my_file.h5', 'D:/a_folder/my_file.h5', 'my_file.h5'] - :param value_in_xml: - :param epc: - :return: + :param value_in_xml: + :param epc: + :return: """ epc_folder = epc.get_epc_file_folder() hdf5_path_respect = value_in_xml hdf5_path_rematch = f"{epc_folder+'/' if epc_folder is not None and len(epc_folder) else ''}{os.path.basename(value_in_xml)}" hdf5_path_no_folder = f"{os.path.basename(value_in_xml)}" - return [hdf5_path_respect, hdf5_path_rematch, hdf5_path_no_folder] + return [ + hdf5_path_respect, + hdf5_path_rematch, + hdf5_path_no_folder, + epc.epc_file_path[:-4] + ".h5", + ] def get_hdf5_path_from_external_path( - external_path_obj: Any, - path_in_root: Optional[str] = None, - root_obj: Optional[Any] = None, - epc: Optional[Epc] = None + external_path_obj: Any, + path_in_root: Optional[str] = None, + root_obj: Optional[Any] = None, + epc: Optional[Epc] = None, ) -> Optional[List[str]]: """ Return the hdf5 file path (Searches for "uri" attribute or in :param:`epc` rels files). @@ -119,50 +140,87 @@ def get_hdf5_path_from_external_path( :param epc: :return: """ + result = [] if isinstance(external_path_obj, str): # external_path_obj is maybe an attribute of an ExternalDataArrayPart, now search upper in the object - upper_path = path_in_root[:path_in_root.rindex(".")] - return get_hdf5_path_from_external_path( + upper_path = path_in_root[: path_in_root.rindex(".")] + result = get_hdf5_path_from_external_path( external_path_obj=get_object_attribute(root_obj, upper_path), path_in_root=upper_path, root_obj=root_obj, epc=epc, ) elif type(external_path_obj).__name__ == "ExternalDataArrayPart": - epc_folder = epc.get_epc_file_folder() + # epc_folder = epc.get_epc_file_folder() h5_uri = search_attribute_matching_name(external_path_obj, "uri") if h5_uri is not None and len(h5_uri) > 0: - return get_h5_path_possibilities(value_in_xml=h5_uri[0], epc=epc) - # return f"{epc_folder}/{h5_uri[0]}" - else: - epc_folder = epc.get_epc_file_folder() - hdf_proxy_lst = search_attribute_matching_name(external_path_obj, "HdfProxy") - ext_file_proxy_lst = search_attribute_matching_name(external_path_obj, "ExternalFileProxy") - - # resqml 2.0.1 - if hdf_proxy_lst is not None and len(hdf_proxy_lst) > 0: - hdf_proxy = hdf_proxy_lst - # print("h5Proxy", hdf_proxy) - while isinstance(hdf_proxy, list): - hdf_proxy = hdf_proxy[0] - hdf_proxy_obj = epc.get_object_by_identifier(get_obj_identifier(hdf_proxy)) - if hdf_proxy_obj is not None: - for rel in epc.additional_rels.get(get_obj_identifier(hdf_proxy_obj), []): - if rel.type_value == EPCRelsRelationshipType.EXTERNAL_RESOURCE.get_type(): - return get_h5_path_possibilities(value_in_xml=rel.target, epc=epc) - # return f"{epc_folder}/{rel.target}" - - # resqml 2.2dev3 - if ext_file_proxy_lst is not None and len(ext_file_proxy_lst) > 0: - ext_file_proxy = ext_file_proxy_lst - while isinstance(ext_file_proxy, list): - ext_file_proxy = ext_file_proxy[0] - ext_part_ref_obj = epc.get_object_by_identifier( - get_obj_identifier( - get_object_attribute_no_verif(ext_file_proxy, "epc_external_part_reference") + result = get_h5_path_possibilities(value_in_xml=h5_uri[0], epc=epc) + # result = f"{epc_folder}/{h5_uri[0]}" + + # epc_folder = epc.get_epc_file_folder() + hdf_proxy_lst = search_attribute_matching_name( + external_path_obj, "HdfProxy" + ) + ext_file_proxy_lst = search_attribute_matching_name( + external_path_obj, "ExternalFileProxy" + ) + + # resqml 2.0.1 + if hdf_proxy_lst is not None and len(hdf_proxy_lst) > 0: + hdf_proxy = hdf_proxy_lst + # logging.debug("h5Proxy", hdf_proxy) + while isinstance(hdf_proxy, list): + hdf_proxy = hdf_proxy[0] + hdf_proxy_obj = epc.get_object_by_identifier( + get_obj_identifier(hdf_proxy) + ) + logging.debug("hdf_proxy_obj : ", hdf_proxy_obj, " hdf_proxy : ", hdf_proxy) + if hdf_proxy_obj is not None: + for rel in epc.additional_rels.get( + get_obj_identifier(hdf_proxy_obj), [] + ): + if ( + rel.type_value + == EPCRelsRelationshipType.EXTERNAL_RESOURCE.get_type() + ): + result = get_h5_path_possibilities( + value_in_xml=rel.target, epc=epc + ) + # result = f"{epc_folder}/{rel.target}" + + # resqml 2.2dev3 + if ext_file_proxy_lst is not None and len(ext_file_proxy_lst) > 0: + ext_file_proxy = ext_file_proxy_lst + while isinstance(ext_file_proxy, list): + ext_file_proxy = ext_file_proxy[0] + ext_part_ref_obj = epc.get_object_by_identifier( + get_obj_identifier( + get_object_attribute_no_verif( + ext_file_proxy, "epc_external_part_reference" ) ) - return get_h5_path_possibilities(value_in_xml=ext_part_ref_obj.filename, epc=epc) - # return f"{epc_folder}/{ext_part_ref_obj.filename}" + ) + result = get_h5_path_possibilities( + value_in_xml=ext_part_ref_obj.filename, epc=epc + ) + # return f"{epc_folder}/{ext_part_ref_obj.filename}" - return None + result += list( + filter( + lambda p: p.lower().endswith(".h5") or p.lower().endswith(".hdf5"), + epc.external_files_path or [], + ) + ) + + if len(result) == 0: + result = [epc.epc_file_path[:-4] + ".h5"] + + logging.debug( + external_path_obj, + result, + "\n\t", + hdf_proxy_lst, + "\n\t", + ext_file_proxy_lst, + ) + return result diff --git a/energyml-utils/src/energyml/utils/data/helper.py b/energyml-utils/src/energyml/utils/data/helper.py index 536e4d3..e9c80ff 100644 --- a/energyml-utils/src/energyml/utils/data/helper.py +++ b/energyml-utils/src/energyml/utils/data/helper.py @@ -1,15 +1,28 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 import inspect +import logging import sys +import traceback from typing import Any, Optional, Callable, List, Union -from .hdf import get_hdf5_path_from_external_path, HDF5FileReader, get_hdf_reference +from .hdf import ( + get_hdf5_path_from_external_path, + HDF5FileReader, + get_hdf_reference, +) +from ..constants import flatten_concatenation from ..epc import Epc, get_obj_identifier from ..exception import ObjectNotFoundNotError -from ..introspection import snake_case, get_object_attribute_no_verif, \ - search_attribute_matching_name_with_path, search_attribute_matching_name, flatten_concatenation, \ - search_attribute_in_upper_matching_name, get_obj_uuid, get_object_attribute +from ..introspection import ( + snake_case, + get_object_attribute_no_verif, + search_attribute_matching_name_with_path, + search_attribute_matching_name, + search_attribute_in_upper_matching_name, + get_obj_uuid, + get_object_attribute, +) _ARRAY_NAMES_ = [ "BooleanArrayFromDiscretePropertyArray", @@ -48,7 +61,7 @@ "StringConstantArray", "StringExternalArray", "StringHdf5Array", - "StringXmlArray" + "StringXmlArray", ] @@ -74,12 +87,16 @@ def is_z_reversed(crs: Optional[Any]) -> bool: reverse_z_values = False if crs is not None: # resqml 201 - zincreasing_downward = search_attribute_matching_name(crs, "ZIncreasingDownward") + zincreasing_downward = search_attribute_matching_name( + crs, "ZIncreasingDownward" + ) if len(zincreasing_downward) > 0: reverse_z_values = zincreasing_downward[0] # resqml >= 22 - vert_axis = search_attribute_matching_name(crs, "VerticalAxis.Direction") + vert_axis = search_attribute_matching_name( + crs, "VerticalAxis.Direction" + ) if len(vert_axis) > 0: reverse_z_values = vert_axis[0].lower() == "down" @@ -93,7 +110,7 @@ def prod_n_tab(val: Union[float, int, str], tab: List[Union[float, int, str]]): :param tab: :return: """ - return list(map(lambda x: x*val, tab)) + return list(map(lambda x: x * val, tab)) def sum_lists(l1: List, l2: List): @@ -107,7 +124,9 @@ def sum_lists(l1: List, l2: List): :param l2: :return: """ - return [l1[i] + l2[i] for i in range(min(len(l1), len(l2)))]+max(l1, l2, key=len)[min(len(l1), len(l2)):] + return [l1[i] + l2[i] for i in range(min(len(l1), len(l2)))] + max( + l1, l2, key=len + )[min(len(l1), len(l2)) :] # _ __ __ @@ -117,8 +136,11 @@ def sum_lists(l1: List, l2: List): # |__/|__/\____/_/ /_/|_/____/ .___/\__,_/\___/\___/ # /_/ + class EnergymlWorkspace: - def get_object(self, uuid: str, object_version: Optional[str]) -> Optional[Any]: + def get_object( + self, uuid: str, object_version: Optional[str] + ) -> Optional[Any]: raise NotImplementedError("EnergymlWorkspace.get_object") def get_object_by_identifier(self, identifier: str) -> Optional[Any]: @@ -129,10 +151,10 @@ def get_object_by_uuid(self, uuid: str) -> Optional[Any]: return self.get_object(uuid, None) def read_external_array( - self, - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, + self, + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, ) -> List[Any]: raise NotImplementedError("EnergymlWorkspace.get_object") @@ -141,57 +163,87 @@ class EPCWorkspace(EnergymlWorkspace): def __init__(self, epc: Epc): self.epc = epc - def get_object(self, uuid: str, object_version: Optional[str]) -> Optional[Any]: + def get_object( + self, uuid: str, object_version: Optional[str] + ) -> Optional[Any]: return self.epc.get_object_by_identifier(f"{uuid}.{object_version}") def read_external_array( - self, - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, + self, + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + use_epc_io_h5: bool = True, ) -> List[Any]: - hdf5_paths = get_hdf5_path_from_external_path( - external_path_obj=energyml_array, - path_in_root=path_in_root, - root_obj=root_obj, - epc=self.epc, - ) h5_reader = HDF5FileReader() path_in_external = get_hdf_reference(energyml_array)[0] - - result_array = None - for hdf5_path in hdf5_paths: - try: - result_array = h5_reader.read_array(hdf5_path, path_in_external) - break # if succeed, not try with other paths - except OSError as e: - pass - - if result_array is None: - raise Exception(f"Failed to read h5 file. Paths tried : {hdf5_paths}") - - # print(f"\tpath_in_root : {path_in_root}") - if path_in_root.lower().endswith("points") and len(result_array) > 0 and len(result_array[0]) == 3: - crs = get_crs_obj( - context_obj=energyml_array, + if ( + self.epc is not None + and use_epc_io_h5 + and self.epc.h5_io_files is not None + and len(self.epc.h5_io_files) + ): + for h5_io in self.epc.h5_io_files: + try: + return h5_reader.read_array(h5_io, path_in_external) + except Exception as e: + logging.error(traceback.format_exc()) + pass + return self.read_external_array( + energyml_array=energyml_array, + root_obj=root_obj, + path_in_root=path_in_root, + use_epc_io_h5=False, + ) + else: + hdf5_paths = get_hdf5_path_from_external_path( + external_path_obj=energyml_array, path_in_root=path_in_root, root_obj=root_obj, - workspace=self, + epc=self.epc, ) - zincreasing_downward = is_z_reversed(crs) - # print(f"\tzincreasing_downward : {zincreasing_downward}") - if zincreasing_downward: - result_array = list(map(lambda p: [p[0], p[1], -p[2]], result_array)) + result_array = None + for hdf5_path in hdf5_paths: + try: + result_array = h5_reader.read_array( + hdf5_path, path_in_external + ) + break # if succeed, not try with other paths + except OSError as e: + pass + + if result_array is None: + raise Exception( + f"Failed to read h5 file. Paths tried : {hdf5_paths}" + ) + + # logging.debug(f"\tpath_in_root : {path_in_root}") + # if path_in_root.lower().endswith("points") and len(result_array) > 0 and len(result_array[0]) == 3: + # crs = None + # try: + # crs = get_crs_obj( + # context_obj=energyml_array, + # path_in_root=path_in_root, + # root_obj=root_obj, + # workspace=self, + # ) + # except ObjectNotFoundNotError as e: + # logging.error("No CRS found, not able to check zIncreasingDownward") + # logging.debug(f"\tzincreasing_downward : {zincreasing_downward}") + # zincreasing_downward = is_z_reversed(crs) - return result_array + # if zincreasing_downward: + # result_array = list(map(lambda p: [p[0], p[1], -p[2]], result_array)) + + return result_array def get_crs_obj( - context_obj: Any, - path_in_root: Optional[str] = None, - root_obj: Optional[Any] = None, - workspace: Optional[EnergymlWorkspace] = None + context_obj: Any, + path_in_root: Optional[str] = None, + root_obj: Optional[Any] = None, + workspace: Optional[EnergymlWorkspace] = None, ) -> Optional[Any]: """ Search for the CRS object related to :param:`context_obj` into the :param:`workspace` @@ -202,21 +254,28 @@ def get_crs_obj( :return: """ if workspace is None: - print("@get_crs_obj no Epc file given") + logging.error("@get_crs_obj no Epc file given") else: - crs_list = search_attribute_matching_name(context_obj, r"\.*Crs", search_in_sub_obj=True, deep_search=False) + crs_list = search_attribute_matching_name( + context_obj, r"\.*Crs", search_in_sub_obj=True, deep_search=False + ) if crs_list is not None and len(crs_list) > 0: - # print(crs_list[0]) - crs = workspace.get_object_by_identifier(get_obj_identifier(crs_list[0])) + # logging.debug(crs_list[0]) + crs = workspace.get_object_by_identifier( + get_obj_identifier(crs_list[0]) + ) if crs is None: crs = workspace.get_object_by_uuid(get_obj_uuid(crs_list[0])) if crs is None: + logging.error( + f"CRS {crs_list[0]} not found (or not read correctly)" + ) raise ObjectNotFoundNotError(get_obj_identifier(crs_list[0])) if crs is not None: return crs if context_obj != root_obj: - upper_path = path_in_root[:path_in_root.rindex(".")] + upper_path = path_in_root[: path_in_root.rindex(".")] if len(upper_path) > 0: return get_crs_obj( context_obj=get_object_attribute(root_obj, upper_path), @@ -262,7 +321,11 @@ def get_supported_array() -> List[str]: Return a list of the supported arrays for the use of :py:func:`energyml.utils.data.helper.read_array` function. :return: """ - return [x for x in _ARRAY_NAMES_ if get_array_reader_function(_array_name_mapping(x)) is not None] + return [ + x + for x in _ARRAY_NAMES_ + if get_array_reader_function(_array_name_mapping(x)) is not None + ] def get_not_supported_array(): @@ -270,14 +333,18 @@ def get_not_supported_array(): Return a list of the NOT supported arrays for the use of :py:func:`energyml.utils.data.helper.read_array` function. :return: """ - return [x for x in _ARRAY_NAMES_ if get_array_reader_function(_array_name_mapping(x)) is None] + return [ + x + for x in _ARRAY_NAMES_ + if get_array_reader_function(_array_name_mapping(x)) is None + ] def read_external_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ) -> List[Any]: """ Read an external array (BooleanExternalArray, BooleanHdf5Array, DoubleHdf5Array, IntegerHdf5Array, StringExternalArray ...) @@ -307,10 +374,10 @@ def get_array_reader_function(array_type_name: str) -> Optional[Callable]: def read_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ) -> List[Any]: """ Read an array and return a list. The array is read depending on its type. see. :py:func:`energyml.utils.data.helper.get_supported_array` @@ -333,15 +400,19 @@ def read_array( workspace=workspace, ) else: - print(f"Type {array_type_name} is not supported: function read_{snake_case(array_type_name)} not found") - raise Exception(f"Type {array_type_name} is not supported\n\t{energyml_array}: \n\tfunction read_{snake_case(array_type_name)} not found") + logging.error( + f"Type {array_type_name} is not supported: function read_{snake_case(array_type_name)} not found" + ) + raise Exception( + f"Type {array_type_name} is not supported\n\t{energyml_array}: \n\tfunction read_{snake_case(array_type_name)} not found" + ) def read_constant_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ) -> List[Any]: """ Read a constant array ( BooleanConstantArray, DoubleConstantArray, FloatingPointConstantArray, IntegerConstantArray ...) @@ -351,21 +422,21 @@ def read_constant_array( :param workspace: :return: """ - # print(f"Reading constant array\n\t{energyml_array}") + # logging.debug(f"Reading constant array\n\t{energyml_array}") value = get_object_attribute_no_verif(energyml_array, "value") count = get_object_attribute_no_verif(energyml_array, "count") - # print(f"\tValue : {[value for i in range(0, count)]}") + # logging.debug(f"\tValue : {[value for i in range(0, count)]}") return [value for i in range(0, count)] def read_xml_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ) -> List[Any]: """ Read a xml array ( BooleanXmlArray, FloatingPointXmlArray, IntegerXmlArray, StringXmlArray ...) @@ -381,10 +452,10 @@ def read_xml_array( def read_jagged_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ) -> List[Any]: """ Read a jagged array @@ -395,13 +466,17 @@ def read_jagged_array( :return: """ elements = read_array( - energyml_array=get_object_attribute_no_verif(energyml_array, "elements"), + energyml_array=get_object_attribute_no_verif( + energyml_array, "elements" + ), root_obj=root_obj, path_in_root=path_in_root + ".elements", workspace=workspace, ) cumulative_length = read_array( - energyml_array=read_array(get_object_attribute_no_verif(energyml_array, "cumulative_length")), + energyml_array=read_array( + get_object_attribute_no_verif(energyml_array, "cumulative_length") + ), root_obj=root_obj, path_in_root=path_in_root + ".cumulative_length", workspace=workspace, @@ -410,16 +485,16 @@ def read_jagged_array( res = [] previous = 0 for cl in cumulative_length: - res.append(elements[previous: cl]) + res.append(elements[previous:cl]) previous = cl return res def read_int_double_lattice_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ): """ Read DoubleLatticeArray or IntegerLatticeArray. @@ -439,16 +514,18 @@ def read_int_double_lattice_array( # elif len(offset) == 2: # pass # else: - raise Exception(f"{type(energyml_array)} read with an offset of length {len(offset)} is not supported") + raise Exception( + f"{type(energyml_array)} read with an offset of length {len(offset)} is not supported" + ) # return result def read_point3d_zvalue_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ): """ Read a Point3D2ValueArray @@ -458,7 +535,9 @@ def read_point3d_zvalue_array( :param workspace: :return: """ - supporting_geometry = get_object_attribute_no_verif(energyml_array, "supporting_geometry") + supporting_geometry = get_object_attribute_no_verif( + energyml_array, "supporting_geometry" + ) sup_geom_array = read_array( energyml_array=supporting_geometry, root_obj=root_obj, @@ -467,12 +546,14 @@ def read_point3d_zvalue_array( ) zvalues = get_object_attribute_no_verif(energyml_array, "zvalues") - zvalues_array = flatten_concatenation(read_array( - energyml_array=zvalues, - root_obj=root_obj, - path_in_root=path_in_root + ".ZValues", - workspace=workspace, - )) + zvalues_array = flatten_concatenation( + read_array( + energyml_array=zvalues, + root_obj=root_obj, + path_in_root=path_in_root + ".ZValues", + workspace=workspace, + ) + ) count = 0 @@ -481,16 +562,17 @@ def read_point3d_zvalue_array( sup_geom_array[i][2] = zvalues_array[i] except Exception as e: if count == 0: - print(e, f": {i} is out of bound of {len(zvalues_array)}") + logging.error(e, f": {i} is out of bound of {len(zvalues_array)}") count = count + 1 return sup_geom_array + def read_point3d_from_representation_lattice_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ): """ Read a Point3DFromRepresentationLatticeArray. @@ -503,16 +585,24 @@ def read_point3d_from_representation_lattice_array( :param workspace: :return: """ - supporting_rep_identifier = get_obj_identifier(get_object_attribute_no_verif(energyml_array, "supporting_representation")) - # print(f"energyml_array : {energyml_array}\n\t{supporting_rep_identifier}") - supporting_rep = workspace.get_object_by_identifier(supporting_rep_identifier) + supporting_rep_identifier = get_obj_identifier( + get_object_attribute_no_verif( + energyml_array, "supporting_representation" + ) + ) + # logging.debug(f"energyml_array : {energyml_array}\n\t{supporting_rep_identifier}") + supporting_rep = workspace.get_object_by_identifier( + supporting_rep_identifier + ) # TODO chercher un pattern \.*patch\.*.[d]+ pour trouver le numero du patch dans le path_in_root puis lire le patch - # print(f"path_in_root {path_in_root}") + # logging.debug(f"path_in_root {path_in_root}") result = [] if "grid2d" in str(type(supporting_rep)).lower(): - patch_path, patch = search_attribute_matching_name_with_path(supporting_rep, "Grid2dPatch")[0] + patch_path, patch = search_attribute_matching_name_with_path( + supporting_rep, "Grid2dPatch" + )[0] points = read_grid2d_patch( patch=patch, grid2d=supporting_rep, @@ -523,17 +613,22 @@ def read_point3d_from_representation_lattice_array( result = points else: - raise Exception(f"Not supported type {type(energyml_array)} for object {type(root_obj)}") + raise Exception( + f"Not supported type {type(energyml_array)} for object {type(root_obj)}" + ) # pour trouver les infos qu'il faut return result + def read_grid2d_patch( - patch: Any, - grid2d: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + patch: Any, + grid2d: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ) -> List: - points_path, points_obj = search_attribute_matching_name_with_path(patch, "Geometry.Points")[0] + points_path, points_obj = search_attribute_matching_name_with_path( + patch, "Geometry.Points" + )[0] return read_array( energyml_array=points_obj, @@ -542,11 +637,12 @@ def read_grid2d_patch( workspace=workspace, ) + def read_point3d_lattice_array( - energyml_array: Any, - root_obj: Optional[Any] = None, - path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None + energyml_array: Any, + root_obj: Optional[Any] = None, + path_in_root: Optional[str] = None, + workspace: Optional[EnergymlWorkspace] = None, ) -> List: """ Read a Point3DLatticeArray. @@ -560,7 +656,9 @@ def read_point3d_lattice_array( :return: """ result = [] - origin = _point_as_array(get_object_attribute_no_verif(energyml_array, "origin")) + origin = _point_as_array( + get_object_attribute_no_verif(energyml_array, "origin") + ) offset = get_object_attribute_no_verif(energyml_array, "offset") if len(offset) == 2: @@ -581,31 +679,53 @@ def read_point3d_lattice_array( current_path=path_in_root, ) - crs = get_crs_obj( - context_obj=energyml_array, - path_in_root=path_in_root, - root_obj=root_obj, - workspace=workspace, - ) + crs = None + try: + crs = get_crs_obj( + context_obj=energyml_array, + path_in_root=path_in_root, + root_obj=root_obj, + workspace=workspace, + ) + except ObjectNotFoundNotError as e: + logging.error( + "No CRS found, not able to check zIncreasingDownward" + ) + zincreasing_downward = is_z_reversed(crs) - slowest_vec = _point_as_array(get_object_attribute_no_verif(slowest, "offset")) - slowest_spacing = read_array(get_object_attribute_no_verif(slowest, "spacing")) - slowest_table = list(map(lambda x: prod_n_tab(x, slowest_vec), slowest_spacing)) + slowest_vec = _point_as_array( + get_object_attribute_no_verif(slowest, "offset") + ) + slowest_spacing = read_array( + get_object_attribute_no_verif(slowest, "spacing") + ) + slowest_table = list( + map(lambda x: prod_n_tab(x, slowest_vec), slowest_spacing) + ) - fastest_vec = _point_as_array(get_object_attribute_no_verif(fastest, "offset")) - fastest_spacing = read_array(get_object_attribute_no_verif(fastest, "spacing")) - fastest_table = list(map(lambda x: prod_n_tab(x, fastest_vec), fastest_spacing)) + fastest_vec = _point_as_array( + get_object_attribute_no_verif(fastest, "offset") + ) + fastest_spacing = read_array( + get_object_attribute_no_verif(fastest, "spacing") + ) + fastest_table = list( + map(lambda x: prod_n_tab(x, fastest_vec), fastest_spacing) + ) slowest_size = len(slowest_table) fastest_size = len(fastest_table) if len(crs_sa_count) > 0 and len(crs_fa_count) > 0: if ( - (crs_sa_count[0] == fastest_size and crs_fa_count[0] == slowest_size) - or (crs_sa_count[0] == fastest_size - 1 and crs_fa_count[0] == slowest_size - 1) + crs_sa_count[0] == fastest_size + and crs_fa_count[0] == slowest_size + ) or ( + crs_sa_count[0] == fastest_size - 1 + and crs_fa_count[0] == slowest_size - 1 ): - print("reversing order") + logging.debug("reversing order") # if offset were given in the wrong order tmp_table = slowest_table slowest_table = fastest_table @@ -631,21 +751,33 @@ def read_point3d_lattice_array( else: previous_value = result[j - 1] if zincreasing_downward: - result.append(sum_lists(previous_value, slowest_table[i - 1])) + result.append( + sum_lists(previous_value, slowest_table[i - 1]) + ) else: - result.append(sum_lists(previous_value, fastest_table[j - 1])) + result.append( + sum_lists(previous_value, fastest_table[j - 1]) + ) else: if i > 0: - prev_line_idx = (i - 1) * fastest_size # numero de ligne precedent + prev_line_idx = ( + i - 1 + ) * fastest_size # numero de ligne precedent previous_value = result[prev_line_idx] if zincreasing_downward: - result.append(sum_lists(previous_value, fastest_table[j - 1])) + result.append( + sum_lists(previous_value, fastest_table[j - 1]) + ) else: - result.append(sum_lists(previous_value, slowest_table[i - 1])) + result.append( + sum_lists(previous_value, slowest_table[i - 1]) + ) else: result.append(previous_value) else: - raise Exception(f"{type(energyml_array)} read with an offset of length {len(offset)} is not supported") + raise Exception( + f"{type(energyml_array)} read with an offset of length {len(offset)} is not supported" + ) return result @@ -656,4 +788,4 @@ def read_point3d_lattice_array( # path_in_root: Optional[str] = None, # workspace: Optional[EnergymlWorkspace] = None # ): -# print(energyml_array) +# logging.debug(energyml_array) diff --git a/energyml-utils/src/energyml/utils/data/mesh.py b/energyml-utils/src/energyml/utils/data/mesh.py index 4bf51e6..702313f 100644 --- a/energyml-utils/src/energyml/utils/data/mesh.py +++ b/energyml-utils/src/energyml/utils/data/mesh.py @@ -1,6 +1,7 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 import inspect +import logging import os import re import sys @@ -10,12 +11,26 @@ from typing import List, Optional, Any, Callable from .hdf import HDF5FileReader -from .helper import read_array, read_grid2d_patch, is_z_reversed, EnergymlWorkspace, get_crs_obj, EPCWorkspace +from .helper import ( + read_array, + read_grid2d_patch, + is_z_reversed, + EnergymlWorkspace, + get_crs_obj, + EPCWorkspace, +) from ..epc import Epc, get_obj_identifier, gen_energyml_object_path -from ..introspection import search_attribute_matching_name, \ - search_attribute_matching_name_with_path, snake_case, get_object_attribute - -_FILE_HEADER: bytes = b"# file exported by energyml-utils python module (Geosiris)\n" +from ..exception import ObjectNotFoundNotError +from ..introspection import ( + search_attribute_matching_name, + search_attribute_matching_name_with_path, + snake_case, + get_object_attribute, +) + +_FILE_HEADER: bytes = ( + b"# file exported by energyml-utils python module (Geosiris)\n" +) Point = list[float] @@ -27,13 +42,9 @@ class MeshFileFormat(Enum): @dataclass class AbstractMesh: - energyml_object: Any = field( - default=None - ) + energyml_object: Any = field(default=None) - crs_object: Any = field( - default=None - ) + crs_object: Any = field(default=None) point_list: List[Point] = field( default_factory=list, @@ -118,8 +129,7 @@ def _mesh_name_mapping(array_type_name: str) -> str: def read_mesh_object( - energyml_object: Any, - workspace: Optional[EnergymlWorkspace] = None + energyml_object: Any, workspace: Optional[EnergymlWorkspace] = None ) -> List[AbstractMesh]: """ Read and "meshable" object. If :param:`energyml_object` is not supported, an exception will be raised. @@ -138,12 +148,17 @@ def read_mesh_object( workspace=workspace, ) else: - print(f"Type {array_type_name} is not supported: function read_{snake_case(array_type_name)} not found") + logging.error( + f"Type {array_type_name} is not supported: function read_{snake_case(array_type_name)} not found" + ) raise Exception( - f"Type {array_type_name} is not supported\n\t{energyml_object}: \n\tfunction read_{snake_case(array_type_name)} not found") + f"Type {array_type_name} is not supported\n\t{energyml_object}: \n\tfunction read_{snake_case(array_type_name)} not found" + ) -def read_point_representation(energyml_object: Any, workspace: EnergymlWorkspace) -> List[PointSetMesh]: +def read_point_representation( + energyml_object: Any, workspace: EnergymlWorkspace +) -> List[PointSetMesh]: # pt_geoms = search_attribute_matching_type(point_set, "AbstractGeometry") h5_reader = HDF5FileReader() @@ -151,8 +166,12 @@ def read_point_representation(energyml_object: Any, workspace: EnergymlWorkspace patch_idx = 0 # resqml 2.0.1 - for points_path_in_obj, points_obj in search_attribute_matching_name_with_path(energyml_object, - "NodePatch.[\d]+.Geometry.Points"): + for ( + points_path_in_obj, + points_obj, + ) in search_attribute_matching_name_with_path( + energyml_object, "NodePatch.[\d]+.Geometry.Points" + ): points = read_array( energyml_array=points_obj, root_obj=energyml_object, @@ -160,25 +179,35 @@ def read_point_representation(energyml_object: Any, workspace: EnergymlWorkspace workspace=workspace, ) - crs = get_crs_obj( - context_obj=points_obj, - path_in_root=points_path_in_obj, - root_obj=energyml_object, - workspace=workspace, - ) + crs = None + try: + crs = get_crs_obj( + context_obj=points_obj, + path_in_root=points_path_in_obj, + root_obj=energyml_object, + workspace=workspace, + ) + except ObjectNotFoundNotError as e: + pass if points is not None: - meshes.append(PointSetMesh( - identifier=f"NodePatch num {patch_idx}", - energyml_object=energyml_object, - crs_object=crs, - point_list=points - )) + meshes.append( + PointSetMesh( + identifier=f"NodePatch num {patch_idx}", + energyml_object=energyml_object, + crs_object=crs, + point_list=points, + ) + ) patch_idx = patch_idx + 1 # resqml 2.2 - for points_path_in_obj, points_obj in search_attribute_matching_name_with_path(energyml_object, - "NodePatchGeometry.[\d]+.Points"): + for ( + points_path_in_obj, + points_obj, + ) in search_attribute_matching_name_with_path( + energyml_object, "NodePatchGeometry.[\d]+.Points" + ): points = read_array( energyml_array=points_obj, root_obj=energyml_object, @@ -186,37 +215,48 @@ def read_point_representation(energyml_object: Any, workspace: EnergymlWorkspace workspace=workspace, ) - crs = get_crs_obj( - context_obj=points_obj, - path_in_root=points_path_in_obj, - root_obj=energyml_object, - workspace=workspace, - ) + crs = None + try: + crs = get_crs_obj( + context_obj=points_obj, + path_in_root=points_path_in_obj, + root_obj=energyml_object, + workspace=workspace, + ) + except ObjectNotFoundNotError as e: + pass if points is not None: - meshes.append(PointSetMesh( - identifier=f"NodePatchGeometry num {patch_idx}", - energyml_object=energyml_object, - crs_object=crs, - point_list=points - )) + meshes.append( + PointSetMesh( + identifier=f"NodePatchGeometry num {patch_idx}", + energyml_object=energyml_object, + crs_object=crs, + point_list=points, + ) + ) patch_idx = patch_idx + 1 return meshes -def read_polyline_representation(energyml_object: Any, workspace: EnergymlWorkspace) -> List[PolylineSetMesh]: +def read_polyline_representation( + energyml_object: Any, workspace: EnergymlWorkspace +) -> List[PolylineSetMesh]: # pt_geoms = search_attribute_matching_type(point_set, "AbstractGeometry") h5_reader = HDF5FileReader() meshes = [] patch_idx = 0 - for patch_path_in_obj, patch in ( - search_attribute_matching_name_with_path(energyml_object, "NodePatch") - + search_attribute_matching_name_with_path(energyml_object, "LinePatch.[\\d]+") + for patch_path_in_obj, patch in search_attribute_matching_name_with_path( + energyml_object, "NodePatch" + ) + search_attribute_matching_name_with_path( + energyml_object, "LinePatch.[\\d]+" ): - points_path, points_obj = search_attribute_matching_name_with_path(patch, "Geometry.Points")[0] + points_path, points_obj = search_attribute_matching_name_with_path( + patch, "Geometry.Points" + )[0] points = read_array( energyml_array=points_obj, root_obj=energyml_object, @@ -224,16 +264,27 @@ def read_polyline_representation(energyml_object: Any, workspace: EnergymlWorksp workspace=workspace, ) - crs = get_crs_obj( - context_obj=points_obj, - path_in_root=patch_path_in_obj + points_path, - root_obj=energyml_object, - workspace=workspace, - ) + crs = None + try: + crs = get_crs_obj( + context_obj=points_obj, + path_in_root=patch_path_in_obj + points_path, + root_obj=energyml_object, + workspace=workspace, + ) + except ObjectNotFoundNotError as e: + pass close_poly = None try: - close_poly_path, close_poly_obj = search_attribute_matching_name_with_path(patch, "ClosedPolylines")[0] + ( + close_poly_path, + close_poly_obj, + ) = search_attribute_matching_name_with_path( + patch, "ClosedPolylines" + )[ + 0 + ] close_poly = read_array( energyml_array=close_poly_obj, root_obj=energyml_object, @@ -245,19 +296,30 @@ def read_polyline_representation(energyml_object: Any, workspace: EnergymlWorksp point_indices = [] try: - node_count_per_poly_path_in_obj, node_count_per_poly = \ - search_attribute_matching_name_with_path(patch, "NodeCountPerPolyline")[0] + ( + node_count_per_poly_path_in_obj, + node_count_per_poly, + ) = search_attribute_matching_name_with_path( + patch, "NodeCountPerPolyline" + )[ + 0 + ] node_counts_list = read_array( energyml_array=node_count_per_poly, root_obj=energyml_object, - path_in_root=patch_path_in_obj + node_count_per_poly_path_in_obj, + path_in_root=patch_path_in_obj + + node_count_per_poly_path_in_obj, workspace=workspace, ) idx = 0 poly_idx = 0 for nb_node in node_counts_list: point_indices.append([x for x in range(idx, idx + nb_node)]) - if close_poly is not None and len(close_poly) > poly_idx and close_poly[poly_idx]: + if ( + close_poly is not None + and len(close_poly) > poly_idx + and close_poly[poly_idx] + ): point_indices[len(point_indices) - 1].append(idx) idx = idx + nb_node poly_idx = poly_idx + 1 @@ -270,33 +332,44 @@ def read_polyline_representation(energyml_object: Any, workspace: EnergymlWorksp point_indices = [list(range(len(points)))] if len(points) > 0: - meshes.append(PolylineSetMesh( - identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", - energyml_object=energyml_object, - crs_object=crs, - point_list=points, - line_indices=point_indices - )) + meshes.append( + PolylineSetMesh( + identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", + energyml_object=energyml_object, + crs_object=crs, + point_list=points, + line_indices=point_indices, + ) + ) patch_idx = patch_idx + 1 return meshes -def read_grid2d_representation(energyml_object: Any, workspace: Optional[EnergymlWorkspace] = None, keep_holes=False) -> List[SurfaceMesh]: +def read_grid2d_representation( + energyml_object: Any, + workspace: Optional[EnergymlWorkspace] = None, + keep_holes=False, +) -> List[SurfaceMesh]: # h5_reader = HDF5FileReader() meshes = [] patch_idx = 0 - for patch_path, patch in search_attribute_matching_name_with_path(energyml_object, "Grid2dPatch"): - crs = get_crs_obj( - context_obj=patch, - path_in_root=patch_path, - root_obj=energyml_object, - workspace=workspace, - ) - - reverse_z_values = is_z_reversed(crs) + for patch_path, patch in search_attribute_matching_name_with_path( + energyml_object, "Grid2dPatch" + ): + reverse_z_values = False + try: + crs = get_crs_obj( + context_obj=patch, + path_in_root=patch_path, + root_obj=energyml_object, + workspace=workspace, + ) + reverse_z_values = is_z_reversed(crs) + except ObjectNotFoundNotError as e: + pass points = read_grid2d_patch( patch=patch, @@ -307,16 +380,20 @@ def read_grid2d_representation(energyml_object: Any, workspace: Optional[Energym fa_count = search_attribute_matching_name(patch, "FastestAxisCount") if fa_count is None: - fa_count = search_attribute_matching_name(energyml_object, "FastestAxisCount") + fa_count = search_attribute_matching_name( + energyml_object, "FastestAxisCount" + ) sa_count = search_attribute_matching_name(patch, "SlowestAxisCount") if sa_count is None: - sa_count = search_attribute_matching_name(energyml_object, "SlowestAxisCount") + sa_count = search_attribute_matching_name( + energyml_object, "SlowestAxisCount" + ) fa_count = fa_count[0] sa_count = sa_count[0] - # print(f"sa_count {sa_count} fa_count {fa_count}") + # logging.debug(f"sa_count {sa_count} fa_count {fa_count}") points_no_nan = [] @@ -327,13 +404,13 @@ def read_grid2d_representation(energyml_object: Any, workspace: Optional[Energym if p[2] != p[2]: # a NaN points[i][2] = 0 elif reverse_z_values: - points[i][2] = - points[i][2] + points[i][2] = -points[i][2] else: for i in range(len(points)): p = points[i] if p[2] == p[2]: # not a NaN if reverse_z_values: - points[i][2] = - points[i][2] + points[i][2] = -points[i][2] indice_to_final_indice[i] = len(points_no_nan) points_no_nan.append(p) @@ -347,13 +424,13 @@ def read_grid2d_representation(energyml_object: Any, workspace: Optional[Energym sa_count = sa_count + 1 fa_count = fa_count + 1 - # print(f"sa_count {sa_count} fa_count {fa_count} : {sa_count*fa_count} - {len(points)} ") + # logging.debug(f"sa_count {sa_count} fa_count {fa_count} : {sa_count*fa_count} - {len(points)} ") for sa in range(sa_count - 1): for fa in range(fa_count - 1): line = sa * fa_count # if sa+1 == int(sa_count / 2) and fa == int(fa_count / 2): - # print( + # logging.debug( # "\n\t", (line + fa), " : ", (line + fa) in indice_to_final_indice, # "\n\t", (line + fa + 1), " : ", (line + fa + 1) in indice_to_final_indice, # "\n\t", (line + fa_count + fa + 1), " : ", (line + fa_count + fa + 1) in indice_to_final_indice, @@ -369,10 +446,10 @@ def read_grid2d_representation(energyml_object: Any, workspace: Optional[Energym ] ) elif ( - (line + fa) in indice_to_final_indice - and (line + fa + 1) in indice_to_final_indice - and (line + fa_count + fa + 1) in indice_to_final_indice - and (line + fa_count + fa) in indice_to_final_indice + (line + fa) in indice_to_final_indice + and (line + fa + 1) in indice_to_final_indice + and (line + fa_count + fa + 1) in indice_to_final_indice + and (line + fa_count + fa) in indice_to_final_indice ): indices.append( [ @@ -382,34 +459,46 @@ def read_grid2d_representation(energyml_object: Any, workspace: Optional[Energym indice_to_final_indice[line + fa_count + fa], ] ) - # print(indices) - meshes.append(SurfaceMesh( - identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", - energyml_object=energyml_object, - crs_object=None, - point_list=points if keep_holes else points_no_nan, - faces_indices=indices - )) + # logging.debug(indices) + meshes.append( + SurfaceMesh( + identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", + energyml_object=energyml_object, + crs_object=None, + point_list=points if keep_holes else points_no_nan, + faces_indices=indices, + ) + ) patch_idx = patch_idx + 1 return meshes -def read_triangulated_set_representation(energyml_object: Any, workspace: EnergymlWorkspace) -> List[SurfaceMesh]: +def read_triangulated_set_representation( + energyml_object: Any, workspace: EnergymlWorkspace +) -> List[SurfaceMesh]: meshes = [] point_offset = 0 patch_idx = 0 - for patch_path, patch in search_attribute_matching_name_with_path(energyml_object, "\\.*Patch"): - crs = get_crs_obj( - context_obj=patch, - path_in_root=patch_path, - root_obj=energyml_object, - workspace=workspace, - ) + for patch_path, patch in search_attribute_matching_name_with_path( + energyml_object, "\\.*Patch" + ): + crs = None + try: + crs = get_crs_obj( + context_obj=patch, + path_in_root=patch_path, + root_obj=energyml_object, + workspace=workspace, + ) + except ObjectNotFoundNotError as e: + pass point_list: List[Point] = [] - for point_path, point_obj in search_attribute_matching_name_with_path(patch, "Geometry.Points"): + for point_path, point_obj in search_attribute_matching_name_with_path( + patch, "Geometry.Points" + ): point_list = point_list + read_array( energyml_array=point_obj, root_obj=energyml_object, @@ -418,21 +507,28 @@ def read_triangulated_set_representation(energyml_object: Any, workspace: Energy ) triangles_list: List[List[int]] = [] - for triangles_path, triangles_obj in search_attribute_matching_name_with_path(patch, "Triangles"): + for ( + triangles_path, + triangles_obj, + ) in search_attribute_matching_name_with_path(patch, "Triangles"): triangles_list = triangles_list + read_array( energyml_array=triangles_obj, root_obj=energyml_object, path_in_root=patch_path + triangles_path, workspace=workspace, ) - triangles_list = list(map(lambda tr: [ti - point_offset for ti in tr], triangles_list)) - meshes.append(SurfaceMesh( - identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", - energyml_object=energyml_object, - crs_object=crs, - point_list=point_list, - faces_indices=triangles_list - )) + triangles_list = list( + map(lambda tr: [ti - point_offset for ti in tr], triangles_list) + ) + meshes.append( + SurfaceMesh( + identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", + energyml_object=energyml_object, + crs_object=crs, + point_list=point_list, + faces_indices=triangles_list, + ) + ) point_offset = point_offset + len(point_list) @@ -455,7 +551,7 @@ def export_off(mesh_list: List[AbstractMesh], out: BytesIO): out.write(b"OFF\n") out.write(_FILE_HEADER) - out.write(f"{nb_points} {nb_faces} {nb_edges}\n".encode('utf-8')) + out.write(f"{nb_points} {nb_faces} {nb_edges}\n".encode("utf-8")) points_io = BytesIO() faces_io = BytesIO() @@ -477,33 +573,41 @@ def export_off(mesh_list: List[AbstractMesh], out: BytesIO): def export_off_part( - off_point_part: BytesIO, - off_face_part: BytesIO, - points: List[List[float]], - indices: List[List[int]], - point_offset: Optional[int] = 0, - colors: Optional[List[List[int]]] = None + off_point_part: BytesIO, + off_face_part: BytesIO, + points: List[List[float]], + indices: List[List[int]], + point_offset: Optional[int] = 0, + colors: Optional[List[List[int]]] = None, ) -> None: for p in points: for pi in p: - off_point_part.write(f"{pi} ".encode('utf-8')) + off_point_part.write(f"{pi} ".encode("utf-8")) off_point_part.write(b"\n") cpt = 0 for face in indices: if len(face) > 1: - off_face_part.write(f"{len(face)} ".encode('utf-8')) + off_face_part.write(f"{len(face)} ".encode("utf-8")) for pi in face: - off_face_part.write(f"{pi + point_offset} ".encode('utf-8')) - - if colors is not None and len(colors) > cpt and colors[cpt] is not None and len(colors[cpt]) > 0: + off_face_part.write(f"{pi + point_offset} ".encode("utf-8")) + + if ( + colors is not None + and len(colors) > cpt + and colors[cpt] is not None + and len(colors[cpt]) > 0 + ): for col in colors[cpt]: - off_face_part.write(f"{col} ".encode('utf-8')) + off_face_part.write(f"{col} ".encode("utf-8")) off_face_part.write(b"\n") + cpt += 1 -def export_obj(mesh_list: List[AbstractMesh], out: BytesIO, obj_name: Optional[str] = None): +def export_obj( + mesh_list: List[AbstractMesh], out: BytesIO, obj_name: Optional[str] = None +): """ Export an :class:`AbstractMesh` into obj format. @@ -513,14 +617,18 @@ def export_obj(mesh_list: List[AbstractMesh], out: BytesIO, obj_name: Optional[s :param obj_name: :return: """ - out.write(f"# Generated by energyml-utils a Geosiris python module\n\n".encode('utf-8')) + out.write( + f"# Generated by energyml-utils a Geosiris python module\n\n".encode( + "utf-8" + ) + ) if obj_name is not None: - out.write(f"o {obj_name}\n\n".encode('utf-8')) + out.write(f"o {obj_name}\n\n".encode("utf-8")) point_offset = 0 for m in mesh_list: - out.write(f"g {m.identifier}\n\n".encode('utf-8')) + out.write(f"g {m.identifier}\n\n".encode("utf-8")) _export_obj_elt( off_point_part=out, off_face_part=out, @@ -528,20 +636,20 @@ def export_obj(mesh_list: List[AbstractMesh], out: BytesIO, obj_name: Optional[s indices=m.get_indices(), point_offset=point_offset, colors=[], - elt_letter="l" if isinstance(m, PolylineSetMesh) else "f" + elt_letter="l" if isinstance(m, PolylineSetMesh) else "f", ) point_offset = point_offset + len(m.point_list) - out.write("\n".encode('utf-8')) + out.write("\n".encode("utf-8")) def _export_obj_elt( - off_point_part: BytesIO, - off_face_part: BytesIO, - points: List[List[float]], - indices: List[List[int]], - point_offset: Optional[int] = 0, - colors: Optional[List[List[int]]] = None, - elt_letter: str = "f", + off_point_part: BytesIO, + off_face_part: BytesIO, + points: List[List[float]], + indices: List[List[int]], + point_offset: Optional[int] = 0, + colors: Optional[List[List[int]]] = None, + elt_letter: str = "f", ) -> None: """ @@ -557,14 +665,20 @@ def _export_obj_elt( offset_obj = 1 # OBJ point indices starts at 1 not 0 for p in points: if len(p) > 0: - off_point_part.write(f"v {' '.join(list(map(lambda xyz: str(xyz), p)))}\n".encode('utf-8')) + off_point_part.write( + f"v {' '.join(list(map(lambda xyz: str(xyz), p)))}\n".encode( + "utf-8" + ) + ) # cpt = 0 for face in indices: if len(face) > 1: - off_point_part.write( + off_face_part.write( f"{elt_letter} {' '.join(list(map(lambda x: str(x + point_offset + offset_obj), face)))}\n".encode( - 'utf-8')) + "utf-8" + ) + ) # if colors is not None and len(colors) > cpt and colors[cpt] is not None and len(colors[cpt]) > 0: # for col in colors[cpt]: @@ -574,14 +688,18 @@ def _export_obj_elt( def export_multiple_data( - epc_path: str, - uuid_list: List[str], - output_folder_path: str, - output_file_path_suffix: str = "", - file_format: MeshFileFormat = MeshFileFormat.OBJ + epc_path: str, + uuid_list: List[str], + output_folder_path: str, + output_file_path_suffix: str = "", + file_format: MeshFileFormat = MeshFileFormat.OBJ, ): epc = Epc.read_file(epc_path) + # with open(epc_path.replace(".epc", ".h5"), "rb") as fh: + # buf = BytesIO(fh.read()) + # epc.h5_io_files.append(buf) + try: os.makedirs(output_folder_path, exist_ok=True) except OSError: @@ -589,12 +707,14 @@ def export_multiple_data( for uuid in uuid_list: energyml_obj = epc.get_object_by_uuid(uuid)[0] - file_name = (f"{gen_energyml_object_path(energyml_obj)}_" - f"[{get_object_attribute(energyml_obj, 'citation.title')}]" - f"{output_file_path_suffix}" - f".{file_format.value}") + file_name = ( + f"{gen_energyml_object_path(energyml_obj)}_" + f"[{get_object_attribute(energyml_obj, 'citation.title')}]" + f"{output_file_path_suffix}" + f".{file_format.value}" + ) file_path = f"{output_folder_path}/{file_name}" - print(f"Exporting : {file_path}") + logging.debug(f"Exporting : {file_path}") mesh_list = read_mesh_object( energyml_object=energyml_obj, workspace=EPCWorkspace(epc=epc), @@ -612,4 +732,4 @@ def export_multiple_data( out=f, ) else: - print(f"Code is not written for format {file_format}") + logging.error(f"Code is not written for format {file_format}") diff --git a/energyml-utils/src/energyml/utils/epc.py b/energyml-utils/src/energyml/utils/epc.py index 115bd19..bc7b3e4 100644 --- a/energyml-utils/src/energyml/utils/epc.py +++ b/energyml-utils/src/energyml/utils/epc.py @@ -5,35 +5,56 @@ """ import datetime +import logging import re +import traceback import zipfile from dataclasses import dataclass, field from enum import Enum from io import BytesIO from typing import List, Any, Union, Dict, Callable, Optional, Tuple -from energyml.opc.opc import CoreProperties, Relationships, Types, Default, Relationship, Override, Created, Creator, \ - Identifier, Keywords1 +from energyml.opc.opc import ( + CoreProperties, + Relationships, + Types, + Default, + Relationship, + Override, + Created, + Creator, + Identifier, + Keywords1, +) from xsdata.formats.dataclass.models.generics import DerivedElement +from .constants import RELS_CONTENT_TYPE, RELS_FOLDER_NAME from .introspection import ( get_class_from_content_type, - get_obj_type, search_attribute_matching_type, get_obj_version, get_obj_uuid, - get_object_type_for_file_path_from_class, get_content_type_from_class, get_direct_dor_list, epoch_to_date, epoch, - gen_uuid + get_obj_type, + search_attribute_matching_type, + get_obj_version, + get_obj_uuid, + get_object_type_for_file_path_from_class, + get_content_type_from_class, + get_direct_dor_list, + epoch_to_date, + epoch, + gen_uuid, + get_obj_identifier, ) from .manager import get_class_pkg, get_class_pkg_version from .serialization import ( - serialize_xml, read_energyml_xml_str, read_energyml_xml_bytes + serialize_xml, + read_energyml_xml_str, + read_energyml_xml_bytes, ) from .xml import is_energyml_content_type -RELS_CONTENT_TYPE = "application/vnd.openxmlformats-package.core-properties+xml" -RELS_FOLDER_NAME = "_rels" - class EpcExportVersion(Enum): """EPC export version.""" + #: Classical export CLASSIC = 1 #: Export with objet path sorted by package (eml/resqml/witsml/prodml) @@ -95,13 +116,12 @@ class Epc: """ A class that represent an EPC file content """ + # content_type: List[str] = field( # default_factory=list, # ) - export_version: EpcExportVersion = field( - default=EpcExportVersion.CLASSIC - ) + export_version: EpcExportVersion = field(default=EpcExportVersion.CLASSIC) core_props: CoreProperties = field(default=None) @@ -115,11 +135,16 @@ class Epc: default_factory=list, ) - """ A list of external files. It ca be used to link hdf5 files """ + """ A list of external files. It can be used to link hdf5 files """ external_files_path: List[str] = field( default_factory=list, ) + """ A list of h5 files stored in memory. (Usefull for Cloud services that doesn't work with local files """ + h5_io_files: List[BytesIO] = field( + default_factory=list, + ) + """ Additional rels for objects. Key is the object (same than in @energyml_objects) and value is a list of RelationShip. This can be used to link an HDF5 to an ExternalPartReference in resqml 2.0.1 @@ -132,14 +157,14 @@ class Epc: """ Epc file path. Used when loaded from a local file or for export """ - epc_file_path: Optional[str] = field( - default=None - ) + epc_file_path: Optional[str] = field(default=None) def __str__(self): return ( - "EPC file (" + str(self.export_version) + ") " - + f"{len(self.energyml_objects)} energyml objects and {len(self.raw_files)} other files {[f.path for f in self.raw_files]}" + "EPC file (" + + str(self.export_version) + + ") " + + f"{len(self.energyml_objects)} energyml objects and {len(self.raw_files)} other files {[f.path for f in self.raw_files]}" # + f"\n{[serialize_json(ar) for ar in self.additional_rels]}" ) @@ -159,16 +184,22 @@ def gen_opc_content_type(self) -> Types: ct.override = [] for e_obj in self.energyml_objects: - ct.override.append(Override( - content_type=get_content_type_from_class(type(e_obj)), - part_name=gen_energyml_object_path(e_obj, self.export_version), - )) + ct.override.append( + Override( + content_type=get_content_type_from_class(type(e_obj)), + part_name=gen_energyml_object_path( + e_obj, self.export_version + ), + ) + ) if self.core_props is not None: - ct.override.append(Override( - content_type=get_content_type_from_class(self.core_props), - part_name=gen_core_props_path(self.export_version), - )) + ct.override.append( + Override( + content_type=get_content_type_from_class(self.core_props), + part_name=gen_core_props_path(self.export_version), + ) + ) return ct @@ -191,41 +222,65 @@ def export_io(self) -> BytesIO: """ zip_buffer = BytesIO() - with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file: + with zipfile.ZipFile( + zip_buffer, "a", zipfile.ZIP_DEFLATED, False + ) as zip_file: # CoreProps if self.core_props is None: self.core_props = CoreProperties( created=Created(any_element=epoch_to_date(epoch())), - creator=Creator(any_element="energyml-utils python module (Geosiris)"), - identifier=Identifier(any_element=f"urn:uuid:{gen_uuid()}"), + creator=Creator( + any_element="energyml-utils python module (Geosiris)" + ), + identifier=Identifier( + any_element=f"urn:uuid:{gen_uuid()}" + ), keywords=Keywords1( lang="en", - content=["generated;Geosiris;python;energyml-utils"] + content=["generated;Geosiris;python;energyml-utils"], ), - version="1.0" + version="1.0", ) - zip_info_core = zipfile.ZipInfo(filename=gen_core_props_path(self.export_version), - date_time=datetime.datetime.now().timetuple()[:6]) + zip_info_core = zipfile.ZipInfo( + filename=gen_core_props_path(self.export_version), + date_time=datetime.datetime.now().timetuple()[:6], + ) data = serialize_xml(self.core_props) zip_file.writestr(zip_info_core, data) # Energyml objects for e_obj in self.energyml_objects: e_path = gen_energyml_object_path(e_obj, self.export_version) - zip_info = zipfile.ZipInfo(filename=e_path, date_time=datetime.datetime.now().timetuple()[:6]) + zip_info = zipfile.ZipInfo( + filename=e_path, + date_time=datetime.datetime.now().timetuple()[:6], + ) data = serialize_xml(e_obj) zip_file.writestr(zip_info, data) # Rels for rels_path, rels in self.compute_rels().items(): - zip_info = zipfile.ZipInfo(filename=rels_path, date_time=datetime.datetime.now().timetuple()[:6]) + zip_info = zipfile.ZipInfo( + filename=rels_path, + date_time=datetime.datetime.now().timetuple()[:6], + ) data = serialize_xml(rels) zip_file.writestr(zip_info, data) + # Other files: + for raw in self.raw_files: + zip_info = zipfile.ZipInfo( + filename=raw.path, + date_time=datetime.datetime.now().timetuple()[:6], + ) + zip_file.writestr(zip_info, raw.content.read()) + # ContentType - zip_info_ct = zipfile.ZipInfo(filename=get_epc_content_type_path(), - date_time=datetime.datetime.now().timetuple()[:6]) + zip_info_ct = zipfile.ZipInfo( + filename=get_epc_content_type_path(), + date_time=datetime.datetime.now().timetuple()[:6], + ) data = serialize_xml(self.gen_opc_content_type()) zip_file.writestr(zip_info_ct, data) @@ -242,10 +297,13 @@ def compute_rels(self) -> Dict[str, Relationships]: rels = { obj_id: [ Relationship( - target=gen_energyml_object_path(target_obj, self.export_version), + target=gen_energyml_object_path( + target_obj, self.export_version + ), type_value=EPCRelsRelationshipType.DESTINATION_OBJECT.get_type(), id=f"_{obj_id}_{get_obj_type(target_obj)}_{get_obj_identifier(target_obj)}", - ) for target_obj in target_obj_list + ) + for target_obj in target_obj_list ] for obj_id, target_obj_list in dor_relation.items() } @@ -255,22 +313,31 @@ def compute_rels(self) -> Dict[str, Relationships]: if obj_id not in rels: rels[obj_id] = [] for target_obj in get_direct_dor_list(obj): - rels[obj_id].append(Relationship( - target=gen_energyml_object_path(target_obj, self.export_version), - type_value=EPCRelsRelationshipType.SOURCE_OBJECT.get_type(), - id=f"_{obj_id}_{get_obj_type(target_obj)}_{get_obj_identifier(target_obj)}", - )) + rels[obj_id].append( + Relationship( + target=gen_energyml_object_path( + target_obj, self.export_version + ), + type_value=EPCRelsRelationshipType.SOURCE_OBJECT.get_type(), + id=f"_{obj_id}_{get_obj_type(target_obj)}_{get_obj_identifier(target_obj)}", + ) + ) map_obj_id_to_obj = { - get_obj_identifier(obj): obj - for obj in self.energyml_objects + get_obj_identifier(obj): obj for obj in self.energyml_objects } obj_rels = { - gen_rels_path(energyml_object=map_obj_id_to_obj.get(obj_id), - export_version=self.export_version): Relationships( - relationship=obj_rels + (self.additional_rels[obj_id] if obj_id in self.additional_rels else []), - + gen_rels_path( + energyml_object=map_obj_id_to_obj.get(obj_id), + export_version=self.export_version, + ): Relationships( + relationship=obj_rels + + ( + self.additional_rels[obj_id] + if obj_id in self.additional_rels + else [] + ), ) for obj_id, obj_rels in rels.items() } @@ -282,7 +349,7 @@ def compute_rels(self) -> Dict[str, Relationships]: Relationship( target=gen_core_props_path(), type_value=EPCRelsRelationshipType.CORE_PROPERTIES.get_type(), - id="CoreProperties" + id="CoreProperties", ) ] ) @@ -297,7 +364,9 @@ def get_object_by_uuid(self, uuid: str) -> List[Any]: :param uuid: :return: """ - return list(filter(lambda o: get_obj_uuid(o) == uuid, self.energyml_objects)) + return list( + filter(lambda o: get_obj_uuid(o) == uuid, self.energyml_objects) + ) def get_object_by_identifier(self, identifier: str) -> Optional[Any]: """ @@ -340,49 +409,71 @@ def read_stream(cls, epc_file_io: BytesIO): # returns an Epc instance raw_file_list = [] additional_rels = {} core_props = None - with zipfile.ZipFile(epc_file_io, "r", zipfile.ZIP_DEFLATED) as epc_file: + with zipfile.ZipFile( + epc_file_io, "r", zipfile.ZIP_DEFLATED + ) as epc_file: content_type_file_name = get_epc_content_type_path() content_type_info = None try: - content_type_info = epc_file.getinfo(content_type_file_name) + content_type_info = epc_file.getinfo( + content_type_file_name + ) except KeyError: for info in epc_file.infolist(): - if info.filename.lower() == content_type_file_name.lower(): + if ( + info.filename.lower() + == content_type_file_name.lower() + ): content_type_info = info break _read_files.append(content_type_file_name) if content_type_info is None: - print(f"No {content_type_file_name} file found") + logging.error(f"No {content_type_file_name} file found") else: - content_type_obj: Types = read_energyml_xml_bytes(epc_file.read(content_type_file_name)) + content_type_obj: Types = read_energyml_xml_bytes( + epc_file.read(content_type_file_name) + ) path_to_obj = {} for ov in content_type_obj.override: ov_ct = ov.content_type ov_path = ov.part_name - # print(ov_ct) - while ov_path.startswith("/") or ov_path.startswith("\\"): + # logging.debug(ov_ct) + while ov_path.startswith("/") or ov_path.startswith( + "\\" + ): ov_path = ov_path[1:] if is_energyml_content_type(ov_ct): _read_files.append(ov_path) try: ov_obj = read_energyml_xml_bytes( epc_file.read(ov_path), - get_class_from_content_type(ov_ct) + get_class_from_content_type(ov_ct), ) if isinstance(ov_obj, DerivedElement): ov_obj = ov_obj.value path_to_obj[ov_path] = ov_obj obj_list.append(ov_obj) except Exception as e: - print( - f"Epc.@read_stream failed to parse file {ov_path} for content-type: {ov_ct} => {get_class_from_content_type(ov_ct)}") - print(e) + logging.error(traceback.format_exc()) + logging.error( + f"Epc.@read_stream failed to parse file {ov_path} for content-type: {ov_ct} => {get_class_from_content_type(ov_ct)}\n\n", + get_class_from_content_type(ov_ct), + ) + try: + logging.debug(epc_file.read(ov_path)) + except: + pass # raise e - elif get_class_from_content_type(ov_ct) == CoreProperties: + elif ( + get_class_from_content_type(ov_ct) + == CoreProperties + ): _read_files.append(ov_path) - core_props = read_energyml_xml_bytes(epc_file.read(ov_path), CoreProperties) + core_props = read_energyml_xml_bytes( + epc_file.read(ov_path), CoreProperties + ) path_to_obj[ov_path] = core_props for f_info in epc_file.infolist(): @@ -393,54 +484,92 @@ def read_stream(cls, epc_file_io: BytesIO): # returns an Epc instance raw_file_list.append( RawFile( path=f_info.filename, - content=BytesIO(epc_file.read(f_info.filename)), + content=BytesIO( + epc_file.read(f_info.filename) + ), ) ) except IOError as e: - print(e) - elif f_info.filename != '_rels/.rels': # CoreProperties rels file + logging.error(traceback.format_exc()) + elif ( + f_info.filename != "_rels/.rels" + ): # CoreProperties rels file # RELS FILES READING START - # print(f"reading rels {f_info.filename}") - rels_folder, rels_file_name = get_file_folder_and_name_from_path(f_info.filename) + # logging.debug(f"reading rels {f_info.filename}") + ( + rels_folder, + rels_file_name, + ) = get_file_folder_and_name_from_path( + f_info.filename + ) while rels_folder.endswith("/"): rels_folder = rels_folder[:-1] - obj_folder = rels_folder[:rels_folder.rindex("/") + 1] if "/" in rels_folder else "" - obj_file_name = rels_file_name[:-5] # removing the ".rels" - rels_file: Relationships = read_energyml_xml_bytes( - epc_file.read(f_info.filename), - Relationships + obj_folder = ( + rels_folder[: rels_folder.rindex("/") + 1] + if "/" in rels_folder + else "" + ) + obj_file_name = rels_file_name[ + :-5 + ] # removing the ".rels" + rels_file: Relationships = ( + read_energyml_xml_bytes( + epc_file.read(f_info.filename), + Relationships, + ) ) obj_path = obj_folder + obj_file_name if obj_path in path_to_obj: try: - additional_rels_key = get_obj_identifier(path_to_obj[obj_path]) + additional_rels_key = ( + get_obj_identifier( + path_to_obj[obj_path] + ) + ) for rel in rels_file.relationship: - # print(f"\t\t{rel.type_value}") - if (rel.type_value != EPCRelsRelationshipType.DESTINATION_OBJECT.get_type() - and rel.type_value != EPCRelsRelationshipType.SOURCE_OBJECT.get_type() - and rel.type_value != EPCRelsRelationshipType.EXTENDED_CORE_PROPERTIES.get_type() + # logging.debug(f"\t\t{rel.type_value}") + if ( + rel.type_value + != EPCRelsRelationshipType.DESTINATION_OBJECT.get_type() + and rel.type_value + != EPCRelsRelationshipType.SOURCE_OBJECT.get_type() + and rel.type_value + != EPCRelsRelationshipType.EXTENDED_CORE_PROPERTIES.get_type() ): # not a computable relation - if additional_rels_key not in additional_rels: - additional_rels[additional_rels_key] = [] - additional_rels[additional_rels_key].append(rel) - except AttributeError as e: + if ( + additional_rels_key + not in additional_rels + ): + additional_rels[ + additional_rels_key + ] = [] + additional_rels[ + additional_rels_key + ].append(rel) + except AttributeError: + logging.error(traceback.format_exc()) pass # 'CoreProperties' object has no attribute 'object_version' except Exception as e: - print(f"Error with obj path {obj_path} {path_to_obj[obj_path]}") + logging.error( + f"Error with obj path {obj_path} {path_to_obj[obj_path]}" + ) raise e else: - print(f"xml file '{f_info.filename}' is not associate to any readable object " - f"(or the object type is not supported because" - f" of a lack of a dependency module) ") - - return Epc(energyml_objects=obj_list, - raw_files=raw_file_list, - core_props=core_props, - additional_rels=additional_rels - ) + logging.error( + f"xml file '{f_info.filename}' is not associate to any readable object " + f"(or the object type is not supported because" + f" of a lack of a dependency module) " + ) + + return Epc( + energyml_objects=obj_list, + raw_files=raw_file_list, + core_props=core_props, + additional_rels=additional_rels, + ) except zipfile.BadZipFile as error: - print(error) + logging.error(error) return None @@ -453,21 +582,9 @@ def read_stream(cls, epc_file_io: BytesIO): # returns an Epc instance # /____//____/ -def get_obj_identifier(obj: Any) -> str: - """ - Generates an objet identifier as : 'OBJ_UUID.OBJ_VERSION' - If the object version is None, the result is 'OBJ_UUID.' - :param obj: - :return: str - """ - obj_obj_version = get_obj_version(obj) - if obj_obj_version is None: - obj_obj_version = "" - obj_uuid = get_obj_uuid(obj) - return f"{obj_uuid}.{obj_obj_version}" - - -def get_reverse_dor_list(obj_list: List[Any], key_func: Callable = get_obj_identifier) -> Dict[str, List[Any]]: +def get_reverse_dor_list( + obj_list: List[Any], key_func: Callable = get_obj_identifier +) -> Dict[str, List[Any]]: """ Compute a dict with 'OBJ_UUID.OBJ_VERSION' as Key, and list of DOR that reference it. If the object version is None, key is 'OBJ_UUID.' @@ -477,7 +594,9 @@ def get_reverse_dor_list(obj_list: List[Any], key_func: Callable = get_obj_ident """ rels = {} for obj in obj_list: - for dor in search_attribute_matching_type(obj, "DataObjectReference", return_self=False): + for dor in search_attribute_matching_type( + obj, "DataObjectReference", return_self=False + ): key = key_func(dor) if key not in rels: rels[key] = [] @@ -488,12 +607,16 @@ def get_reverse_dor_list(obj_list: List[Any], key_func: Callable = get_obj_ident # PATHS -def gen_core_props_path(export_version: EpcExportVersion = EpcExportVersion.CLASSIC): +def gen_core_props_path( + export_version: EpcExportVersion = EpcExportVersion.CLASSIC, +): return "docProps/core.xml" -def gen_energyml_object_path(energyml_object: Union[str, Any], - export_version: EpcExportVersion = EpcExportVersion.CLASSIC): +def gen_energyml_object_path( + energyml_object: Union[str, Any], + export_version: EpcExportVersion = EpcExportVersion.CLASSIC, +): """ Generate a path to store the :param:`energyml_object` into an epc file (depending on the :param:`export_version`) :param energyml_object: @@ -503,7 +626,9 @@ def gen_energyml_object_path(energyml_object: Union[str, Any], if isinstance(energyml_object, str): energyml_object = read_energyml_xml_str(energyml_object) - obj_type = get_object_type_for_file_path_from_class(energyml_object.__class__) + obj_type = get_object_type_for_file_path_from_class( + energyml_object.__class__ + ) pkg = get_class_pkg(energyml_object) pkg_version = get_class_pkg_version(energyml_object) @@ -514,7 +639,7 @@ def gen_energyml_object_path(energyml_object: Union[str, Any], # object_version = "0" if export_version == EpcExportVersion.EXPANDED: - return f"namespace_{pkg}{pkg_version.replace('.', '')}/{uuid}{('/version_' + object_version) if object_version is not None else ''}/{obj_type}_{uuid}.xml" + return f"namespace_{pkg}{pkg_version.replace('.', '')}/{uuid}{(('/version_' + object_version) if object_version is not None else '')}/{obj_type}_{uuid}.xml" else: return obj_type + "_" + uuid + ".xml" @@ -525,14 +650,15 @@ def get_file_folder_and_name_from_path(path: str) -> Tuple[str, str]: :param path: :return: """ - obj_folder = path[:path.rindex("/") + 1] if "/" in path else "" - obj_file_name = path[path.rindex("/") + 1:] if "/" in path else path + obj_folder = path[: path.rindex("/") + 1] if "/" in path else "" + obj_file_name = path[path.rindex("/") + 1 :] if "/" in path else path return obj_folder, obj_file_name -def gen_rels_path(energyml_object: Any, - export_version: EpcExportVersion = EpcExportVersion.CLASSIC - ) -> str: +def gen_rels_path( + energyml_object: Any, + export_version: EpcExportVersion = EpcExportVersion.CLASSIC, +) -> str: """ Generate a path to store the :param:`energyml_object` rels file into an epc file (depending on the :param:`export_version`) @@ -544,11 +670,15 @@ def gen_rels_path(energyml_object: Any, return f"{RELS_FOLDER_NAME}/.rels" else: obj_path = gen_energyml_object_path(energyml_object, export_version) - obj_folder, obj_file_name = get_file_folder_and_name_from_path(obj_path, ) + obj_folder, obj_file_name = get_file_folder_and_name_from_path( + obj_path + ) return f"{obj_folder}{RELS_FOLDER_NAME}/{obj_file_name}.rels" -def get_epc_content_type_path(export_version: EpcExportVersion = EpcExportVersion.CLASSIC) -> str: +def get_epc_content_type_path( + export_version: EpcExportVersion = EpcExportVersion.CLASSIC, +) -> str: """ Generate a path to store the "[Content_Types].xml" file into an epc file (depending on the :param:`export_version`) diff --git a/energyml-utils/src/energyml/utils/exception.py b/energyml-utils/src/energyml/utils/exception.py index cbc0819..14cf4cb 100644 --- a/energyml-utils/src/energyml/utils/exception.py +++ b/energyml-utils/src/energyml/utils/exception.py @@ -1,5 +1,7 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 +from typing import Optional + class NotImplementedError(Exception): """Exception for not implemented functions""" @@ -11,6 +13,17 @@ def __init__(self, msg): class NoCrsError(Exception): pass + class ObjectNotFoundNotError(Exception): def __init__(self, obj_id): - super().__init__(f"Object id: {obj_id}") \ No newline at end of file + super().__init__(f"Object id: {obj_id}") + + +class UnknownTypeFromQualifiedType(Exception): + def __init__(self, qt: Optional[str] = None): + super().__init__(f"not matchable qualified type: {qt}") + + +class NotParsableType(Exception): + def __init__(self, t: Optional[str] = None): + super().__init__(f"type: {t}") diff --git a/energyml-utils/src/energyml/utils/introspection.py b/energyml-utils/src/energyml/utils/introspection.py index 6b2fdc0..eba87e2 100644 --- a/energyml-utils/src/energyml/utils/introspection.py +++ b/energyml-utils/src/energyml/utils/introspection.py @@ -1,21 +1,36 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 -import datetime +import inspect +import logging import random import re import sys import typing -import uuid as uuid_mod from dataclasses import Field from enum import Enum from importlib import import_module +from types import ModuleType from typing import Any, List, Optional, Union, Dict, Tuple -from .manager import get_class_pkg, get_class_pkg_version, RELATED_MODULES, \ - get_related_energyml_modules_name, get_sub_classes, get_classes_matching_name, dict_energyml_modules -from .xml import parse_content_type, ENERGYML_NAMESPACES - -primitives = (bool, str, int, float, type(None)) +from .constants import ( + primitives, + epoch_to_date, + epoch, + gen_uuid, + snake_case, + pascal_case, +) +from .manager import ( + get_class_pkg, + get_class_pkg_version, + RELATED_MODULES, + get_related_energyml_modules_name, + get_sub_classes, + get_classes_matching_name, + dict_energyml_modules, +) +from .uri import Uri +from .xml import parse_content_type, ENERGYML_NAMESPACES, parse_qualified_type def is_enum(cls: Union[type, Any]): @@ -47,17 +62,56 @@ def is_abstract(cls: Union[type, Any]) -> bool: :return: bool """ if isinstance(cls, type): - return not is_primitive(cls) and (cls.__name__.startswith("Abstract") or (hasattr(cls, "__dataclass_fields__") and len(cls.__dataclass_fields__)) == 0) and len(get_class_methods(cls)) == 0 + return ( + not is_primitive(cls) + and ( + cls.__name__.startswith("Abstract") + or ( + hasattr(cls, "__dataclass_fields__") + and len(cls.__dataclass_fields__) + ) + == 0 + ) + and len(get_class_methods(cls)) == 0 + ) return is_abstract(type(cls)) +def get_module_classes_from_name(mod_name: str) -> List: + return get_module_classes(sys.modules[mod_name]) + + +def get_module_classes(mod: ModuleType) -> List: + return inspect.getmembers(mod, inspect.isclass) + + +def find_class_in_module(module_name, class_name): + try: + return getattr(sys.modules[module_name], class_name) + except: + for cls_name, cls in get_module_classes_from_name(module_name): + try: + if cls_name == class_name or cls.Meta.name == class_name: + return cls + except Exception as e: + pass + logging.error(f"Not Found : {module_name}; {class_name}") + return None + + def get_class_methods(cls: Union[type, Any]) -> List[str]: """ Returns the list of the methods names for a specific class. :param cls: :return: """ - return [func for func in dir(cls) if callable(getattr(cls, func)) and not func.startswith("__") and not isinstance(getattr(cls, func), type)] + return [ + func + for func in dir(cls) + if callable(getattr(cls, func)) + and not func.startswith("__") + and not isinstance(getattr(cls, func), type) + ] def get_class_from_name(class_name_and_module: str) -> Optional[type]: @@ -68,30 +122,48 @@ def get_class_from_name(class_name_and_module: str) -> Optional[type]: """ module_name = class_name_and_module[: class_name_and_module.rindex(".")] last_ns_part = class_name_and_module[ - class_name_and_module.rindex(".") + 1: - ] + class_name_and_module.rindex(".") + 1 : + ] try: # Required to read "CustomData" on eml objects that may contain resqml values # ==> we need to import all modules related to the same version of the common import_related_module(module_name) - return getattr(sys.modules[module_name], last_ns_part) + # return getattr(sys.modules[module_name], last_ns_part) + return find_class_in_module(module_name, last_ns_part) except AttributeError as e: - if "2d" in last_ns_part: - return get_class_from_name( - class_name_and_module.replace("2d", "2D") - ) - elif "3d" in last_ns_part: - return get_class_from_name( - class_name_and_module.replace("3d", "3D") - ) - elif last_ns_part[0].islower(): - return get_class_from_name( - module_name + "." + last_ns_part[0].upper() + last_ns_part[1:] - ) - else: - print(e) + # if "2d" in last_ns_part: + # logging.debug("replace 2D") + # return get_class_from_name( + # class_name_and_module.replace("2d", "2D") + # ) + # elif "3d" in last_ns_part: + # return get_class_from_name( + # class_name_and_module.replace("3d", "3D") + # ) + # elif last_ns_part[0].islower(): + # return get_class_from_name( + # module_name + "." + last_ns_part[0].upper() + last_ns_part[1:] + # ) + # elif "2D" in last_ns_part or "3D" in last_ns_part: + # idx = -1 + # logging.debug(class_name_and_module) + # try: + # idx = class_name_and_module.rindex("2D") + 2 + # except: + # idx = class_name_and_module.rindex("3D") + 2 + # if class_name_and_module[idx].isupper(): + # reformated = ( + # class_name_and_module[:idx] + # + class_name_and_module[idx].lower() + # + class_name_and_module[idx + 1:] + # ) + # logging.debug(f"reformated {reformated}") + # return get_class_from_name(reformated) + # else: + # logging.debug(e) + logging.error(e) except KeyError: - print(f"[ERR] module not found : '{module_name}'") + logging.error(f"[ERR] module not found : '{module_name}'") return None @@ -103,10 +175,13 @@ def get_energyml_module_dev_version(pkg: str, current_version: str): current_version = current_version.replace("-", "_").replace(".", "_") res = [] if pkg in accessible_modules: - # print("\t", pkg, current_version) + # logging.debug("\t", pkg, current_version) for am_pkg_version in accessible_modules[pkg]: - if am_pkg_version != current_version and am_pkg_version.startswith(current_version): - # print("\t\t", am_pkg_version) + if ( + am_pkg_version != current_version + and am_pkg_version.startswith(current_version) + ): + # logging.debug("\t\t", am_pkg_version) res.append(get_module_name(pkg, am_pkg_version)) return res @@ -119,32 +194,48 @@ def get_energyml_class_in_related_dev_pkg(cls: type): res = [] - for dev_module_name in get_energyml_module_dev_version(class_pkg, class_pkg_version): + for dev_module_name in get_energyml_module_dev_version( + class_pkg, class_pkg_version + ): try: res.append(get_class_from_name(f"{dev_module_name}.{class_name}")) except Exception as e: - print(f"FAILED {dev_module_name}.{class_name}") - print(e) + logging.error(f"FAILED {dev_module_name}.{class_name}") + logging.error(e) pass return res +def get_class_from_qualified_type(qualified_type: str) -> Optional[type]: + return get_class_from_content_type(qualified_type) + + def get_class_from_content_type(content_type: str) -> Optional[type]: """ Return a :class:`type` object matching with the content-type :param:`content_type`. :param content_type: :return: """ - ct = parse_content_type(content_type) + ct = None + try: + ct = parse_content_type(content_type) + except AttributeError: + pass + if ct is None: + try: + ct = parse_qualified_type(content_type) + except AttributeError: + pass + domain = ct.group("domain") if domain is None: - # print(f"\tdomain {domain} xmlDomain {ct.group('xmlDomain')} ") + # logging.debug(f"\tdomain {domain} xmlDomain {ct.group('xmlDomain')} ") domain = "opc" if domain == "opc": xml_domain = ct.group("xmlDomain") if "." in xml_domain: - xml_domain = xml_domain[xml_domain.rindex(".") + 1:] + xml_domain = xml_domain[xml_domain.rindex(".") + 1 :] # Don't know what to do with http://schemas.f2i-consulting.com/package/2014/metadata/extended-core-properties # if "extended" in xml_domain: # xml_domain = xml_domain.replace("extended", "") @@ -152,20 +243,25 @@ def get_class_from_content_type(content_type: str) -> Optional[type]: # xml_domain = xml_domain[1:] # opc_type = pascal_case(xml_domain).replace("-", "") - # print("\tenergyml.opc.opc." + opc_type) + # logging.debug("\tenergyml.opc.opc." + opc_type) return get_class_from_name("energyml.opc.opc." + opc_type) else: domain = ct.group("domain") obj_type = ct.group("type") if obj_type.lower().startswith("obj_"): # for resqml201 - obj_type = "Obj" + obj_type[4:] + # obj_type = "Obj" + obj_type[4:] + obj_type = obj_type[4:] version_num = str(ct.group("domainVersion")).replace(".", "_") + if "_" not in version_num: + version_num = re.sub(r"(\d)(\d)", r"\1_\2", version_num) if domain.lower() == "resqml" and version_num.startswith("2_0"): version_num = "2_0_1" + + # logging.debug(get_module_name(domain, version_num) + # + "." + # + obj_type) return get_class_from_name( - get_module_name(domain, version_num) - + "." - + obj_type + get_module_name(domain, version_num) + "." + obj_type ) @@ -176,35 +272,6 @@ def get_module_name(domain: str, domain_version: str): return f"energyml.{domain}.{domain_version}.{ns[ns.rindex('/') + 1:]}" -def snake_case(s: str) -> str: - """ Transform a str into snake case. """ - s = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s) - s = re.sub('__([A-Z])', r'_\1', s) - s = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s) - return s.lower() - - -def pascal_case(s: str) -> str: - """ Transform a str into pascal case. """ - return snake_case(s).replace("_", " ").title().replace(" ", "") - - -def flatten_concatenation(matrix) -> List: - """ - Flatten a matrix. - - Example : - [ [a,b,c], [d,e,f], [ [x,y,z], [0] ] ] - will be translated in: [a, b, c, d, e, f, [x,y,z], [0]] - :param matrix: - :return: - """ - flat_list = [] - for row in matrix: - flat_list += row - return flat_list - - def import_related_module(energyml_module_name: str) -> None: """ Import related modules for a specific energyml module. (See. :const:`RELATED_MODULES`) @@ -218,7 +285,7 @@ def import_related_module(energyml_module_name: str) -> None: import_module(m) except Exception as e: pass - # print(e) + # logging.error(e) def get_class_fields(cls: Union[type, Any]) -> Dict[str, Field]: @@ -246,7 +313,9 @@ def get_class_attributes(cls: Union[type, Any]) -> List[str]: def get_matching_class_attribute_name( - cls: Union[type, Any], attribute_name: str, re_flags=re.IGNORECASE, + cls: Union[type, Any], + attribute_name: str, + re_flags=re.IGNORECASE, ) -> Optional[str]: """ From an object and an attribute name, returns the correct attribute name of the class. @@ -257,30 +326,33 @@ def get_matching_class_attribute_name( # a search with the exact value for name, cf in class_fields.items(): - if ( - snake_case(name) == snake_case(attribute_name) - or ('name' in cf.metadata and cf.metadata['name'] == attribute_name) + if snake_case(name) == snake_case(attribute_name) or ( + "name" in cf.metadata and cf.metadata["name"] == attribute_name ): return name # search regex after to avoid shadowing perfect match pattern = re.compile(attribute_name, flags=re_flags) for name, cf in class_fields.items(): - # print(f"\t->{name} : {attribute_name} {pattern.match(name)} {('name' in cf.metadata and pattern.match(cf.metadata['name']))}") - if pattern.match(name) or ('name' in cf.metadata and pattern.match(cf.metadata['name'])): + # logging.error(f"\t->{name} : {attribute_name} {pattern.match(name)} {('name' in cf.metadata and pattern.match(cf.metadata['name']))}") + if pattern.match(name) or ( + "name" in cf.metadata and pattern.match(cf.metadata["name"]) + ): return name return None def get_object_attribute( - obj: Any, attr_dot_path: str, force_snake_case=True + obj: Any, attr_dot_path: str, force_snake_case=True ) -> Any: """ returns the value of an attribute given by a dot representation of its path in the object example "Citation.Title" """ - while attr_dot_path.startswith("."): # avoid '.Citation.Title' to take an empty attribute name before the first '.' + while attr_dot_path.startswith( + "." + ): # avoid '.Citation.Title' to take an empty attribute name before the first '.' attr_dot_path = attr_dot_path[1:] current_attrib_name = attr_dot_path @@ -301,7 +373,7 @@ def get_object_attribute( if "." in attr_dot_path: return get_object_attribute( - value, attr_dot_path[len(current_attrib_name) + 1:] + value, attr_dot_path[len(current_attrib_name) + 1 :] ) else: return value @@ -330,7 +402,7 @@ def get_object_attribute_advanced(obj: Any, attr_dot_path: str) -> Any: if "." in attr_dot_path: return get_object_attribute_advanced( - value, attr_dot_path[len(current_attrib_name) + 1:] + value, attr_dot_path[len(current_attrib_name) + 1 :] ) else: return value @@ -377,7 +449,7 @@ def get_object_attribute_rgx(obj: Any, attr_dot_path_rgx: str) -> Any: if len(attrib_list) > 1: return get_object_attribute_rgx( - value, attr_dot_path_rgx[len(current_attrib_name) + 1:] + value, attr_dot_path_rgx[len(current_attrib_name) + 1 :] ) else: return value @@ -385,17 +457,17 @@ def get_object_attribute_rgx(obj: Any, attr_dot_path_rgx: str) -> Any: def get_obj_type(obj: Any) -> str: - """ Return the type name of an object. If obj is already a :class:`type`, return its __name__""" + """Return the type name of an object. If obj is already a :class:`type`, return its __name__""" if isinstance(obj, type): return str(obj.__name__) return get_obj_type(type(obj)) def class_match_rgx( - cls: Union[type, Any], - rgx: str, - super_class_search: bool = True, - re_flags=re.IGNORECASE, + cls: Union[type, Any], + rgx: str, + super_class_search: bool = True, + re_flags=re.IGNORECASE, ): if not isinstance(cls, type): cls = type(cls) @@ -411,13 +483,13 @@ def class_match_rgx( def search_attribute_matching_type_with_path( - obj: Any, - type_rgx: str, - re_flags=re.IGNORECASE, - return_self: bool = True, # test directly on input object and not only in its attributes - deep_search: bool = True, # Search inside a matching object - super_class_search: bool = True, # Search inside in super classes of the object - current_path: str = "", + obj: Any, + type_rgx: str, + re_flags=re.IGNORECASE, + return_self: bool = True, # test directly on input object and not only in its attributes + deep_search: bool = True, # Search inside a matching object + super_class_search: bool = True, # Search inside in super classes of the object + current_path: str = "", ) -> List[Tuple[str, Any]]: """ Returns a list of tuple (path, value) for each sub attribute with type matching param "type_rgx". @@ -434,7 +506,7 @@ def search_attribute_matching_type_with_path( res = [] if obj is not None: if return_self and class_match_rgx( - obj, type_rgx, super_class_search, re_flags + obj, type_rgx, super_class_search, re_flags ): res.append((current_path, obj)) if not deep_search: @@ -480,14 +552,14 @@ def search_attribute_matching_type_with_path( def search_attribute_in_upper_matching_name( - obj: Any, - name_rgx: str, - root_obj: Optional[Any] = None, - re_flags=re.IGNORECASE, - current_path: str = "", + obj: Any, + name_rgx: str, + root_obj: Optional[Any] = None, + re_flags=re.IGNORECASE, + current_path: str = "", ) -> Optional[Any]: """ - See :func:`search_attribute_matching_type_with_path`. It only returns the value not the path + See :func:`search_attribute_matching_name_with_path`. It only returns the value not the path :param obj: :param name_rgx: :param root_obj: @@ -495,12 +567,14 @@ def search_attribute_in_upper_matching_name( :param current_path: :return: """ - elt_list = search_attribute_matching_name(obj, name_rgx, search_in_sub_obj=False, deep_search=False) + elt_list = search_attribute_matching_name( + obj, name_rgx, search_in_sub_obj=False, deep_search=False + ) if elt_list is not None and len(elt_list) > 0: return elt_list if obj != root_obj: - upper_path = current_path[:current_path.rindex(".")] + upper_path = current_path[: current_path.rindex(".")] if len(upper_path) > 0: return search_attribute_in_upper_matching_name( obj=get_object_attribute(root_obj, upper_path), @@ -514,12 +588,12 @@ def search_attribute_in_upper_matching_name( def search_attribute_matching_type( - obj: Any, - type_rgx: str, - re_flags=re.IGNORECASE, - return_self: bool = True, # test directly on input object and not only in its attributes - deep_search: bool = True, # Search inside a matching object - super_class_search: bool = True, # Search inside in super classes of the object + obj: Any, + type_rgx: str, + re_flags=re.IGNORECASE, + return_self: bool = True, # test directly on input object and not only in its attributes + deep_search: bool = True, # Search inside a matching object + super_class_search: bool = True, # Search inside in super classes of the object ) -> List[Any]: """ See :func:`search_attribute_matching_type_with_path`. It only returns the value not the path @@ -545,12 +619,12 @@ def search_attribute_matching_type( def search_attribute_matching_name_with_path( - obj: Any, - name_rgx: str, - re_flags=re.IGNORECASE, - current_path: str = "", - deep_search: bool = True, # Search inside a matching object - search_in_sub_obj: bool = True, # Search in obj attributes + obj: Any, + name_rgx: str, + re_flags=re.IGNORECASE, + current_path: str = "", + deep_search: bool = True, # Search inside a matching object + search_in_sub_obj: bool = True, # Search in obj attributes ) -> List[Tuple[str, Any]]: """ Returns a list of tuple (path, value) for each sub attribute with type matching param "name_rgx". @@ -567,10 +641,10 @@ def search_attribute_matching_name_with_path( name_rgx = name_rgx[1:] current_match = name_rgx next_match = current_match - if '.' in current_match: + if "." in current_match: attrib_list = re.split(r"(? 0: # next_match is different, match is not final + if ( + next_match != current_match and len(next_match) > 0 + ): # next_match is different, match is not final res = res + search_attribute_matching_name_with_path( obj=matched, name_rgx=next_match, @@ -613,7 +705,7 @@ def search_attribute_matching_name_with_path( search_in_sub_obj=False, # no partial search in sub obj with no match ) else: # a complete match - res.append( (matched_path, matched) ) + res.append((matched_path, matched)) if deep_search: res = res + search_attribute_matching_name_with_path( obj=matched, @@ -638,11 +730,11 @@ def search_attribute_matching_name_with_path( def search_attribute_matching_name( - obj: Any, - name_rgx: str, - re_flags=re.IGNORECASE, - deep_search: bool = True, # Search inside a matching object - search_in_sub_obj: bool = True, # Search in obj attributes + obj: Any, + name_rgx: str, + re_flags=re.IGNORECASE, + deep_search: bool = True, # Search inside a matching object + search_in_sub_obj: bool = True, # Search in obj attributes ) -> List[Any]: """ See :func:`search_attribute_matching_name_with_path`. It only returns the value not the path @@ -661,7 +753,7 @@ def search_attribute_matching_name( name_rgx=name_rgx, re_flags=re_flags, deep_search=deep_search, - search_in_sub_obj=search_in_sub_obj + search_in_sub_obj=search_in_sub_obj, ) ] @@ -669,14 +761,6 @@ def search_attribute_matching_name( # Utility functions -def gen_uuid() -> str: - """ - Generate a new uuid. - :return: - """ - return str(uuid_mod.uuid4()) - - def get_obj_uuid(obj: Any) -> str: """ Return the object uuid (attribute must match the following regex : "[Uu]u?id|UUID"). @@ -698,10 +782,102 @@ def get_obj_version(obj: Any) -> str: try: return get_object_attribute_no_verif(obj, "version_string") except Exception: - print(f"Error with {type(obj)}") + logging.error(f"Error with {type(obj)}") # raise e +def get_obj_pkg_pkgv_type_uuid_version( + obj: Any, +) -> Tuple[ + Optional[str], Optional[str], Optional[str], Optional[str], Optional[str] +]: + """ + return from an energyml object or a DOR a tuple : + - package : e.g. resqml|eml|witsml|prodml + - package version : e.g. 20 + - type : e.g. obj_TriangulatedSetRepresentation + - uuid + - object version + :param obj: + :return: + """ + pkg: Optional[str] = get_class_pkg(obj) + pkg_v: Optional[str] = get_class_pkg_version(obj) + obj_type: Optional[str] = get_object_type_for_file_path_from_class(obj) + obj_uuid = get_obj_uuid(obj) + obj_version = get_obj_version(obj) + + ct = None + try: + ct = get_object_attribute_no_verif(obj, "content_type") + except: + pass + + if ct is not None: + ct_match = parse_content_type(ct) + logging.debug("ct : ", ct_match) + if ct_match is not None: + pkg = ct_match.group("domain") + pkg_v = ct_match.group("domainVersion") + obj_type = ct_match.group("type") + else: + try: + qt = get_object_attribute_no_verif(obj, "qualified_type") + qt_match = parse_qualified_type(qt) + logging.debug("qt : ", qt, obj.__dict__, qt_match) + if qt_match is not None: + pkg = qt_match.group("domain") + pkg_v = qt_match.group("domainVersion") + obj_type = qt_match.group("type") + except: + pass + + # flattening version + if pkg_v is not None: + pkg_v = pkg_v.replace(".", "") + + return pkg, pkg_v, obj_type, obj_uuid, obj_version + + +def get_obj_identifier(obj: Any) -> str: + """ + Generates an objet identifier as : 'OBJ_UUID.OBJ_VERSION' + If the object version is None, the result is 'OBJ_UUID.' + :param obj: + :return: str + """ + obj_obj_version = get_obj_version(obj) + if obj_obj_version is None: + obj_obj_version = "" + obj_uuid = get_obj_uuid(obj) + return f"{obj_uuid}.{obj_obj_version}" + + +def get_obj_uri(obj: Any, dataspace: Optional[str] = None) -> Uri: + """ + Generates an objet etp Uri from an objet or a DOR + :param obj: + :param dataspace: the etp dataspace + :return: str + """ + ( + domain, + domain_version, + object_type, + obj_uuid, + obj_version, + ) = get_obj_pkg_pkgv_type_uuid_version(obj) + + return Uri( + dataspace=dataspace, + domain=domain, + domain_version=domain_version, + object_type=object_type, + uuid=obj_uuid, + version=obj_version, + ) + + def get_direct_dor_list(obj: Any) -> List[Any]: """ Search all sub attribute of type "DataObjectreference". @@ -711,18 +887,29 @@ def get_direct_dor_list(obj: Any) -> List[Any]: return search_attribute_matching_type(obj, "DataObjectreference") -def get_data_object_type(cls: Union[type, Any], print_dev_version=True, nb_max_version_digits=2): - return get_class_pkg(cls) + "." + get_class_pkg_version(cls, print_dev_version, nb_max_version_digits) +def get_data_object_type( + cls: Union[type, Any], print_dev_version=True, nb_max_version_digits=2 +): + return ( + get_class_pkg(cls) + + "." + + get_class_pkg_version(cls, print_dev_version, nb_max_version_digits) + ) -def get_qualified_type_from_class(cls: Union[type, Any], print_dev_version=True): +def get_qualified_type_from_class( + cls: Union[type, Any], print_dev_version=True +): return ( - get_data_object_type(cls, print_dev_version, 2) - .replace(".", "") + "." + get_object_type_for_file_path_from_class(cls) + get_data_object_type(cls, print_dev_version, 2).replace(".", "") + + "." + + get_object_type_for_file_path_from_class(cls) ) -def get_content_type_from_class(cls: Union[type, Any], print_dev_version=True, nb_max_version_digits=2): +def get_content_type_from_class( + cls: Union[type, Any], print_dev_version=True, nb_max_version_digits=2 +): if not isinstance(cls, type): cls = type(cls) @@ -730,54 +917,48 @@ def get_content_type_from_class(cls: Union[type, Any], print_dev_version=True, n if cls.__name__.lower() == "coreproperties": return "application/vnd.openxmlformats-package.core-properties+xml" else: - return ("application/x-" + get_class_pkg(cls) - + "+xml;version=" + get_class_pkg_version(cls, print_dev_version, nb_max_version_digits) + ";type=" - + get_object_type_for_file_path_from_class(cls)) + return ( + "application/x-" + + get_class_pkg(cls) + + "+xml;version=" + + get_class_pkg_version( + cls, print_dev_version, nb_max_version_digits + ) + + ";type=" + + get_object_type_for_file_path_from_class(cls) + ) - print(f"@get_content_type_from_class not supported type : {cls}") + logging.error(f"@get_content_type_from_class not supported type : {cls}") return None def get_object_type_for_file_path_from_class(cls) -> str: - # obj_type = get_obj_type(cls) - # pkg = get_class_pkg(cls) - # if re.match(r"Obj[A-Z].*", obj_type) is not None and pkg == "resqml": - # return "obj_" + obj_type[3:] - # return obj_type + classic_type = get_obj_type(cls) + for parent_cls in cls.__class__.__bases__: + try: + if ( + classic_type.lower() in parent_cls.Meta.name.lower() + ): # to work with 3d transformed in 3D and Obj[A-Z] in obj_[A-Z] + return parent_cls.Meta.name + except AttributeError: + pass try: - return cls.Meta.name # to work with 3d transformed in 3D and Obj[A-Z] in obj_[A-Z] + return ( + cls.Meta.name + ) # to work with 3d transformed in 3D and Obj[A-Z] in obj_[A-Z] except AttributeError: - pkg = get_class_pkg(cls) - return get_obj_type(cls) - - -def now(time_zone=datetime.timezone(datetime.timedelta(hours=1), "UTC")) -> float: - """ Return an epoch value """ - return datetime.datetime.timestamp(datetime.datetime.now(time_zone)) - + pass -def epoch(time_zone=datetime.timezone(datetime.timedelta(hours=1), "UTC")) -> int: - return int(now(time_zone)) - - -def date_to_epoch(date: str) -> int: - """ - Transform a energyml date into an epoch datetime - :return: int - """ - return int(datetime.datetime.fromisoformat(date).timestamp()) - - -def epoch_to_date(epoch_value: int, time_zone=datetime.timezone(datetime.timedelta(hours=1), "UTC")) -> str: - date = datetime.datetime.fromtimestamp(epoch_value / 1e3, time_zone) - return date.strftime("%Y-%m-%dT%H:%M:%S%z") + return classic_type # RANDOM -def get_class_from_simple_name(simple_name: str, energyml_module_context=None) -> type: +def get_class_from_simple_name( + simple_name: str, energyml_module_context=None +) -> type: """ Search for a :class:`type` depending on the simple class name :param:`simple_name`. :param simple_name: @@ -799,7 +980,9 @@ def get_class_from_simple_name(simple_name: str, energyml_module_context=None) - return eval(simple_name) -def _gen_str_from_attribute_name(attribute_name: Optional[str], _parent_class: Optional[type]=None) -> str: +def _gen_str_from_attribute_name( + attribute_name: Optional[str], _parent_class: Optional[type] = None +) -> str: """ Generate a str from the attribute name. The result is not the same for an attribute named "Uuid" than for an attribute named "mime_type" for example. @@ -812,8 +995,15 @@ def _gen_str_from_attribute_name(attribute_name: Optional[str], _parent_class: O if attribute_name_lw == "uuid" or attribute_name_lw == "uid": return gen_uuid() elif attribute_name_lw == "title": - return f"{_parent_class.__name__} title (" + str(random_value_from_class(int)) + ")" - elif attribute_name_lw == "schema_version" and get_class_pkg_version(_parent_class) is not None: + return ( + f"{_parent_class.__name__} title (" + + str(random_value_from_class(int)) + + ")" + ) + elif ( + attribute_name_lw == "schema_version" + and get_class_pkg_version(_parent_class) is not None + ): return get_class_pkg_version(_parent_class) elif re.match(r"\w*version$", attribute_name_lw): return str(random_value_from_class(int)) @@ -821,15 +1011,27 @@ def _gen_str_from_attribute_name(attribute_name: Optional[str], _parent_class: O return epoch_to_date(epoch()) elif re.match(r"path_in_.*", attribute_name_lw): return f"/FOLDER/{gen_uuid()}/a_patch{random.randint(0, 30)}" - elif "mime_type" in attribute_name_lw and ("external" in _parent_class.__name__.lower() and "part" in _parent_class.__name__.lower()): + elif "mime_type" in attribute_name_lw and ( + "external" in _parent_class.__name__.lower() + and "part" in _parent_class.__name__.lower() + ): return f"application/x-hdf5" elif "type" in attribute_name_lw: if attribute_name_lw.startswith("qualified"): - return get_qualified_type_from_class(get_classes_matching_name(_parent_class, "Abstract")[0]) + return get_qualified_type_from_class( + get_classes_matching_name(_parent_class, "Abstract")[0] + ) if attribute_name_lw.startswith("content"): - return get_content_type_from_class(get_classes_matching_name(_parent_class, "Abstract")[0]) - return "A random str " + (f"[{attribute_name}] " if attribute_name is not None else "") + "(" + str( - random_value_from_class(int)) + ")" + return get_content_type_from_class( + get_classes_matching_name(_parent_class, "Abstract")[0] + ) + return ( + "A random str " + + (f"[{attribute_name}] " if attribute_name is not None else "") + + "(" + + str(random_value_from_class(int)) + + ")" + ) def random_value_from_class(cls: type): @@ -842,10 +1044,19 @@ def random_value_from_class(cls: type): if not is_primitive(cls): # import_related_module(cls.__module__) energyml_module_context = get_related_energyml_modules_name(cls) - return _random_value_from_class(cls=cls, energyml_module_context=energyml_module_context, attribute_name=None) + return _random_value_from_class( + cls=cls, + energyml_module_context=energyml_module_context, + attribute_name=None, + ) -def _random_value_from_class(cls: Any, energyml_module_context: List[str], attribute_name: Optional[str] = None, _parent_class: Optional[type]=None): +def _random_value_from_class( + cls: Any, + energyml_module_context: List[str], + attribute_name: Optional[str] = None, + _parent_class: Optional[type] = None, +): """ Generate a random value for a :class:`type`. All attributes should be filled with random values. :param cls: @@ -861,52 +1072,89 @@ def _random_value_from_class(cls: Any, energyml_module_context: List[str], attri elif isinstance(cls, int) or cls == int: return random.randint(0, 10000) elif isinstance(cls, float) or cls == float: - return random.randint(0, 1000000) / 100. + return random.randint(0, 1000000) / 100.0 elif isinstance(cls, bool) or cls == bool: return random.randint(0, 1) == 1 elif is_enum(cls): - return cls[cls._member_names_[random.randint(0, len(cls._member_names_) - 1)]] + return cls[ + cls._member_names_[ + random.randint(0, len(cls._member_names_) - 1) + ] + ] elif isinstance(cls, typing.Union.__class__): type_list = list(cls.__args__) if type(None) in type_list: - type_list.remove(type(None)) # we don't want to generate none value + type_list.remove( + type(None) + ) # we don't want to generate none value chosen_type = type_list[random.randint(0, len(type_list))] - return _random_value_from_class(chosen_type, energyml_module_context, attribute_name, cls) - elif cls.__module__ == 'typing': + return _random_value_from_class( + chosen_type, energyml_module_context, attribute_name, cls + ) + elif cls.__module__ == "typing": nb_value_for_list = random.randint(2, 3) type_list = list(cls.__args__) if type(None) in type_list: - type_list.remove(type(None)) # we don't want to generate none value + type_list.remove( + type(None) + ) # we don't want to generate none value if cls._name == "List": lst = [] for i in range(nb_value_for_list): - chosen_type = type_list[random.randint(0, len(type_list) - 1)] - lst.append(_random_value_from_class(chosen_type, energyml_module_context, attribute_name, list)) + chosen_type = type_list[ + random.randint(0, len(type_list) - 1) + ] + lst.append( + _random_value_from_class( + chosen_type, + energyml_module_context, + attribute_name, + list, + ) + ) return lst else: chosen_type = type_list[random.randint(0, len(type_list) - 1)] - return _random_value_from_class(chosen_type, energyml_module_context, attribute_name, _parent_class) + return _random_value_from_class( + chosen_type, + energyml_module_context, + attribute_name, + _parent_class, + ) else: - potential_classes = list(filter(lambda _c: not is_abstract(_c), [cls] + get_sub_classes(cls))) + potential_classes = list( + filter( + lambda _c: not is_abstract(_c), + [cls] + get_sub_classes(cls), + ) + ) if len(potential_classes) > 0: - chosen_type = potential_classes[random.randint(0, len(potential_classes) - 1)] + chosen_type = potential_classes[ + random.randint(0, len(potential_classes) - 1) + ] args = {} for k, v in get_class_fields(chosen_type).items(): - # print(f"get_class_fields {k} : {v}") + # logging.debug(f"get_class_fields {k} : {v}") args[k] = _random_value_from_class( - cls=get_class_from_simple_name(simple_name=v.type, energyml_module_context=energyml_module_context), + cls=get_class_from_simple_name( + simple_name=v.type, + energyml_module_context=energyml_module_context, + ), energyml_module_context=energyml_module_context, attribute_name=k, - _parent_class=chosen_type) + _parent_class=chosen_type, + ) if not isinstance(chosen_type, type): chosen_type = type(chosen_type) return chosen_type(**args) except Exception as e: - print(f"exception on attribute '{attribute_name}' for class {cls} :") + logging.error(f"exception on attribute '{attribute_name}' for class {cls} :") raise e - print(f"@_random_value_from_class Not supported object type generation {cls}") - return None \ No newline at end of file + logging.error( + f"@_random_value_from_class Not supported object type generation {cls}" + ) + return None diff --git a/energyml-utils/src/energyml/utils/manager.py b/energyml-utils/src/energyml/utils/manager.py index fd37023..eb0926a 100644 --- a/energyml-utils/src/energyml/utils/manager.py +++ b/energyml-utils/src/energyml/utils/manager.py @@ -2,30 +2,11 @@ # SPDX-License-Identifier: Apache-2.0 import importlib import inspect +import logging import pkgutil -import re -from typing import List, Union, Any - -REGEX_ENERGYML_MODULE_NAME = r"energyml\.(?P.*)\.v(?P(?P\d+(_\d+)*)(_dev(?P.*))?)\..*" -REGEX_PROJECT_VERSION = r"(?P[\d]+)(.(?P[\d]+)(.(?P[\d]+))?)?" - -ENERGYML_MODULES_NAMES = ["eml", "prodml", "witsml", "resqml"] - -RELATED_MODULES = [ - ["energyml.eml.v2_0.commonv2", "energyml.resqml.v2_0_1.resqmlv2"], - [ - "energyml.eml.v2_1.commonv2", - "energyml.prodml.v2_0.prodmlv2", - "energyml.witsml.v2_0.witsmlv2", - ], - ["energyml.eml.v2_2.commonv2", "energyml.resqml.v2_2_dev3.resqmlv2"], - [ - "energyml.eml.v2_3.commonv2", - "energyml.resqml.v2_2.resqmlv2", - "energyml.prodml.v2_2.prodmlv2", - "energyml.witsml.v2_1.witsmlv2", - ], -] +from typing import Union, Any, Dict + +from .constants import * def get_related_energyml_modules_name(cls: Union[type, Any]) -> List[str]: @@ -44,7 +25,7 @@ def get_related_energyml_modules_name(cls: Union[type, Any]) -> List[str]: return [] -def dict_energyml_modules() -> List: +def dict_energyml_modules() -> Dict: """ List all accessible energyml python modules :return: @@ -52,10 +33,10 @@ def dict_energyml_modules() -> List: modules = {} energyml_module = importlib.import_module("energyml") - # print("> energyml") + # logging.debug("> energyml") for mod in pkgutil.iter_modules(energyml_module.__path__): - # print(f"{mod.name}") + # logging.debug(f"{mod.name}") if mod.name in ENERGYML_MODULES_NAMES: energyml_sub_module = importlib.import_module( f"energyml.{mod.name}" @@ -74,7 +55,7 @@ def list_energyml_modules(): energyml_module = importlib.import_module("energyml") modules = [] for obj in pkgutil.iter_modules(energyml_module.__path__): - # print(f"{obj.name}") + # logging.debug(f"{obj.name}") if obj.name in ENERGYML_MODULES_NAMES: modules.append(obj.name) return modules @@ -96,7 +77,7 @@ def list_classes(module_path: str) -> List: class_list.append(obj) return class_list except ModuleNotFoundError: - print(f"Err : module {module_path} not found") + logging.error(f"Err : module {module_path} not found") return [] @@ -119,7 +100,11 @@ def get_sub_classes(cls: type) -> List[type]: return list(dict.fromkeys(sub_classes)) -def get_classes_matching_name(cls: type, name_rgx: str, re_flags=re.IGNORECASE,) -> List[type]: +def get_classes_matching_name( + cls: type, + name_rgx: str, + re_flags=re.IGNORECASE, +) -> List[type]: """ Search a class matching the regex @re_flags. The search is the energyml packages related to the objet type @cls. :param cls: @@ -132,7 +117,9 @@ def get_classes_matching_name(cls: type, name_rgx: str, re_flags=re.IGNORECASE,) try: module = importlib.import_module(related) for _, obj in inspect.getmembers(module): - if inspect.isclass(obj) and re.match(name_rgx, obj.__name__, re_flags): + if inspect.isclass(obj) and re.match( + name_rgx, obj.__name__, re_flags + ): match_classes.append(obj) except ModuleNotFoundError: pass @@ -168,11 +155,11 @@ def get_all_classes(module_name: str, version: str) -> dict: def get_class_pkg(cls): try: - p = re.compile(REGEX_ENERGYML_MODULE_NAME) + p = re.compile(RGX_ENERGYML_MODULE_NAME) m = p.search(cls.__module__) return m.group("pkg") except AttributeError as e: - print(f"Exception to get class package for '{cls}'") + logging.error(f"Exception to get class package for '{cls}'") raise e @@ -182,7 +169,7 @@ def reshape_version(version: str, nb_digit: int) -> str: else, the original version is returned. Example : reshapeVersion("v2.0.1", 2) ==> "2.0" and reshapeVersion("version2.0.1.3.2.5", 4) ==> "version2.0.1.3.2.5" """ - p = re.compile(REGEX_PROJECT_VERSION) + p = re.compile(RGX_PROJECT_VERSION) m = p.search(version) if m is not None: n0 = m.group("n0") @@ -205,7 +192,7 @@ def reshape_version(version: str, nb_digit: int) -> str: def get_class_pkg_version( cls, print_dev_version: bool = True, nb_max_version_digits: int = 2 ): - p = re.compile(REGEX_ENERGYML_MODULE_NAME) + p = re.compile(RGX_ENERGYML_MODULE_NAME) class_module = None if isinstance(cls, type): class_module = cls.__module__ diff --git a/energyml-utils/src/energyml/utils/serialization.py b/energyml-utils/src/energyml/utils/serialization.py index 5705491..373667e 100644 --- a/energyml-utils/src/energyml/utils/serialization.py +++ b/energyml-utils/src/energyml/utils/serialization.py @@ -1,40 +1,80 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 +import json +import logging +import traceback +from enum import Enum from io import BytesIO -from typing import Optional, Any +from typing import Optional, Any, Union, List, Dict, Callable, Type -import energyml import xsdata from xsdata.exceptions import ParserError from xsdata.formats.dataclass.context import XmlContext -from xsdata.formats.dataclass.parsers import XmlParser +from xsdata.formats.dataclass.models.generics import DerivedElement +from xsdata.formats.dataclass.parsers import XmlParser, JsonParser +from xsdata.formats.dataclass.parsers.config import ParserConfig from xsdata.formats.dataclass.serializers import JsonSerializer from xsdata.formats.dataclass.serializers import XmlSerializer from xsdata.formats.dataclass.serializers.config import SerializerConfig -from .introspection import get_class_from_name, get_energyml_class_in_related_dev_pkg -from .manager import dict_energyml_modules, get_class_pkg_version, get_class_pkg -from .xml import get_class_name_from_xml, get_tree, get_xml_encoding +from .exception import UnknownTypeFromQualifiedType, NotParsableType +from .introspection import ( + get_class_from_name, + get_energyml_class_in_related_dev_pkg, + get_class_from_content_type, + get_qualified_type_from_class, + get_class_fields, + get_obj_identifier, + is_primitive, + search_attribute_matching_name, + get_class_from_qualified_type, + get_matching_class_attribute_name, is_enum, +) +from .xml import ( + get_class_name_from_xml, + get_tree, + get_xml_encoding, + ENERGYML_NAMESPACES, +) -def _read_energyml_xml_bytes_as_class(file: bytes, obj_class: type) -> Any: +class JSON_VERSION(Enum): + XSDATA = "XSDATA" + OSDU_OFFICIAL = "OSDU_OFFICIAL" + + +def _read_energyml_xml_bytes_as_class(file: bytes, obj_class: Type, fail_on_unknown_properties=True, fail_on_unknown_attributes=True) -> Any: """ - Read an xml file into the instance of type :param:`obj_class`. + Read a xml file into the instance of type :param:`obj_class`. :param file: :param obj_class: :return: """ - parser = XmlParser() + config = ParserConfig( + fail_on_unknown_properties=fail_on_unknown_properties, + fail_on_unknown_attributes=fail_on_unknown_attributes, + # process_xinclude=True, + ) + parser = XmlParser(config=config) try: return parser.from_bytes(file, obj_class) except ParserError as e: - print(f"Failed to parse file {file} as class {obj_class}") + logging.error(f"Failed to parse file {file} as class {obj_class}") + if len(e.args) > 0: + if "unknown property" in e.args[0].lower(): + logging.error(e) + logging.error( + "A property has not been found, please check if your 'xsi::type' values contains " + "the xml namespace (e.g. 'xsi:type=\"eml:VerticalCrsEpsgCode\"')." + ) raise e -def read_energyml_xml_bytes(file: bytes, obj_type: Optional[type] = None) -> Any: +def read_energyml_xml_bytes( + file: bytes, obj_type: Optional[type] = None +) -> Any: """ - Read an xml file. The type of object is searched from the xml root name if not given. + Read a xml file. The type of object is searched from the xml root name if not given. :param obj_type: :param file: :return: @@ -44,23 +84,32 @@ def read_energyml_xml_bytes(file: bytes, obj_type: Optional[type] = None) -> Any try: return _read_energyml_xml_bytes_as_class(file, obj_type) except xsdata.exceptions.ParserError as e: - print(f"Failed to read file with type {obj_type}: {get_energyml_class_in_related_dev_pkg(obj_type)}") + if len(e.args) > 0: + if "unknown property" in e.args[0].lower(): + logging.error(f"Trying reading without fail on unknown attribute/property") + try: + return _read_energyml_xml_bytes_as_class(file, obj_type, False, False) + except Exception as e: + logging.error(traceback.print_stack()) + pass + + # Otherwise for obj_type_dev in get_energyml_class_in_related_dev_pkg(obj_type): try: - print(f"Trying with class : {obj_type_dev}") - obj = _read_energyml_xml_bytes_as_class( - file, obj_type_dev - ) - print(f" ==> succeed read with {obj_type_dev}") + logging.debug(f"Trying with class : {obj_type_dev}") + obj = _read_energyml_xml_bytes_as_class(file, obj_type_dev) + logging.debug(f" ==> succeed read with {obj_type_dev}") return obj except Exception: pass raise e -def read_energyml_xml_io(file: BytesIO, obj_class: Optional[type] = None) -> Any: +def read_energyml_xml_io( + file: BytesIO, obj_class: Optional[type] = None +) -> Any: if obj_class is not None: - return read_energyml_xml_bytes_as_class(file.getbuffer(), obj_class) + return _read_energyml_xml_bytes_as_class(file.getbuffer(), obj_class) else: return read_energyml_xml_bytes(file.getbuffer()) @@ -77,6 +126,103 @@ def read_energyml_xml_file(file_path: str) -> Any: return read_energyml_xml_bytes(xml_content_b) +def _read_energyml_json_bytes_as_class( + file: bytes, json_version: JSON_VERSION, obj_class: type +) -> Union[List, Any]: + """ + Read a json file into energyml object. If json_version==JSON_VERSION.XSDATA the instance will be of type :param:`obj_class`. + For json_version==JSON_VERSION.OSDU_OFFICIAL a list of read objects is returned + :param file: + :param json_version: + :param obj_class: + :return: + """ + if json_version == JSON_VERSION.XSDATA: + config = ParserConfig( + # fail_on_unknown_properties=False, + # fail_on_unknown_attributes=False, + # process_xinclude=True, + ) + parser = JsonParser(config=config) + try: + return parser.from_bytes(file, obj_class) + except ParserError as e: + logging.error(f"Failed to parse file {file} as class {obj_class}") + raise e + elif json_version == JSON_VERSION.OSDU_OFFICIAL: + return read_json_dict(json.loads(file)) + + +def read_energyml_json_bytes( + file: bytes, json_version: JSON_VERSION, obj_type: Optional[type] = None +) -> Union[List, Any]: + """ + Read a json file into energyml object. If json_version==JSON_VERSION.XSDATA the instance will be of type :param:`obj_class`. + For json_version==JSON_VERSION.OSDU_OFFICIAL a list of read objects is returned + :param file: + :param json_version: + :param obj_type: + :return: + """ + if obj_type is None: + obj_type = get_class_from_content_type(get_class_from_json_dict(file)) + if json_version == JSON_VERSION.XSDATA: + try: + return _read_energyml_json_bytes_as_class(file, obj_type) + except xsdata.exceptions.ParserError as e: + logging.error( + f"Failed to read file with type {obj_type}: {get_energyml_class_in_related_dev_pkg(obj_type)}" + ) + for obj_type_dev in get_energyml_class_in_related_dev_pkg( + obj_type + ): + try: + logging.debug(f"Trying with class : {obj_type_dev}") + obj = _read_energyml_json_bytes_as_class( + file, obj_type_dev + ) + logging.debug(f" ==> succeed read with {obj_type_dev}") + return obj + except Exception: + pass + raise e + elif json_version == JSON_VERSION.OSDU_OFFICIAL: + return read_json_dict(json.loads(file)) + + +def read_energyml_json_io( + file: BytesIO, json_version: JSON_VERSION, obj_class: Optional[type] = None +) -> Union[List, Any]: + if obj_class is not None: + return _read_energyml_json_bytes_as_class( + file.getbuffer(), json_version, obj_class + ) + else: + return read_energyml_json_bytes(file.getbuffer(), json_version) + + +def read_energyml_json_str( + file_content: str, json_version: JSON_VERSION +) -> Union[List, Any]: + return read_energyml_json_bytes(file_content.encode("utf-8"), json_version) + + +def read_energyml_json_file( + file_path: str, json_version: JSON_VERSION +) -> Union[List, Any]: + json_content_b = "" + with open(file_path, "rb") as f: + json_content_b = f.read() + return read_energyml_json_bytes(json_content_b, json_version) + + +# _____ _ ___ __ _ +# / ___/___ _____(_)___ _/ (_)___ ____ _/ /_(_)___ ____ +# \__ \/ _ \/ ___/ / __ `/ / /_ / / __ `/ __/ / __ \/ __ \ +# ___/ / __/ / / / /_/ / / / / /_/ /_/ / /_/ / /_/ / / / / +# /____/\___/_/ /_/\__,_/_/_/ /___/\__,_/\__/_/\____/_/ /_/ + + def serialize_xml(obj) -> str: context = XmlContext( # element_name_generator=text.camel_case, @@ -84,14 +230,229 @@ def serialize_xml(obj) -> str: ) serializer_config = SerializerConfig(indent=" ") serializer = XmlSerializer(context=context, config=serializer_config) - return serializer.render(obj) + return serializer.render(obj, ns_map=ENERGYML_NAMESPACES) -def serialize_json(obj) -> str: - context = XmlContext( - # element_name_generator=text.camel_case, - # attribute_name_generator=text.kebab_case +def serialize_json(obj, json_version: JSON_VERSION) -> str: + if json_version == JSON_VERSION.XSDATA: + context = XmlContext( + # element_name_generator=text.camel_case, + # attribute_name_generator=text.kebab_case + ) + serializer_config = SerializerConfig(indent=" ") + serializer = JsonSerializer(context=context, config=serializer_config) + return serializer.render(obj) + elif json_version == JSON_VERSION.OSDU_OFFICIAL: + return json.dumps(to_json_dict(obj), indent=4, sort_keys=True) + + +def get_class_from_json_dict(o: Union[dict, bytes]) -> Optional[str]: + """ + Searches for the attribute "$type" + :param o: + :return: + """ + if isinstance(o, str) or isinstance(o, bytes): + o = json.loads(o) + for att in ["$type", "dataObjectType"]: + if att in o: + return o[att] + return None + + +# RAW + + +def read_json_dict(obj: Any) -> List: + """ + Reads a json dict valid with the OSDU standard. + This means: + - Any not "primitive" object (not str/number/bool ...) has a "$type" attribute set to its qualified type + - None value are not given, except for mandatory attributes (depending on the energyml standard) + - If an attribute is named 'value' (case-sensitive, this doesn't apply to 'Value'), the name of the attribute + in the dict is "_" + - "_data" attribute is given for DOR (not mandatory) and contains the json representation of the target object + :param obj: + :return: a list of read objects. This is a list due to the "_data" attribute + """ + if "$type" in obj: + sub_obj = [] + obj = _read_json_dict(obj, sub_obj) + return [obj] + sub_obj + else: + raise UnknownTypeFromQualifiedType() + + +def _read_json_dict(obj_json: Any, sub_obj: List) -> Any: + """ + Reads a json dict valid with the OSDU standard. + This means: + - Any not "primitive" object (not str/number/bool ...) has a "$type" attribute set to its qualified type + - None value are not given, except for mandatory attributes (depending on the energyml standard) + - If an attribute is named 'value' (case-sensitive, this doesn't apply to 'Value'), the name of the attribute + in the dict is "_" + - "_data" attribute is given for DOR (not mandatory) and contains the json representation of the target object + :param obj_json: + :param sub_obj: list of contextual external objects given inside the object that references them with a DOR + :return: a list of read objects. This is a list due to the "_data" attribute + """ + if isinstance(obj_json, dict) and "$type" in obj_json: + qt = obj_json["$type"] + + obj_class = get_class_from_qualified_type(qt) + if obj_class is None: + raise UnknownTypeFromQualifiedType(qt + " " + json.dumps(obj_json)) + obj = obj_class() + + try: + for att, val in obj_json.items(): # tous les autres attributs + if att.lower() == "_data" and isinstance(val, dict): + for sub in read_json_dict(val): + sub_obj.append(sub) + elif not att.startswith("$"): + if att == "_": + att = "value" + setattr( + obj, + get_matching_class_attribute_name(obj, att), + _read_json_dict(val, sub_obj), + ) + except Exception as e: + logging.error( + f"Err on {att}", + search_attribute_matching_name( + obj=obj, + name_rgx=att, + deep_search=False, + search_in_sub_obj=False, + ), + obj, + ) + raise e + return obj + elif isinstance(obj_json, list): + return [_read_json_dict(o, sub_obj) for o in obj_json] + elif is_primitive(obj_json): + # logging.debug(f"PRIM : {obj_json}") + return obj_json + else: + raise NotParsableType(type(obj_json) + " " + obj_json) + + +def to_json_dict(obj: Any, obj_id_to_obj: Optional[Dict] = None) -> Any: + """ + Transform an object to a dict valid with the OSDU standard + :param obj: + :param obj_id_to_obj: + :return: + """ + return to_json_dict_fn( + obj, + lambda _id: obj_id_to_obj[_id] + if obj_id_to_obj is not None and _id in obj_id_to_obj + else None, ) - serializer_config = SerializerConfig(indent=" ") - serializer = JsonSerializer(context=context, config=serializer_config) - return serializer.render(obj) + + +def to_json_dict_fn(obj: Any, f_identifier_to_obj: Callable) -> Any: + """ + Transform an object to a dict valid with the OSDU standard + :param obj: + :param f_identifier_to_obj: A function that takes an object identifier see :func:`.introspection.get_obj_identifier` + and returns the corresponding object + :return: + """ + assert f_identifier_to_obj is not None + return _to_json_dict_fn(obj, f_identifier_to_obj, None) + + +def _fill_dict_with_attribs( + res: Dict, + obj: Any, + f_identifier_to_obj: Optional[Callable] = None, + _parent: Optional[Any] = None, +) -> None: + + for att_name, field in get_class_fields(obj).items(): + field_name = ( + field.metadata["name"] + if "name" in field.metadata + else field.name + ) + if field_name == "value": + field_name = "_" + field_name = field_name[0].upper() + field_name[1:] + mandatory = ( + field.metadata["required"] + if "required" in field.metadata + else False + ) + value = getattr(obj, att_name) + + if "Any_element" in str(field_name): + logging.debug(f"\t> {field_name}, {att_name} : {value}, {type(obj)}") + + if (value is not None or mandatory) and ( + not isinstance(value, list) or len(value) > 0 + ): + res[field_name] = _to_json_dict_fn( + value, f_identifier_to_obj, obj + ) + + if _parent is not None and ( + field_name.lower() == "uuid" or field_name.lower() == "uid" + ): + # adding referenced data + ref_identifier = get_obj_identifier(obj) + if f_identifier_to_obj is not None: + ref_value = f_identifier_to_obj(ref_identifier) + if ref_value is not None: + res["_data"] = to_json_dict_fn( + ref_value, f_identifier_to_obj + ) + else: + logging.debug(f"NotFound : {ref_identifier}") + + +def _to_json_dict_fn( + obj: Any, + f_identifier_to_obj: Optional[Callable] = None, + _parent: Optional[Any] = None, +) -> Any: + """ + Transform an object to a dict valid with the OSDU standard + :param obj: + :param f_identifier_to_obj: A function that takes an object identifier see :func:`.introspection.get_obj_identifier` + and returns the corresponding object + :param _parent: None if :param:`obj` is the one given directly by the user, else the parent object of :param:`obj` + in the original object given by the user + :return: Any + """ + if obj is None: + return None + elif is_enum(obj): + return str(obj) + # return { + # "$type": get_qualified_type_from_class(obj), + # "_": obj.value + # } + elif is_primitive(obj): + return obj + elif isinstance(obj, xsdata.models.datatype.XmlDateTime): + return str(obj) + elif isinstance(obj, DerivedElement): + res = {"$type": get_qualified_type_from_class(obj.value)} + # _fill_dict_with_attribs(res, obj.value, f_identifier_to_obj, _parent) + return res + elif isinstance(obj, list): + return [_to_json_dict_fn(o, f_identifier_to_obj, _parent) for o in obj] + else: + try: + res = {"$type": get_qualified_type_from_class(obj)} + _fill_dict_with_attribs(res, obj, f_identifier_to_obj, _parent) + return res + except Exception as e: + logging.error(f"Except on qt: {obj} - {type(obj)}") + raise e + + diff --git a/energyml-utils/src/energyml/utils/uri.py b/energyml-utils/src/energyml/utils/uri.py new file mode 100644 index 0000000..0b82a87 --- /dev/null +++ b/energyml-utils/src/energyml/utils/uri.py @@ -0,0 +1,94 @@ +import re +from dataclasses import dataclass, field +from typing import Optional + +from .constants import * + + +@dataclass +class Uri: + """ + A class to represent an ETP URI + """ + + dataspace: Optional[str] = field(default=None) + domain: Optional[str] = field(default=None) + domain_version: Optional[str] = field(default=None) + object_type: Optional[str] = field(default=None) + uuid: Optional[str] = field(default=None) + version: Optional[str] = field(default=None) + collection_domain: Optional[str] = field(default=None) + collection_domain_version: Optional[str] = field(default=None) + collection_domain_type: Optional[str] = field(default=None) + query: Optional[str] = field(default=None) + + @classmethod + def parse(cls, uri: str): + m = re.match(URI_RGX, uri, re.IGNORECASE) + if m is not None: + res = Uri() + res.dataspace = m.group(URI_RGX_GRP_DATASPACE) + res.domain = m.group(URI_RGX_GRP_DOMAIN) + if res.domain is not None and len(res.domain) <= 0: + res.domain = None + res.domain_version = m.group(URI_RGX_GRP_DOMAIN_VERSION) + res.object_type = m.group(URI_RGX_GRP_OBJECT_TYPE) + res.uuid = m.group(URI_RGX_GRP_UUID) or m.group(URI_RGX_GRP_UUID2) + res.version = m.group(URI_RGX_GRP_VERSION) + res.collection_domain = m.group(URI_RGX_GRP_COLLECTION_DOMAIN) + res.collection_domain_version = m.group( + URI_RGX_GRP_COLLECTION_DOMAIN_VERSION + ) + res.collection_domain_type = m.group(URI_RGX_GRP_COLLECTION_TYPE) + res.query = m.group(URI_RGX_GRP_QUERY) + return res + else: + return None + + def is_dataspace_uri(self): + return ( + self.domain is None + and self.object_type is None + and self.query is None + and self.collection_domain_type is None + ) + + def is_object_uri(self): + return ( + self.domain is not None + and self.domain_version is not None + and self.object_type is not None + and self.uuid is not None + ) + + def __str__(self): + res = "eml:///" + if self.dataspace is not None and len(self.dataspace) > 0: + res += f"dataspace('{self.dataspace}')" + if self.domain is not None: + res += "/" + if self.domain is not None and self.domain_version is not None: + res += f"{self.domain}{self.domain_version}.{self.object_type}" + if self.uuid is not None: + res += "(" + if self.version is not None: + res += f"uuid={self.uuid},version='{self.version}'" + else: + res += self.uuid + res += ")" + if ( + self.collection_domain is not None + and self.collection_domain_version + ): + res += f"/{self.collection_domain}{self.collection_domain_version}" + if self.collection_domain_type is not None: + res += f".{self.collection_domain_type}" + + if self.query is not None: + res += f"?{self.query}" + + return res + + +def parse_uri(uri: str) -> Uri: + return Uri.parse(uri) diff --git a/energyml-utils/src/energyml/utils/validation.py b/energyml-utils/src/energyml/utils/validation.py index 5a2129e..87cdaab 100644 --- a/energyml-utils/src/energyml/utils/validation.py +++ b/energyml-utils/src/energyml/utils/validation.py @@ -6,7 +6,8 @@ from typing import Any, List from .epc import ( - get_obj_identifier, Epc, + get_obj_identifier, + Epc, ) from .introspection import ( get_class_fields, @@ -14,8 +15,12 @@ search_attribute_matching_type_with_path, get_object_attribute_no_verif, get_object_attribute_rgx, - get_matching_class_attribute_name, get_obj_uuid, get_obj_version, get_content_type_from_class, - get_qualified_type_from_class, is_enum, + get_matching_class_attribute_name, + get_obj_uuid, + get_obj_version, + get_content_type_from_class, + get_qualified_type_from_class, + is_enum, ) @@ -62,14 +67,14 @@ def validate_epc(epc: Epc) -> List[ValidationError]: """ errs = [] for obj in epc.energyml_objects: - errs = errs + patterns_verification(obj) + errs = errs + patterns_validation(obj) - errs = errs + dor_verification(epc.energyml_objects) + errs = errs + dor_validation(epc.energyml_objects) return errs -def dor_verification(energyml_objects: List[Any]) -> List[ValidationError]: +def dor_validation(energyml_objects: List[Any]) -> List[ValidationError]: """ Verification for DOR. An error is raised if DORs contains wrong information, or if a referenced object is unknown in the :param:`epc`. @@ -179,16 +184,16 @@ def dor_verification(energyml_objects: List[Any]) -> List[ValidationError]: return errs -def patterns_verification(obj: Any) -> List[ValidationError]: +def patterns_validation(obj: Any) -> List[ValidationError]: """ Verification on object values, using the patterns defined in the original energyml xsd files. :param obj: :return: """ - return _patterns_verification(obj, obj, "") + return _patterns_validation(obj, obj, "") -def _patterns_verification( +def _patterns_validation( obj: Any, root_obj: Any, current_attribute_dot_path: str = "" ) -> List[ValidationError]: """ @@ -203,19 +208,19 @@ def _patterns_verification( if isinstance(obj, list): cpt = 0 for val in obj: - error_list = error_list + _patterns_verification( + error_list = error_list + _patterns_validation( val, root_obj, f"{current_attribute_dot_path}.{cpt}" ) cpt = cpt + 1 elif isinstance(obj, dict): for k, val in obj.items(): - error_list = error_list + _patterns_verification( + error_list = error_list + _patterns_validation( val, root_obj, f"{current_attribute_dot_path}.{k}" ) else: - # print(get_class_fields(obj)) + # logging.debug(get_class_fields(obj)) for att_name, att_field in get_class_fields(obj).items(): - # print(f"att_name : {att_field.metadata}") + # logging.debug(f"att_name : {att_field.metadata}") error_list = error_list + validate_attribute( get_object_attribute(obj, att_name, False), root_obj, @@ -292,9 +297,9 @@ def validate_attribute( ) if isinstance(value, list): for val in value: - if ( - (isinstance(val, str) and len(val) > min_inclusive) - or ((isinstance(val, int) or isinstance(val, float)) and val > min_inclusive) + if (isinstance(val, str) and len(val) > min_inclusive) or ( + (isinstance(val, int) or isinstance(val, float)) + and val > min_inclusive ): errs.append(potential_err) @@ -309,7 +314,7 @@ def validate_attribute( ) ) - return errs + _patterns_verification( + return errs + _patterns_validation( obj=value, root_obj=root_obj, current_attribute_dot_path=path, diff --git a/energyml-utils/src/energyml/utils/xml.py b/energyml-utils/src/energyml/utils/xml.py index da7426e..e6f7c46 100644 --- a/energyml-utils/src/energyml/utils/xml.py +++ b/energyml-utils/src/energyml/utils/xml.py @@ -1,75 +1,12 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 -import re +import logging from io import BytesIO from typing import Optional, Any, Union from lxml import etree as ETREE # type: Any -ENERGYML_NAMESPACES = { - "eml": "http://www.energistics.org/energyml/data/commonv2", - "prodml": "http://www.energistics.org/energyml/data/prodmlv2", - "witsml": "http://www.energistics.org/energyml/data/witsmlv2", - "resqml": "http://www.energistics.org/energyml/data/resqmlv2", -} -""" -dict of all energyml namespaces -""" # pylint: disable=W0105 - -ENERGYML_NAMESPACES_PACKAGE = { - "eml": ["http://www.energistics.org/energyml/data/commonv2"], - "prodml": ["http://www.energistics.org/energyml/data/prodmlv2"], - "witsml": ["http://www.energistics.org/energyml/data/witsmlv2"], - "resqml": ["http://www.energistics.org/energyml/data/resqmlv2"], - "opc": [ - "http://schemas.openxmlformats.org/package/2006/content-types", - "http://schemas.openxmlformats.org/package/2006/metadata/core-properties" - ], -} -""" -dict of all energyml namespace packages -""" # pylint: disable=W0105 - -REGEX_UUID_NO_GRP = ( - r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" -) -REGEX_UUID = r"(?P" + REGEX_UUID_NO_GRP + ")" -REGEX_DOMAIN_VERSION = r"(?P(?P([\d]+[\._])*\d)\s*(?Pdev\s*(?P[\d]+))?)" -REGEX_DOMAIN_VERSION_FLAT = r"(?P(?P([\d]+)*\d)\s*(?Pdev\s*(?P[\d]+))?)" - - -# ContentType -REGEX_MIME_TYPE_MEDIA = r"(?Papplication|audio|font|example|image|message|model|multipart|text|video)" -REGEX_CT_ENERGYML_DOMAIN = r"(?Px-(?P[\w]+)\+xml)" -REGEX_CT_XML_DOMAIN = r"(?P(x\-)?(?P.+)\+xml)" -REGEX_CT_TOKEN_VERSION = r"version=" + REGEX_DOMAIN_VERSION -REGEX_CT_TOKEN_TYPE = r"type=(?P[\w\_]+)" - -REGEX_CONTENT_TYPE = ( - REGEX_MIME_TYPE_MEDIA + "/" - + "(?P(" + REGEX_CT_ENERGYML_DOMAIN + ")|(" + REGEX_CT_XML_DOMAIN + r")|([\w-]+\.?)+)" - + "(;((" + REGEX_CT_TOKEN_VERSION + ")|(" + REGEX_CT_TOKEN_TYPE + ")))*" -) -REGEX_QUALIFIED_TYPE = ( - r"(?P[a-zA-Z]+)" + REGEX_DOMAIN_VERSION_FLAT + r"\.(?P[\w_]+)" -) -# ========= - -REGEX_SCHEMA_VERSION = ( - r"(?P[eE]ml|[cC]ommon|[rR]esqml|[wW]itsml|[pP]rodml)?\s*v?" - + REGEX_DOMAIN_VERSION - + r"\s*$" -) - -REGEX_ENERGYML_FILE_NAME_OLD = r"(?P[\w]+)_" + REGEX_UUID_NO_GRP + r"\.xml$" -REGEX_ENERGYML_FILE_NAME_NEW = ( - REGEX_UUID_NO_GRP + r"\.(?P\d+(\.\d+)*)\.xml$" -) -REGEX_ENERGYML_FILE_NAME = ( - rf"^(.*/)?({REGEX_ENERGYML_FILE_NAME_OLD})|({REGEX_ENERGYML_FILE_NAME_NEW})" -) - -REGEX_XML_HEADER = r"^\s*\<\?xml\s+((encoding\s*=\s*\"(?P[^\"]+)\"|version\s*=\s*\"(?P[^\"]+)\"|standalone\s*=\s*\"(?P[^\"]+)\")\s+)+" +from .constants import * def get_pkg_from_namespace(namespace: str) -> Optional[str]: @@ -81,7 +18,8 @@ def get_pkg_from_namespace(namespace: str) -> Optional[str]: def is_energyml_content_type(content_type: str) -> bool: ct = parse_content_type(content_type) - return ct.group("domain") is not None + domain = ct.group("domain") + return domain is not None and domain in ENERGYML_NAMESPACES_PACKAGE.keys() def get_root_namespace(tree: ETREE.Element) -> str: @@ -92,27 +30,35 @@ def get_class_name_from_xml(tree: ETREE.Element) -> str: root_namespace = get_root_namespace(tree) pkg = get_pkg_from_namespace(root_namespace) if pkg is None: - print(f"No pkg found for elt {tree}") + logging.error(f"No pkg found for elt {tree}") else: if pkg == "opc": return "energyml.opc.opc." + get_root_type(tree) else: - schema_version = find_schema_version_in_element(tree).replace(".", "_").replace("-", "_") - # print(schema_version) + schema_version = ( + find_schema_version_in_element(tree) + .replace(".", "_") + .replace("-", "_") + ) + # logging.debug(schema_version) if pkg == "resqml" and schema_version == "2_0": schema_version = "2_0_1" - return ("energyml." + pkg - + ".v" + schema_version - + "." - + root_namespace[root_namespace.rindex("/") + 1:] - + "." + get_root_type(tree) - ) + return ( + "energyml." + + pkg + + ".v" + + schema_version + + "." + + root_namespace[root_namespace.rindex("/") + 1:] + + "." + + get_root_type(tree) + ) def get_xml_encoding(xml_content: str) -> Optional[str]: try: - m = re.search(REGEX_XML_HEADER, xml_content) + m = re.search(RGX_XML_HEADER, xml_content) return m.group("encoding") except AttributeError: return "utf-8" @@ -121,8 +67,13 @@ def get_xml_encoding(xml_content: str) -> Optional[str]: def get_tree(xml_content: Union[bytes, str]) -> ETREE.Element: xml_bytes = xml_content if isinstance(xml_bytes, str): + # return ETREE.fromstring(xml_content) encoding = get_xml_encoding(xml_content) - xml_bytes = xml_content.encode(encoding=encoding.strip().lower() if encoding is not None else "utf-8") + xml_bytes = xml_content.encode( + encoding=encoding.strip().lower() + if encoding is not None + else "utf-8" + ) return ETREE.parse(BytesIO(xml_bytes)).getroot() @@ -135,7 +86,9 @@ def energyml_xpath(tree: ETREE.Element, xpath: str) -> Optional[list]: return None -def search_element_has_child_xpath(tree: ETREE.Element, child_name: str) -> list: +def search_element_has_child_xpath( + tree: ETREE.Element, child_name: str +) -> list: """ Search elements that has a child named (xml tag) as 'child_name'. Warning : child_name must contain the namespace (see. ENERGYML_NAMESPACES) @@ -147,15 +100,19 @@ def get_uuid(tree: ETREE.Element) -> str: _uuids = tree.xpath("@uuid") if len(_uuids) <= 0: _uuids = tree.xpath("@UUID") + if len(_uuids) <= 0: + _uuids = tree.xpath("@Uuid") if len(_uuids) <= 0: _uuids = tree.xpath("@uid") + if len(_uuids) <= 0: + _uuids = tree.xpath("@Uid") if len(_uuids) <= 0: _uuids = tree.xpath("@UID") return _uuids[0] def get_root_type(tree: ETREE.Element) -> str: - """ Returns the type (xml tag) of the element without the namespace """ + """Returns the type (xml tag) of the element without the namespace""" return tree.xpath("local-name()") @@ -179,7 +136,3 @@ def find_schema_version_in_element(tree: ETREE.ElementTree) -> str: if match_version is not None: return match_version.group(0).replace("dev", "-dev") return "" - - -def parse_content_type(ct: str): - return re.search(REGEX_CONTENT_TYPE, ct) diff --git a/energyml-utils/tests/test_epc.py b/energyml-utils/tests/test_epc.py new file mode 100644 index 0000000..cb01e18 --- /dev/null +++ b/energyml-utils/tests/test_epc.py @@ -0,0 +1,143 @@ +# Copyright (c) 2023-2024 Geosiris. +# SPDX-License-Identifier: Apache-2.0 +from energyml.eml.v2_0.commonv2 import Citation as Citation20 +from energyml.eml.v2_0.commonv2 import ( + DataObjectReference as DataObjectReference201, +) +from energyml.eml.v2_3.commonv2 import Citation +from energyml.eml.v2_3.commonv2 import DataObjectReference +from energyml.resqml.v2_0_1.resqmlv2 import FaultInterpretation +from energyml.resqml.v2_2.resqmlv2 import TriangulatedSetRepresentation + +from src.energyml.utils.epc import ( + get_obj_identifier, + gen_energyml_object_path, + EpcExportVersion, +) +from src.energyml.utils.introspection import ( + epoch_to_date, + epoch, + gen_uuid, + get_obj_pkg_pkgv_type_uuid_version, + get_obj_uri, +) + +fi_cit = Citation20( + title="An interpretation", + originator="Valentin", + creation=epoch_to_date(epoch()), + editor="test", + format="Geosiris", + last_update=epoch_to_date(epoch()), +) + +fi = FaultInterpretation( + citation=fi_cit, + uuid=gen_uuid(), + object_version="0", +) + +tr_cit = Citation( + title="--", + # title="test title", + originator="Valentin", + creation=epoch_to_date(epoch()), + editor="test", + format="Geosiris", + last_update=epoch_to_date(epoch()), +) + +dor = DataObjectReference( + uuid=fi.uuid, + title="a DOR title", + object_version="0", + qualified_type="a wrong qualified type", +) + +dor_correct20 = DataObjectReference201( + uuid=fi.uuid, + title="a DOR title", + content_type="application/x-resqml+xml;version=2.0;type=obj_FaultInterpretation", + version_string="0", +) + +dor_correct23 = DataObjectReference( + uuid=fi.uuid, + title="a DOR title", + object_version="0", + qualified_type="resqml20.obj_FaultInterpretation", +) + +tr = TriangulatedSetRepresentation( + citation=tr_cit, + uuid=gen_uuid(), + represented_object=dor_correct23, +) + + +def test_get_obj_identifier(): + assert get_obj_identifier(tr) == tr.uuid + "." + assert get_obj_identifier(fi) == fi.uuid + ".0" + assert get_obj_identifier(dor_correct20) == dor_correct20.uuid + ".0" + assert get_obj_identifier(dor_correct23) == dor_correct23.uuid + ".0" + + +def test_get_obj_pkg_pkgv_type_uuid_version_obj_201(): + ( + domain, + domain_version, + object_type, + obj_uuid, + obj_version, + ) = get_obj_pkg_pkgv_type_uuid_version(fi) + assert domain == "resqml" + assert domain_version == "20" + assert object_type == "obj_FaultInterpretation" + assert obj_uuid == fi.uuid + assert obj_version == fi.object_version + + +def test_get_obj_pkg_pkgv_type_uuid_version_obj_22(): + ( + domain, + domain_version, + object_type, + obj_uuid, + obj_version, + ) = get_obj_pkg_pkgv_type_uuid_version(tr) + assert domain == "resqml" + assert domain_version == "22" + assert object_type == "TriangulatedSetRepresentation" + assert obj_uuid == tr.uuid + assert obj_version == tr.object_version + + +def test_get_obj_uri(): + assert ( + str(get_obj_uri(tr)) + == f"eml:///resqml22.TriangulatedSetRepresentation({tr.uuid})" + ) + assert ( + str(get_obj_uri(tr, "/MyDataspace/")) + == f"eml:///dataspace('/MyDataspace/')/resqml22.TriangulatedSetRepresentation({tr.uuid})" + ) + + assert ( + str(get_obj_uri(fi)) + == f"eml:///resqml20.obj_FaultInterpretation(uuid={fi.uuid},version='{fi.object_version}')" + ) + assert ( + str(get_obj_uri(fi, "/MyDataspace/")) + == f"eml:///dataspace('/MyDataspace/')/resqml20.obj_FaultInterpretation(uuid={fi.uuid},version='{fi.object_version}')" + ) + + +def test_gen_energyml_object_path(): + assert ( + gen_energyml_object_path(tr) + == f"TriangulatedSetRepresentation_{tr.uuid}.xml" + ) + assert ( + gen_energyml_object_path(tr, EpcExportVersion.EXPANDED) + == f"namespace_resqml22/{tr.uuid}/TriangulatedSetRepresentation_{tr.uuid}.xml" + ) diff --git a/energyml-utils/tests/test_introspection.py b/energyml-utils/tests/test_introspection.py index d540ebe..2e0293e 100644 --- a/energyml-utils/tests/test_introspection.py +++ b/energyml-utils/tests/test_introspection.py @@ -1,14 +1,20 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 +import energyml.resqml.v2_0_1.resqmlv2 +from energyml.opc.opc import Dcmitype1, Contributor -from energyml.opc.opc import ( - Dcmitype1, - Contributor +from src.energyml.utils.constants import ( + date_to_epoch, + pascal_case, + epoch, + epoch_to_date, + snake_case, ) - from src.energyml.utils.introspection import ( - is_primitive, is_enum, get_class_from_name, - snake_case, pascal_case + is_primitive, + is_enum, + get_class_from_name, + get_class_from_content_type, ) @@ -43,3 +49,16 @@ def test_pascal_case(): assert pascal_case("This_IsASnakecase") == "ThisIsASnakecase" assert pascal_case("This_isASnakecase") == "ThisIsASnakecase" assert pascal_case("this_is_a_snakecase") == "ThisIsASnakecase" + + +def test_epoch(): + now = epoch() + assert date_to_epoch(epoch_to_date(now)) == now + + +def test_get_class_from_content_type(): + found_type = get_class_from_content_type( + "resqml20.obj_Grid2dRepresentation" + ) + assert found_type is not None + assert found_type == energyml.resqml.v2_0_1.resqmlv2.Grid2DRepresentation diff --git a/energyml-utils/tests/test_uri.py b/energyml-utils/tests/test_uri.py new file mode 100644 index 0000000..b878af7 --- /dev/null +++ b/energyml-utils/tests/test_uri.py @@ -0,0 +1,90 @@ +# Copyright (c) 2023-2024 Geosiris. +# SPDX-License-Identifier: Apache-2.0 + +from src.energyml.utils.uri import Uri, parse_uri + + +def test_uri_constructor(): + assert ( + str( + Uri( + dataspace="/folder-name/project-name", + domain="resqml", + domain_version="20", + object_type="obj_HorizonInterpretation", + uuid="421a7a05-033a-450d-bcef-051352023578", + version="2.0", + collection_domain=None, + collection_domain_version=None, + collection_domain_type=None, + query="query", + ) + ) + == "eml:///dataspace('/folder-name/project-name')/resqml20.obj_HorizonInterpretation(uuid=421a7a05-033a-450d-bcef-051352023578,version='2.0')?query" + ) + + +def test_uri_error(): + assert parse_uri("eml//") is None + assert parse_uri("a random text") is None + + +def test_uri_default_dataspace(): + uri = "eml:///" + assert uri == str(parse_uri(uri)) + assert uri == str(Uri()) + + +def test_uri_empty_dataspace(): + uri = "eml:///dataspace('')" + assert "eml:///" == str(parse_uri(uri)) + + +def test_uri_dataspace(): + uri = "eml:///dataspace('rdms-db')" + assert uri == str(parse_uri(uri)) + + +def test_uri_dataspace_bis(): + uri = "eml:///dataspace('/folder-name/project-name')" + assert uri == str(parse_uri(uri)) + + +def test_uri_dataspace_query(): + uri = "eml:///dataspace('rdms-db')?$filter=Name eq 'mydb'" + assert uri == str(parse_uri(uri)) + + +def test_uri_collection(): + uri = "eml:///witsml20.Well/witsml20.Wellbore" + assert uri == str(parse_uri(uri)) + + +def test_uri_data_object_resqml(): + uri = "eml:///resqml20.obj_HorizonInterpretation(421a7a05-033a-450d-bcef-051352023578)" + assert uri == str(parse_uri(uri)) + + +def test_uri_data_object_witsml(): + uri = "eml:///witsml21.Well(uuid=ec8c3f16-1454-4f36-ae10-27d2a2680cf2)" + assert uri == str(parse_uri(uri)) + + +def test_uri_dataspace_data_object(): + uri = "eml:///dataspace('/folder-name/project-name')/resqml20.obj_HorizonInterpretation?query" + assert uri == str(parse_uri(uri)) + + +def test_uri_dataspace_data_object_query(): + uri = "eml:///dataspace('/folder-name/project-name')/resqml20.obj_HorizonInterpretation(uuid=421a7a05-033a-450d-bcef-051352023578,version='2.0')?query" + assert uri == str(parse_uri(uri)) + + +def test_uri_dataspace_data_object_collection_query(): + uri = "eml:///dataspace('test')/witsml20.Well(ec8c3f16-1454-4f36-ae10-27d2a2680cf2)/witsml20.Wellbore?query" + assert uri == str(parse_uri(uri)) + + +def test_uri_full(): + uri = "eml:///witsml20.Well(uuid=ec8c3f16-1454-4f36-ae10-27d2a2680cf2,version='1.0')/witsml20.Wellbore?query" + assert uri == str(parse_uri(uri)) diff --git a/energyml-utils/tests/test_xml.py b/energyml-utils/tests/test_xml.py new file mode 100644 index 0000000..12dd163 --- /dev/null +++ b/energyml-utils/tests/test_xml.py @@ -0,0 +1,225 @@ +# Copyright (c) 2023-2024 Geosiris. +# SPDX-License-Identifier: Apache-2.0 + +import logging + +from src.energyml.utils.xml import * + +CT_20 = "application/x-resqml+xml;version=2.0;type=obj_TriangulatedSetRepresentation" +CT_22 = ( + "application/x-resqml+xml;version=2.2;type=TriangulatedSetRepresentation" +) +CT_22_DEV = "application/x-resqml+xml;version=2.2dev3;type=TriangulatedSetRepresentation" + +QT_20 = "resqml20.obj_TriangulatedSetRepresentation" +QT_22 = "resqml22.TriangulatedSetRepresentation" +QT_22_DEV = "resqml22dev3.TriangulatedSetRepresentation" + +# Following xml are not correct but are modified to make some test on the different ways to write attributes +# like the UUID that can be written (depending on the package) ["UUID", "Uuid", "Uid" ...] +tr_xml20 = """ + + + A title + An originator + 2020-01-08T13:41:24Z + A description + A name + 2020-01-08T13:41:25Z + + + A title + + +""" + +tr_xml22 = """ + + + A title + An originator + 2020-01-08T13:41:24Z + A description + A name + 2020-01-08T13:41:25Z + + + A title + + +""" + +tr_xml22dev3 = """ + + + A title + An originator + 2020-01-08T13:41:24Z + A description + A name + 2020-01-08T13:41:25Z + + + A title + + +""" + + +def test_parse_content_type_20(): + match = parse_content_type(CT_20) + assert match is not None + assert match.group("media") == "application" + assert match.group("domain") == "resqml" + assert match.group("rawDomain") == "x-resqml+xml" + assert match.group("domainVersion") == "2.0" + assert match.group("versionNum") == "2.0" + assert match.group("devNum") is None + assert match.group("dev") is None + assert match.group("type") == "obj_TriangulatedSetRepresentation" + + +def test_parse_content_type_22(): + match = parse_content_type(CT_22) + logging.error(match.groupdict()) + assert match is not None + assert match.group("media") == "application" + assert match.group("domain") == "resqml" + assert match.group("rawDomain") == "x-resqml+xml" + assert match.group("domainVersion") == "2.2" + assert match.group("versionNum") == "2.2" + assert match.group("devNum") is None + assert match.group("dev") is None + assert match.group("type") == "TriangulatedSetRepresentation" + + +def test_parse_content_type_22_dev(): + match = parse_content_type(CT_22_DEV) + logging.error(match.groupdict()) + assert match is not None + assert match.group("media") == "application" + assert match.group("domain") == "resqml" + assert match.group("rawDomain") == "x-resqml+xml" + assert match.group("domainVersion") == "2.2dev3" + assert match.group("versionNum") == "2.2" + assert match.group("devNum") == "3" + assert match.group("dev") == "dev3" + assert match.group("type") == "TriangulatedSetRepresentation" + + +def test_parse_qualified_type_20(): + match = parse_qualified_type(QT_20) + assert match is not None + assert match.group("domain") == "resqml" + assert match.group("domainVersion") == "20" + assert match.group("devNum") is None + assert match.group("dev") is None + assert match.group("type") == "obj_TriangulatedSetRepresentation" + + +def test_parse_qualified_type_22(): + match = parse_qualified_type(QT_22) + logging.error(match.groupdict()) + assert match is not None + assert match.group("domain") == "resqml" + assert match.group("domainVersion") == "22" + assert match.group("devNum") is None + assert match.group("dev") is None + assert match.group("type") == "TriangulatedSetRepresentation" + + +def test_parse_qualified_type_22_dev(): + match = parse_qualified_type(QT_22_DEV) + logging.error(match.groupdict()) + assert match is not None + assert match.group("domain") == "resqml" + assert match.group("domainVersion") == "22dev3" + assert match.group("devNum") == "3" + assert match.group("dev") == "dev3" + assert match.group("type") == "TriangulatedSetRepresentation" + + +def test_is_energyml_content_type(): + for d in ENERGYML_NAMESPACES_PACKAGE.keys(): + assert is_energyml_content_type( + f"application/x-{d}+xml;version=2.0;type=obj_XXX" + ) + assert not is_energyml_content_type( + f"application/x-randomValue+xml;version=2.0;type=obj_XXX" + ) + + +def test_get_root_type_20(): + tree = get_tree(tr_xml20) + assert get_root_type(tree) == "TriangulatedSetRepresentation" + + +def test_get_root_type_22dev3(): + tree = get_tree(tr_xml22dev3) + assert get_root_type(tree) == "TriangulatedSetRepresentation" + + +def test_get_uuid_20(): + tree = get_tree(tr_xml20) + assert get_uuid(tree) == "3d2af068-bd16-4b53-932c-d1c2ff6913c3" + + +def test_get_uuid_22(): + tree = get_tree(tr_xml22) + assert get_uuid(tree) == "3d2af068-bd16-4b53-932c-d1c2ff6913c3" + + +def test_get_uuid_22dev3(): + tree = get_tree(tr_xml22dev3) + assert get_uuid(tree) == "3d2af068-bd16-4b53-932c-d1c2ff6913c3" + + +def test_find_schema_version_in_element_20(): + tree = get_tree(tr_xml20) + assert find_schema_version_in_element(tree) == "2.0" + + +def test_find_schema_version_in_element_22(): + tree = get_tree(tr_xml22) + assert find_schema_version_in_element(tree) == "2.2" + + +def test_find_schema_version_in_element_22dev3(): + tree = get_tree(tr_xml22dev3) + assert find_schema_version_in_element(tree) == "2.2-dev3" + + +def test_find_search_element_has_child_xpath(): + tree = get_tree(tr_xml22dev3) + assert len(search_element_has_child_xpath(tree, "eml:Citation")) == 1 + assert len(search_element_has_child_xpath(tree, "eml:Title")) == 2 + + +def test_get_xml_encoding(): + assert get_xml_encoding(tr_xml20) == "us-ascii" + assert get_xml_encoding(tr_xml22) == "windows-1250" + assert get_xml_encoding(tr_xml22dev3) == "UTF-8"