From b0c687544ddc213d8dcd4a056139b63451938b21 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Fri, 24 Feb 2023 10:15:35 -0500 Subject: [PATCH] use only lxml for XML handling (#863) --- .github/workflows/main.yml | 4 -- docs/en/installation.rst | 2 +- etc/debian/control | 2 +- owslib/catalogue/csw2.py | 15 +----- owslib/catalogue/csw3.py | 10 +--- owslib/etree.py | 32 ++++++------- owslib/feature/schema.py | 2 +- owslib/util.py | 98 ++++++++++++++------------------------ requirements.txt | 5 +- 9 files changed, 58 insertions(+), 112 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1085256ea..816be089f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -8,9 +8,7 @@ jobs: strategy: matrix: python-version: ['3.7', '3.8', '3.9'] - lxml: [true, false] env: - LXML: ${{ matrix.lxml }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_SERVICE_NAME: github steps: @@ -28,8 +26,6 @@ jobs: pip3 install -e . pip3 install -r requirements.txt pip3 install -r requirements-dev.txt - echo "LXML => $LXML" - if [ "$LXML" == "true" ]; then pip install lxml; fi - name: run tests ⚙️ run: python3 -m pytest - name: run coveralls ⚙️ diff --git a/docs/en/installation.rst b/docs/en/installation.rst index 132928d23..cd4a8dde1 100644 --- a/docs/en/installation.rst +++ b/docs/en/installation.rst @@ -4,7 +4,7 @@ Installation Requirements ------------ -OWSLib requires a Python interpreter, as well as `ElementTree `_ or `lxml `_ for XML parsing. +OWSLib requires a Python interpreter, as well as `lxml `_ for XML parsing. Install ------- diff --git a/etc/debian/control b/etc/debian/control index aa4cd5d87..704cdf53e 100644 --- a/etc/debian/control +++ b/etc/debian/control @@ -9,5 +9,5 @@ Homepage: http://geopython.github.com/OWSLib/ Package: python-owslib Architecture: all -Depends: ${misc:Depends}, debconf, python (>=2.7), python-lxml +Depends: ${misc:Depends}, debconf, python (>=3), python-lxml Description: OWSLib is a Python package for client programming with Open Geospatial Consortium (OGC) web service (hence OWS) interface standards, and their related content models. diff --git a/owslib/catalogue/csw2.py b/owslib/catalogue/csw2.py index 339633664..690e5782a 100644 --- a/owslib/catalogue/csw2.py +++ b/owslib/catalogue/csw2.py @@ -226,11 +226,6 @@ def getrecords(self, qtype=None, keywords=[], typenames='csw:Record', propertyna else: # construct request node0 = self._setrootelement('csw:GetRecords') - if etree.__name__ != 'lxml.etree': # apply nsmap manually - node0.set('xmlns:ows', namespaces['ows']) - node0.set('xmlns:gmd', namespaces['gmd']) - node0.set('xmlns:dif', namespaces['dif']) - node0.set('xmlns:fgdc', namespaces['fgdc']) node0.set('outputSchema', outputschema) node0.set('outputFormat', format) node0.set('version', self.version) @@ -354,11 +349,6 @@ def getrecords2(self, constraints=[], sortby=None, typenames='csw:Record', esn=' else: # construct request node0 = self._setrootelement('csw:GetRecords') - if etree.__name__ != 'lxml.etree': # apply nsmap manually - node0.set('xmlns:ows', namespaces['ows']) - node0.set('xmlns:gmd', namespaces['gmd']) - node0.set('xmlns:dif', namespaces['dif']) - node0.set('xmlns:fgdc', namespaces['fgdc']) node0.set('outputSchema', outputschema) node0.set('outputFormat', format) node0.set('version', self.version) @@ -622,10 +612,7 @@ def _setidentifierkey(self, el): return el def _setrootelement(self, el): - if etree.__name__ == 'lxml.etree': # apply nsmap - return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces) - else: - return etree.Element(util.nspath_eval(el, namespaces)) + return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces) def _setconstraint(self, parent, qtype=None, propertyname='csw:AnyText', keywords=[], bbox=None, cql=None, identifier=None): diff --git a/owslib/catalogue/csw3.py b/owslib/catalogue/csw3.py index 91ca4f096..38fb22e0c 100644 --- a/owslib/catalogue/csw3.py +++ b/owslib/catalogue/csw3.py @@ -242,11 +242,6 @@ def getrecords(self, constraints=[], sortby=None, typenames='csw30:Record', esn= else: # construct request node0 = self._setrootelement('csw30:GetRecords') - if etree.__name__ != 'lxml.etree': # apply nsmap manually - node0.set('xmlns:ows110', namespaces['ows110']) - node0.set('xmlns:gmd', namespaces['gmd']) - node0.set('xmlns:dif', namespaces['dif']) - node0.set('xmlns:fgdc', namespaces['fgdc']) node0.set('outputSchema', outputschema) node0.set('outputFormat', format) node0.set('version', self.version) @@ -516,10 +511,7 @@ def _setidentifierkey(self, el): return el def _setrootelement(self, el): - if etree.__name__ == 'lxml.etree': # apply nsmap - return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces) - else: - return etree.Element(util.nspath_eval(el, namespaces)) + return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces) def _setconstraint(self, parent, qtype=None, propertyname='csw30:AnyText', keywords=[], bbox=None, cql=None, identifier=None): diff --git a/owslib/etree.py b/owslib/etree.py index 590989c67..71dbaa0e4 100644 --- a/owslib/etree.py +++ b/owslib/etree.py @@ -4,37 +4,33 @@ # Contact email: sgillies@frii.com # ============================================================================= + +from lxml import etree +from lxml.etree import ParseError +ElementType = etree._Element + from owslib.namespaces import Namespaces -def patch_well_known_namespaces(etree_module): - """Monkey patches the etree module to add some well-known namespaces.""" +def patch_well_known_namespaces(): + """Monkey patches lxml.etree to add some well-known namespaces.""" ns = Namespaces() try: - register_namespace = etree_module.register_namespace + register_namespace = etree.register_namespace except AttributeError: - etree_module._namespace_map + etree._namespace_map def register_namespace(prefix, uri): - etree_module._namespace_map[uri] = prefix + etree._namespace_map[uri] = prefix for k, v in list(ns.get_namespaces().items()): register_namespace(k, v) + etree.set_default_parser( + parser=etree.XMLParser(resolve_entities=False) + ) -# try to find lxml or elementtree -try: - from lxml import etree - from lxml.etree import ParseError - ElementType = etree._Element -except ImportError: - import xml.etree.ElementTree as etree - ElementType = etree.Element - try: - from xml.etree.ElementTree import ParseError - except ImportError: - from xml.parsers.expat import ExpatError as ParseError -patch_well_known_namespaces(etree) +patch_well_known_namespaces() diff --git a/owslib/feature/schema.py b/owslib/feature/schema.py index f2b92992a..663eed39b 100644 --- a/owslib/feature/schema.py +++ b/owslib/feature/schema.py @@ -13,7 +13,7 @@ from urllib.parse import urlencode, parse_qsl from owslib.etree import etree from owslib.namespaces import Namespaces -from owslib.util import which_etree, findall, Authentication, openURL +from owslib.util import findall, Authentication, openURL MYNS = Namespaces() XS_NAMESPACE = MYNS.get_namespace("xs") diff --git a/owslib/util.py b/owslib/util.py index bcd5e6eb0..68551f3e3 100644 --- a/owslib/util.py +++ b/owslib/util.py @@ -277,11 +277,8 @@ def nspath_eval(xpath, namespaces): def cleanup_namespaces(element): """ Remove unused namespaces from an element """ - if etree.__name__ == 'lxml.etree': - etree.cleanup_namespaces(element) - return element - else: - return etree.fromstring(etree.tostring(element)) + etree.cleanup_namespaces(element) + return element def add_namespaces(root, ns_keys): @@ -292,35 +289,34 @@ def add_namespaces(root, ns_keys): ns_keys = [(x, namespaces.get_namespace(x)) for x in ns_keys] - if etree.__name__ != 'lxml.etree': - # We can just add more namespaces when not using lxml. - # We can't re-add an existing namespaces. Get a list of current - # namespaces in use - existing_namespaces = set() - for elem in root.iter(): - if elem.tag[0] == "{": - uri, tag = elem.tag[1:].split("}") - existing_namespaces.add(namespaces.get_namespace_from_url(uri)) - for key, link in ns_keys: - if link is not None and key not in existing_namespaces: - root.set("xmlns:%s" % key, link) - return root - else: - # lxml does not support setting xmlns attributes - # Update the elements nsmap with new namespaces - new_map = root.nsmap - for key, link in ns_keys: - if link is not None: - new_map[key] = link - # Recreate the root element with updated nsmap - new_root = etree.Element(root.tag, nsmap=new_map) - # Carry over attributes - for a, v in list(root.items()): - new_root.set(a, v) - # Carry over children - for child in root: - new_root.append(deepcopy(child)) - return new_root + # lxml does not support setting xmlns attributes + # Update the elements nsmap with new namespaces + new_map = root.nsmap + for key, link in ns_keys: + if link is not None: + new_map[key] = link + # Recreate the root element with updated nsmap + new_root = etree.Element(root.tag, nsmap=new_map) + # Carry over attributes + for a, v in list(root.items()): + new_root.set(a, v) + # Carry over children + for child in root: + new_root.append(deepcopy(child)) + return new_root + + # We can just add more namespaces when not using lxml. + # We can't re-add an existing namespaces. Get a list of current + # namespaces in use + existing_namespaces = set() + for elem in root.iter(): + if elem.tag[0] == "{": + uri, tag = elem.tag[1:].split("}") + existing_namespaces.add(namespaces.get_namespace_from_url(uri)) + for key, link in ns_keys: + if link is not None and key not in existing_namespaces: + root.set("xmlns:%s" % key, link) + return root def getXMLInteger(elem, tag): @@ -519,21 +515,14 @@ def element_to_string(element, encoding=None, xml_declaration=False): if encoding is None: encoding = "ISO-8859-1" - if etree.__name__ == 'lxml.etree': - if xml_declaration: - if encoding in ['unicode', 'utf-8']: - output = '\n{}'.format( - etree.tostring(element, encoding='unicode')) - else: - output = etree.tostring(element, encoding=encoding, xml_declaration=True) + if xml_declaration: + if encoding in ['unicode', 'utf-8']: + output = '\n{}'.format( + etree.tostring(element, encoding='unicode')) else: - output = etree.tostring(element) + output = etree.tostring(element, encoding=encoding, xml_declaration=True) else: - if xml_declaration: - output = '\n{}'.format( - encoding, etree.tostring(element, encoding=encoding)) - else: - output = etree.tostring(element) + output = etree.tostring(element) return output @@ -777,21 +766,6 @@ def bind_url(url): log.addHandler(NullHandler()) -def which_etree(): - """decipher which etree library is being used by OWSLib""" - - which_etree = None - - if 'lxml' in etree.__file__: - which_etree = 'lxml.etree' - elif 'xml/etree' in etree.__file__: - which_etree = 'xml.etree' - elif 'elementree' in etree.__file__: - which_etree = 'elementtree.ElementTree' - - return which_etree - - def findall(root, xpath, attribute_name=None, attribute_value=None): """Find elements recursively from given root element based on xpath and possibly given attribute diff --git a/requirements.txt b/requirements.txt index 05622e145..c1b2c09bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ +dataclasses; python_version < '3.7' +lxml python-dateutil>=1.5 pytz -requests>=1.0 pyyaml -dataclasses; python_version < '3.7' +requests>=1.0