Skip to content

Commit

Permalink
use only lxml for XML handling (#863)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkralidis committed Feb 24, 2023
1 parent b571118 commit b0c6875
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 112 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ jobs:
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9']
lxml: [true, false]
env:
LXML: ${{ matrix.lxml }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COVERALLS_SERVICE_NAME: github
steps:
Expand All @@ -28,8 +26,6 @@ jobs:
pip3 install -e .
pip3 install -r requirements.txt
pip3 install -r requirements-dev.txt
echo "LXML => $LXML"
if [ "$LXML" == "true" ]; then pip install lxml; fi
- name: run tests ⚙️
run: python3 -m pytest
- name: run coveralls ⚙️
Expand Down
2 changes: 1 addition & 1 deletion docs/en/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Installation
Requirements
------------

OWSLib requires a Python interpreter, as well as `ElementTree <https://docs.python.org/2/library/xml.etree.elementtree.html>`_ or `lxml <http://lxml.de>`_ for XML parsing.
OWSLib requires a Python interpreter, as well as `lxml <https://lxml.de>`_ for XML parsing.

Install
-------
Expand Down
2 changes: 1 addition & 1 deletion etc/debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ Homepage: http://geopython.github.com/OWSLib/

Package: python-owslib
Architecture: all
Depends: ${misc:Depends}, debconf, python (>=2.7), python-lxml
Depends: ${misc:Depends}, debconf, python (>=3), python-lxml
Description: OWSLib is a Python package for client programming with Open Geospatial Consortium (OGC) web service (hence OWS) interface standards, and their related content models.
15 changes: 1 addition & 14 deletions owslib/catalogue/csw2.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,6 @@ def getrecords(self, qtype=None, keywords=[], typenames='csw:Record', propertyna
else:
# construct request
node0 = self._setrootelement('csw:GetRecords')
if etree.__name__ != 'lxml.etree': # apply nsmap manually
node0.set('xmlns:ows', namespaces['ows'])
node0.set('xmlns:gmd', namespaces['gmd'])
node0.set('xmlns:dif', namespaces['dif'])
node0.set('xmlns:fgdc', namespaces['fgdc'])
node0.set('outputSchema', outputschema)
node0.set('outputFormat', format)
node0.set('version', self.version)
Expand Down Expand Up @@ -354,11 +349,6 @@ def getrecords2(self, constraints=[], sortby=None, typenames='csw:Record', esn='
else:
# construct request
node0 = self._setrootelement('csw:GetRecords')
if etree.__name__ != 'lxml.etree': # apply nsmap manually
node0.set('xmlns:ows', namespaces['ows'])
node0.set('xmlns:gmd', namespaces['gmd'])
node0.set('xmlns:dif', namespaces['dif'])
node0.set('xmlns:fgdc', namespaces['fgdc'])
node0.set('outputSchema', outputschema)
node0.set('outputFormat', format)
node0.set('version', self.version)
Expand Down Expand Up @@ -622,10 +612,7 @@ def _setidentifierkey(self, el):
return el

def _setrootelement(self, el):
if etree.__name__ == 'lxml.etree': # apply nsmap
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)
else:
return etree.Element(util.nspath_eval(el, namespaces))
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)

def _setconstraint(self, parent, qtype=None, propertyname='csw:AnyText', keywords=[], bbox=None, cql=None,
identifier=None):
Expand Down
10 changes: 1 addition & 9 deletions owslib/catalogue/csw3.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,6 @@ def getrecords(self, constraints=[], sortby=None, typenames='csw30:Record', esn=
else:
# construct request
node0 = self._setrootelement('csw30:GetRecords')
if etree.__name__ != 'lxml.etree': # apply nsmap manually
node0.set('xmlns:ows110', namespaces['ows110'])
node0.set('xmlns:gmd', namespaces['gmd'])
node0.set('xmlns:dif', namespaces['dif'])
node0.set('xmlns:fgdc', namespaces['fgdc'])
node0.set('outputSchema', outputschema)
node0.set('outputFormat', format)
node0.set('version', self.version)
Expand Down Expand Up @@ -516,10 +511,7 @@ def _setidentifierkey(self, el):
return el

def _setrootelement(self, el):
if etree.__name__ == 'lxml.etree': # apply nsmap
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)
else:
return etree.Element(util.nspath_eval(el, namespaces))
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)

def _setconstraint(self, parent, qtype=None, propertyname='csw30:AnyText', keywords=[], bbox=None, cql=None,
identifier=None):
Expand Down
32 changes: 14 additions & 18 deletions owslib/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,33 @@
# Contact email: sgillies@frii.com
# =============================================================================


from lxml import etree
from lxml.etree import ParseError
ElementType = etree._Element

from owslib.namespaces import Namespaces


def patch_well_known_namespaces(etree_module):
"""Monkey patches the etree module to add some well-known namespaces."""
def patch_well_known_namespaces():
"""Monkey patches lxml.etree to add some well-known namespaces."""

ns = Namespaces()

try:
register_namespace = etree_module.register_namespace
register_namespace = etree.register_namespace
except AttributeError:
etree_module._namespace_map
etree._namespace_map

def register_namespace(prefix, uri):
etree_module._namespace_map[uri] = prefix
etree._namespace_map[uri] = prefix

for k, v in list(ns.get_namespaces().items()):
register_namespace(k, v)

etree.set_default_parser(
parser=etree.XMLParser(resolve_entities=False)
)

# try to find lxml or elementtree
try:
from lxml import etree
from lxml.etree import ParseError
ElementType = etree._Element
except ImportError:
import xml.etree.ElementTree as etree
ElementType = etree.Element
try:
from xml.etree.ElementTree import ParseError
except ImportError:
from xml.parsers.expat import ExpatError as ParseError

patch_well_known_namespaces(etree)
patch_well_known_namespaces()
2 changes: 1 addition & 1 deletion owslib/feature/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from urllib.parse import urlencode, parse_qsl
from owslib.etree import etree
from owslib.namespaces import Namespaces
from owslib.util import which_etree, findall, Authentication, openURL
from owslib.util import findall, Authentication, openURL

MYNS = Namespaces()
XS_NAMESPACE = MYNS.get_namespace("xs")
Expand Down
98 changes: 36 additions & 62 deletions owslib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,8 @@ def nspath_eval(xpath, namespaces):

def cleanup_namespaces(element):
""" Remove unused namespaces from an element """
if etree.__name__ == 'lxml.etree':
etree.cleanup_namespaces(element)
return element
else:
return etree.fromstring(etree.tostring(element))
etree.cleanup_namespaces(element)
return element


def add_namespaces(root, ns_keys):
Expand All @@ -292,35 +289,34 @@ def add_namespaces(root, ns_keys):

ns_keys = [(x, namespaces.get_namespace(x)) for x in ns_keys]

if etree.__name__ != 'lxml.etree':
# We can just add more namespaces when not using lxml.
# We can't re-add an existing namespaces. Get a list of current
# namespaces in use
existing_namespaces = set()
for elem in root.iter():
if elem.tag[0] == "{":
uri, tag = elem.tag[1:].split("}")
existing_namespaces.add(namespaces.get_namespace_from_url(uri))
for key, link in ns_keys:
if link is not None and key not in existing_namespaces:
root.set("xmlns:%s" % key, link)
return root
else:
# lxml does not support setting xmlns attributes
# Update the elements nsmap with new namespaces
new_map = root.nsmap
for key, link in ns_keys:
if link is not None:
new_map[key] = link
# Recreate the root element with updated nsmap
new_root = etree.Element(root.tag, nsmap=new_map)
# Carry over attributes
for a, v in list(root.items()):
new_root.set(a, v)
# Carry over children
for child in root:
new_root.append(deepcopy(child))
return new_root
# lxml does not support setting xmlns attributes
# Update the elements nsmap with new namespaces
new_map = root.nsmap
for key, link in ns_keys:
if link is not None:
new_map[key] = link
# Recreate the root element with updated nsmap
new_root = etree.Element(root.tag, nsmap=new_map)
# Carry over attributes
for a, v in list(root.items()):
new_root.set(a, v)
# Carry over children
for child in root:
new_root.append(deepcopy(child))
return new_root

# We can just add more namespaces when not using lxml.
# We can't re-add an existing namespaces. Get a list of current
# namespaces in use
existing_namespaces = set()
for elem in root.iter():
if elem.tag[0] == "{":
uri, tag = elem.tag[1:].split("}")
existing_namespaces.add(namespaces.get_namespace_from_url(uri))
for key, link in ns_keys:
if link is not None and key not in existing_namespaces:
root.set("xmlns:%s" % key, link)
return root


def getXMLInteger(elem, tag):
Expand Down Expand Up @@ -519,21 +515,14 @@ def element_to_string(element, encoding=None, xml_declaration=False):
if encoding is None:
encoding = "ISO-8859-1"

if etree.__name__ == 'lxml.etree':
if xml_declaration:
if encoding in ['unicode', 'utf-8']:
output = '<?xml version="1.0" encoding="utf-8" standalone="no"?>\n{}'.format(
etree.tostring(element, encoding='unicode'))
else:
output = etree.tostring(element, encoding=encoding, xml_declaration=True)
if xml_declaration:
if encoding in ['unicode', 'utf-8']:
output = '<?xml version="1.0" encoding="utf-8" standalone="no"?>\n{}'.format(
etree.tostring(element, encoding='unicode'))
else:
output = etree.tostring(element)
output = etree.tostring(element, encoding=encoding, xml_declaration=True)
else:
if xml_declaration:
output = '<?xml version="1.0" encoding="{}" standalone="no"?>\n{}'.format(
encoding, etree.tostring(element, encoding=encoding))
else:
output = etree.tostring(element)
output = etree.tostring(element)

return output

Expand Down Expand Up @@ -777,21 +766,6 @@ def bind_url(url):
log.addHandler(NullHandler())


def which_etree():
"""decipher which etree library is being used by OWSLib"""

which_etree = None

if 'lxml' in etree.__file__:
which_etree = 'lxml.etree'
elif 'xml/etree' in etree.__file__:
which_etree = 'xml.etree'
elif 'elementree' in etree.__file__:
which_etree = 'elementtree.ElementTree'

return which_etree


def findall(root, xpath, attribute_name=None, attribute_value=None):
"""Find elements recursively from given root element based on
xpath and possibly given attribute
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
dataclasses; python_version < '3.7'
lxml
python-dateutil>=1.5
pytz
requests>=1.0
pyyaml
dataclasses; python_version < '3.7'
requests>=1.0

0 comments on commit b0c6875

Please sign in to comment.