-
Notifications
You must be signed in to change notification settings - Fork 0
/
xmlutils.py
322 lines (271 loc) · 9.88 KB
/
xmlutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import xml.etree.ElementTree as ET
import xml.etree as etree
from xml.dom import minidom
import os
import sys
import subprocess
from string import Template
import xml.etree.ElementInclude
import xmldict
import collections
#element is xml tree node
#keys are elements that are queried
#value is string literal
#TODO: Phase out usage of XmlDictConfig
module_dir=os.path.dirname(os.path.realpath(__file__))
def check_if_xml_tree(data):
if isinstance(data, etree.ElementTree.Element):
return True
if isinstance(data, etree.ElementTree.ElementTree):
return True
return False
def read_string(xml_str):
xmldoc = ET.fromstring(xml_str)
xml.etree.ElementInclude.include(xmldoc)
xml.etree.ElementInclude.include(xmldoc)
return xmldoc
def read_file(xml_fn):
if check_if_xml_tree(xml_fn):
return xml_fn
try:
xmldoc = ET.parse(xml_fn)
except Exception as e:
print("file loading failed ", xml_fn)
print(e)
sys.exit()
root = xmldoc.getroot()
xml.etree.ElementInclude.include(root)
xml.etree.ElementInclude.include(root)
return root
def tostring(root):
root_txt = ET.tostring(root)
root_txt = minidom.parseString(root_txt).toprettyxml()
return root_txt
#return ET.dump(root)
def get_attr_list(root):
#<root attr1='', attr2=''></root>
return root.keys()
def get_value(root):
if root is None:
return None
try:
val = root.text.strip()
return val
except:
return root.text
def get_elem_iter(root, attrname, path_prefix=".//"):
'''
Not sure what the return type should be for non-existing element
Check all places where this function is begin called
'''
all_attr_elems=root.findall(path_prefix+attrname)
for elem in all_attr_elems:
yield elem
def get_elems(root, attrname, path_prefix=".//", uniq=False, error_if_not_found=True):
'''
Not sure what the return type should be for non-existing element
Check all places where this function is begin called
error_if_not_found: if the attribute is not found than an error gets raised.
the expected usage is to check for has_key before calling this function.
'''
all_attr_elems=root.findall(path_prefix+attrname)
if len(all_attr_elems) == 0:
assert None #raise error
return None
if uniq:
assert(len(all_attr_elems) == 1)
return all_attr_elems[0]
return all_attr_elems
def get_parent_elem(root, attrname, path_prefix=".//", uniq=True):
'''
return parent elem of the elem corresponding to attrname
'''
parent_elems = root.findall(path_prefix + attrname + "/..")
if uniq == True:
assert len(parent_elems) == 1 #attrname should match to one uniq elem
return parent_elems[0] #this
return parent_elems
def get_value_elems(root, attrname):
values=[]
for elem in get_elems(root, attrname):
values.append(get_value(elem))
return values
#this is the most commonly used one -- sort of like key-value pair
def get_value_elem(root, attrname, path_prefix='.//'):
'''
give value for attrname
'''
elem=get_elems(root, attrname, path_prefix=path_prefix, uniq=True)
return get_value(elem)
def get_value_of_key(root=None, key=None, path_prefix='.//'):
'''
<key>value<key>
key should be unique in the xml of the root
'''
elem=get_elems(root, key, path_prefix=path_prefix, uniq=True)
return get_value(elem)
#this is the one we use most often
#<attr>value<attr9>
def get_value_by_attr(root, attrname):
elem=get_elems(root, attrname, uniq=True)
if elem is None:
return None
#when an elem is created at runtime
#the xml keeps the type info
#else its all string
#when docking..lets make it a string
try:
res = elem.text.strip()
except:
res = elem.text
return res
def has_key(root, attr_path, path_prefix='./'):
'''
find the key anywhere in the doc.
a key is an xml item, specifically
the label of the xml item. for e.g.
in this fragment
<alpha>
<beta>iota</beta>
</alpha>
Here "alpha/beta" is the key.
'''
elem = root.findall(path_prefix + attr_path)
if not elem:
return False
return True
#<elemn><key>value</key></elemn>
def get_elem_by_key_value(xmldoc, elemn, key,value, uniq=False):
all_elems = xmldoc.findall('.//'+elemn+'[' + key+ '=\'' +value+'\']')
if uniq:
assert(len(all_elems) == 1)
return all_elems[0]
return all_elems
def get_uniq_elem_by_key_value(xmldoc, elemn, key,value):
return get_elem_by_key_value(xmldoc, elemn, key,value, uniq=True)
def get_elems_by_parent_child(root, parent, child):
#get all roots that are of type <parent><child></child></parent>
return root.findall(".//"+parent+"/"+child)
#Give that element whose parent=parent and has one child element as <key>value</key>
def get_elems_by_parent_child_key_value(root,parent, child, key, value, uniq=False):
#<parent>
# <child>
# <key>value</key>
# </child>
#</parent>
xpath_str=".//"+parent+"/"+child+"["+key+"=\'"+value+"\']"
childnodes=root.findall(xpath_str)
if uniq:
if(len(childnodes) == 0):
assert(0)
assert(len(childnodes) == 1)
return childnodes[0]
return childnodes
def gen_node(node_label, node_text):
'''
create a new node
'''
node = ET.Element(node_label)
node.text = node_text
return node
def dock_elem_value(cfg_root=None, dock_path=None, elem_name=None, elem_value=None):
node = ET.Element(elem_name)
node.text = str(elem_value)
if dock_path is not None:
dock_elem = get_elems(cfg_root, dock_path, uniq=True)
dock_elem.append(node)
else:
cfg_root.append(node)
#Bugs in python: cfg_root has append, list has append. List are everywhere. mistake can happen when passing a list
#instead of cfg_root.
#solution: don't mix while coding; keep everything consistent either everything is list and or not.
def append_elem(dock_root, elem_root):
'''
dock_root is the master xml element
elem_root is the xml element to be added to dock_root
'''
dock_root.append(elem_root)
def update_elem_value(cfg_root=None, elem_label=None, elem_text=None):
elem = get_elems(cfg_root, elem_label, uniq=True)
elem.text = elem_text
return
class XmlListConfig(list):
def __init__(self, aList):
for element in aList:
if element is not None:
# treat like dict
if len(element) == 1 or element[0].tag != element[1].tag:
self.append(XmlDictConfig(element))
# treat like list
elif element[0].tag == element[1].tag:
self.append(XmlListConfig(element))
elif element.text:
text = element.text.strip()
if text:
self.append(text)
class XmlDictConfig(dict):
'''
Example usage:
>>> tree = ElementTree.parse('your_file.xml')
>>> root = tree.getroot()
>>> xmldict = XmlDictConfig(root)
Or, if you want to use an XML string:
>>> root = ElementTree.XML(xml_string)
>>> xmldict = XmlDictConfig(root)
And then use xmldict for what it is... a dict.
'''
def __init__(self, parent_element):
if parent_element.items():
self.update(dict(parent_element.items()))
for element in parent_element:
if len(element):
# treat like dict - we assume that if the first two tags
# in a series are different, then they are all different.
if len(element) == 1 or element[0].tag != element[1].tag:
aDict = XmlDictConfig(element)
# treat like list - we assume that if the first two tags
# in a series are the same, then the rest are the same.
else:
# here, we put the list in dictionary; the key is the
# tag name the list elements all share in common, and
# the value is the list itself
aDict = {element[0].tag: XmlListConfig(element)}
# if the tag has attributes, add those to the dict
if element.items():
aDict.update(dict(element.items()))
self.update({element.tag: aDict})
# this assumes that if you've got an attribute in a tag,
# you won't be having any text. This may or may not be a
# good idea -- time will tell. It works for the way we are
# currently doing XML configuration files...
elif element.items():
self.update({element.tag: dict(element.items())})
# finally, if there are no child tags and no attributes, extract
# the text
else:
self.update({element.tag: element.text})
def flatten(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
def create_dict_param_attr(xml_root=None):
nested_xml_dict = xmldict.xml_to_dict(xml_root)
flat_xml_dict = flatten(nested_xml_dict, sep='/')
return flat_xml_dict
def merge_xml(xml1_path, xml2_path, xmlo_path):
global module_dir
with open("tmp.xml", "w+") as fh:
fh.write(Template("""<?xml version="1.0"?>
<merge xmlns="http://informatik.hu-berlin.de/merge">
<file1>${xml1_path}</file1>
<file2>${xml2_path}</file2>
</merge>""").substitute(locals()))
a = locals()
a.update(globals())
merge_cmd = Template("xsltproc ${module_dir}/merge.xslt tmp.xml > ${xmlo_path}").substitute(locals())
subprocess.call(merge_cmd, shell=True)