forked from rosshadden/sublime-xpath
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sublime_lxml.py
361 lines (307 loc) · 18.6 KB
/
sublime_lxml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
import sublime
from .lxml_parser import *
from .sublime_helper import get_scopes
import re
RE_TAG_NAME_END_POS = re.compile('[>\s/]')
RE_TAG_ATTRIBUTES = re.compile('\s+((\w+(?::\w+)?)\s*=\s*(?:"([^"]*)"|\'([^\']*)\'))')
# TODO: consider subclassing etree.ElementBase and adding as methods to that
def getNodeTagRegion(view, node, position_type):
"""Given a view, a node and a position type (open or close), return the region that relates to the node's position."""
return sublime.Region(*getNodeTagRange(node, position_type))
def getNodePosition(view, node):
"""Given a view and a node, return the regions that represent the positions of the open and close tags."""
open_pos = getNodeTagRegion(view, node, 'open')
close_pos = getNodeTagRegion(view, node, 'close')
return (open_pos, close_pos)
def getNodePositions(view, node):
"""Generator for distinct positions within this node."""
open_pos, close_pos = getNodePosition(view, node)
pos = open_pos.begin()
for child in node.iterchildren():
if isinstance(child, LocationAwareElement): # skip comments
child_open_pos, child_close_pos = getNodePosition(view, child)
yield (node, pos, child_open_pos.begin(), True)
pos = child_close_pos.end()
yield (child, child_open_pos.begin(), pos, len(child) == 0)
yield (node, pos, close_pos.end(), True)
def regionIntersects(outer, inner, include_beginning):
return outer.intersects(inner) or (include_beginning and inner.empty() and outer.contains(inner.begin())) # only include beginning if selection size is empty. so can select <hello>text|<world />|</hello> and xpath will show as 'hello/world' rather than '/hello'
# TODO: consider subclassing tree? and moving function to that class
def getNodesAtPositions(view, roots, positions):
"""Given a sorted list of trees and non-overlapping positions, return the nodes that relate to each position - efficiently, without searching through unnecessary children and stop once all are found."""
def relevance(span, start_index, max_index, include_beginning):
"""Look through all sorted positions from the starting index to the max, to find those that match the span. If there is a gap, stop looking."""
found_one = False
for index in range(start_index, max_index + 1):
if regionIntersects(span, positions[index], include_beginning):
yield index
found_one = True
elif found_one: # if we have found something previously, there is no need to check positions after this non-match, because they are sorted
break
elif index > start_index + 1 and not found_one: # if we haven't found anything, there is no need to check positions after start_index + 1, because they are sorted
break
def matchSpan(span, start_index, max_index, include_beginning):
"""Return the indexes that match the span, as well as the first index that was found and the last index that was found."""
matches = list(relevance(span, start_index, max_index, include_beginning))
if len(matches) > 0:
start_index = matches[0]
max_index = matches[-1]
return (matches, start_index, max_index)
def getMatches(node, next_match_index, max_index, final_matches):
"""Check the node and it's children for all matches within the specified range."""
spans = getNodePositions(view, node)
found_match_at_last_expected_position_in_node = False
for span_node, pos_start, pos_end, is_final in spans:
matches, first_match_index, last_match_index = matchSpan(sublime.Region(pos_start, pos_end), next_match_index, max_index, span_node == node)
if len(matches) > 0: # if matches were found
if last_match_index == max_index: # if the last index that matched is the maximum index that could match inside this node
found_match_at_last_expected_position_in_node = True # it could be the last match inside this node
if is_final:
final_matches.append((span_node, matches, pos_start, pos_end, span_node == node))
next_match_index = last_match_index # the next index to search is the last index that matched
else:
next_match_index = getMatches(span_node, first_match_index, last_match_index, final_matches) # the next index to search is the last index that matched
elif found_match_at_last_expected_position_in_node: # no match this time. If we have previously found the match at the last expected position within this node, then it was the last match in the node
break # stop looking for further matches
return next_match_index
matches = []
start_match_index = 0
for root in roots:
if root is not None:
last_match_index = len(positions) - 1
open_pos, close_pos = getNodePosition(view, root)
root_matches, start_match_index, last_match_index = matchSpan(open_pos.cover(close_pos), start_match_index, last_match_index, True)
get_matches_in_tree = len(root_matches) > 0 # determine if it is worth checking this tree
if get_matches_in_tree: # skip the tree if it doesn't participate in the match (saves iterating through all children of root element unnecessarily)
start_match_index = getMatches(root, start_match_index, last_match_index, matches)
return matches
def get_nodes_from_document(nodes):
"""Given a list of nodes that are the result of an XPath query, return those that belong to the original document."""
for node in nodes:
element = None
if isinstance(node, etree._ElementUnicodeResult): # if the node is an attribute or text node etc.
element = node.getparent() # get the parent
if element is None: # some nodes are not actually part of the original document we parsed, for example when using the substring function. so there is no way to find the original node, and therefore the location
continue
elif isinstance(node, etree.PIBase):
element = node
elif isinstance(node, etree.CommentBase):
element = node
elif isinstance(node, etree.ElementBase):
element = node
else:
continue # unsupported type
yield node
def get_regions_of_nodes(view, nodes, element_position_type, attribute_position_type):
global TAG_NAME_END_POS
global RE_TAG_ATTRIBUTES
def ensureTagNameEndPosIsSet(node, open_pos):
try:
pos = node.tag_name_end_pos
except AttributeError:
node.tag_name_end_pos = open_pos.begin() + RE_TAG_NAME_END_POS.search(view.substr(open_pos)).start()
for node in nodes:
attr_name = None
is_text = None
is_tail = None
if isinstance(node, etree._ElementUnicodeResult): # if the node is an attribute or text node etc.
attr_name = node.attrname
is_text = node.is_text
is_tail = node.is_tail
node = node.getparent() # get the parent
open_pos = getNodeTagRegion(view, node, 'open')
close_pos = getNodeTagRegion(view, node, 'close')
if is_text or is_tail:
text_begin_pos = None
text_end_pos = None
next_node = None
if is_text:
text_begin_pos = open_pos.end()
text_end_pos = close_pos.begin()
next_node = node.iterchildren()
elif is_tail:
text_begin_pos = close_pos.end()
text_end_pos = getNodeTagRegion(view, node.getparent(), 'close').begin()
next_node = node.itersiblings()
next_node = next(next_node, None)
if next_node is not None:
text_end_pos = getNodeTagRegion(view, next_node, 'open').begin()
yield sublime.Region(text_begin_pos, text_end_pos)
elif isinstance(node, etree.CommentBase) or isinstance(node, etree.PIBase):
yield open_pos
elif attr_name is None or attribute_position_type is None or attribute_position_type in ('element', 'parent'):
# position type 'open' <|name| attr1="test"></name> "Goto name in open tag"
# position type 'close' <name attr1="test"></|name|> "Goto name in close tag"
# position type 'names' <|name| attr1="test"></|name|> "Goto name in open and close tags"
# position type 'content' <name>|content<subcontent />|</name> "Goto content"
# position type 'entire' |<name>content<subcontent /></name>| "Select entire element" # the idea being, that you can even paste it into a single-selection app, and it will have only the selected elements - useful for filtering out only useful/relevant parts of a document after a xpath query etc.
# position type 'open_attributes' <name| attr1="test" attr2="hello" |/>
if element_position_type in ('open', 'close', 'names', 'open_attributes'):
# select only the tag name with the prefix
ensureTagNameEndPosIsSet(node, open_pos)
if element_position_type == 'open_attributes':
chars_before_end = len('>')
if node.is_self_closing():
chars_before_end += len('/')
yield sublime.Region(node.tag_name_end_pos, open_pos.end() - chars_before_end)
else:
chars_before_tag = len('<')
if element_position_type in ('open', 'names') or node.is_self_closing():
yield sublime.Region(open_pos.begin() + chars_before_tag, node.tag_name_end_pos)
if element_position_type in ('close', 'names') and not node.is_self_closing():
chars_before_tag += len('/')
yield sublime.Region(close_pos.begin() + chars_before_tag, close_pos.begin() + len('/') + (node.tag_name_end_pos - open_pos.begin()))
elif element_position_type == 'content':
if node.is_self_closing():
yield sublime.Region(open_pos.end(), open_pos.end())
else:
yield sublime.Region(open_pos.end(), close_pos.begin())
elif element_position_type == 'entire':
yield sublime.Region(open_pos.begin(), close_pos.end())
elif attribute_position_type != 'none':
# position type 'name' <element |attr1|="test"></element> "Goto attribute name in open tag"
# position type 'content' <element attr1="|test|"></element> "Goto attribute value in open tag"
# position type 'entire' <element |attr1="test"|></element> "Goto attribute declaration in open tag"
ensureTagNameEndPosIsSet(node, open_pos)
attrs = view.substr(sublime.Region(node.tag_name_end_pos, open_pos.end()))
q = etree.QName(attr_name)
for match in RE_TAG_ATTRIBUTES.finditer(attrs):
is_this = False
prefixed_name = match.group(2).split(':')
if len(prefixed_name) == 2 and prefixed_name[0] != 'xmlns':
if prefixed_name[1] == q.localname and q.namespace == node.nsmap[prefixed_name[0]]:
is_this = True
is_this = is_this or match.group(2) == attr_name
if is_this:
group = (1, None)
if attribute_position_type in ('name'):
group = (2, None)
elif attribute_position_type in ('value', 'content'):
group = (3, 4)
group = next(g for g in group if match.group(g) is not None) # find first value match group (i.e. if double quotes, group 3, if single quotes, group 4)
yield sublime.Region(node.tag_name_end_pos + match.start(group), node.tag_name_end_pos + match.end(group))
break
def move_cursors_to_nodes(view, nodes, element_position_type, attribute_position_type):
nodes = list(nodes)
total_results = len(nodes)
nodes = list(get_nodes_from_document(nodes))
cursors = list(get_regions_of_nodes(view, nodes, element_position_type, attribute_position_type))
if len(cursors) > 0:
view.sel().clear()
view.sel().add_all(cursors)
view.show(cursors[0]) # scroll to show the first selection, if it is not already visible
return (len(nodes), total_results)
def getElementXMLPreview(view, node, maxlen):
"""Generate the xml string for the given node, up to the specified number of characters."""
open_pos, close_pos = getNodePosition(view, node)
preview = view.substr(sublime.Region(open_pos.begin(), close_pos.end()))
return collapseWhitespace(preview, maxlen)
def parse_xpath_query_for_completions(view, completion_position):
"""Given a view with XPath syntax and a position where completions are desired, parse the xpath query and return the relevant sub queries."""
selectors = ['punctuation.separator.arguments.xpath', 'punctuation.section.arguments.begin.xpath.subexpression', 'punctuation.section.arguments.end.xpath.subexpression', 'punctuation.section.arguments.begin.xpath.predicate', 'punctuation.section.arguments.end.xpath.predicate', 'variable.function.xpath', 'keyword.operator']
selector_regions = []
pos = 0
for scope in get_scopes(view, 0, completion_position):
for selector in selectors:
if selector in scope[0]:
if scope[0].endswith('variable.function.xpath punctuation.section.arguments.begin.xpath.subexpression '): # combine the function name with the open parenthesis
selector_regions[-1] = (scope[0], sublime.Region(selector_regions[-1][1].begin(), scope[2] + 1))
else:
selector_regions.append((None, sublime.Region(pos, scope[1])))
selector_regions.append((scope[0], sublime.Region(scope[1], scope[2] + 1)))
pos = scope[2] + 1
break
selector_regions.append((None, sublime.Region(pos, completion_position)))
query_parts = [(selector_region[0], selector_region[1], view.substr(selector_region[1])) for selector_region in selector_regions if not selector_region[1].empty()]
# parse the xpath expression into a tree
tree = {
'open': '',
'close': '',
'children': [{ 'value': '' }],
'parent': None
}
node = tree
for scope, region, part in query_parts:
if part[-1] in ('[', '('): # an opening bracket increments the depth
child = {}
child['open'] = part
child['parent'] = node
child['children'] = [{ 'value': '' }]
node['children'].append(child)
node = child
elif part in (']', ')'): # a closing bracket decrements the depth, and moves everything in the depth above to the new depth
node['close'] = part
node = node['parent']
node['children'].append({ 'value': '' })
elif part == ',':
node['children'].append({ 'separator': part })
elif scope is not None and scope.endswith('keyword.operator.xpath '):
node['children'].append({ 'operator': part })
else:
if 'value' not in node['children'][-1]:
node['children'].append({ 'value': '' })
node['children'][-1]['value'] += part
# flatten the tree where possible
def flatten(node, everything):
children = [{ 'value': '' }]
for child in node['children']:
if 'value' not in children[-1]:
children.append({ 'value': '' })
if 'open' in child:
if 'close' in child:
children[-1]['value'] += child['open']
children[-1]['value'] += flatten(child, True)[0]['value']
children[-1]['value'] += child['close']
else:
newchild = child.copy()
newchild['children'] = flatten(newchild, False)
del newchild['parent']
children.append(newchild)
#if 'value' not in newchild['children'][-1]:
# children.append({ 'value': '' })
else:
include = everything or 'value' in child
if include:
if 'value' not in children[-1]:
children.append({ 'value': '' })
children[-1]['value'] += child[list(child.keys())[0]]
else:
children.append(child)
return children
flattened = { 'children': flatten(tree, False) }
# split the rest of the tree into subqueries that should be executed on the results of the previous one
subqueries = {0: ''}
def split(node, level):
children = node['children']
relevant = []
for child in reversed(children):
if 'operator' in child or 'separator' in child: # take the children from the end, until we reach an operator or a separator
break
else:
relevant.append(child)
for child in reversed(relevant):
if 'open' in child:
if 'close' not in child:
level += 1
subqueries.setdefault(level, '')
else:
subqueries[level] += child['open']
split(child, level)
if 'close' in child:
subqueries[level] += child['close']
else:
subqueries[level] += child[list(child.keys())[0]]
split(flattened, 0)
queries = []
levels = sorted(subqueries.keys())
for key in levels:
subquery = subqueries[key].strip()
if subquery != '' or key == levels[-1]:
queries.append(subquery)
return queries
def chunks(start, end, chunk_size): # inspired by http://stackoverflow.com/a/18854817/4473405
"""Return a generator that will split the range into chunks of the specified size."""
return ((i, min(i + chunk_size, end)) for i in range(start, end, chunk_size))
def region_chunks(view, region, chunk_size):
"""Return a generator that will split the region into chunks of the specified size."""
return (view.substr(sublime.Region(begin, end)) for begin, end in chunks(region.begin(), region.end(), chunk_size))