-
Notifications
You must be signed in to change notification settings - Fork 2
/
ere.y
96 lines (83 loc) · 2.94 KB
/
ere.y
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
%{
#include "yydefs.h"
%}
/* this is the ERE grammar from
https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_05_03
however we removed support for the following things:
1) collation [.xxx.]
2) equivalence [=xxx=]
these 2 are locale-dependent features which are only supported in POSIX,
no other regex library supports those things.
3) posix character classes like [:digit:]
it's trivial to replace those with what they would expand to using some sort
of a preprocessor, e.g. [:digit:] -> 0-9
4) character ranges ending in '-', such as #--
this is a really unusual special case which is very unlikely to be
encountered. if needed though, it can be worked around by ending the range
expression on character before the '-' character and put the minus character
as single character to the start of the regex expression.
note that the original POSIX grammar rejects empty set of parens (), unlike
most implementations.
we haven't added a workaround to make this undefined construct work.
*/
%token ORD_CHAR QUOTED_CHAR DUP_COUNT
%start extended_reg_exp
%%
/* --------------------------------------------
Extended Regular Expression
--------------------------------------------
*/
extended_reg_exp : ERE_branch
| extended_reg_exp '|' ERE_branch
;
ERE_branch : ERE_expression
| ERE_branch ERE_expression
;
ERE_expression : one_char_or_coll_elem_ERE
| '^'
| '$'
| '(' extended_reg_exp ')'
| ERE_expression ERE_dupl_symbol
;
one_char_or_coll_elem_ERE : ORD_CHAR
| QUOTED_CHAR
| '.'
| bracket_expression
;
ERE_dupl_symbol : '*'
| '+'
| '?'
| '{' DUP_COUNT '}'
| '{' DUP_COUNT ',' '}'
| '{' DUP_COUNT ',' DUP_COUNT '}'
;
/* --------------------------------------------
Bracket Expression
-------------------------------------------
*/
bracket_expression : '[' matching_list ']'
| '[' nonmatching_list ']'
;
matching_list : bracket_list
;
nonmatching_list : '^' bracket_list
;
bracket_list : follow_list
;
follow_list : expression_term
| follow_list expression_term
;
expression_term : single_expression
| range_expression
;
single_expression : one_char_ERE
;
range_expression : start_range end_range
;
start_range : end_range '-'
;
one_char_ERE : ORD_CHAR
| QUOTED_CHAR
;
end_range : one_char_ERE
;