-
Notifications
You must be signed in to change notification settings - Fork 0
/
RAParser.scala
196 lines (164 loc) · 6.51 KB
/
RAParser.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import scala.util.parsing.combinator.PackratParsers
import scala.util.parsing.combinator.syntactical.StandardTokenParsers
import Absyn._
class RAParser extends StandardTokenParsers with PackratParsers {
type P[+A] = PackratParser[A]
def parseStr[A](parser: P[A], input: String): A = {
phrase(parser)(new lexical.Scanner(input)) match {
case Success(ast, _) => ast
case e: NoSuccess => sys.error(e.msg)
}
}
def parse[A](parser: P[A], input: String): A = {
val source = scala.io.Source.fromFile(input)
val lines = try source.mkString finally source.close()
parseStr(parser,lines)
}
lexical.reserved ++= List("TRUE", "FALSE", "AND", "OR", "NOT",
"UNION", "DUNION", "INTERSECT", "EXCEPT", "JOIN", "GROUP", "SUM", "COAL", "coal",
"true", "false", "and", "or", "not",
"union", "dunion", "intersect", "except", "join", "group", "sum", "e", "E")
lexical.delimiters ++= List("=","*", "/", "+", "-", "(", ")", "[", "]", "{", "}",
"<>", "<", "<=", ">", ">=", ",", ".", ";", ":", "->", ":=", "^", "⨝", "∩", "∪", "⊎",
"∧","∨","¬","⊤","⊥", "'")
lazy val posNumber: P[Double] = (
// this handles signed exponents but not unsigned
// this seems to be because e1234 gets parsed as an identifier token
// the right way to handle this is probably to define a regex-based parser
// using the standard regular expression for floating point numebrs during tokenization
// but this is workable as long as we only need to parse floating point numbers with signed exponents,
// which for example python prints by default (but scala doesn't).
// This approach also means that e/E cannot appear on its own as an identifier, which is
// not ideal.
// TODO: Redo the whole parser usign RegexParsers...
(numericLit~"."~numericLit~("e"|"E")~("+"|"-")~numericLit ^^ {
case x~y~z~_~s~e => (x+"."+z+"e"+s+e).toDouble }) |
(numericLit~"."~numericLit ^^ { case x~y~z => (x+"."+z).toDouble }) |
(numericLit~"." ^^ { case x~y => (x+".0").toDouble }) |
(numericLit ^^ { x => x.toDouble })
)
lazy val identifier: P[String] = (
ident | "e" | "E" ^^ {x => x}
)
lazy val number: P[Double] = (
posNumber ^^ {x => x} |
"-"~posNumber ^^ {case _~x => -x}
)
lazy val expression: P[Expr] =
summation
lazy val summation: P[Expr] =
summation ~ "+" ~ product ^^ {
case e1~"+"~e2 => Plus(e1,e2)
} | summation ~ "-" ~ product ^^ {
case e1~"-"~e2 => Minus(e1,e2)
} | product
lazy val product: P[Expr] =
product ~ "*" ~ atomic ^^ {
case e1~"*"~e2 => Times(e1,e2)
} | product ~ "/" ~ atomic ^^ {
case e1~"/"~e2 => Div(e1,e2)
} | "-"~atomic ^^ {
case x~y => UMinus(y)
} | atomic
lazy val atomic: P[Expr] = (
(stringLit ^^ Atom ) |
(identifier ^^ Var) |
(number ^^ { x => Num(x) }) |
"("~>expression<~")"
)
lazy val predicate: P[Pred] = (
disjPredicate
)
lazy val disjPredicate: P[Pred] =
disjPredicate ~ ("OR"|"or"|"∨") ~ conjPredicate ^^ {
case p1~_~p2 => Or(p1,p2)
} | conjPredicate
lazy val conjPredicate: P[Pred] =
conjPredicate ~ ("AND"|"and"|"∧") ~ atomicPredicate ^^ {
case p1~_~p2 => And(p1,p2)
} | atomicPredicate
lazy val atomicPredicate: P[Pred] = (
(("TRUE"|"true"|"⊤") ^^^ Bool(true))
| (("FALSE"|"false"|"⊥") ^^^ Bool(false))
| "("~>predicate<~")"
| ("NOT"|"not"|"¬")~atomicPredicate ^^ {
case _~p => Not(p)
} | expression~"="~expression ^^ {
case e1~"="~e2 => Eq(e1,e2)
} | expression~"<>"~expression ^^ {
case e1~"<>"~e2 => Neq(e1,e2)
} | expression~"<"~expression ^^ {
case e1~"<"~e2 => Lt(e1,e2)
} | expression~"<="~expression ^^ {
case e1~"<="~e2 => Leq(e1,e2)
} | expression~">"~expression ^^ {
case e1~">"~e2 => Gt(e1,e2)
} | expression~">="~expression ^^ {
case e1~">="~e2 => Geq(e1,e2)
}
)
lazy val query: P[Query] =
differenceQuery
lazy val differenceQuery: P[Query] =
differenceQuery ~ "-" ~ unionQuery ^^ {
case q1~"-"~q2 => Difference(q1,q2)
} | differenceQuery ~ ("EXCEPT"|"except") ~ unionQuery ^^ {
case q1~_~q2 => Difference(q1,q2)
} | unionQuery
lazy val unionQuery: P[Query] =
unionQuery ~ ("UNION"|"union"|"∪") ~ intersectionQuery ^^ {
case q1 ~ _ ~ q2 => UnionAll(q1,q2)
} | unionQuery ~ ("DUNION"|"dunion"|"⊎") ~ "[" ~ identifier ~ "]" ~ intersectionQuery ^^ {
case q1 ~ _ ~ _ ~ att ~ _ ~ q2 => DUnion(q1,q2,att)
} | intersectionQuery
lazy val intersectionQuery: P[Query] =
intersectionQuery ~ ("INTERSECT"|"intersect"|"∪") ~ joinQuery ^^ {
case q1 ~ _ ~ q2 => Intersection(q1,q2)
} | joinQuery
lazy val joinQuery: P[Query] =
joinQuery ~ ("JOIN"|"join"|"⨝") ~ basicQuery ^^ {
case q1 ~ _ ~ q2 => NaturalJoin(q1,q2)
} | basicQuery
lazy val basicQuery: P[Query] =
atomicQuery ~ "(" ~ predicate ~ ")" ^^ {
case q~_~p~_ => Select(q,p)
} | atomicQuery ~ "[" ~ predicate ~ "]" ~ atomicQuery^^ {
case q1~_~p~_~q2 => ThetaJoin(q1,p,q2)
} | atomicQuery ~ "[" ~ repsep(identifier,",") ~ "]" ^^ {
case q~_~attrs~_ => Project(q,attrs)
} | atomicQuery ~ "[" ~ repsep(identifier,",") ~ ("SUM"|"sum") ~ repsep(identifier,",") ~ "]" ^^ {
case q~_~attrs~_~sums~_ => Aggregation(q,attrs,sums)
} | atomicQuery ~ "[" ~ ("COAL"|"coal") ~ repsep(identifier,",") ~ "]" ^^ {
case q~_~_~attrs~_ => Coalesce(q,attrs)
} | atomicQuery ~ "[" ~ "^" ~ rep1sep(identifier,",") ~ "]" ^^ {
case q~_~_~attrs~_ => ProjectAway(q,attrs)
} | atomicQuery ~ "{" ~ rep1sep(derivation,",") ~ "}" ^^ {
case q~_~derivs~_ => Derivation(q,derivs)
} | atomicQuery ~ "{" ~ rep1sep(renaming,",") ~ "}" ^^ {
case q~_~renamings~_ => Rename(q,renamings)
} | atomicQuery
lazy val derivation: P[(String,Expr)] =
identifier ~ ":=" ~ expression ^^ {
case x~_~e => (x,e)
}
lazy val renaming: P[(String,String)] =
identifier ~ "->" ~ identifier ^^ {
case x~_~y => (x,y)
}
lazy val atomicQuery: P[Query] = (
identifier ^^ { r => Relation(r) }
| "("~>query<~")"
)
lazy val viewDef: P[(String,Query)] =
identifier ~ ":=" ~ query ^^ {
case x~":="~q => (x,q)
}
lazy val specification : P[List[(String,Query)]] = (
repsep(viewDef,";")
)
lazy val result: P[(List[Double],Double)] = (
"{" ~ stringLit ~ ":" ~ "[" ~ repsep(number,",") ~ "]" ~","~ stringLit ~ ":" ~ number ~ "}" ^^ {
case _~_~_~v~_~_~_~_~x~_ => (v,x)
}
)
}