-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.py
159 lines (150 loc) · 4.16 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from lark import Lark
import sys
import convert_code
RULES = '''
start: procedure start -> start0
| main -> start1
procedure: type WORD CLAP params? CLAP LBRACE dcls? statements? RETURN expr SEMI RBRACE
main: INT HOME CLAP INT WORD CLAP CHAR DEREF DEREF WORD CLAP LBRACE dcls? statements? RETURN expr SEMI RBRACE
params: "" -> params0
| paramlist -> params1
paramlist: dcl -> paramlist0
| dcl CLAP paramlist -> paramlist1
solidtype: INT -> solidtype0
| LONG -> solidtype1
| CHAR -> solidtype2
| BOOL -> solidtype3
type: solidtype stars?
stars: "" -> stars0
| DEREF stars? -> stars1
dcls: "" -> dcls0
| dcls? BABY dcl BECOMES NUM SEMI -> dcls1
| dcls? BABY dcl BECOMES NULL SEMI -> dcls2
| dcls? BABY dcl BECOMES "'" CHARACTER "'" SEMI -> dcls3
dcl: type WORD
statements: "" -> statements0
| statements? statement -> statements1
statement: lvalue BECOMES expr SEMI -> statement0
| LPAREN test RPAREN HMMM LBRACE statements? RBRACE ELSE LBRACE statements? RBRACE -> statement1
| LPAREN test RPAREN HMMM WHILE LBRACE statements? RBRACE -> statement2
| PRINTLN CLAP expr CLAP SEMI -> statement3
| DELETE LBRACK RBRACK expr SEMI -> statement4
test: expr EQ expr -> test0
| expr NE expr -> test1
| expr LT expr -> test2
| expr LE expr -> test3
| expr GE expr -> test4
| expr GT expr -> test5
expr: term -> expr0
| expr PLUS term -> expr1
| expr MINUS term -> expr2
term: factor -> term0
| term STAR factor -> term1
| term SLASH factor -> term2
| term PCT factor -> term3
factor: WORD -> factor0
| NUM -> factor1
| NULL -> factor2
| LPAREN expr RPAREN -> factor3
| REF lvalue -> factor4
| DEREF factor -> factor5
| NEW type LBRACK expr RBRACK -> factor6
| CALL WORD CLAP CLAP -> factor7
| CALL WORD CLAP arglist CLAP -> factor8
| "'" CHARACTER "'" -> factor9
arglist: expr -> arglist0
| expr CLAP arglist -> arglist1
lvalue: WORD -> lvalue0
| STAR factor -> lvalue1
| LPAREN lvalue RPAREN -> lvalue2
NUM: /[0-9]+/
LONG: "__long__"
CHARACTER: /[a-zA-Z]/
CHAR: "__char__"
LPAREN: "("
RPAREN: ")"
LBRACE: "{"
RBRACE: "}"
RETURN: "__return__"
HMMM: "__hmmm__"
ELSE: "__else__"
WHILE: "__while__"
PRINTLN: "__println__"
HOME: "__home__"
BECOMES: "="
INT: "__int__"
TRUE: "__true__"
FALSE: "__false__"
BOOL: "__bool__"
EQ: "=="
NE: "!="
LT: "<"
GT: ">"
LE: "<="
GE: ">="
PLUS: "__plus__"
MINUS: "__minus__"
STAR: "__star__"
SLASH: "__slash__"
PCT: "%"
CALL: "__call__"
CLAP: "__clap__"
BABY: "__baby__"
SEMI: "__semi__"
NEW: "__new__"
DELETE: "__delete__"
REF: "__ref__"
DEREF: "__deref__"
LBRACK: "["
RBRACK: "]"
NULL: "NULL"
%import common.WORD
%ignore " "
'''
# emoji unicode to
emoji_table = {
u'\U0001f170': '__char__',
u'\U0001F51A': '__return__',
u'\U0001F914': '__hmmm__',
u'\U0001F937': '__else__',
u'\U000023F1': '__while__',
u'\U0001F5A8': '__println__',
u'\U0001F3E0': '__home__',
u'\U0001F4AF': '__int__',
u'\U0001f44d': '__true__',
u'\U0001f44e': '__false__',
u'\U0001f171': '__bool__',
u'\U00002795': '__plus__',
u'\U00002796': '__minus__',
u'\U00002716': '__star__',
u'\U00002797':'__slash__',
u'\U0001f4f2': '__call__',
u'\U0001f44f': '__clap__',
u'\U0001f476': '__baby__',
u'\U0001f44c': '__semi__',
u'\U0001f195': '__new__',
u'\U0001f5d1': '__delete__',
u'\U0001f448': '__ref__',
u'\U0001f449': '__deref__',
u'\U0001f346': '__long__'
}
# Returns a parse-tree from a Cmoji file
def parse_file(file_name):
larker = Lark(RULES)
parse_string = ''
with open(file_name, 'r', ) as f:
for line in f.read().split('\n'):
parse_string += line
# Pre-processes the cmoji file, replacing emojis with their parse equivelants
for emoji in emoji_table:
parse_string = parse_string.replace(emoji, emoji_table[emoji])
parse_string = parse_string.replace('\t', ' ').replace('\n', ' ')
try:
tree = larker.parse(parse_string)
except Exception:
print(parse_string)
print('Needs more emoji\'s, mate')
raise Exception
return tree
if len(sys.argv) > 1:
convert_code.convert(parse_file(sys.argv[1]))