-
Notifications
You must be signed in to change notification settings - Fork 1
/
token.go
270 lines (243 loc) · 5 KB
/
token.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
package thrifter
import (
"bytes"
"crypto/sha1"
"fmt"
"regexp"
"strings"
)
type token int
var baseTypeTokens = []string{"bool", "byte", "i8", "i16", "i32", "i64", "double", "string", "binary", "slist"}
func isBaseTypeToken(str string) bool {
for _, s := range baseTypeTokens {
if s == str {
return true
}
}
return false
}
const (
// special tokens
T_ILLEGAL token = iota
T_EOF
T_IDENT
T_STRING // string literal
T_NUMBER // integer or float
// white space
T_SPACE
T_LINEBREAK // \n
T_RETURN // \r
T_TAB // \t
// punctuator
T_SEMICOLON // ;
T_COLON // :
T_EQUALS // =
T_QUOTE // "
T_SINGLEQUOTE // '
T_LEFTPAREN // (
T_RIGHTPAREN // )
T_LEFTCURLY // {
T_RIGHTCURLY // }
T_LEFTSQUARE // [
T_RIGHTSQUARE // ]
T_COMMENT // /
T_LESS // <
T_GREATER // >
T_COMMA // ,
T_DOT // .
T_PLUS // +
T_MINUS // -
// declaration keywords
keywordStart
T_NAMESPACE
T_ENUM
T_SENUM // currently not supported
T_CONST
T_SERVICE
T_STRUCT
T_INCLUDE
T_CPP_INCLUDE
T_TYPEDEF
T_UNION
T_EXCEPTION
// field keywords
T_OPTIONAL
T_REQUIRED
// type keywords
T_MAP
T_SET
T_LIST
// function keywords
T_ONEWAY
T_VOID
T_THROWS
keywordEnd
)
// Get corresponding token from string literal, mostly used for generate token.
func GetToken(literal string) token {
return toToken(literal)
}
func toToken(literal string) token {
switch literal {
// white space
case "\n":
return T_LINEBREAK
case "\r":
return T_RETURN
case " ":
return T_SPACE
case "\t":
return T_TAB
// punctuator
case ";":
return T_SEMICOLON
case ":":
return T_COLON
case "=":
return T_EQUALS
case "\"":
return T_QUOTE
case "'":
return T_SINGLEQUOTE
case "(":
return T_LEFTPAREN
case ")":
return T_RIGHTPAREN
case "{":
return T_LEFTCURLY
case "}":
return T_RIGHTCURLY
case "[":
return T_LEFTSQUARE
case "]":
return T_RIGHTSQUARE
case "<":
return T_LESS
case ">":
return T_GREATER
case ",":
return T_COMMA
case ".":
return T_DOT
case "+":
return T_PLUS
case "-":
return T_MINUS
// declaration keywords
case "namespace":
return T_NAMESPACE
case "enum":
return T_ENUM
case "senum":
return T_SENUM
case "const":
return T_CONST
case "service":
return T_SERVICE
case "struct":
return T_STRUCT
case "include":
return T_INCLUDE
case "cpp_include":
return T_CPP_INCLUDE
case "typedef":
return T_TYPEDEF
case "union":
return T_UNION
case "exception":
return T_EXCEPTION
// field keywords
case "optional":
return T_OPTIONAL
case "required":
return T_REQUIRED
// type keywords
case "map":
return T_MAP
case "set":
return T_SET
case "list":
return T_LIST
// function keywords
case "oneway":
return T_ONEWAY
case "void":
return T_VOID
case "throws":
return T_THROWS
default:
return T_IDENT
}
}
// comment type
const (
SINGLE_LINE_COMMENT = iota + 1 // like this
MULTI_LINE_COMMENT /* like this */
BASH_LIKE_COMMENT // # like this
)
// Generate hash from token.Type + token.Raw + token.Pos, for nodes like enum/struct/service to find their element node when iterate over token.
func GenTokenHash(t *Token) (res string) {
h := sha1.New()
val := fmt.Sprintf("%d_%s_%+v", t.Type, t.Raw, t.Pos)
hash := h.Sum([]byte(val))
return string(hash)
}
// isDigit returns true if the rune is a digit.
func IsDigit(lit rune) bool {
return (lit >= '0' && lit <= '9')
}
// determine whether it is an integer or a float number
func IsNumber(str string) (isFloat bool, isInt bool) {
isFloat, _ = regexp.MatchString("^\\d+\\.\\d+$", str)
isInt, _ = regexp.MatchString("^\\d+$", str)
return
}
func getCommentValue(raw string, commentType int) (res string) {
switch commentType {
case SINGLE_LINE_COMMENT:
res = strings.Replace(raw, "//", "", 1)
case MULTI_LINE_COMMENT:
res = strings.ReplaceAll(raw, "/*", "")
res = strings.ReplaceAll(res, "*/", "")
case BASH_LIKE_COMMENT:
res = strings.Replace(raw, "#", "", 1)
}
return
}
// isKeyword returns if tok is in the keywords range
func IsKeyword(tok token) bool {
return keywordStart < tok && tok < keywordEnd
}
func IsWhitespace(tok token) bool {
return tok == T_SPACE || tok == T_LINEBREAK || tok == T_RETURN || tok == T_TAB
}
func toString(start *Token, end *Token) string {
var res bytes.Buffer
curr := start
for curr != end {
res.WriteString(curr.Raw)
curr = curr.Next
}
res.WriteString(end.Raw)
return res.String()
}
const singleQuoteString = "'"
const singleQuoteRune = '\''
const quoteString = "\""
const quoteRune = '"'
// UnQuote removes one matching leading and trailing single or double quote.
// cannot use strconv.Unquote as this unescapes quotes.
func unQuote(lit string) (string, rune) {
if len(lit) < 2 {
return lit, quoteRune
}
chars := []rune(lit)
first, last := chars[0], chars[len(chars)-1]
if first != last {
return lit, quoteRune
}
if s := string(chars[0]); s == quoteString || s == singleQuoteString {
return string(chars[1 : len(chars)-1]), chars[0]
}
return lit, quoteRune
}