-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.go
246 lines (220 loc) · 5.45 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
package ybase
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"log/slog"
)
const EOF = -1
var ErrYbase = errors.New("Ybase")
// DebugFunc outputs debug logs.
// Assuming a function like slog.Debug.
type DebugFunc func(msg string, v ...any)
func NilDebugFunc(msg string, v ...any) {}
// Reader represents a reader object for lex.
type Reader interface {
// ResetBuffer clears the buffer.
ResetBuffer()
// Buffer returns the read runes.
Buffer() string
// Next gets the next rune and advances the pos.
Next() rune
// Peek gets the next rune but keeps the pos.
Peek() rune
// Discard ignores the next rune.
Discard() rune
// Err returns an error during the reading.
Err() error
// Debugf outputs debug logs.
Debugf(msg string, v ...any)
// Errorf outputs logs and set an error.
Errorf(err error, msg string, v ...any)
// DiscardWhile calls Discard() while pred(Peek()).
DiscardWhile(pred func(rune) bool)
// NextWhile calls Next() while pred(Peek()).
NextWhile(pred func(rune) bool)
// Pos returns the current position.
Pos() Pos
}
type reader struct {
pos Pos
rdr *bufio.Reader
buf bytes.Buffer
err error
debugFunc DebugFunc
}
func NewReaderWithInitPos(rdr io.Reader, debugFunc DebugFunc, initPos Pos) Reader {
if debugFunc == nil {
debugFunc = NilDebugFunc
}
return &reader{
pos: initPos,
rdr: bufio.NewReader(rdr),
debugFunc: debugFunc,
}
}
func NewReader(rdr io.Reader, debugFunc DebugFunc) Reader {
return NewReaderWithInitPos(rdr, debugFunc, NewPos(1, 0, 0))
}
func (r reader) Pos() Pos { return r.pos }
func (r *reader) ResetBuffer() { r.buf.Reset() }
func (r reader) Buffer() string { return r.buf.String() }
func (r reader) Err() error { return r.err }
func (r reader) logAttrs() []any {
return []any{
slog.Int("line", r.pos.Line()),
slog.Int("column", r.pos.Column()),
slog.Int("offset", r.pos.Offset()),
slog.String("buf", r.buf.String()),
}
}
func (r reader) Debugf(msg string, v ...any) {
attrs := r.logAttrs()
attrs = append(attrs, v...)
r.debugFunc("ybase: "+msg, attrs...)
}
func (r *reader) Errorf(err error, msg string, v ...any) {
r.err = errors.Join(ErrYbase, fmt.Errorf("%w: %s", err, msg))
attrs := r.logAttrs()
attrs = append(attrs, v...)
attrs = append(attrs, slog.Any("err", r.err))
r.debugFunc("ybase: "+msg, attrs...)
}
func (r *reader) DiscardWhile(pred func(rune) bool) {
for x := r.Peek(); pred(x); x = r.Peek() {
_ = r.Discard()
}
}
func (r *reader) NextWhile(pred func(rune) bool) {
for x := r.Peek(); pred(x); x = r.Peek() {
r.next()
}
}
func (r *reader) Discard() rune {
g, _, err := r.rdr.ReadRune()
r.Debugf("Discard", slog.String("rune", string(g)), slog.Any("err", err))
if err != nil {
if !errors.Is(err, io.EOF) {
r.Errorf(err, "Discard from reader")
}
return EOF
}
r.pos = r.pos.Add(g)
return g
}
func (r *reader) Peek() rune {
g, _, err := r.rdr.ReadRune()
r.Debugf("Peek", slog.String("rune", string(g)), slog.Any("err", err))
if err != nil {
if !errors.Is(err, io.EOF) {
r.Errorf(err, "Peek from reader")
}
return EOF
}
if err := r.rdr.UnreadRune(); err != nil {
r.Errorf(err, "Peek failed to unread")
return EOF
}
return g
}
func (r *reader) Next() rune {
g, _, err := r.rdr.ReadRune()
r.Debugf("Next", slog.String("rune", string(g)), slog.Any("err", err))
if err != nil {
if !errors.Is(err, io.EOF) {
r.Errorf(err, "Next from reader")
}
return EOF
}
r.pos = r.pos.Add(g)
if _, err := r.buf.WriteRune(g); err != nil {
r.Errorf(err, "Next failed to write buffer")
return EOF
}
return g
}
func (r *reader) next() { _ = r.Next() }
// ScanFunc scans source and calculate token.
type ScanFunc func(Reader) int
type Scanner interface {
Reader
Scan() int
// Error consumes an error from yyLexer.
Error(msg string)
}
type scanner struct {
Reader
scanFunc ScanFunc
}
func NewScanner(rdr Reader, scanFunc ScanFunc) Scanner {
return &scanner{
Reader: rdr,
scanFunc: scanFunc,
}
}
func (s *scanner) Scan() int { return s.scanFunc(s.Reader) }
func (s *scanner) Error(msg string) {
s.Errorf(fmt.Errorf("%w: %s", ErrYbase, msg), msg)
}
// Lexer is an utility to implement yyLexer.
//
// Recommendation:
// - Set level to yyDebug (YYDEBUG in yacc).
// - Set yyErrorVerbose to true (YYERROR_VERBOSE in yacc)
//
// Implements yyLexer by Error(string) and Lex(*yySymType) int, e.g.
//
// type ActualLexer struct {
// Lexer
// }
//
// func (a *ActualLexer) Lex(lval *yySymType) int {
// return a.DoLex(func(tok Token) {
// lval.token = tok // declares in %union
// })
// }
type Lexer interface {
Scanner
// DoLex runs the lexical analysis.
// Returns EOF if EOF or an error occurs.
DoLex(callback func(Token)) int
}
type lexer struct {
Scanner
pos Pos
}
func NewLexer(scanner Scanner) Lexer {
return &lexer{
Scanner: scanner,
pos: scanner.Pos(),
}
}
func (l *lexer) DoLex(callback func(Token)) int {
if l.Err() != nil {
return EOF
}
start := l.pos
t := l.Scan()
if t == EOF || l.Err() != nil {
return EOF
}
end := l.Pos()
l.pos = end
v := l.Buffer()
tok := NewToken(t, v, start, end)
callback(tok)
l.Debugf("Lex",
slog.Int("type", tok.Type()),
slog.String("value", tok.Value()),
slog.Int("start.line", tok.Start().Line()),
slog.Int("start.column", tok.Start().Column()),
slog.Int("start.offset", tok.Start().Offset()),
slog.Int("end.line", tok.End().Line()),
slog.Int("end.column", tok.End().Column()),
slog.Int("end.offset", tok.End().Offset()),
)
l.ResetBuffer()
return tok.Type()
}