-
Notifications
You must be signed in to change notification settings - Fork 1
/
scanner.go
134 lines (125 loc) · 2.96 KB
/
scanner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
package iscdhcp
import (
"regexp"
)
// scanner analyzes a stream of bytes, one byte at a time, and emits codes
// denoting the beginning/end of lexical symbols
type scanner struct {
stateStack []int
}
func (l *scanner) init() {
l.stateStack = []int{scanStateFindAnyBegin}
}
func (l *scanner) step(b byte) (int, error) {
for re, spec := range lexMap[l.stateStack[0]] {
if re.Match([]byte{b}) {
for _, newState := range spec.newStates {
switch newState {
case scanSameState:
continue
case scanPopState:
l.stateStack = l.stateStack[1:]
if len(l.stateStack) == 0 {
return codeContinue, contextError("state stack is empty")
}
default:
l.stateStack = append([]int{newState}, l.stateStack...)
}
}
return spec.code, nil
}
}
return codeContinue, nil
}
type transitionSpec struct {
code int
newStates []int
}
var lexMap = map[int]map[*regexp.Regexp]transitionSpec{
scanStateFindAnyBegin: map[*regexp.Regexp]transitionSpec{
regexp.MustCompile(`\{`): {
code: codeBlockBegin,
newStates: []int{scanSameState},
},
regexp.MustCompile(`}`): {
code: codeBlockEnd,
newStates: []int{scanSameState},
},
regexp.MustCompile("[0-9a-zA-Z!=~]"): {
code: codeIdentifierBegin,
newStates: []int{scanStateFindIdentifierEnd},
},
regexp.MustCompile(`[\s\n]`): {
code: codeWhitespace,
newStates: []int{scanSameState},
},
regexp.MustCompile(`"`): {
code: codeStringBegin,
newStates: []int{scanStateFindStringEnd},
},
regexp.MustCompile(`#`): {
code: codeCommentBegin,
newStates: []int{scanStateFindCommentEnd},
},
regexp.MustCompile(`;`): {
code: codeSemicolon,
newStates: []int{scanSameState},
},
},
scanStateFindIdentifierEnd: map[*regexp.Regexp]transitionSpec{
regexp.MustCompile(`[\s]`): {
code: codeIdentifierEnd,
newStates: []int{scanPopState},
},
regexp.MustCompile(`{`): {
code: codeBlockBegin,
newStates: []int{scanPopState},
},
regexp.MustCompile(`"`): {
code: codeStringBegin,
newStates: []int{scanPopState, scanStateFindStringEnd},
},
regexp.MustCompile(`;`): {
code: codeSemicolon,
newStates: []int{scanPopState},
},
regexp.MustCompile(`,`): {
code: codeComma,
newStates: []int{scanPopState},
},
},
scanStateFindStringEnd: map[*regexp.Regexp]transitionSpec{
regexp.MustCompile(`"`): {
code: codeStringEnd,
newStates: []int{scanPopState},
},
},
scanStateFindCommentEnd: map[*regexp.Regexp]transitionSpec{
regexp.MustCompile(`\n`): {
code: codeCommentEnd,
newStates: []int{scanPopState},
},
},
}
const (
scanPopState = iota
scanSameState
scanStateFindAnyBegin
scanStateFindIdentifierEnd
scanStateFindStringEnd
scanStateFindCommentEnd
)
const (
codeContinue = iota
codeWhitespace
codeBlockBegin
codeBlockEnd
codeStringBegin
codeStringEnd
codeIdentifierBegin
codeIdentifierEnd
codeCommentBegin
codeCommentEnd
codeSemicolon
codeComma
)