This repository has been archived by the owner on Jan 30, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SymToJsonEvent.js
411 lines (381 loc) · 13 KB
/
SymToJsonEvent.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// note: perhaps {, }, [, ], :, , should go to buffer -- eatEmitValue?
// todo: make a good impl based on inner functions; after it's tested and benchmarked, inline all the functions and see if it improves things
const charCode0 = '0'.charCodeAt(0)
const charCode1 = '1'.charCodeAt(0)
const charCode9 = '9'.charCodeAt(0)
const charCodeLowerA = 'a'.charCodeAt(0)
const charCodeLowerF = 'f'.charCodeAt(0)
const charCodeUpperA = 'A'.charCodeAt(0)
const charCodeUpperF = 'F'.charCodeAt(0)
const Continue = {id: 'continue'}
const Mismatch = {id: 'mismatch'}
// todo: max buffer size; max prefix size; option to not buffer prefixes
// might be useful to have open string and close string events
// where you can decide to ignore the string -- this will cause the parser to drop all syms until close instead of buffering them -- saving memory; same for key?
// this could make sense to ignore values of certain keys
// perhaps a generic buffer switch could be sufficient + events for commas -- on each event you can toggle buffering by returning appropriate feedback; could even set event granularity, e.g. events for substrings, where you could search strings for some needle
// and/or could have a buffer full event/feedback
// todo: JsonEventType.true, ...
// SymToJson.make
export const SymToJsonEvent = (next) => {
// todo: inline state vars
const state = {
choiceId: 'initial',
isDone: false,
wsBuffer: [],
buffer: [],
}
const parents = ['top']
let hexSeqIdx = 0
const dumpState = () => JSON.stringify(state)
const eat = (sym) => { state.buffer.push(sym) }
// maybe this should return sth like {continue: true}
const eatFork = (sym, choiceId) => {
state.buffer.push(sym)
state.choiceId = choiceId
return Continue
}
const eatPrefix = (sym) => { state.wsBuffer.push(sym) }
const emit = (id, choiceId) => {
// todo: {type, prefix, buffer}
const ret = next.push({
id,
wsBuffer: state.wsBuffer, // or join
buffer: state.buffer, // or
})
state.buffer = []
state.wsBuffer = []
state.choiceId = choiceId
return ret
}
// todo? in most cases eat, emitvalue could be replaced with eatemitvalue
const emitValue = (id) => {
const parent = parents[parents.length - 1]
emit(id, parent === 'top'? 'final': 'value*')
}
const isZeroNine = (sym) => {
const code = sym.charCodeAt(0)
return code >= charCode0 && code <= charCode9
}
const isOneNine = (sym) => {
const code = sym.charCodeAt(0)
return code >= charCode1 && code <= charCode9
}
const isWhitespace = (sym) => {
return ' \r\n\t'.includes(sym)
}
// returning continue, next.push result, setting status ready for .end()
// extracting repeated fragments
// either make key a new kind of parent, entered on { and , (in object), and exited on :
// or create a key flag
const crash = (sym, msg) => {
throw Error(`Expected ${msg}, got ${sym}`)
}
const value = (sym) => {
if (sym === '{') {
parents.push('object')
parents.push('key')
return emit('open object', '*key')
}
if (sym === '[') {
parents.push('array')
return emit('open array', '*value')
}
if (sym === '"') return eatFork(sym, '"*')
if (sym === 't') return eatFork(sym, 't*rue')
if (sym === 'f') return eatFork(sym, 'f*alse')
if (sym === 'n') return eatFork(sym, 'n*ull')
if (sym === '-') return eatFork(sym, '-*')
if (sym === '0') return eatFork(sym, '0*')
if (isOneNine(sym)) return eatFork(sym, '1-9*')
if (isWhitespace(sym)) return eatPrefix(sym)
// return {id: 'error', message: `Unexpected symbol in value ${sym}`}
return Mismatch
}
const fraction = (sym) => {
if (sym === '.') return eatFork(sym, '0-9.*')
return exponent(sym)
}
const exponent = (sym) => {
if ('eE'.includes(sym)) return eatFork(sym, 'exp*')
return number(sym)
}
const number = (sym) => {
// we assume here that sym is a non-numeric symbol that terminates the number
// note: eatemitvalue is not suitable here
emitValue('number')
// the terminating symbol is part of what comes after the number -- essentially a space or a comma
// let the standard flow handle that
return self.push(sym)
}
const closeParent = (sym) => {
const parent = parents[parents.length - 1]
if (parent === 'object' && sym === '}') {
parents.pop()
// could eatEmitValue just as well
emitValue('close object')
}
else if (parent === 'array' && sym === ']') {
parents.pop()
// could eatEmitValue just as well
emitValue('close array')
}
else {
crash(sym, `whitespace or comma or ${parent} close`)
}
}
const self = {
push: (sym) => {
const {isDone, choiceId} = state
if (isDone) {
throw Error(`PUSH: Matcher already completed! ${dumpState()}`)
}
// todo: maybe replace the if-elses with a map
// todo: set isDone before the end (on match or mismatch)
// todo: prioritize? order by most often hit branches first
// todo: ?initial -> *value or top
if (choiceId === 'initial') {
if (value(sym) === Mismatch) {
throw Error(`Unexpected top-level symbol ${sym}`)
}
}
else if (choiceId === 'final') {
// todo: could tune the parser so it accepts infinite stream of space-separated JSON values
if (isWhitespace(sym)) eatPrefix(sym)
else {
throw Error(`Unexpected non-whitespace after top-level value: ${sym}`)
}
}
else if (choiceId === '"*') {
if (sym === '"') {
const parent = parents[parents.length - 1]
eat(sym)
if (parent === 'key') emit('key', 'key*')
else emitValue('string')
}
else if (sym === '\\') eatFork(sym, '\\*')
else {
const code = sym.charCodeAt(0)
if (code >= 0x0020 && code <= 0x10ffff) { eat(sym) }
else {
throw Error(`Unexpected control character: ${code}`)
}
}
}
else if (choiceId === '\\*') {
if ('"\\/bfnrt'.includes(sym)) eatFork(sym, '"*')
else if (sym === 'u') eatFork(sym, '\\u*')
else {
// todo: error: invalid escape
throw Error(`Invalid escape character: ${sym}`)
}
}
else if (choiceId === '\\u*') {
// '0123456789abcdefABCDEF'.includes(sym)
const code = sym.charCodeAt(0)
if (
(code >= charCode0 && code <= charCode9) ||
(code >= charCodeLowerA && code <= charCodeLowerF) ||
(code >= charCodeUpperA && code <= charCodeUpperF)
) {
if (hexSeqIdx < 3) {
hexSeqIdx += 1
eat(sym)
} else {
hexSeqIdx = 0
eatFork(sym, '"*')
}
} else {
// todo: error: invalid hex escape
throw Error(`Invalid hexadecimal escape character: ${sym}`)
}
}
else if (choiceId === '-*') {
if (sym === '0') eatFork(sym, '0*')
else {
// todo: extract ~ afterMinus
if (isOneNine(sym)) eatFork(sym, '1-9*')
else {
// todo: throw: invalid after -
throw Error(`Expected 0-9, got ${sym}`)
}
}
}
else if (choiceId === '0*') return fraction(sym)
else if (choiceId === '1-9*') {
// todo: extract code0-9, maybe code1-9
if (isZeroNine(sym)) eatFork(sym, '1-90-9*')
else return fraction(sym)
}
else if (choiceId === '0-9.*') {
// todo: extract
if (isZeroNine(sym)) eatFork(sym, '0-9.0-9*')
else {
throw Error(`expected 0-9, got ${sym}`)
}
}
else if (choiceId === 'exp*') {
if ('+-'.includes(sym)) eatFork(sym, 'exp+-*')
else {
// todo: extract
if (isZeroNine(sym)) eatFork(sym, 'exp+-0-9')
else throw Error(`Expected +-0..9, got ${sym}`)
}
}
else if (choiceId === '1-90-9*') {
// todo?: extract digit loop
if (isZeroNine(sym)) eat(sym)
else return fraction(sym)
}
else if (choiceId === '0-9.0-9*') {
// todo?: extract digit loop
if (isZeroNine(sym)) eat(sym)
else return exponent(sym)
}
else if (choiceId === 'exp+-*') {
// todo charcode ><
if (isZeroNine(sym)) eatFork(sym, 'exp+-0-9')
else {
throw Error(`Expected digit, got ${sym}`)
}
}
else if (choiceId === 'exp+-0-9') {
// todo?: extract digit loop
if (isZeroNine(sym)) eat(sym)
else return number(sym)
}
else if (choiceId === 't*rue') {
if (sym === 'r') eatFork(sym, 'tr*ue')
else throw Error(`expected t[r]ue, got t[${sym}]...`)
}
else if (choiceId === 'tr*ue') {
// todo: error reporting
if (sym === 'u') eatFork(sym, 'tru*e')
else throw Error(`expected tr[u]e, got tr[${sym}]...`)
}
else if (choiceId === 'tru*e') {
// todo: error reporting
if (sym === 'e') {
eat(sym)
emitValue('true')
}
else throw Error(`expected tru[e], got tru[${sym}]...`)
}
else if (choiceId === 'f*alse') {
// todo: error reporting
if (sym === 'a') eatFork(sym, 'fa*lse')
else throw Error(`expected f[a]lse, got f[${sym}]...`)
}
else if (choiceId === 'fa*lse') {
// todo: error reporting
if (sym === 'l') eatFork(sym, 'fal*se')
else throw Error(`expected fa[l]se, got fa[${sym}]...`)
}
else if (choiceId === 'fal*se') {
// todo: error reporting
if (sym === 's') eatFork(sym, 'fals*e')
else throw Error(`expected fal[s]e, got fal[${sym}]...`)
}
else if (choiceId === 'fals*e') {
// todo: error reporting
if (sym === 'e') {
eat(sym)
emitValue('false')
}
else throw Error(`expected fals[e], got fals[${sym}]...`)
}
else if (choiceId === 'n*ull') {
// todo: error reporting
if (sym === 'u') eatFork(sym, 'nu*ll')
else throw Error(`expected n[u]ll, got n[${sym}]...`)
}
else if (choiceId === 'nu*ll') {
// todo: error reporting
if (sym === 'l') eatFork(sym, 'nul*l')
else throw Error(`expected nu[l]l, got nu[${sym}]...`)
}
else if (choiceId === 'nul*l') {
// todo: error reporting
if (sym === 'l') {
eat(sym)
emitValue('null')
}
else throw Error(`expected nul[l], got nul[${sym}]...`)
}
else if (choiceId === '*value') {
if (value(sym) === Mismatch) closeParent(sym)
}
else if (choiceId === 'value*') {
// todo: accept whitespace, comma, ] if current parent is array, } if current parent is object
if (sym === ',') {
const parent = parents[parents.length - 1]
if (parent === 'object') {
parents.push('key')
emit('comma', '*key')
}
else if (parent === 'array') emit('comma', '*value')
else throw Error(`Unexpected parent ${parent}`)
}
else if (isWhitespace(sym)) eatPrefix(sym)
else closeParent(sym)
}
else if (choiceId === '*key') {
if (sym === '"') eatFork(sym, '"*')
else if (sym === '}') {
// const parent = parents[parents.length - 1]
// console.assert(parent === 'key')
parents.pop()
parents.pop()
// eatemitvalue would work here too
emitValue('close object')
}
else if (isWhitespace(sym)) eatPrefix(sym)
else {
crash(sym, `whitespace or " or object close`)
}
}
else if (choiceId === 'key*') {
// whitespace or : or crash
// todo: emit key either on +close string or :
// todo: transition from string close to this choiceId if choiceId before string open was *key
if (sym === ':') {
// console.assert(parents[parents.length - 1] === 'key')
parents.pop()
emit('colon', '*value')
}
else if (isWhitespace(sym)) eatPrefix(sym)
else {
throw Error(`Expected : or whitespace, got ${sym}`)
}
}
return Continue
},
end: () => {
// todo: include final wsBuffer in the end event
const {isDone, choiceId} = state
if (isDone) {
throw Error(`END: Matcher already completed! ${dumpState()}`)
}
if (choiceId === 'final') {
state.isDone = true
// todo? or push event, then call next.end() w/o args
return next.end({
id: 'end',
wsBuffer: state.wsBuffer,
})
} else if (['exp+-0-9', '1-9*', '1-90-9*', '0-9.0-9*', '0*'].includes(choiceId)) {
state.isDone = true
// eatemitvalue would not work here
emitValue('number')
return next.end({
id: 'end',
wsBuffer: state.wsBuffer,
})
} else {
// todo: error
throw Error(`todo: invalid end state ${dumpState()}`)
}
},
}
return self
}