-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
SiViC_ASM_Lexer.pas
460 lines (413 loc) · 14.1 KB
/
SiViC_ASM_Lexer.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
unit SiViC_ASM_Lexer;
{$INCLUDE '.\SiViC_defs.inc'}
interface
const
SVC_ASM_LEXER_CHAR_STRINGQUOTE = '''';
SVC_ASM_LEXER_CHARS_WHITESPACE = [#0..#32];
SVC_ASM_LEXER_CHARS_NUMBERSTART = ['$','0'..'9'];
SVC_ASM_LEXER_CHARS_NUMBER = ['0'..'9','a'..'f','A'..'F','x','X'];
SVC_ASM_LEXER_CHARS_UNARYOPERATORS = ['+','-'];
SVC_ASM_LEXER_CHARS_IDENTIFIER = ['a'..'z','A'..'Z','0'..'9','_','@'];
SVC_ASM_LEXER_CHARS_INVAL_1_IDENT = ['_','@'];
type // {} (**) /**/
TSVCLexerCommentType = (lcmtNone,lcmtType1,lcmtType2,lcmtType3,lcmtType4);
TSVCLexerTokenType = (lttNumber,lttUnaryOp,lttIdentifier,lttGeneral,
lttComment,lttString,lttInvalid);
TSVCLexerCharType = (lctWhiteSpace,lctNumber,lctUnaryOp,lctIdentifier,
lctStringQuote,lctOthers,lctInvalid);
TSVCLexerStage = (lsTraverse,lsIdentifier,lsNumber,lsComment,lsString);
TSVCLexerToken = record
Str: String;
Start: Integer;
TokenType: TSVCLexerTokenType;
end;
TSVCLexerTokens = record
Arr: array of TSVCLexerToken;
Count: Integer;
end;
TSVCLexer = class(TObject)
private
fLine: String;
fIncludeComments: Boolean;
fTokens: TSVCLexerTokens;
// tokenizing engine variables
fStage: TSVCLexerStage;
fPosition: Integer;
fTokenStart: Integer;
fTokenLength: Integer;
fCommentType: TSVCLexerCommentType;
Function GetToken(Index: Integer): TSVCLexerToken;
protected
procedure AddToken(const Str: String; Start: Integer; TokenType: TSVCLexerTokenType); virtual;
// tokenizing engine
Function GetCurrCharType: TSVCLexerCharType; virtual;
Function CommentStart: TSVCLexerCommentType; virtual;
Function CommentEnd: Boolean; virtual;
procedure Process_Traverse; virtual;
procedure Process_Identifier; virtual;
procedure Process_Number; virtual;
procedure Process_Comment; virtual;
procedure Process_String; virtual;
public
class Function TrimComment(const Str: String): String; virtual;
class Function UnquoteString(const Str: String): String; virtual;
constructor Create;
destructor Destroy; override;
procedure Initialize; virtual;
Function Tokenize(const Line: String): Boolean; virtual;
procedure Clear; virtual;
property Tokens[Index: Integer]: TSVCLexerToken read GetToken; default;
published
property IncludeComments: Boolean read fIncludeComments write fIncludeComments;
property ContinuousCommentType: TSVCLexerCommentType read fCommentType write fCommentType;
property Count: Integer read fTokens.Count;
end;
implementation
uses
SysUtils,
SiViC_Common;
Function TSVCLexer.GetToken(Index: Integer): TSVCLexerToken;
begin
If (Index >= Low(fTokens.Arr)) and (Index < fTokens.Count) then
Result := fTokens.Arr[Index]
else
raise Exception.CreateFmt('TSVCLexer.GetToken: Index (%d) out of bounds.',[Index]);
end;
//==============================================================================
procedure TSVCLexer.AddToken(const Str: String; Start: Integer; TokenType: TSVCLexerTokenType);
begin
If Length(fTokens.Arr) <= fTokens.Count then
SetLength(fTokens.Arr,Length(fTokens.Arr) + 8);
fTokens.Arr[fTokens.Count].Str := Str;
fTokens.Arr[fTokens.Count].Start := Start;
fTokens.Arr[fTokens.Count].TokenType := TokenType;
Inc(fTokens.Count);
end;
//------------------------------------------------------------------------------
Function TSVCLexer.GetCurrCharType: TSVCLexerCharType;
begin
If SVC_CharInSet(fLine[fPosition],SVC_ASM_LEXER_CHARS_WHITESPACE) then
Result := lctWhiteSpace
else If SVC_CharInSet(fLine[fPosition],SVC_ASM_LEXER_CHARS_NUMBERSTART) then
Result := lctNumber
else If SVC_CharInSet(fLine[fPosition],SVC_ASM_LEXER_CHARS_UNARYOPERATORS) then
Result := lctUnaryOp
else If SVC_CharInSet(fLine[fPosition],SVC_ASM_LEXER_CHARS_IDENTIFIER) then
Result := lctIdentifier
else If fLine[fPosition] = SVC_ASM_LEXER_CHAR_STRINGQUOTE then
Result := lctStringQuote
else If Ord(fLine[fPosition]) <= 127 then
Result := lctOthers
else
Result := lctInvalid;
end;
//------------------------------------------------------------------------------
Function TSVCLexer.CommentStart: TSVCLexerCommentType;
begin
Result := lcmtNone;
case fLine[fPosition] of
'/': If fPosition < Length(fLine) then
case fLine[fPosition + 1] of
'/': Result := lcmtType1;
'*': Result := lcmtType4;
end;
'{': Result := lcmtType2;
'(': If fPosition < Length(fLine) then
If fLine[fPosition + 1] = '*' then
Result := lcmtType3;
end;
end;
//------------------------------------------------------------------------------
Function TSVCLexer.CommentEnd: Boolean;
begin
Result := False;
case fLine[fPosition] of
'}': begin
Result := fCommentType = lcmtType2;
Inc(fTokenLength);
end;
'*': If fPosition < Length(fLine) then
begin
Result := ((fLine[fPosition + 1] = ')') and (fCommentType = lcmtType3)) or
((fLine[fPosition + 1] = '/') and (fCommentType = lcmtType4));
If Result then
begin
Inc(fPosition);
Inc(fTokenLength,2);
end
end;
end;
If Result then
fCommentType := lcmtNone;
end;
//------------------------------------------------------------------------------
procedure TSVCLexer.Process_Traverse;
procedure InitToken(Stage: TSVCLexerStage; Start: Integer; Length: Integer = 1);
begin
fStage := Stage;
fTokenStart := Start;
fTokenLength := Length;
end;
begin
case GetCurrCharType of
lctWhiteSpace:; // continue
lctNumber: If fTokens.Count > 0 then
begin
If (fTokens.Arr[Pred(fTokens.Count)].TokenType = lttUnaryOp) and
(fTokens.Arr[Pred(fTokens.Count)].Start = Pred(fPosition)) then
begin
Dec(fTokens.Count);
InitToken(lsNumber,Pred(fPosition),2);
end
else InitToken(lsNumber,fPosition);
end
else InitToken(lsNumber,fPosition);
lctUnaryOp: AddToken(fLine[fPosition],fPosition,lttUnaryOp);
lctIdentifier: InitToken(lsIdentifier,fPosition);
lctStringQuote: If fPosition < Length(fLine) then
InitToken(lsString,fPosition)
else
AddToken(fLine[fPosition],fPosition,lttGeneral);
lctOthers: begin
fCommentType := CommentStart;
If fCommentType <> lcmtNone then
begin
InitToken(lsComment,fPosition);
If fCommentType in [lcmtType1,lcmtType3,lcmtType4] then
begin
Inc(fPosition);
Inc(fTokenLength);
end;
end
else AddToken(fLine[fPosition],fPosition,lttGeneral);
end;
lctInvalid: AddToken(fLine[fPosition],fPosition,lttInvalid);
end;
end;
//------------------------------------------------------------------------------
procedure TSVCLexer.Process_Identifier;
begin
If not(SVC_CharInSet(fLine[fPosition],SVC_ASM_LEXER_CHARS_IDENTIFIER)) then
begin
AddToken(Trim(Copy(fLine,fTokenStart,fTokenLength)),fTokenStart,lttIdentifier);
fStage := lsTraverse;
Dec(fPosition);
end
else Inc(fTokenLength);
end;
//------------------------------------------------------------------------------
procedure TSVCLexer.Process_Number;
begin
If not(SVC_CharInSet(fLine[fPosition],SVC_ASM_LEXER_CHARS_NUMBER)) then
begin
If SVC_CharInSet(fLine[fPosition],SVC_ASM_LEXER_CHARS_IDENTIFIER) then
begin
fStage := lsIdentifier;
Inc(fTokenLength);
end
else
begin
AddToken(Trim(Copy(fLine,fTokenStart,fTokenLength)),fTokenStart,lttNumber);
fStage := lsTraverse;
Dec(fPosition);
end;
end
else Inc(fTokenLength);
end;
//------------------------------------------------------------------------------
procedure TSVCLexer.Process_Comment;
begin
If CommentEnd then
begin
If fIncludeComments then
AddToken(Copy(fLine,fTokenStart,fTokenLength),fTokenStart,lttComment);
fStage := lsTraverse;
end
else Inc(fTokenLength);
end;
//------------------------------------------------------------------------------
procedure TSVCLexer.Process_String;
begin
If fLine[fPosition] = SVC_ASM_LEXER_CHAR_STRINGQUOTE then
begin
fStage := lsTraverse;
If fPosition < Length(fLine) then
begin
If fLine[fPosition + 1] = SVC_ASM_LEXER_CHAR_STRINGQUOTE then
begin
Inc(fPosition);
Inc(fTokenLength,2);
fStage := lsString;
end
else AddToken(Copy(fLine,fTokenStart,fTokenLength + 1),fTokenStart,lttString);
end
else AddToken(Copy(fLine,fTokenStart,fTokenLength + 1),fTokenStart,lttString);
end
else Inc(fTokenLength);
end;
//==============================================================================
class Function TSVCLexer.TrimComment(const Str: String): String;
var
StartCommentType: TSVCLexerCommentType;
EndCommentType: TSVCLexerCommentType;
begin
StartCommentType := lcmtNone;
EndCommentType := lcmtNone;
If Length(Str) > 0 then
begin
// check start of the string
case Str[1] of
'/': If Length(Str) >= 2 then
case Str[2] of
'/': begin
StartCommentType := lcmtType1;
EndCommentType := lcmtType1;
end;
'*': StartCommentType := lcmtType4;
end;
'{': StartCommentType := lcmtType2;
'(': If Length(Str) >= 2 then
If Str[2] = '*' then
StartCommentType := lcmtType3;
end;
//check end of the string
If StartCommentType <> lcmtType1 then
case Str[Length(Str)] of
'/': If Length(Str) >= 4 then
If Str[Length(Str) - 1] = '*' then
EndCommentType := lcmtType4;
'}': If Length(Str) >= 2 then
EndCommentType := lcmtType2;
')': If Length(Str) >= 4 then
If Str[Length(Str) - 1] = '*' then
EndCommentType := lcmtType3;
end;
// do trimming
If EndCommentType = StartCommentType then
case StartCommentType of
lcmtType1: Result := Copy(Str,3,Length(Str) - 2);
lcmtType2: Result := Copy(Str,2,Length(Str) - 2);
lcmtType3: Result := Copy(Str,3,Length(Str) - 4);
lcmtType4: Result := Copy(Str,3,Length(Str) - 4);
else
Result := Str;
end
else Result := Str;
end
else Result := '';
end;
//------------------------------------------------------------------------------
class Function TSVCLexer.UnquoteString(const Str: String): String;
var
i,ResPos: Integer;
begin
SetLength(Result,Length(Str));
ResPos := 0;
i := 1;
while i <= Length(Str) do
begin
If Str[i] = SVC_ASM_LEXER_CHAR_STRINGQUOTE then
begin
If (i > 1 )and (i < Length(Str)) then
begin
If Str[i + 1] = SVC_ASM_LEXER_CHAR_STRINGQUOTE then
begin
Inc(ResPos);
Result[ResPos] := Str[i];
Inc(i);
end
else Break{while i};
end;
end
else
begin
Inc(ResPos);
Result[ResPos] := Str[i];
end;
Inc(i);
end;
SetLength(Result,ResPos);
end;
//------------------------------------------------------------------------------
constructor TSVCLexer.Create;
begin
inherited;
Initialize;
fIncludeComments := False;
end;
//------------------------------------------------------------------------------
destructor TSVCLexer.Destroy;
begin
SetLength(fTokens.Arr,0);
inherited;
end;
//------------------------------------------------------------------------------
procedure TSVCLexer.Initialize;
begin
fStage := lsTraverse;
Clear;
fCommentType := lcmtNone;
end;
//------------------------------------------------------------------------------
Function TSVCLexer.Tokenize(const Line: String): Boolean;
var
i: Integer;
begin
Clear;
fLine := Line;
fPosition := 1;
fTokenStart := 1;
fTokenLength := 0;
If Length(fLine) > 0 then
begin
If fCommentType in [lcmtType2,lcmtType3,lcmtType4] then
fStage := lsComment
else
fStage := lsTraverse;
while (fPosition >= 1) and (fPosition <= Length(fLine)) do
begin
case fStage of
lsTraverse: Process_Traverse;
lsIdentifier: Process_Identifier;
lsNumber: Process_Number;
lsComment: Process_Comment;
lsString: Process_String;
end;
Inc(fPosition);
end;
case fStage of
lsIdentifier:
AddToken(Trim(Copy(fLine,fTokenStart,fTokenLength)),fTokenStart,lttIdentifier);
lsNumber:
AddToken(Trim(Copy(fLine,fTokenStart,fTokenLength)),fTokenStart,lttNumber);
lsComment:
If fIncludeComments then
AddToken(Copy(fLine,fTokenStart,fTokenLength),fTokenStart,lttComment);
lsString:
AddToken(Copy(fLine,fTokenStart,fTokenLength),fTokenStart,lttString);
end;
end;
{
check whether there are invalid tokens, change unary operators that are not
combined with numbers to general tokens, change one-char identifiers "@" and
"_" to general
}
Result := True;
For i := 0 to Pred(fTokens.Count) do
case fTokens.Arr[i].TokenType of
lttUnaryOp: fTokens.Arr[i].TokenType := lttGeneral;
lttIdentifier: If Length(fTokens.Arr[i].Str) = 1 then
If SVC_CharInSet(fTokens.Arr[i].Str[1],SVC_ASM_LEXER_CHARS_INVAL_1_IDENT) then
fTokens.Arr[i].TokenType := lttGeneral;
lttInvalid: begin
Result := False;
Break{For i};
end;
end;
end;
//------------------------------------------------------------------------------
procedure TSVCLexer.Clear;
begin
fTokens.Count := 0;
end;
end.