forked from wmorgan/whistlepig
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tokenizer.lex
51 lines (41 loc) · 914 Bytes
/
tokenizer.lex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
%top{
#define TOK_DONE 0
#define TOK_NUMBER 1
#define TOK_WORD 2
#include "segment.h"
typedef struct lexinfo {
pos_t start;
pos_t end;
} lexinfo;
}
%option 8bit reentrant fast noyywrap
%option outfile="tokenizer.lex.c" header-file="tokenizer.lex.h"
%option extra-type="struct lexinfo*"
FIRSTWORDCHAR [^[:blank:][:punct:]<>\r\n]
INNERWORDCHAR [^[:blank:]<>\r\n]
LASTWORDCHAR [^[:blank:][:punct:]<>\r\n]
%%
\-?[[:digit:]]+(\.([[:digit:]]+)?)? {
yyextra->start = yyextra->end;
yyextra->end += (pos_t)yyleng;
return TOK_NUMBER;
}
{FIRSTWORDCHAR}{INNERWORDCHAR}*{LASTWORDCHAR} {
yyextra->start = yyextra->end;
yyextra->end += (pos_t)yyleng;
return TOK_WORD;
}
{FIRSTWORDCHAR}{LASTWORDCHAR}? {
yyextra->start = yyextra->end;
yyextra->end += (pos_t)yyleng;
return TOK_WORD;
}
[\r\n] {
yyextra->start++;
yyextra->end++;
}
. {
yyextra->start++;
yyextra->end++;
}
%%