Skip to content

Commit

Permalink
Merge pull request #97 from skx/lexer-fix
Browse files Browse the repository at this point in the history
Increase test-coverage of lexer
  • Loading branch information
skx authored Nov 21, 2023
2 parents 77f5afc + 9d572c7 commit e549712
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 28 deletions.
67 changes: 41 additions & 26 deletions lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package lexer

import (
"errors"
"fmt"
"strings"
"unicode"
Expand Down Expand Up @@ -237,11 +238,23 @@ func (l *Lexer) NextToken() token.Token {
}
}
case rune('"'):
tok.Type = token.STRING
tok.Literal = l.readString()
str, err := l.readString('"')
if err == nil {
tok.Literal = str
tok.Type = token.STRING
} else {
tok.Literal = err.Error()
tok.Type = token.ILLEGAL
}
case rune('`'):
tok.Type = token.BACKTICK
tok.Literal = l.readBacktick()
str, err := l.readString('`')
if err == nil {
tok.Literal = str
tok.Type = token.BACKTICK
} else {
tok.Literal = err.Error()
tok.Type = token.ILLEGAL
}
case rune('['):
tok = newToken(token.LBRACKET, l.ch)
case rune(']'):
Expand Down Expand Up @@ -284,16 +297,16 @@ func newToken(tokenType token.Type, ch rune) token.Token {
//
// So with input like this:
//
// a.blah();
// a.blah();
//
// Our identifier should be "a" (then we have a period, then a second
// identifier "blah", followed by opening & closing parenthesis).
//
// However we also have to cover the case of:
//
// string.toupper( "blah" );
// os.getenv( "PATH" );
// ..
// string.toupper( "blah" );
// os.getenv( "PATH" );
// ..
//
// So we have a horrid implementation..
func (l *Lexer) readIdentifier() string {
Expand Down Expand Up @@ -488,22 +501,36 @@ func (l *Lexer) readDecimal() token.Token {
return token.Token{Type: token.INT, Literal: integer}
}

// read string
func (l *Lexer) readString() string {
// read a string, deliminated by the given character.
func (l *Lexer) readString(delim rune) (string, error) {
out := ""

for {
l.readChar()
if l.ch == '"' {

if l.ch == rune(0) {
return "", fmt.Errorf("unterminated string")
}
if l.ch == delim {
break
}

//
// Handle \n, \r, \t, \", etc.
//
if l.ch == '\\' {

// Line ending with "\" + newline
if l.peekChar() == '\n' {
// consume the newline.
l.readChar()
continue
}

l.readChar()

if l.ch == rune(0) {
return "", errors.New("unterminated string")
}
if l.ch == rune('n') {
l.ch = '\n'
}
Expand All @@ -521,9 +548,10 @@ func (l *Lexer) readString() string {
}
}
out = out + string(l.ch)

}

return out
return out, nil
}

// read a regexp, including flags.
Expand Down Expand Up @@ -576,19 +604,6 @@ func (l *Lexer) readRegexp() (string, error) {
return out, nil
}

// read the end of a backtick-quoted string
func (l *Lexer) readBacktick() string {
position := l.position + 1
for {
l.readChar()
if l.ch == '`' {
break
}
}
out := string(l.characters[position:l.position])
return out
}

// peek character
func (l *Lexer) peekChar() rune {
if l.readPosition >= len(l.characters) {
Expand Down
79 changes: 77 additions & 2 deletions lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,15 +244,17 @@ func TestString(t *testing.T) {
t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
}
}

}

func TestSimpleComment(t *testing.T) {
input := `=+// This is a comment
// This is still a comment
# I like comments
let a = 1; # This is a comment too.
// This is a final
// comment on two-lines`
// comment on two-lines
/*
`

tests := []struct {
expectedType token.Type
Expand Down Expand Up @@ -673,6 +675,11 @@ a = 3/4;
t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
}
}

x := l.GetLine()
if x != 2 {
t.Fatalf("unexpected line. %d", x)
}
}

// TestDotDot is designed to ensure we get a ".." not an integer value.
Expand Down Expand Up @@ -702,3 +709,71 @@ func TestDotDot(t *testing.T) {
}
}
}

// TestIllegalString is designed to look for an unterminated/illegal string
func TestIllegalString(t *testing.T) {

// Illegal strings
bad := []string{
`if ( f ~= "steve\
)`,
`if ( f ~= "steve\`,
}

for _, input := range bad {

tests := []struct {
expectedType token.Type
expectedLiteral string
}{
{token.IF, "if"},
{token.LPAREN, "("},
{token.IDENT, "f"},
{token.CONTAINS, "~="},
{token.ILLEGAL, "unterminated string"},
{token.EOF, ""},
}
l := New(input)
for i, tt := range tests {
tok := l.NextToken()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type)
}
if tok.Literal != tt.expectedLiteral {
t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
}
}
}
}

// TestIllegalString is designed to look for an unterminated/illegal backtick
func TestIllegalBacktick(t *testing.T) {
input := "if ( f ~= `steve )"

tests := []struct {
expectedType token.Type
expectedLiteral string
}{
{token.IF, "if"},
{token.LPAREN, "("},
{token.IDENT, "f"},
{token.CONTAINS, "~="},
{token.ILLEGAL, "unterminated string"},
{token.EOF, ""},
}
l := New(input)
for i, tt := range tests {
tok := l.NextToken()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type)
}
if tok.Literal != tt.expectedLiteral {
t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal)
}
}

x := l.GetLine()
if x != 0 {
t.Fatalf("unexpected line. %d", x)
}
}

0 comments on commit e549712

Please sign in to comment.