Skip to content

Commit

Permalink
Change LexBuffer type from char to uint16
Browse files Browse the repository at this point in the history
  • Loading branch information
ncave committed Oct 27, 2020
1 parent e292e2c commit bd3f38e
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 26 deletions.
10 changes: 7 additions & 3 deletions src/fsharp/UnicodeLexing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@ module internal FSharp.Compiler.UnicodeLexing
open System.IO
open Internal.Utilities.Text.Lexing

type Lexbuf = LexBuffer<char>
type Lexbuf = LexBuffer<LexBufferChar>

let StringAsLexbuf (supportsFeature, s: string) =
#if FABLE_COMPILER
LexBuffer<LexBufferChar>.FromString (supportsFeature, s)
#else
LexBuffer<char>.FromChars (supportsFeature, s.ToCharArray())
#endif

let FunctionAsLexbuf (supportsFeature, bufferFiller) =
LexBuffer<char>.FromFunction(supportsFeature, bufferFiller)
LexBuffer<LexBufferChar>.FromFunction(supportsFeature, bufferFiller)

let SourceTextAsLexbuf (supportsFeature, sourceText) =
LexBuffer<char>.FromSourceText(supportsFeature, sourceText)
LexBuffer<LexBufferChar>.FromSourceText(supportsFeature, sourceText)

let StreamReaderAsLexbuf (supportsFeature, reader: StreamReader) =
let mutable isFinished = false
Expand Down
4 changes: 2 additions & 2 deletions src/fsharp/UnicodeLexing.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ open FSharp.Compiler.Features
open FSharp.Compiler.Text
open Internal.Utilities.Text.Lexing

type Lexbuf = LexBuffer<char>
type Lexbuf = LexBuffer<LexBufferChar>

val internal StringAsLexbuf: (LanguageFeature -> bool) * string -> Lexbuf

val public FunctionAsLexbuf: (LanguageFeature -> bool) * (char [] * int * int -> int) -> Lexbuf
val public FunctionAsLexbuf: (LanguageFeature -> bool) * (LexBufferChar[] * int * int -> int) -> Lexbuf

val public SourceTextAsLexbuf: (LanguageFeature -> bool) * ISourceText -> Lexbuf

Expand Down
2 changes: 1 addition & 1 deletion src/fsharp/absil/illex.fsl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ open FSharp.Compiler.AbstractIL.Internal.AsciiParser
open FSharp.Compiler.AbstractIL.Internal.AsciiConstants


let lexeme (lexbuf : LexBuffer<char>) = LexBuffer<char>.LexemeString lexbuf
let lexeme (lexbuf : LexBuffer<_>) = LexBuffer<_>.LexemeString lexbuf
let lexemeChar (lexbuf : LexBuffer<char>) n = lexbuf.LexemeChar n

let unexpectedChar _lexbuf =
Expand Down
3 changes: 3 additions & 0 deletions src/fsharp/absil/illib.fs
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,9 @@ module String =
else None

let getLines (str: string) =
#if FABLE_COMPILER
System.Text.RegularExpressions.Regex.Split(str, "\r\n|\r|\n");
#else
use reader = new StringReader(str)
[|
let mutable line = reader.ReadLine()
Expand Down
6 changes: 5 additions & 1 deletion src/fsharp/fsi/fsi.fs
Original file line number Diff line number Diff line change
Expand Up @@ -1988,7 +1988,7 @@ type internal FsiStdinLexerProvider

let LexbufFromLineReader (fsiStdinSyphon: FsiStdinSyphon) readF =
UnicodeLexing.FunctionAsLexbuf
(isFeatureSupported, (fun (buf: char[], start, len) ->
(isFeatureSupported, (fun (buf, start, len) ->
//fprintf fsiConsoleOutput.Out "Calling ReadLine\n"
let inputOption = try Some(readF()) with :? EndOfStreamException -> None
inputOption |> Option.iter (fun t -> fsiStdinSyphon.Add (t + "\n"))
Expand All @@ -2002,7 +2002,11 @@ type internal FsiStdinLexerProvider
if ninput > len then fprintf fsiConsoleOutput.Error "%s" (FSIstrings.SR.fsiLineTooLong())
let ntrimmed = min len ninput
for i = 0 to ntrimmed-1 do
#if FABLE_COMPILER
buf.[i+start] <- uint16 input.[i]
#else
buf.[i+start] <- input.[i]
#endif
ntrimmed
))

Expand Down
4 changes: 4 additions & 0 deletions src/fsharp/lex.fsl
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ let shouldStartFile args lexbuf (m:range) err tok =
else tok

let evalIfDefExpression startPos isFeatureSupported args (lookup:string->bool) (lexed:string) =
#if FABLE_COMPILER
let lexbuf = LexBuffer<_>.FromString (isFeatureSupported, lexed)
#else
let lexbuf = LexBuffer<char>.FromChars (isFeatureSupported, lexed.ToCharArray ())
#endif
lexbuf.StartPos <- startPos
lexbuf.EndPos <- startPos
let tokenStream = FSharp.Compiler.PPLexer.tokenstream args
Expand Down
3 changes: 2 additions & 1 deletion src/fsharp/service/ServiceLexing.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace FSharp.Compiler.SourceCodeServices

open FSharp.Compiler
open Internal.Utilities.Text.Lexing

// Prevents warnings of experimental APIs within the signature file itself.
#nowarn "57"
Expand Down Expand Up @@ -267,7 +268,7 @@ type FSharpSourceTokenizer =
member CreateLineTokenizer: lineText:string -> FSharpLineTokenizer

/// Create a tokenizer for a line of this source file using a buffer filler
member CreateBufferTokenizer : bufferFiller:(char[] * int * int -> int) -> FSharpLineTokenizer
member CreateBufferTokenizer : bufferFiller:(LexBufferChar[] * int * int -> int) -> FSharpLineTokenizer

module internal TestExpose =
val TokenInfo : Parser.token -> (FSharpTokenColorKind * FSharpTokenCharKind * FSharpTokenTriggerClass)
Expand Down
55 changes: 44 additions & 11 deletions src/fsharp/utils/prim-lexing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ type ISourceText =
type StringText(str: string) =

let getLines (str: string) =
#if FABLE_COMPILER
System.Text.RegularExpressions.Regex.Split(str, "\r\n|\r|\n");
#else
use reader = new StringReader(str)
[|
let mutable line = reader.ReadLine()
Expand All @@ -42,6 +45,7 @@ type StringText(str: string) =
// http://stackoverflow.com/questions/19365404/stringreader-omits-trailing-linebreak
yield String.Empty
|]
#endif

let getLines =
// This requires allocating and getting all the lines.
Expand Down Expand Up @@ -174,6 +178,12 @@ namespace Internal.Utilities.Text.Lexing
0,
0)

#if FABLE_COMPILER
type internal LexBufferChar = uint16
#else
type internal LexBufferChar = char
#endif

type internal LexBufferFiller<'Char> = (LexBuffer<'Char> -> unit)

and [<Sealed>]
Expand Down Expand Up @@ -230,8 +240,14 @@ namespace Internal.Utilities.Text.Lexing
member lexbuf.BufferScanStart with get() : int = bufferScanStart and set v = bufferScanStart <- v
member lexbuf.BufferAcceptAction with get() = bufferAcceptAction and set v = bufferAcceptAction <- v
member lexbuf.RefillBuffer () = filler lexbuf
static member LexemeString(lexbuf:LexBuffer<char>) =
new System.String(lexbuf.Buffer,lexbuf.BufferScanStart,lexbuf.LexemeLength)

static member LexemeString (lexbuf: LexBuffer<LexBufferChar>) =
#if FABLE_COMPILER
let chars = Array.init lexbuf.LexemeLength (lexbuf.LexemeChar >> char)
new System.String(chars)
#else
new System.String(lexbuf.Buffer, lexbuf.BufferScanStart, lexbuf.LexemeLength)
#endif

member lexbuf.IsPastEndOfStream
with get() = eof
Expand Down Expand Up @@ -271,9 +287,13 @@ namespace Internal.Utilities.Text.Lexing
LexBuffer<'Char>.FromArrayNoCopy(supportsFeature, buffer)

// Important: This method takes ownership of the array
static member FromChars (supportsFeature:LanguageFeature -> bool, arr:char[]) = LexBuffer.FromArrayNoCopy (supportsFeature, arr)
static member FromChars (supportsFeature:LanguageFeature -> bool, arr:LexBufferChar[]) = LexBuffer.FromArrayNoCopy (supportsFeature, arr)

static member FromSourceText (supportsFeature: LanguageFeature -> bool, sourceText: ISourceText) =
#if FABLE_COMPILER
let arr = Array.init sourceText.Length (fun i -> uint16 (sourceText.Item i))
LexBuffer.FromArrayNoCopy (supportsFeature, arr)
#else
let mutable currentSourceIndex = 0
LexBuffer<char>.FromFunction(supportsFeature, fun (chars, start, length) ->
let lengthToCopy =
Expand All @@ -288,16 +308,25 @@ namespace Internal.Utilities.Text.Lexing
currentSourceIndex <- currentSourceIndex + lengthToCopy
lengthToCopy
)
#endif

static member FromString (supportsFeature: LanguageFeature -> bool, s: string) =
#if FABLE_COMPILER
let arr = Array.init s.Length (fun i -> uint16 s.[i])
LexBuffer.FromArrayNoCopy (supportsFeature, arr)
#else
LexBuffer.FromArrayNoCopy (supportsFeature, s.ToCharArray())
#endif

module GenericImplFragments =
let startInterpret(lexBuffer:LexBuffer<char>) =
let startInterpret(lexBuffer:LexBuffer<LexBufferChar>) =
lexBuffer.BufferScanStart <- lexBuffer.BufferScanStart + lexBuffer.LexemeLength;
lexBuffer.BufferMaxScanLength <- lexBuffer.BufferMaxScanLength - lexBuffer.LexemeLength;
lexBuffer.BufferScanLength <- 0;
lexBuffer.LexemeLength <- 0;
lexBuffer.BufferAcceptAction <- -1;

let afterRefill (trans: uint16[][],sentinel,lexBuffer:LexBuffer<char>,scanUntilSentinel,endOfScan,state,eofPos) =
let afterRefill (trans: uint16[][],sentinel,lexBuffer:LexBuffer<LexBufferChar>,scanUntilSentinel,endOfScan,state,eofPos) =
// end of file occurs if we couldn't extend the buffer
if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then
let snew = int trans.[state].[eofPos] // == EOF
Expand All @@ -311,9 +340,9 @@ namespace Internal.Utilities.Text.Lexing
else
scanUntilSentinel lexBuffer state

let onAccept (lexBuffer:LexBuffer<char>,a) =
lexBuffer.LexemeLength <- lexBuffer.BufferScanLength;
lexBuffer.BufferAcceptAction <- a;
let onAccept (lexBuffer:LexBuffer<LexBufferChar>, a) =
lexBuffer.LexemeLength <- lexBuffer.BufferScanLength
lexBuffer.BufferAcceptAction <- a

open GenericImplFragments

Expand All @@ -338,12 +367,16 @@ namespace Internal.Utilities.Text.Lexing
// ways
let baseForUnicodeCategories = numLowUnicodeChars+numSpecificUnicodeChars*2
let unicodeCategory =
#if FABLE_COMPILER
System.Char.GetUnicodeCategory(char inp)
#else
System.Char.GetUnicodeCategory(inp)
#endif
//System.Console.WriteLine("inp = {0}, unicodeCategory = {1}", [| box inp; box unicodeCategory |]);
int trans.[state].[baseForUnicodeCategories + int32 unicodeCategory]
else
// This is the specific unicode character
let c = char (int trans.[state].[baseForSpecificUnicodeChars+i*2])
let c = trans.[state].[baseForSpecificUnicodeChars+i*2]
//System.Console.WriteLine("c = {0}, inp = {1}, i = {2}", [| box c; box inp; box i |]);
// OK, have we found the entry for a specific unicode character?
if c = inp
Expand All @@ -365,7 +398,7 @@ namespace Internal.Utilities.Text.Lexing
afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,eofPos)
else
// read a character - end the scan if there are no further transitions
let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos]
let inp = uint16 lexBuffer.Buffer.[lexBuffer.BufferScanPos]

// Find the new state
let snew = lookupUnicodeCharacters state inp
Expand All @@ -383,7 +416,7 @@ namespace Internal.Utilities.Text.Lexing
// 30 entries, one for each UnicodeCategory
// 1 entry for EOF

member tables.Interpret(initialState,lexBuffer : LexBuffer<char>) =
member tables.Interpret(initialState, lexBuffer: LexBuffer<LexBufferChar>) =
startInterpret(lexBuffer)
scanUntilSentinel lexBuffer initialState

Expand Down
17 changes: 13 additions & 4 deletions src/fsharp/utils/prim-lexing.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ type internal Position =

static member FirstLine : fileIdx:int -> Position

#if FABLE_COMPILER
type internal LexBufferChar = uint16
#else
type internal LexBufferChar = char
#endif

[<Sealed>]
/// Input buffers consumed by lexers generated by <c>fslex.exe</c>.
/// The type must be generic to match the code generated by FsLex and FsYacc (if you would like to
Expand All @@ -106,7 +112,7 @@ type internal LexBuffer<'Char> =
member LexemeContains: 'Char -> bool

/// Fast helper to turn the matched characters into a string, avoiding an intermediate array.
static member LexemeString : LexBuffer<char> -> string
static member LexemeString : LexBuffer<LexBufferChar> -> string

/// Dynamically typed, non-lexically scoped parameter table.
member BufferLocalStore : IDictionary<string,obj>
Expand All @@ -119,13 +125,16 @@ type internal LexBuffer<'Char> =

/// Create a lex buffer suitable for Unicode lexing that reads characters from the given array.
/// Important: does take ownership of the array.
static member FromChars: (LanguageFeature -> bool) * char[] -> LexBuffer<char>
static member FromChars: (LanguageFeature -> bool) * LexBufferChar[] -> LexBuffer<LexBufferChar>

/// Create a lex buffer suitable for Unicode lexing that reads characters from the given string.
static member FromString: (LanguageFeature -> bool) * string -> LexBuffer<LexBufferChar>

/// Create a lex buffer that reads character or byte inputs by using the given function.
static member FromFunction: (LanguageFeature -> bool) * ('Char[] * int * int -> int) -> LexBuffer<'Char>

/// Create a lex buffer backed by source text.
static member FromSourceText : (LanguageFeature -> bool) * ISourceText -> LexBuffer<char>
static member FromSourceText : (LanguageFeature -> bool) * ISourceText -> LexBuffer<LexBufferChar>

/// The type of tables for an unicode lexer generated by <c>fslex.exe</c>.
[<Sealed>]
Expand All @@ -135,5 +144,5 @@ type internal UnicodeTables =
static member Create : uint16[][] * uint16[] -> UnicodeTables

/// Interpret tables for a unicode lexer generated by <c>fslex.exe</c>.
member Interpret: initialState:int * LexBuffer<char> -> int
member Interpret: initialState:int * LexBuffer<LexBufferChar> -> int

2 changes: 1 addition & 1 deletion src/fsharp/utils/prim-parsing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ exception RecoverableParseError
exception Accept of obj

[<Sealed>]
type internal IParseState(ruleStartPoss:Position[], ruleEndPoss:Position[], lhsPos:Position[], ruleValues:obj[], lexbuf:LexBuffer<char>) =
type internal IParseState(ruleStartPoss:Position[], ruleEndPoss:Position[], lhsPos:Position[], ruleValues:obj[], lexbuf:LexBuffer<LexBufferChar>) =
member p.LexBuffer = lexbuf

member p.InputRange index = ruleStartPoss.[index-1], ruleEndPoss.[index-1]
Expand Down
4 changes: 2 additions & 2 deletions src/fsharp/utils/prim-parsing.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ type internal IParseState =
member RaiseError<'b> : unit -> 'b

/// Return the LexBuffer for this parser instance.
member LexBuffer : LexBuffer<char>
member LexBuffer : LexBuffer<LexBufferChar>


[<Sealed>]
Expand Down Expand Up @@ -118,7 +118,7 @@ type internal Tables<'tok> =

/// Interpret the parser table taking input from the given lexer, using the given lex buffer, and the given start state.
/// Returns an object indicating the final synthesized value for the parse.
member Interpret : lexer:(LexBuffer<char> -> 'tok) * lexbuf:LexBuffer<char> * initialState:int -> obj
member Interpret : lexer:(LexBuffer<LexBufferChar> -> 'tok) * lexbuf:LexBuffer<LexBufferChar> * initialState:int -> obj

/// Indicates an accept action has occurred.
exception internal Accept of obj
Expand Down

0 comments on commit bd3f38e

Please sign in to comment.