Skip to content

Commit

Permalink
Change LexBuffer type from char to uint16
Browse files Browse the repository at this point in the history
  • Loading branch information
ncave committed Sep 17, 2019
1 parent 2e41de6 commit 2fda58f
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/absil/illex.fsl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ open FSharp.Compiler.AbstractIL.Internal.AsciiParser
open FSharp.Compiler.AbstractIL.Internal.AsciiConstants


let lexeme (lexbuf : LexBuffer<char>) = new System.String(lexbuf.Lexeme)
let lexeme (lexbuf : LexBuffer<_>) = LexBuffer<_>.LexemeString (lexbuf)

let unexpectedChar _lexbuf =
raise Parsing.RecoverableParseError ;;
Expand Down
3 changes: 3 additions & 0 deletions src/absil/illib.fs
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,9 @@ module String =
else None

let getLines (str: string) =
#if FABLE_COMPILER
System.Text.RegularExpressions.Regex.Split(str, "\r\n|\r|\n");
#else
use reader = new StringReader(str)
[|
let line = ref (reader.ReadLine())
Expand Down
14 changes: 7 additions & 7 deletions src/fsharp/UnicodeLexing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ open System.IO

open Internal.Utilities.Text.Lexing

type Lexbuf = LexBuffer<char>
type Lexbuf = LexBuffer<LexBufferChar>

let StringAsLexbuf (s:string) : Lexbuf =
LexBuffer<_>.FromChars (s.ToCharArray())
let StringAsLexbuf =
Lexbuf.FromString

let FunctionAsLexbuf (bufferFiller: char[] * int * int -> int) : Lexbuf =
LexBuffer<_>.FromFunction bufferFiller
let FunctionAsLexbuf (bufferFiller: LexBufferChar[] * int * int -> int) : Lexbuf =
LexBuffer<LexBufferChar>.FromFunction bufferFiller

let SourceTextAsLexbuf sourceText =
LexBuffer<char>.FromSourceText(sourceText)
LexBuffer<char>.FromSourceText sourceText

// The choice of 60 retries times 50 ms is not arbitrary. The NTFS FILETIME structure
// uses 2 second resolution for LastWriteTime. We retry long enough to surpass this threshold
Expand Down Expand Up @@ -68,5 +68,5 @@ let UnicodeFileAsLexbuf (filename,codePage : int option, retryLocked:bool) : Le
else
reraise()
let source = getSource 0
let lexbuf = LexBuffer<_>.FromChars(source.ToCharArray())
let lexbuf = LexBuffer<_>.FromString (source)
lexbuf
4 changes: 2 additions & 2 deletions src/fsharp/UnicodeLexing.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ open FSharp.Compiler.Text
open Microsoft.FSharp.Text
open Internal.Utilities.Text.Lexing

type Lexbuf = LexBuffer<char>
type Lexbuf = LexBuffer<LexBufferChar>
val internal StringAsLexbuf : string -> Lexbuf
val public FunctionAsLexbuf : (char [] * int * int -> int) -> Lexbuf
val public FunctionAsLexbuf : (LexBufferChar[] * int * int -> int) -> Lexbuf
val public UnicodeFileAsLexbuf :string * int option * (*retryLocked*) bool -> Lexbuf
val public SourceTextAsLexbuf : ISourceText -> Lexbuf
6 changes: 5 additions & 1 deletion src/fsharp/fsi/fsi.fs
Original file line number Diff line number Diff line change
Expand Up @@ -1629,7 +1629,7 @@ type internal FsiStdinLexerProvider

let LexbufFromLineReader (fsiStdinSyphon: FsiStdinSyphon) readf =
UnicodeLexing.FunctionAsLexbuf
(fun (buf: char[], start, len) ->
(fun (buf, start, len) ->
//fprintf fsiConsoleOutput.Out "Calling ReadLine\n"
let inputOption = try Some(readf()) with :? EndOfStreamException -> None
inputOption |> Option.iter (fun t -> fsiStdinSyphon.Add (t + "\n"))
Expand All @@ -1643,7 +1643,11 @@ type internal FsiStdinLexerProvider
if ninput > len then fprintf fsiConsoleOutput.Error "%s" (FSIstrings.SR.fsiLineTooLong())
let ntrimmed = min len ninput
for i = 0 to ntrimmed-1 do
#if FABLE_COMPILER
buf.[i+start] <- uint16 input.[i]
#else
buf.[i+start] <- input.[i]
#endif
ntrimmed
)

Expand Down
2 changes: 1 addition & 1 deletion src/fsharp/lex.fsl
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ let shouldStartFile args lexbuf (m:range) err tok =
else tok

let evalIfDefExpression startPos args (lookup:string->bool) (lexed:string) =
let lexbuf = LexBuffer<char>.FromChars (lexed.ToCharArray ())
let lexbuf = LexBuffer<_>.FromString (lexed)
lexbuf.StartPos <- startPos
lexbuf.EndPos <- startPos
let tokenStream = FSharp.Compiler.PPLexer.tokenstream args
Expand Down
3 changes: 2 additions & 1 deletion src/fsharp/service/ServiceLexing.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace FSharp.Compiler.SourceCodeServices

open FSharp.Compiler
open Internal.Utilities.Text.Lexing

type Position = int * int
type Range = Position * Position
Expand Down Expand Up @@ -237,7 +238,7 @@ type FSharpLineTokenizer =
type FSharpSourceTokenizer =
new : conditionalDefines:string list * fileName:string option -> FSharpSourceTokenizer
member CreateLineTokenizer : lineText:string -> FSharpLineTokenizer
member CreateBufferTokenizer : bufferFiller:(char[] * int * int -> int) -> FSharpLineTokenizer
member CreateBufferTokenizer : bufferFiller:(LexBufferChar[] * int * int -> int) -> FSharpLineTokenizer

module internal TestExpose =
val TokenInfo : Parser.token -> (FSharpTokenColorKind * FSharpTokenCharKind * FSharpTokenTriggerClass)
Expand Down
56 changes: 43 additions & 13 deletions src/utils/prim-lexing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ type ISourceText =
type StringText(str: string) =

let getLines (str: string) =
#if FABLE_COMPILER
System.Text.RegularExpressions.Regex.Split(str, "\r\n|\r|\n");
#else
use reader = new StringReader(str)
[|
let mutable line = reader.ReadLine()
Expand All @@ -42,6 +45,7 @@ type StringText(str: string) =
// http://stackoverflow.com/questions/19365404/stringreader-omits-trailing-linebreak
yield String.Empty
|]
#endif

let getLines =
// This requires allocating and getting all the lines.
Expand Down Expand Up @@ -168,6 +172,12 @@ namespace Internal.Utilities.Text.Lexing
0,
0)

#if FABLE_COMPILER
type internal LexBufferChar = uint16
#else
type internal LexBufferChar = char
#endif

type internal LexBufferFiller<'Char> = (LexBuffer<'Char> -> unit)

and [<Sealed>]
Expand Down Expand Up @@ -217,6 +227,7 @@ namespace Internal.Utilities.Text.Lexing
and set b = endPos <- b

member lexbuf.Lexeme = Array.sub buffer bufferScanStart lexemeLength
member lexbuf.LexemeChar(n) = buffer.[n+bufferScanStart]

member lexbuf.BufferLocalStore = (context :> IDictionary<_,_>)
member lexbuf.LexemeLength with get() : int = lexemeLength and set v = lexemeLength <- v
Expand All @@ -226,8 +237,14 @@ namespace Internal.Utilities.Text.Lexing
member lexbuf.BufferScanStart with get() : int = bufferScanStart and set v = bufferScanStart <- v
member lexbuf.BufferAcceptAction with get() = bufferAcceptAction and set v = bufferAcceptAction <- v
member lexbuf.RefillBuffer () = filler lexbuf
static member LexemeString(lexbuf:LexBuffer<char>) =
new System.String(lexbuf.Buffer,lexbuf.BufferScanStart,lexbuf.LexemeLength)

static member LexemeString (lexbuf: LexBuffer<LexBufferChar>) =
#if FABLE_COMPILER
let chars = Array.init lexbuf.LexemeLength (lexbuf.LexemeChar >> char)
new System.String(chars)
#else
new System.String(lexbuf.Buffer, lexbuf.BufferScanStart, lexbuf.LexemeLength)
#endif

member lexbuf.IsPastEndOfStream
with get() = eof
Expand Down Expand Up @@ -266,9 +283,13 @@ namespace Internal.Utilities.Text.Lexing
LexBuffer<'Char>.FromArrayNoCopy buffer

// Important: This method takes ownership of the array
static member FromChars (arr:char[]) = LexBuffer.FromArrayNoCopy arr
static member FromChars (arr:LexBufferChar[]) = LexBuffer.FromArrayNoCopy arr

static member FromSourceText (sourceText: ISourceText) =
#if FABLE_COMPILER
let arr = Array.init sourceText.Length (fun i -> uint16 (sourceText.Item i))
LexBuffer.FromArrayNoCopy arr
#else
let mutable currentSourceIndex = 0
LexBuffer<char>.FromFunction(fun (chars, start, length) ->
let lengthToCopy =
Expand All @@ -283,16 +304,25 @@ namespace Internal.Utilities.Text.Lexing
currentSourceIndex <- currentSourceIndex + lengthToCopy
lengthToCopy
)
#endif

static member FromString (s: string) =
#if FABLE_COMPILER
let arr = Array.init s.Length (fun i -> uint16 s.[i])
LexBuffer.FromArrayNoCopy arr
#else
LexBuffer.FromArrayNoCopy (s.ToCharArray())
#endif

module GenericImplFragments =
let startInterpret(lexBuffer:LexBuffer<char>) =
let startInterpret(lexBuffer:LexBuffer<LexBufferChar>) =
lexBuffer.BufferScanStart <- lexBuffer.BufferScanStart + lexBuffer.LexemeLength;
lexBuffer.BufferMaxScanLength <- lexBuffer.BufferMaxScanLength - lexBuffer.LexemeLength;
lexBuffer.BufferScanLength <- 0;
lexBuffer.LexemeLength <- 0;
lexBuffer.BufferAcceptAction <- -1;

let afterRefill (trans: uint16[][],sentinel,lexBuffer:LexBuffer<char>,scanUntilSentinel,endOfScan,state,eofPos) =
let afterRefill (trans: uint16[][],sentinel,lexBuffer:LexBuffer<LexBufferChar>,scanUntilSentinel,endOfScan,state,eofPos) =
// end of file occurs if we couldn't extend the buffer
if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then
let snew = int trans.[state].[eofPos] // == EOF
Expand All @@ -306,9 +336,9 @@ namespace Internal.Utilities.Text.Lexing
else
scanUntilSentinel lexBuffer state

let onAccept (lexBuffer:LexBuffer<char>,a) =
lexBuffer.LexemeLength <- lexBuffer.BufferScanLength;
lexBuffer.BufferAcceptAction <- a;
let onAccept (lexBuffer:LexBuffer<LexBufferChar>, a) =
lexBuffer.LexemeLength <- lexBuffer.BufferScanLength
lexBuffer.BufferAcceptAction <- a

open GenericImplFragments

Expand All @@ -335,15 +365,15 @@ namespace Internal.Utilities.Text.Lexing
let baseForUnicodeCategories = numLowUnicodeChars+numSpecificUnicodeChars*2
let unicodeCategory =
#if FX_RESHAPED_GLOBALIZATION
System.Globalization.CharUnicodeInfo.GetUnicodeCategory(inp)
System.Globalization.CharUnicodeInfo.GetUnicodeCategory(char inp)
#else
System.Char.GetUnicodeCategory(inp)
System.Char.GetUnicodeCategory(char inp)
#endif
//System.Console.WriteLine("inp = {0}, unicodeCategory = {1}", [| box inp; box unicodeCategory |]);
int trans.[state].[baseForUnicodeCategories + int32 unicodeCategory]
else
// This is the specific unicode character
let c = char (int trans.[state].[baseForSpecificUnicodeChars+i*2])
let c = trans.[state].[baseForSpecificUnicodeChars+i*2]
//System.Console.WriteLine("c = {0}, inp = {1}, i = {2}", [| box c; box inp; box i |]);
// OK, have we found the entry for a specific unicode character?
if c = inp
Expand All @@ -366,7 +396,7 @@ namespace Internal.Utilities.Text.Lexing
afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,eofPos)
else
// read a character - end the scan if there are no further transitions
let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos]
let inp = uint16 lexBuffer.Buffer.[lexBuffer.BufferScanPos]

// Find the new state
let snew = lookupUnicodeCharacters state inp
Expand All @@ -384,7 +414,7 @@ namespace Internal.Utilities.Text.Lexing
// 30 entries, one for each UnicodeCategory
// 1 entry for EOF

member tables.Interpret(initialState,lexBuffer : LexBuffer<char>) =
member tables.Interpret(initialState, lexBuffer: LexBuffer<LexBufferChar>) =
startInterpret(lexBuffer)
scanUntilSentinel lexBuffer initialState

Expand Down
18 changes: 14 additions & 4 deletions src/utils/prim-lexing.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ type internal Position =
static member FirstLine : fileIdx:int -> Position

#if FABLE_COMPILER
type internal LexBufferChar = uint16
#else
type internal LexBufferChar = char
#endif

[<Sealed>]
/// Input buffers consumed by lexers generated by <c>fslex.exe</c>.
/// The type must be generic to match the code generated by FsLex and FsYacc (if you would like to
Expand All @@ -99,7 +106,7 @@ type internal LexBuffer<'Char> =
member Lexeme: 'Char []

/// Fast helper to turn the matched characters into a string, avoiding an intermediate array.
static member LexemeString : LexBuffer<char> -> string
static member LexemeString : LexBuffer<LexBufferChar> -> string

/// Dynamically typed, non-lexically scoped parameter table.
member BufferLocalStore : IDictionary<string,obj>
Expand All @@ -109,12 +116,15 @@ type internal LexBuffer<'Char> =

/// Create a lex buffer suitable for Unicode lexing that reads characters from the given array.
/// Important: does take ownership of the array.
static member FromChars: char[] -> LexBuffer<char>
static member FromChars: LexBufferChar[] -> LexBuffer<LexBufferChar>

/// Create a lex buffer suitable for Unicode lexing that reads characters from the given string.
static member FromString: string -> LexBuffer<LexBufferChar>

/// Create a lex buffer that reads character or byte inputs by using the given function.
static member FromFunction: ('Char[] * int * int -> int) -> LexBuffer<'Char>
/// Create a lex buffer backed by source text.
static member FromSourceText : ISourceText -> LexBuffer<char>
static member FromSourceText : ISourceText -> LexBuffer<LexBufferChar>

/// The type of tables for an unicode lexer generated by <c>fslex.exe</c>.
[<Sealed>]
Expand All @@ -124,5 +134,5 @@ type internal UnicodeTables =
static member Create : uint16[][] * uint16[] -> UnicodeTables

/// Interpret tables for a unicode lexer generated by <c>fslex.exe</c>.
member Interpret: initialState:int * LexBuffer<char> -> int
member Interpret: initialState:int * LexBuffer<LexBufferChar> -> int

2 changes: 1 addition & 1 deletion src/utils/prim-parsing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ exception RecoverableParseError
exception Accept of obj

[<Sealed>]
type internal IParseState(ruleStartPoss:Position[], ruleEndPoss:Position[], lhsPos:Position[], ruleValues:obj[], lexbuf:LexBuffer<char>) =
type internal IParseState(ruleStartPoss:Position[], ruleEndPoss:Position[], lhsPos:Position[], ruleValues:obj[], lexbuf:LexBuffer<LexBufferChar>) =
member p.LexBuffer = lexbuf

member p.InputRange n = ruleStartPoss.[n-1], ruleEndPoss.[n-1]
Expand Down
4 changes: 2 additions & 2 deletions src/utils/prim-parsing.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ type internal IParseState =
member RaiseError<'b> : unit -> 'b

/// Return the LexBuffer for this parser instance.
member LexBuffer : LexBuffer<char>
member LexBuffer : LexBuffer<LexBufferChar>


[<Sealed>]
Expand Down Expand Up @@ -118,7 +118,7 @@ type internal Tables<'tok> =

/// Interpret the parser table taking input from the given lexer, using the given lex buffer, and the given start state.
/// Returns an object indicating the final synthesized value for the parse.
member Interpret : lexer:(LexBuffer<char> -> 'tok) * lexbuf:LexBuffer<char> * startState:int -> obj
member Interpret : lexer:(LexBuffer<LexBufferChar> -> 'tok) * lexbuf:LexBuffer<LexBufferChar> * startState:int -> obj

/// Indicates an accept action has occurred.
exception internal Accept of obj
Expand Down

0 comments on commit 2fda58f

Please sign in to comment.