From 0e742c80a589a3ad755fa14381b05dba3ee79e94 Mon Sep 17 00:00:00 2001 From: ncave <777696+ncave@users.noreply.github.com> Date: Tue, 25 Sep 2018 09:28:12 -0700 Subject: [PATCH] Change LexBuffer type from char to uint16 --- src/absil/illex.fsl | 2 +- src/absil/illib.fs | 3 ++ src/fsharp/UnicodeLexing.fs | 10 +++-- src/fsharp/UnicodeLexing.fsi | 4 +- src/fsharp/fsi/fsi.fs | 6 ++- src/fsharp/lex.fsl | 4 ++ src/fsharp/service/ServiceLexing.fsi | 3 +- src/utils/prim-lexing.fs | 57 ++++++++++++++++++++++------ src/utils/prim-lexing.fsi | 17 +++++++-- src/utils/prim-parsing.fs | 2 +- src/utils/prim-parsing.fsi | 4 +- 11 files changed, 86 insertions(+), 26 deletions(-) diff --git a/src/absil/illex.fsl b/src/absil/illex.fsl index 149cd087b91..fb600548e8a 100644 --- a/src/absil/illex.fsl +++ b/src/absil/illex.fsl @@ -16,7 +16,7 @@ open FSharp.Compiler.AbstractIL.Internal.AsciiParser open FSharp.Compiler.AbstractIL.Internal.AsciiConstants -let lexeme (lexbuf : LexBuffer) = new System.String(lexbuf.Lexeme) +let lexeme (lexbuf : LexBuffer<_>) = LexBuffer<_>.LexemeString (lexbuf) let unexpectedChar _lexbuf = raise Parsing.RecoverableParseError ;; diff --git a/src/absil/illib.fs b/src/absil/illib.fs index 33e0566a6c2..fce097edb81 100644 --- a/src/absil/illib.fs +++ b/src/absil/illib.fs @@ -571,6 +571,9 @@ module String = else None let getLines (str: string) = +#if FABLE_COMPILER + System.Text.RegularExpressions.Regex.Split(str, "\r\n|\r|\n"); +#else use reader = new StringReader(str) [| let mutable line = reader.ReadLine() diff --git a/src/fsharp/UnicodeLexing.fs b/src/fsharp/UnicodeLexing.fs index e2c9ed7df65..822ddf89f18 100644 --- a/src/fsharp/UnicodeLexing.fs +++ b/src/fsharp/UnicodeLexing.fs @@ -9,16 +9,20 @@ module internal FSharp.Compiler.UnicodeLexing open System.IO open Internal.Utilities.Text.Lexing -type Lexbuf = LexBuffer +type Lexbuf = LexBuffer let StringAsLexbuf (supportsFeature, s: string) = +#if FABLE_COMPILER + LexBuffer.FromString (supportsFeature, s) +#else LexBuffer.FromChars (supportsFeature, s.ToCharArray()) +#endif let FunctionAsLexbuf (supportsFeature, bufferFiller) = - LexBuffer.FromFunction(supportsFeature, bufferFiller) + LexBuffer.FromFunction(supportsFeature, bufferFiller) let SourceTextAsLexbuf (supportsFeature, sourceText) = - LexBuffer.FromSourceText(supportsFeature, sourceText) + LexBuffer.FromSourceText(supportsFeature, sourceText) let StreamReaderAsLexbuf (supportsFeature, reader: StreamReader) = let mutable isFinished = false diff --git a/src/fsharp/UnicodeLexing.fsi b/src/fsharp/UnicodeLexing.fsi index 1885d246b78..15c0c2acdda 100644 --- a/src/fsharp/UnicodeLexing.fsi +++ b/src/fsharp/UnicodeLexing.fsi @@ -7,11 +7,11 @@ open FSharp.Compiler.Features open FSharp.Compiler.Text open Internal.Utilities.Text.Lexing -type Lexbuf = LexBuffer +type Lexbuf = LexBuffer val internal StringAsLexbuf: (LanguageFeature -> bool) * string -> Lexbuf -val public FunctionAsLexbuf: (LanguageFeature -> bool) * (char [] * int * int -> int) -> Lexbuf +val public FunctionAsLexbuf: (LanguageFeature -> bool) * (LexBufferChar[] * int * int -> int) -> Lexbuf val public SourceTextAsLexbuf: (LanguageFeature -> bool) * ISourceText -> Lexbuf diff --git a/src/fsharp/fsi/fsi.fs b/src/fsharp/fsi/fsi.fs index bf5f650514f..dd5fc8090ed 100644 --- a/src/fsharp/fsi/fsi.fs +++ b/src/fsharp/fsi/fsi.fs @@ -1699,7 +1699,7 @@ type internal FsiStdinLexerProvider let LexbufFromLineReader (fsiStdinSyphon: FsiStdinSyphon) readF = UnicodeLexing.FunctionAsLexbuf - (isFeatureSupported, (fun (buf: char[], start, len) -> + (isFeatureSupported, (fun (buf, start, len) -> //fprintf fsiConsoleOutput.Out "Calling ReadLine\n" let inputOption = try Some(readF()) with :? EndOfStreamException -> None inputOption |> Option.iter (fun t -> fsiStdinSyphon.Add (t + "\n")) @@ -1713,7 +1713,11 @@ type internal FsiStdinLexerProvider if ninput > len then fprintf fsiConsoleOutput.Error "%s" (FSIstrings.SR.fsiLineTooLong()) let ntrimmed = min len ninput for i = 0 to ntrimmed-1 do +#if FABLE_COMPILER + buf.[i+start] <- uint16 input.[i] +#else buf.[i+start] <- input.[i] +#endif ntrimmed )) diff --git a/src/fsharp/lex.fsl b/src/fsharp/lex.fsl index 527f6ebd084..6c2fc952577 100644 --- a/src/fsharp/lex.fsl +++ b/src/fsharp/lex.fsl @@ -152,7 +152,11 @@ let shouldStartFile args lexbuf (m:range) err tok = else tok let evalIfDefExpression startPos isFeatureSupported args (lookup:string->bool) (lexed:string) = +#if FABLE_COMPILER + let lexbuf = LexBuffer<_>.FromString (isFeatureSupported, lexed) +#else let lexbuf = LexBuffer.FromChars (isFeatureSupported, lexed.ToCharArray ()) +#endif lexbuf.StartPos <- startPos lexbuf.EndPos <- startPos let tokenStream = FSharp.Compiler.PPLexer.tokenstream args diff --git a/src/fsharp/service/ServiceLexing.fsi b/src/fsharp/service/ServiceLexing.fsi index 5bd784e9e42..db8cf2e8271 100755 --- a/src/fsharp/service/ServiceLexing.fsi +++ b/src/fsharp/service/ServiceLexing.fsi @@ -3,6 +3,7 @@ namespace FSharp.Compiler.SourceCodeServices open FSharp.Compiler +open Internal.Utilities.Text.Lexing // Prevents warnings of experimental APIs within the signature file itself. #nowarn "57" @@ -245,7 +246,7 @@ type FSharpLineTokenizer = type FSharpSourceTokenizer = new : conditionalDefines:string list * fileName:string option -> FSharpSourceTokenizer member CreateLineTokenizer : lineText:string -> FSharpLineTokenizer - member CreateBufferTokenizer : bufferFiller:(char[] * int * int -> int) -> FSharpLineTokenizer + member CreateBufferTokenizer : bufferFiller:(LexBufferChar[] * int * int -> int) -> FSharpLineTokenizer module internal TestExpose = val TokenInfo : Parser.token -> (FSharpTokenColorKind * FSharpTokenCharKind * FSharpTokenTriggerClass) diff --git a/src/utils/prim-lexing.fs b/src/utils/prim-lexing.fs index 1f772d6e87e..8d30b3cd793 100644 --- a/src/utils/prim-lexing.fs +++ b/src/utils/prim-lexing.fs @@ -31,6 +31,9 @@ type ISourceText = type StringText(str: string) = let getLines (str: string) = +#if FABLE_COMPILER + System.Text.RegularExpressions.Regex.Split(str, "\r\n|\r|\n"); +#else use reader = new StringReader(str) [| let mutable line = reader.ReadLine() @@ -42,6 +45,7 @@ type StringText(str: string) = // http://stackoverflow.com/questions/19365404/stringreader-omits-trailing-linebreak yield String.Empty |] +#endif let getLines = // This requires allocating and getting all the lines. @@ -169,6 +173,12 @@ namespace Internal.Utilities.Text.Lexing 0, 0) +#if FABLE_COMPILER + type internal LexBufferChar = uint16 +#else + type internal LexBufferChar = char +#endif + type internal LexBufferFiller<'Char> = (LexBuffer<'Char> -> unit) and [] @@ -217,6 +227,8 @@ namespace Internal.Utilities.Text.Lexing and set b = endPos <- b member lexbuf.Lexeme = Array.sub buffer bufferScanStart lexemeLength + member lexbuf.LexemeChar(n) = buffer.[n+bufferScanStart] + member lexbuf.BufferLocalStore = (context :> IDictionary<_,_>) member lexbuf.LexemeLength with get() : int = lexemeLength and set v = lexemeLength <- v member lexbuf.Buffer with get() : 'Char[] = buffer and set v = buffer <- v @@ -225,8 +237,14 @@ namespace Internal.Utilities.Text.Lexing member lexbuf.BufferScanStart with get() : int = bufferScanStart and set v = bufferScanStart <- v member lexbuf.BufferAcceptAction with get() = bufferAcceptAction and set v = bufferAcceptAction <- v member lexbuf.RefillBuffer () = filler lexbuf - static member LexemeString(lexbuf:LexBuffer) = - new System.String(lexbuf.Buffer,lexbuf.BufferScanStart,lexbuf.LexemeLength) + + static member LexemeString (lexbuf: LexBuffer) = +#if FABLE_COMPILER + let chars = Array.init lexbuf.LexemeLength (lexbuf.LexemeChar >> char) + new System.String(chars) +#else + new System.String(lexbuf.Buffer, lexbuf.BufferScanStart, lexbuf.LexemeLength) +#endif member lexbuf.IsPastEndOfStream with get() = eof @@ -266,9 +284,13 @@ namespace Internal.Utilities.Text.Lexing LexBuffer<'Char>.FromArrayNoCopy(supportsFeature, buffer) // Important: This method takes ownership of the array - static member FromChars (supportsFeature:LanguageFeature -> bool, arr:char[]) = LexBuffer.FromArrayNoCopy (supportsFeature, arr) + static member FromChars (supportsFeature:LanguageFeature -> bool, arr:LexBufferChar[]) = LexBuffer.FromArrayNoCopy (supportsFeature, arr) static member FromSourceText (supportsFeature: LanguageFeature -> bool, sourceText: ISourceText) = +#if FABLE_COMPILER + let arr = Array.init sourceText.Length (fun i -> uint16 (sourceText.Item i)) + LexBuffer.FromArrayNoCopy (supportsFeature, arr) +#else let mutable currentSourceIndex = 0 LexBuffer.FromFunction(supportsFeature, fun (chars, start, length) -> let lengthToCopy = @@ -283,16 +305,25 @@ namespace Internal.Utilities.Text.Lexing currentSourceIndex <- currentSourceIndex + lengthToCopy lengthToCopy ) +#endif + + static member FromString (supportsFeature: LanguageFeature -> bool, s: string) = +#if FABLE_COMPILER + let arr = Array.init s.Length (fun i -> uint16 s.[i]) + LexBuffer.FromArrayNoCopy (supportsFeature, arr) +#else + LexBuffer.FromArrayNoCopy (supportsFeature, s.ToCharArray()) +#endif module GenericImplFragments = - let startInterpret(lexBuffer:LexBuffer) = + let startInterpret(lexBuffer:LexBuffer) = lexBuffer.BufferScanStart <- lexBuffer.BufferScanStart + lexBuffer.LexemeLength; lexBuffer.BufferMaxScanLength <- lexBuffer.BufferMaxScanLength - lexBuffer.LexemeLength; lexBuffer.BufferScanLength <- 0; lexBuffer.LexemeLength <- 0; lexBuffer.BufferAcceptAction <- -1; - let afterRefill (trans: uint16[][],sentinel,lexBuffer:LexBuffer,scanUntilSentinel,endOfScan,state,eofPos) = + let afterRefill (trans: uint16[][],sentinel,lexBuffer:LexBuffer,scanUntilSentinel,endOfScan,state,eofPos) = // end of file occurs if we couldn't extend the buffer if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then let snew = int trans.[state].[eofPos] // == EOF @@ -306,9 +337,9 @@ namespace Internal.Utilities.Text.Lexing else scanUntilSentinel lexBuffer state - let onAccept (lexBuffer:LexBuffer,a) = - lexBuffer.LexemeLength <- lexBuffer.BufferScanLength; - lexBuffer.BufferAcceptAction <- a; + let onAccept (lexBuffer:LexBuffer, a) = + lexBuffer.LexemeLength <- lexBuffer.BufferScanLength + lexBuffer.BufferAcceptAction <- a open GenericImplFragments @@ -333,12 +364,16 @@ namespace Internal.Utilities.Text.Lexing // ways let baseForUnicodeCategories = numLowUnicodeChars+numSpecificUnicodeChars*2 let unicodeCategory = +#if FABLE_COMPILER + System.Char.GetUnicodeCategory(char inp) +#else System.Char.GetUnicodeCategory(inp) +#endif //System.Console.WriteLine("inp = {0}, unicodeCategory = {1}", [| box inp; box unicodeCategory |]); int trans.[state].[baseForUnicodeCategories + int32 unicodeCategory] else // This is the specific unicode character - let c = char (int trans.[state].[baseForSpecificUnicodeChars+i*2]) + let c = trans.[state].[baseForSpecificUnicodeChars+i*2] //System.Console.WriteLine("c = {0}, inp = {1}, i = {2}", [| box c; box inp; box i |]); // OK, have we found the entry for a specific unicode character? if c = inp @@ -360,7 +395,7 @@ namespace Internal.Utilities.Text.Lexing afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,eofPos) else // read a character - end the scan if there are no further transitions - let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] + let inp = uint16 lexBuffer.Buffer.[lexBuffer.BufferScanPos] // Find the new state let snew = lookupUnicodeCharacters state inp @@ -378,7 +413,7 @@ namespace Internal.Utilities.Text.Lexing // 30 entries, one for each UnicodeCategory // 1 entry for EOF - member tables.Interpret(initialState,lexBuffer : LexBuffer) = + member tables.Interpret(initialState, lexBuffer: LexBuffer) = startInterpret(lexBuffer) scanUntilSentinel lexBuffer initialState diff --git a/src/utils/prim-lexing.fsi b/src/utils/prim-lexing.fsi index b0579d71e2f..48d6e2f59a2 100644 --- a/src/utils/prim-lexing.fsi +++ b/src/utils/prim-lexing.fsi @@ -85,6 +85,12 @@ type internal Position = static member FirstLine : fileIdx:int -> Position +#if FABLE_COMPILER +type internal LexBufferChar = uint16 +#else +type internal LexBufferChar = char +#endif + [] /// Input buffers consumed by lexers generated by fslex.exe. /// The type must be generic to match the code generated by FsLex and FsYacc (if you would like to @@ -100,7 +106,7 @@ type internal LexBuffer<'Char> = member Lexeme: 'Char [] /// Fast helper to turn the matched characters into a string, avoiding an intermediate array. - static member LexemeString : LexBuffer -> string + static member LexemeString : LexBuffer -> string /// Dynamically typed, non-lexically scoped parameter table. member BufferLocalStore : IDictionary @@ -113,13 +119,16 @@ type internal LexBuffer<'Char> = /// Create a lex buffer suitable for Unicode lexing that reads characters from the given array. /// Important: does take ownership of the array. - static member FromChars: (LanguageFeature -> bool) * char[] -> LexBuffer + static member FromChars: (LanguageFeature -> bool) * LexBufferChar[] -> LexBuffer + + /// Create a lex buffer suitable for Unicode lexing that reads characters from the given string. + static member FromString: (LanguageFeature -> bool) * string -> LexBuffer /// Create a lex buffer that reads character or byte inputs by using the given function. static member FromFunction: (LanguageFeature -> bool) * ('Char[] * int * int -> int) -> LexBuffer<'Char> /// Create a lex buffer backed by source text. - static member FromSourceText : (LanguageFeature -> bool) * ISourceText -> LexBuffer + static member FromSourceText : (LanguageFeature -> bool) * ISourceText -> LexBuffer /// The type of tables for an unicode lexer generated by fslex.exe. [] @@ -129,5 +138,5 @@ type internal UnicodeTables = static member Create : uint16[][] * uint16[] -> UnicodeTables /// Interpret tables for a unicode lexer generated by fslex.exe. - member Interpret: initialState:int * LexBuffer -> int + member Interpret: initialState:int * LexBuffer -> int diff --git a/src/utils/prim-parsing.fs b/src/utils/prim-parsing.fs index 996cdc67d59..ae95ea5f4dd 100644 --- a/src/utils/prim-parsing.fs +++ b/src/utils/prim-parsing.fs @@ -13,7 +13,7 @@ exception RecoverableParseError exception Accept of obj [] -type internal IParseState(ruleStartPoss:Position[], ruleEndPoss:Position[], lhsPos:Position[], ruleValues:obj[], lexbuf:LexBuffer) = +type internal IParseState(ruleStartPoss:Position[], ruleEndPoss:Position[], lhsPos:Position[], ruleValues:obj[], lexbuf:LexBuffer) = member p.LexBuffer = lexbuf member p.InputRange n = ruleStartPoss.[n-1], ruleEndPoss.[n-1] diff --git a/src/utils/prim-parsing.fsi b/src/utils/prim-parsing.fsi index 7ec3e7a0a80..762c90796ab 100644 --- a/src/utils/prim-parsing.fsi +++ b/src/utils/prim-parsing.fsi @@ -37,7 +37,7 @@ type internal IParseState = member RaiseError<'b> : unit -> 'b /// Return the LexBuffer for this parser instance. - member LexBuffer : LexBuffer + member LexBuffer : LexBuffer [] @@ -118,7 +118,7 @@ type internal Tables<'tok> = /// Interpret the parser table taking input from the given lexer, using the given lex buffer, and the given start state. /// Returns an object indicating the final synthesized value for the parse. - member Interpret : lexer:(LexBuffer -> 'tok) * lexbuf:LexBuffer * startState:int -> obj + member Interpret : lexer:(LexBuffer -> 'tok) * lexbuf:LexBuffer * startState:int -> obj /// Indicates an accept action has occurred. exception internal Accept of obj