From c931480e876f5e4cce15e3354d0bc8d1a0529794 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Sun, 27 Feb 2022 11:33:54 -0800 Subject: [PATCH] Performance improvements with new parser internals. (#32) * Performance improvements with new parser internals. * Bump PS version --- .github/workflows/ci.yml | 9 +- bench/ParseFile.purs | 6 +- bench/bench.dhall | 3 +- parse-package-set/parse-package-set.dhall | 3 +- spago.dhall | 5 +- src/PureScript/CST.purs | 4 +- src/PureScript/CST/Lexer.purs | 66 ++- src/PureScript/CST/Parser.purs | 37 +- src/PureScript/CST/Parser/Monad.purs | 501 +++++++++------------- test/Main.purs | 107 ++++- 10 files changed, 404 insertions(+), 337 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 61e6590..d4ca194 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,8 +14,8 @@ jobs: - uses: purescript-contrib/setup-purescript@main with: - purescript: "0.14.0" - spago: "0.19.0" + purescript: "0.14.5" + spago: "0.20.3" psa: "0.8.2" purs-tidy: "latest" @@ -30,6 +30,9 @@ jobs: - name: Build source run: spago build --purs-args '--censor-lib --strict' + - name: Run tests + run: spago test + - name: Check formatting run: npm run check @@ -37,4 +40,4 @@ jobs: run: npm run parse-package-set - name: Run file benchmark - run: npm run bench-file test/Main.purs + run: npm run bench-file src/PureScript/CST/Parser.purs diff --git a/bench/ParseFile.purs b/bench/ParseFile.purs index 889b3ed..ca984da 100644 --- a/bench/ParseFile.purs +++ b/bench/ParseFile.purs @@ -44,9 +44,11 @@ main = launchAff_ do case parseModule contents of ParseSucceeded _ -> do Console.log "Parse succeeded." - ParseSucceededWithErrors _ errs -> + ParseSucceededWithErrors _ errs -> do + Console.log "Parse succeeded with errors." for_ errs $ Console.error <<< printPositionedError - ParseFailed err -> + ParseFailed err -> do + Console.log "Parse failed." Console.error $ printPositionedError err Nothing -> Console.log "File path required" diff --git a/bench/bench.dhall b/bench/bench.dhall index 4cb5a1a..82dac1a 100644 --- a/bench/bench.dhall +++ b/bench/bench.dhall @@ -1,7 +1,7 @@ let conf = ../spago.dhall in conf // { - sources = conf.sources # [ "bench/**/*.purs" ], + sources = [ "src/**/*.purs", "bench/**/*.purs" ], dependencies = [ "aff" , "arrays" @@ -12,6 +12,7 @@ in conf // { , "either" , "foldable-traversable" , "free" + , "functions" , "functors" , "identity" , "integers" diff --git a/parse-package-set/parse-package-set.dhall b/parse-package-set/parse-package-set.dhall index 9d4eb34..096a9f7 100644 --- a/parse-package-set/parse-package-set.dhall +++ b/parse-package-set/parse-package-set.dhall @@ -1,7 +1,7 @@ let conf = ../spago.dhall in conf // { - sources = conf.sources # [ "parse-package-set/**/*.purs" ], + sources = [ "src/**/*.purs", "parse-package-set/**/*.purs" ], dependencies = [ "aff" , "arrays" @@ -16,6 +16,7 @@ in conf // { , "filterable" , "foldable-traversable" , "free" + , "functions" , "functors" , "identity" , "integers" diff --git a/spago.dhall b/spago.dhall index 0ebdb7b..59be48c 100644 --- a/spago.dhall +++ b/spago.dhall @@ -1,12 +1,14 @@ { name = "language-cst-parser" , dependencies = [ "arrays" + , "console" , "const" , "control" , "effect" , "either" , "foldable-traversable" , "free" + , "functions" , "functors" , "identity" , "integers" @@ -14,6 +16,7 @@ , "lists" , "maybe" , "newtype" + , "node-process" , "numbers" , "ordered-collections" , "partial" @@ -28,5 +31,5 @@ , "unsafe-coerce" ] , packages = ./packages.dhall -, sources = [ "src/**/*.purs" ] +, sources = [ "src/**/*.purs", "test/**/Main.purs" ] } diff --git a/src/PureScript/CST.purs b/src/PureScript/CST.purs index 17a2c39..d2b4fbd 100644 --- a/src/PureScript/CST.purs +++ b/src/PureScript/CST.purs @@ -93,8 +93,8 @@ parsePartialModule src = pure $ Module { header, body } } Right $ Tuple res state.errors - ParseFail error position _ _ -> - Left { error, position } + ParseFail error _ -> + Left error printModule :: forall e. TokensOf e => Module e -> String printModule mod = diff --git a/src/PureScript/CST/Lexer.purs b/src/PureScript/CST/Lexer.purs index 1c901cb..aa63638 100644 --- a/src/PureScript/CST/Lexer.purs +++ b/src/PureScript/CST/Lexer.purs @@ -6,9 +6,11 @@ module PureScript.CST.Lexer import Prelude -import Control.Alt (class Alt, (<|>)) -import Data.Array as Array +import Control.Alt (class Alt, alt) +import Control.Monad.ST as ST +import Control.Monad.ST.Ref as STRef import Data.Array.NonEmpty as NonEmptyArray +import Data.Array.ST as STArray import Data.Char as Char import Data.Either (Either(..)) import Data.Foldable (fold, foldl, foldMap) @@ -32,6 +34,8 @@ import PureScript.CST.Layout (LayoutDelim(..), LayoutStack, insertLayout) import PureScript.CST.TokenStream (TokenStep(..), TokenStream(..), consTokens, step, unwindLayout) import PureScript.CST.Types (Comment(..), IntValue(..), LineFeed(..), ModuleName(..), SourcePos, SourceStyle(..), Token(..)) +infixr 3 alt as <|> + data LexResult e a = LexFail e String | LexSucc a String @@ -147,18 +151,29 @@ satisfy mkErr p = Lex \str -> LexFail (\_ -> mkErr (mkUnexpected str)) str many :: forall e a. Lex e a -> Lex e (Array a) -many (Lex k) = Lex \str -> do - let - go acc str' = - case k str' of - LexFail err str'' - | SCU.length str' == SCU.length str'' -> - LexSucc acc str' - | otherwise -> - LexFail err str'' - LexSucc a str'' -> - go (Array.snoc acc a) str'' - go [] str +many (Lex k) = Lex \str -> ST.run do + valuesRef <- STArray.new + strRef <- STRef.new str + contRef <- STRef.new true + resRef <- STRef.new (LexSucc [] str) + ST.while (STRef.read contRef) do + str' <- STRef.read strRef + case k str' of + LexFail error str'' + | SCU.length str' == SCU.length str'' -> do + values <- STArray.unsafeFreeze valuesRef + _ <- STRef.write (LexSucc values str'') resRef + _ <- STRef.write false contRef + pure unit + | otherwise -> do + _ <- STRef.write (LexFail error str'') resRef + _ <- STRef.write false contRef + pure unit + LexSucc a str'' -> do + _ <- STArray.push a valuesRef + _ <- STRef.write str'' strRef + pure unit + STRef.read resRef fail :: forall a. ParseError -> Lex LexError a fail = Lex <<< LexFail <<< const @@ -369,9 +384,9 @@ token = <|> tokenComma where parseModuleName = ado - parts <- many (try (parseProper <* charDot)) + prefix <- parseModuleNamePrefix name <- parseName - in name (toModuleName parts) + in name (toModuleName prefix) parseName :: Lex _ (Maybe ModuleName -> Token) parseName = @@ -461,6 +476,9 @@ token = ident <- try $ charQuestionMark *> (parseIdent <|> parseProper) in TokHole ident + parseModuleNamePrefix = + regex (LexExpected "module name") "(?:(?:\\p{Lu}[\\p{L}0-9_']*)\\.)*" + parseProper = regex (LexExpected "proper name") "\\p{Lu}[\\p{L}0-9_']*" @@ -566,8 +584,8 @@ token = parseNumber = do intPart <- intPartRegex - fractionPart <- optional (try (charDot *> fractionPartRegex)) - exponentPart <- optional (charExponent *> parseExponentPart) + fractionPart <- parseNumberFractionPart + exponentPart <- parseNumberExponentPart if isNothing fractionPart && isNothing exponentPart then do let intVal = stripUnderscores intPart case Int.fromString intVal of @@ -587,6 +605,12 @@ token = Nothing -> fail $ LexNumberOutOfRange raw + parseNumberFractionPart = + optional (try (charDot *> fractionPartRegex)) + + parseNumberExponentPart = + optional (charExponent *> parseExponentPart) + parseExponentPart = ado sign <- optional parseExponentSign exponent <- intPartRegex @@ -656,7 +680,7 @@ token = charAny = satisfy (LexExpected "char") (const true) -toModuleName :: Array String -> Maybe ModuleName +toModuleName :: String -> Maybe ModuleName toModuleName = case _ of - [] -> Nothing - mn -> Just $ ModuleName $ String.joinWith "." mn + "" -> Nothing + mn -> Just $ ModuleName $ SCU.dropRight 1 mn diff --git a/src/PureScript/CST/Parser.purs b/src/PureScript/CST/Parser.purs index 5741635..54afe3d 100644 --- a/src/PureScript/CST/Parser.purs +++ b/src/PureScript/CST/Parser.purs @@ -26,7 +26,7 @@ import Data.Tuple (Tuple(..), uncurry) import Prim as P import PureScript.CST.Errors (ParseError(..), RecoveredError(..)) import PureScript.CST.Layout (currentIndent) -import PureScript.CST.Parser.Monad (Parser, Recovery(..), eof, lookAhead, many, optional, recover, take, try) +import PureScript.CST.Parser.Monad (Parser, eof, lookAhead, many, optional, recover, take, try) import PureScript.CST.TokenStream (TokenStep(..), TokenStream, layoutStack) import PureScript.CST.TokenStream as TokenStream import PureScript.CST.Types (Binder(..), ClassFundep(..), DataCtor(..), DataMembers(..), Declaration(..), Delimited, DoStatement(..), Export(..), Expr(..), Fixity(..), FixityOp(..), Foreign(..), Guarded(..), GuardedExpr(..), Ident(..), Import(..), ImportDecl(..), Instance(..), InstanceBinding(..), IntValue(..), Label(..), Labeled(..), LetBinding(..), Module(..), ModuleBody(..), ModuleHeader(..), ModuleName(..), Name(..), OneOrDelimited(..), Operator(..), PatternGuard(..), Proper(..), QualifiedName(..), RecordLabeled(..), RecordUpdate(..), Role(..), Row(..), Separated(..), SourceToken, Token(..), Type(..), TypeVarBinding(..), Where(..), Wrapped(..)) @@ -1146,30 +1146,37 @@ reservedKeywords = Set.fromFoldable ] recoverIndent :: forall a. (RecoveredError -> a) -> Parser a -> Parser a -recoverIndent mkNode = recover \{ position, error } -> - map (\tokens -> mkNode (RecoveredError { position, error, tokens })) <<< recoverTokensWhile \tok indent -> - case tok.value of - TokLayoutEnd col -> col > indent - TokLayoutSep col -> col > indent - _ -> true - -recoverTokensWhile :: (SourceToken -> Int -> Boolean) -> TokenStream -> Recovery (Array SourceToken) +recoverIndent mkNode = recover \{ position, error } stream -> do + let + Tuple tokens newStream = recoverTokensWhile + ( \tok indent -> case tok.value of + TokLayoutEnd col -> col > indent + TokLayoutSep col -> col > indent + _ -> true + ) + stream + if Array.null tokens then + Nothing + else + Just (Tuple (mkNode (RecoveredError { position, error, tokens })) newStream) + +recoverTokensWhile :: (SourceToken -> Int -> Boolean) -> TokenStream -> Tuple (Array SourceToken) TokenStream recoverTokensWhile p initStream = go [] initStream where indent :: Int indent = maybe 0 _.column $ currentIndent $ layoutStack initStream - go :: Array SourceToken -> TokenStream -> Recovery (Array SourceToken) + go :: Array SourceToken -> TokenStream -> Tuple (Array SourceToken) TokenStream go acc stream = case TokenStream.step stream of - TokenError errPos _ _ _ -> - Recovery acc errPos stream - TokenEOF eofPos _ -> - Recovery acc eofPos stream + TokenError _ _ _ _ -> + Tuple acc stream + TokenEOF _ _ -> + Tuple acc stream TokenCons tok _ nextStream _ -> if p tok indent then go (Array.snoc acc tok) nextStream else - Recovery acc tok.range.start stream + Tuple acc stream recoverDecl :: RecoveryStrategy Declaration recoverDecl = recoverIndent DeclError diff --git a/src/PureScript/CST/Parser/Monad.purs b/src/PureScript/CST/Parser/Monad.purs index 92b10fc..d1f5f8f 100644 --- a/src/PureScript/CST/Parser/Monad.purs +++ b/src/PureScript/CST/Parser/Monad.purs @@ -3,7 +3,6 @@ module PureScript.CST.Parser.Monad , ParserState , ParserResult(..) , PositionedError - , Recovery(..) , initialParserState , fromParserResult , runParser @@ -22,329 +21,255 @@ import Prelude import Control.Alt (class Alt, (<|>)) import Control.Lazy (class Lazy) -import Control.Monad.ST.Class (liftST) +import Control.Monad.ST as ST +import Control.Monad.ST.Internal as STRef import Data.Array as Array import Data.Array.ST as STArray import Data.Either (Either(..)) -import Data.Lazy as Z +import Data.Function.Uncurried (Fn2, Fn4, mkFn2, mkFn4, runFn2, runFn4) +import Data.Lazy as Lazy import Data.Maybe (Maybe(..)) import Data.Tuple (Tuple(..)) -import Effect.Unsafe (unsafePerformEffect) import PureScript.CST.Errors (ParseError(..)) import PureScript.CST.TokenStream (TokenStep(..), TokenStream) import PureScript.CST.TokenStream as TokenStream -import PureScript.CST.Types (Comment, LineFeed, SourceToken, SourcePos) -import Unsafe.Coerce (unsafeCoerce) - -foreign import data UnsafeBoundValue :: Type - -data Queue c a b - = Leaf (c a b) - | Node (Queue c a UnsafeBoundValue) (Queue c UnsafeBoundValue b) - -qappend :: forall c a x b. Queue c a x -> Queue c x b -> Queue c a b -qappend = unsafeCoerce Node - -qsingleton :: forall c a b. c a b -> Queue c a b -qsingleton = Leaf - -data UnconsView c a b x - = UnconsDone (c a b) - | UnconsMore (c a x) (Queue c x b) - -unconsView :: forall c a b. Queue c a b -> UnconsView c a b UnsafeBoundValue -unconsView = uncons (unsafeCoerce UnconsDone) (unsafeCoerce UnconsMore) - -uncons - :: forall c a b r - . (c a b -> r) - -> (forall x. c a x -> Queue c x b -> r) - -> Queue c a b - -> r -uncons done more = case _ of - Leaf a -> done a - Node a b -> uncons' more a b - -uncons' - :: forall c a x b r - . (forall z. c a z -> Queue c z b -> r) - -> Queue c a x - -> Queue c x b - -> r -uncons' cons l r = case l of - Leaf k -> cons (unsafeCoerce k) (unsafeCoerce r) - Node l' r' -> uncons' cons l' (Node (unsafeCoerce r') (unsafeCoerce r)) +import PureScript.CST.Types (Comment, LineFeed, SourcePos, SourceToken) type PositionedError = { position :: SourcePos , error :: ParseError } -newtype ParserK a b = ParserK (a -> Parser b) - -data Recovery a = Recovery a SourcePos TokenStream - -derive instance functorRecovery :: Functor Recovery - -type FoldBox a b s = - { init :: Unit -> s - , step :: s -> a -> s - , done :: s -> b +type ParserState = + { consumed :: Boolean + , errors :: Array PositionedError + , stream :: TokenStream } -foreign import data Fold :: Type -> Type -> Type - -mkFold :: forall a b s. FoldBox a b s -> Fold a b -mkFold = unsafeCoerce - -unFold :: forall r a b. (forall s. FoldBox a b s -> r) -> Fold a b -> r -unFold = unsafeCoerce - -foldArray :: forall a. Fold a (Array a) -foldArray = mkFold - { init: \_ -> - unsafePerformEffect $ liftST STArray.new - , step: \arr a -> - unsafePerformEffect $ liftST do - _ <- STArray.push a arr - pure arr - , done: - unsafePerformEffect <<< liftST <<< STArray.unsafeFreeze +initialParserState :: TokenStream -> ParserState +initialParserState stream = + { consumed: false + , errors: [] + , stream } -data Parser a - = Take (SourceToken -> Either ParseError a) - | Eof (Tuple SourcePos (Array (Comment LineFeed)) -> a) - | Fail SourcePos ParseError - | Alt (Parser a) (Parser a) - | Try (Parser a) - | LookAhead (Parser a) - | Defer (Z.Lazy (Parser a)) - | Recover (PositionedError -> TokenStream -> Recovery a) (Parser a) - | Iter (Fold UnsafeBoundValue a) (Parser UnsafeBoundValue) - | Pure a - | Bind (Parser UnsafeBoundValue) (Queue ParserK UnsafeBoundValue a) - -instance functorParser :: Functor Parser where - map f = case _ of - Bind p queue -> - Bind p (qappend queue (qsingleton (ParserK (Pure <<< f)))) - p -> - Bind (unsafeCoerce p) (qsingleton (ParserK (Pure <<< unsafeCoerce f))) - -instance applyParser :: Apply Parser where - apply p1 p2 = do - f <- p1 - a <- p2 - pure (f a) +newtype Parser a = Parser + ( forall r + . Fn4 + ParserState + ((Unit -> r) -> r) + (Fn2 ParserState PositionedError r) + (Fn2 ParserState a r) + r + ) + +instance Functor Parser where + map f (Parser p) = Parser + ( mkFn4 \state1 more resume done -> + runFn4 p state1 more resume + ( mkFn2 \state2 a -> + runFn2 done state2 (f a) + ) + ) + +instance Apply Parser where + apply (Parser p1) (Parser p2) = Parser + ( mkFn4 \state1 more resume done -> + runFn4 p1 state1 more resume + ( mkFn2 \state2 f -> + more \_ -> + runFn4 p2 state2 more resume + ( mkFn2 \state3 a -> + runFn2 done state3 (f a) + ) + ) + ) + +instance Applicative Parser where + pure a = Parser + ( mkFn4 \state1 _ _ done -> + runFn2 done state1 a + ) + +instance Bind Parser where + bind (Parser p1) k = Parser + ( mkFn4 \state1 more resume done -> + runFn4 p1 state1 more resume + ( mkFn2 \state2 a -> + more \_ -> do + let (Parser p2) = k a + runFn4 p2 state2 more resume done + ) + ) + +instance Monad Parser + +instance Alt Parser where + alt (Parser k1) (Parser k2) = Parser + ( mkFn4 \state1 more resume done -> do + let + state2 = + if state1.consumed then state1 { consumed = false } + else state1 + runFn4 k1 state2 more + ( mkFn2 \state3 error -> + if state3.consumed then + runFn2 resume state3 error + else + runFn4 k2 state1 more resume done + ) + done + ) + +instance Lazy (Parser a) where + defer k = Parser + ( mkFn4 \state more resume done -> do + let (Parser k) = Lazy.force parser + runFn4 k state more resume done + ) + where + parser = Lazy.defer k + +fail :: forall a. PositionedError -> Parser a +fail error = Parser (mkFn4 \state _ resume _ -> runFn2 resume state error) -instance applicativeParser :: Applicative Parser where - pure = Pure - -instance bindParser :: Bind Parser where - bind p k = case p of - Bind p' queue -> - Bind p' (qappend queue (qsingleton (ParserK k))) - _ -> - Bind (unsafeCoerce p) (qsingleton (ParserK (unsafeCoerce k))) - -instance monadParser :: Monad Parser - -instance altParser :: Alt Parser where - alt = Alt - -instance lazyParser :: Lazy (Parser a) where - defer = Defer <<< Z.defer +try :: forall a. Parser a -> Parser a +try (Parser p) = Parser + ( mkFn4 \state1 more resume done -> + runFn4 p state1 more + ( mkFn2 \state2 error -> + runFn2 resume (state2 { consumed = state1.consumed }) error + ) + done + ) + +recover :: forall a. (PositionedError -> TokenStream -> Maybe (Tuple a TokenStream)) -> Parser a -> Parser a +recover k (Parser p) = Parser + ( mkFn4 \state1 more resume done -> do + runFn4 p (state1 { consumed = false }) more + ( mkFn2 \state2 error -> + case k error state1.stream of + Nothing -> + runFn2 resume (state2 { consumed = state1.consumed }) error + Just (Tuple a stream) -> + runFn2 done + { consumed: true + , errors: Array.snoc state2.errors error + , stream + } + a + ) + done + ) take :: forall a. (SourceToken -> Either ParseError a) -> Parser a -take = Take - -fail :: forall a. SourcePos -> ParseError -> Parser a -fail = Fail - -try :: forall a. Parser a -> Parser a -try = Try +take k = Parser + ( mkFn4 \state _ resume done -> + case TokenStream.step state.stream of + TokenError position error _ _ -> + runFn2 resume state { error, position } + TokenEOF position _ -> + runFn2 resume state { error: UnexpectedEof, position } + TokenCons tok _ nextStream _ -> + case k tok of + Left error -> + runFn2 resume state { error, position: tok.range.start } + Right a -> + runFn2 done + ( state + { consumed = true + , stream = nextStream + } + ) + a + ) -iter :: forall a b. Fold a b -> Parser a -> Parser b -iter a b = Iter (unsafeCoerce a) (unsafeCoerce b) +eof :: Parser (Tuple SourcePos (Array (Comment LineFeed))) +eof = Parser + ( mkFn4 \state _ resume done -> + case TokenStream.step state.stream of + TokenError position error _ _ -> + runFn2 resume state { error, position } + TokenEOF position comments -> + runFn2 done (state { consumed = true }) (Tuple position comments) + TokenCons tok _ _ _ -> + runFn2 resume state + { error: ExpectedEof tok.value + , position: tok.range.start + } + ) lookAhead :: forall a. Parser a -> Parser a -lookAhead = LookAhead +lookAhead (Parser p) = Parser + ( mkFn4 \state1 more resume done -> + runFn4 p state1 more + (mkFn2 \_ error -> runFn2 resume state1 error) + (mkFn2 \_ value -> runFn2 done state1 value) + ) many :: forall a. Parser a -> Parser (Array a) -many = iter foldArray +many parser = Parser + ( mkFn4 \state1 _ resume done -> do + let + result = ST.run do + valuesRef <- STArray.new + stateRef <- STRef.new state1 + contRef <- STRef.new true + resRef <- STRef.new (ParseSucc [] state1) + ST.while (STRef.read contRef) do + state2 <- STRef.read stateRef + let + state2' = + if state2.consumed then state2 { consumed = false } + else state2 + case runParser' state2' parser of + ParseFail error state3 -> + if state3.consumed then do + _ <- STRef.write (ParseFail error state3) resRef + _ <- STRef.write false contRef + pure unit + else do + values <- STArray.unsafeFreeze valuesRef + _ <- STRef.write (ParseSucc values state2) resRef + _ <- STRef.write false contRef + pure unit + ParseSucc value state3 -> do + _ <- STArray.push value valuesRef + _ <- STRef.write state3 stateRef + pure unit + STRef.read resRef + case result of + ParseFail error state2 -> + runFn2 resume state2 error + ParseSucc values state2 -> + runFn2 done state2 values + ) optional :: forall a. Parser a -> Parser (Maybe a) optional p = Just <$> p <|> pure Nothing -eof :: Parser (Tuple SourcePos (Array (Comment LineFeed))) -eof = Eof identity +data Trampoline a = More (Unit -> Trampoline a) | Done a -recover :: forall a. (PositionedError -> TokenStream -> Recovery a) -> Parser a -> Parser a -recover = Recover +runParser' :: forall a. ParserState -> Parser a -> ParserResult a +runParser' state1 (Parser p) = + run $ runFn4 p state1 More + (mkFn2 \state2 error -> Done (ParseFail error state2)) + (mkFn2 \state2 value -> Done (ParseSucc value state2)) + where + run = case _ of + More k -> run (k unit) + Done a -> a runParser :: forall a. TokenStream -> Parser a -> Either PositionedError (Tuple a (Array PositionedError)) runParser stream = fromParserResult <<< runParser' (initialParserState stream) +data ParserResult a + = ParseFail PositionedError ParserState + | ParseSucc a ParserState + fromParserResult :: forall a. ParserResult a -> Either PositionedError (Tuple a (Array PositionedError)) fromParserResult = case _ of - ParseFail error position _ _ -> - Left { position, error } + ParseFail error _ -> + Left error ParseSucc res { errors } -> Right (Tuple res errors) - -data ParserResult a - = ParseFail ParseError SourcePos ParserState (Maybe TokenStream) - | ParseSucc a ParserState - -data ParserStack a - = StkNil - | StkAlt (ParserStack a) ParserState (Parser a) - | StkTry (ParserStack a) ParserState - | StkLookAhead (ParserStack a) ParserState - | StkBinds (ParserStack a) (ParserBinds a) - | StkRecover (ParserStack a) ParserState (PositionedError -> TokenStream -> Recovery a) - -type ParserBinds = - Queue ParserK UnsafeBoundValue - -type ParserState = - { consumed :: Boolean - , errors :: Array PositionedError - , position :: SourcePos - , stream :: TokenStream - } - -initialParserState :: TokenStream -> ParserState -initialParserState stream = - { consumed: false - , errors: [] - , position: { line: 0, column: 0 } - , stream - } - -data FailUnwind a - = FailStop (ParserResult a) - | FailAlt (ParserStack a) ParserState (Parser a) - | FailRecover (ParserStack a) ParserState a - -data SuccUnwind a - = SuccStop (ParserResult a) - | SuccBinds (ParserStack a) ParserState (ParserBinds a) - -runParser' :: forall a. ParserState -> Parser a -> ParserResult a -runParser' = \state parser -> - (unsafeCoerce :: ParserResult UnsafeBoundValue -> ParserResult a) $ - go StkNil state (unsafeCoerce parser) - where - go :: ParserStack UnsafeBoundValue -> ParserState -> Parser UnsafeBoundValue -> ParserResult UnsafeBoundValue - go stack state@{ errors } = case _ of - Alt a b -> - go (StkAlt stack state b) (state { consumed = false }) a - Try a -> - go (StkTry stack state) state a - LookAhead a -> - go (StkLookAhead stack state) state a - Bind p binds -> - go (StkBinds stack binds) state p - Pure a -> - case unwindSucc a state stack of - SuccBinds prevStack prevState queue -> - case unconsView queue of - UnconsDone (ParserK k) -> - go prevStack prevState (k a) - UnconsMore (ParserK k) nextQueue -> - go (StkBinds prevStack nextQueue) prevState (k a) - SuccStop res -> - res - Fail errPos err -> - case unwindFail err errPos state stack of - FailAlt prevStack prevState prev -> - go prevStack prevState prev - FailRecover prevStack prevState a -> - go prevStack prevState (Pure a) - FailStop res -> - res - Take k -> - case TokenStream.step state.stream of - TokenError errPos err errStream _ -> - ParseFail err errPos state errStream - TokenEOF errPos _ -> - go stack state (Fail errPos UnexpectedEof) - TokenCons tok nextPos nextStream _ -> - case k tok of - Left err -> - go stack state (Fail tok.range.start err) - Right a -> - go stack { consumed: true, errors, position: nextPos, stream: nextStream } (Pure a) - Eof k -> - case TokenStream.step state.stream of - TokenError errPos err errStream _ -> - ParseFail err errPos state errStream - TokenEOF eofPos comments -> - go stack (state { consumed = true, position = eofPos }) (Pure (k (Tuple eofPos comments))) - TokenCons tok _ _ _ -> - go stack state (Fail tok.range.start (ExpectedEof tok.value)) - Iter f p -> do - let - Tuple state' a = f # unFold \{ init, step, done } -> do - let - iter acc state' = case runParser' (state' { consumed = false }) p of - ParseSucc a state'' -> - iter (step acc a) state'' - ParseFail err errPos state'' _ - | state''.consumed -> - Tuple state'' (Fail errPos err) - | otherwise -> - Tuple state' (Pure (done acc)) - iter (init unit) state - go stack state' a - Defer z -> - go stack state (Z.force z) - Recover k p -> - go (StkRecover stack state k) (state { consumed = false }) p - - unwindFail :: ParseError -> SourcePos -> ParserState -> ParserStack UnsafeBoundValue -> FailUnwind UnsafeBoundValue - unwindFail error position state@{ errors } = case _ of - StkNil -> - FailStop (ParseFail error position state (Just state.stream)) - StkAlt prevStack prevState prev -> - if state.consumed then - unwindFail error position state prevStack - else - FailAlt prevStack prevState prev - StkTry prevStack prevState -> - unwindFail error position (state { consumed = prevState.consumed }) prevStack - StkRecover prevStack prevState k -> - if state.consumed then do - let posError = { error, position } - let nextErrors = Array.snoc errors posError - let (Recovery a nextPos nextStream) = k posError prevState.stream - FailRecover prevStack - { consumed: true - , errors: nextErrors - , position: nextPos - , stream: nextStream - } - a - else - unwindFail error position (state { consumed = state.consumed || prevState.consumed }) prevStack - StkLookAhead prevStack prevState -> - unwindFail error position prevState prevStack - StkBinds prevStack _ -> - unwindFail error position state prevStack - - unwindSucc :: UnsafeBoundValue -> ParserState -> ParserStack UnsafeBoundValue -> SuccUnwind UnsafeBoundValue - unwindSucc a state = case _ of - StkNil -> - SuccStop (ParseSucc a state) - StkAlt prevStack _ _ -> - unwindSucc a state prevStack - StkTry prevStack _ -> - unwindSucc a state prevStack - StkRecover prevStack prevState _ -> - unwindSucc a (state { consumed = state.consumed || prevState.consumed }) prevStack - StkLookAhead prevStack prevState -> - unwindSucc a prevState prevStack - StkBinds prevStack queue -> - SuccBinds prevStack state queue diff --git a/test/Main.purs b/test/Main.purs index f91f98c..e46b32f 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -1,11 +1,112 @@ module Test.Main where import Prelude +import Prim hiding (Type) +import Data.Array as Array +import Data.Array.NonEmpty as NonEmptyArray +import Data.Maybe (maybe) +import Data.String (Pattern(..)) +import Data.String as String +import Data.String.CodeUnits as SCU import Effect (Effect) -import Effect.Class.Console (log) +import Effect.Class.Console as Console +import Node.Process as Process +import PureScript.CST (RecoveredParserResult(..), parseBinder, parseDecl, parseExpr, parseModule, parseType) +import PureScript.CST.Types (Binder, Declaration(..), DoStatement(..), Expr(..), LetBinding(..), Module(..), ModuleBody(..), Type) + +class ParseFor f where + parseFor :: String -> RecoveredParserResult f + +instance ParseFor Module where + parseFor = parseModule + +instance ParseFor Declaration where + parseFor = parseDecl + +instance ParseFor Expr where + parseFor = parseExpr + +instance ParseFor Type where + parseFor = parseType + +instance ParseFor Binder where + parseFor = parseBinder + +assertParse + :: forall f + . ParseFor f + => String + -> String + -> (RecoveredParserResult f -> Boolean) + -> Effect Unit +assertParse name src k = do + let res = parseFor (trim src) + unless (k res) do + Console.error $ "Assertion failed: " <> name + Process.exit 1 + where + trim = + String.split (Pattern "\n") + >>> Array.dropWhile String.null + >>> Array.uncons + >>> maybe [] + ( \{ head, tail } -> do + let leadingSpaces = SCU.takeWhile (eq ' ') head + let trimLine = SCU.drop (SCU.length leadingSpaces) + Array.cons (trimLine head) (trimLine <$> tail) + ) + >>> String.joinWith "\n" main :: Effect Unit main = do - log "🍝" - log "You should add some tests." + assertParse "Recovered do statements" + """ + do + foo <- bar + a b c + + foo + """ + case _ of + ParseSucceededWithErrors (ExprDo { statements }) _ + | [ DoBind _ _ _ + , DoError _ + , DoDiscard _ + ] <- NonEmptyArray.toArray statements -> + true + _ -> + false + + assertParse "Recovered let bindings" + """ + let + a = b c + + b = 42 + in + a + b + """ + case _ of + ParseSucceededWithErrors (ExprLet { bindings }) _ + | [ LetBindingError _ + , LetBindingName _ + ] <- NonEmptyArray.toArray bindings -> + true + _ -> + false + + assertParse "Recovered declarations" + """ + module Foo where + a = 42 + {} + b = 12 + """ + case _ of + ParseSucceededWithErrors (Module { body: ModuleBody { decls } }) _ + | [ DeclValue _ + , DeclError _ + , DeclValue _ + ] <- decls -> + true + _ -> + false