Skip to content

Commit

Permalink
Performance improvements with new parser internals. (#32)
Browse files Browse the repository at this point in the history
* Performance improvements with new parser internals.

* Bump PS version
  • Loading branch information
natefaubion authored Feb 27, 2022
1 parent ab6ea97 commit c931480
Show file tree
Hide file tree
Showing 10 changed files with 404 additions and 337 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ jobs:

- uses: purescript-contrib/setup-purescript@main
with:
purescript: "0.14.0"
spago: "0.19.0"
purescript: "0.14.5"
spago: "0.20.3"
psa: "0.8.2"
purs-tidy: "latest"

Expand All @@ -30,11 +30,14 @@ jobs:
- name: Build source
run: spago build --purs-args '--censor-lib --strict'

- name: Run tests
run: spago test

- name: Check formatting
run: npm run check

- name: Parse package sets
run: npm run parse-package-set

- name: Run file benchmark
run: npm run bench-file test/Main.purs
run: npm run bench-file src/PureScript/CST/Parser.purs
6 changes: 4 additions & 2 deletions bench/ParseFile.purs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ main = launchAff_ do
case parseModule contents of
ParseSucceeded _ -> do
Console.log "Parse succeeded."
ParseSucceededWithErrors _ errs ->
ParseSucceededWithErrors _ errs -> do
Console.log "Parse succeeded with errors."
for_ errs $ Console.error <<< printPositionedError
ParseFailed err ->
ParseFailed err -> do
Console.log "Parse failed."
Console.error $ printPositionedError err
Nothing ->
Console.log "File path required"
Expand Down
3 changes: 2 additions & 1 deletion bench/bench.dhall
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
let conf = ../spago.dhall

in conf // {
sources = conf.sources # [ "bench/**/*.purs" ],
sources = [ "src/**/*.purs", "bench/**/*.purs" ],
dependencies =
[ "aff"
, "arrays"
Expand All @@ -12,6 +12,7 @@ in conf // {
, "either"
, "foldable-traversable"
, "free"
, "functions"
, "functors"
, "identity"
, "integers"
Expand Down
3 changes: 2 additions & 1 deletion parse-package-set/parse-package-set.dhall
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
let conf = ../spago.dhall

in conf // {
sources = conf.sources # [ "parse-package-set/**/*.purs" ],
sources = [ "src/**/*.purs", "parse-package-set/**/*.purs" ],
dependencies =
[ "aff"
, "arrays"
Expand All @@ -16,6 +16,7 @@ in conf // {
, "filterable"
, "foldable-traversable"
, "free"
, "functions"
, "functors"
, "identity"
, "integers"
Expand Down
5 changes: 4 additions & 1 deletion spago.dhall
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
{ name = "language-cst-parser"
, dependencies =
[ "arrays"
, "console"
, "const"
, "control"
, "effect"
, "either"
, "foldable-traversable"
, "free"
, "functions"
, "functors"
, "identity"
, "integers"
, "lazy"
, "lists"
, "maybe"
, "newtype"
, "node-process"
, "numbers"
, "ordered-collections"
, "partial"
Expand All @@ -28,5 +31,5 @@
, "unsafe-coerce"
]
, packages = ./packages.dhall
, sources = [ "src/**/*.purs" ]
, sources = [ "src/**/*.purs", "test/**/Main.purs" ]
}
4 changes: 2 additions & 2 deletions src/PureScript/CST.purs
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ parsePartialModule src =
pure $ Module { header, body }
}
Right $ Tuple res state.errors
ParseFail error position _ _ ->
Left { error, position }
ParseFail error _ ->
Left error

printModule :: forall e. TokensOf e => Module e -> String
printModule mod =
Expand Down
66 changes: 45 additions & 21 deletions src/PureScript/CST/Lexer.purs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ module PureScript.CST.Lexer

import Prelude

import Control.Alt (class Alt, (<|>))
import Data.Array as Array
import Control.Alt (class Alt, alt)
import Control.Monad.ST as ST
import Control.Monad.ST.Ref as STRef
import Data.Array.NonEmpty as NonEmptyArray
import Data.Array.ST as STArray
import Data.Char as Char
import Data.Either (Either(..))
import Data.Foldable (fold, foldl, foldMap)
Expand All @@ -32,6 +34,8 @@ import PureScript.CST.Layout (LayoutDelim(..), LayoutStack, insertLayout)
import PureScript.CST.TokenStream (TokenStep(..), TokenStream(..), consTokens, step, unwindLayout)
import PureScript.CST.Types (Comment(..), IntValue(..), LineFeed(..), ModuleName(..), SourcePos, SourceStyle(..), Token(..))

infixr 3 alt as <|>

data LexResult e a
= LexFail e String
| LexSucc a String
Expand Down Expand Up @@ -147,18 +151,29 @@ satisfy mkErr p = Lex \str ->
LexFail (\_ -> mkErr (mkUnexpected str)) str

many :: forall e a. Lex e a -> Lex e (Array a)
many (Lex k) = Lex \str -> do
let
go acc str' =
case k str' of
LexFail err str''
| SCU.length str' == SCU.length str'' ->
LexSucc acc str'
| otherwise ->
LexFail err str''
LexSucc a str'' ->
go (Array.snoc acc a) str''
go [] str
many (Lex k) = Lex \str -> ST.run do
valuesRef <- STArray.new
strRef <- STRef.new str
contRef <- STRef.new true
resRef <- STRef.new (LexSucc [] str)
ST.while (STRef.read contRef) do
str' <- STRef.read strRef
case k str' of
LexFail error str''
| SCU.length str' == SCU.length str'' -> do
values <- STArray.unsafeFreeze valuesRef
_ <- STRef.write (LexSucc values str'') resRef
_ <- STRef.write false contRef
pure unit
| otherwise -> do
_ <- STRef.write (LexFail error str'') resRef
_ <- STRef.write false contRef
pure unit
LexSucc a str'' -> do
_ <- STArray.push a valuesRef
_ <- STRef.write str'' strRef
pure unit
STRef.read resRef

fail :: forall a. ParseError -> Lex LexError a
fail = Lex <<< LexFail <<< const
Expand Down Expand Up @@ -369,9 +384,9 @@ token =
<|> tokenComma
where
parseModuleName = ado
parts <- many (try (parseProper <* charDot))
prefix <- parseModuleNamePrefix
name <- parseName
in name (toModuleName parts)
in name (toModuleName prefix)

parseName :: Lex _ (Maybe ModuleName -> Token)
parseName =
Expand Down Expand Up @@ -461,6 +476,9 @@ token =
ident <- try $ charQuestionMark *> (parseIdent <|> parseProper)
in TokHole ident

parseModuleNamePrefix =
regex (LexExpected "module name") "(?:(?:\\p{Lu}[\\p{L}0-9_']*)\\.)*"

parseProper =
regex (LexExpected "proper name") "\\p{Lu}[\\p{L}0-9_']*"

Expand Down Expand Up @@ -566,8 +584,8 @@ token =

parseNumber = do
intPart <- intPartRegex
fractionPart <- optional (try (charDot *> fractionPartRegex))
exponentPart <- optional (charExponent *> parseExponentPart)
fractionPart <- parseNumberFractionPart
exponentPart <- parseNumberExponentPart
if isNothing fractionPart && isNothing exponentPart then do
let intVal = stripUnderscores intPart
case Int.fromString intVal of
Expand All @@ -587,6 +605,12 @@ token =
Nothing ->
fail $ LexNumberOutOfRange raw

parseNumberFractionPart =
optional (try (charDot *> fractionPartRegex))

parseNumberExponentPart =
optional (charExponent *> parseExponentPart)

parseExponentPart = ado
sign <- optional parseExponentSign
exponent <- intPartRegex
Expand Down Expand Up @@ -656,7 +680,7 @@ token =
charAny =
satisfy (LexExpected "char") (const true)

toModuleName :: Array String -> Maybe ModuleName
toModuleName :: String -> Maybe ModuleName
toModuleName = case _ of
[] -> Nothing
mn -> Just $ ModuleName $ String.joinWith "." mn
"" -> Nothing
mn -> Just $ ModuleName $ SCU.dropRight 1 mn
37 changes: 22 additions & 15 deletions src/PureScript/CST/Parser.purs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import Data.Tuple (Tuple(..), uncurry)
import Prim as P
import PureScript.CST.Errors (ParseError(..), RecoveredError(..))
import PureScript.CST.Layout (currentIndent)
import PureScript.CST.Parser.Monad (Parser, Recovery(..), eof, lookAhead, many, optional, recover, take, try)
import PureScript.CST.Parser.Monad (Parser, eof, lookAhead, many, optional, recover, take, try)
import PureScript.CST.TokenStream (TokenStep(..), TokenStream, layoutStack)
import PureScript.CST.TokenStream as TokenStream
import PureScript.CST.Types (Binder(..), ClassFundep(..), DataCtor(..), DataMembers(..), Declaration(..), Delimited, DoStatement(..), Export(..), Expr(..), Fixity(..), FixityOp(..), Foreign(..), Guarded(..), GuardedExpr(..), Ident(..), Import(..), ImportDecl(..), Instance(..), InstanceBinding(..), IntValue(..), Label(..), Labeled(..), LetBinding(..), Module(..), ModuleBody(..), ModuleHeader(..), ModuleName(..), Name(..), OneOrDelimited(..), Operator(..), PatternGuard(..), Proper(..), QualifiedName(..), RecordLabeled(..), RecordUpdate(..), Role(..), Row(..), Separated(..), SourceToken, Token(..), Type(..), TypeVarBinding(..), Where(..), Wrapped(..))
Expand Down Expand Up @@ -1146,30 +1146,37 @@ reservedKeywords = Set.fromFoldable
]

recoverIndent :: forall a. (RecoveredError -> a) -> Parser a -> Parser a
recoverIndent mkNode = recover \{ position, error } ->
map (\tokens -> mkNode (RecoveredError { position, error, tokens })) <<< recoverTokensWhile \tok indent ->
case tok.value of
TokLayoutEnd col -> col > indent
TokLayoutSep col -> col > indent
_ -> true

recoverTokensWhile :: (SourceToken -> Int -> Boolean) -> TokenStream -> Recovery (Array SourceToken)
recoverIndent mkNode = recover \{ position, error } stream -> do
let
Tuple tokens newStream = recoverTokensWhile
( \tok indent -> case tok.value of
TokLayoutEnd col -> col > indent
TokLayoutSep col -> col > indent
_ -> true
)
stream
if Array.null tokens then
Nothing
else
Just (Tuple (mkNode (RecoveredError { position, error, tokens })) newStream)

recoverTokensWhile :: (SourceToken -> Int -> Boolean) -> TokenStream -> Tuple (Array SourceToken) TokenStream
recoverTokensWhile p initStream = go [] initStream
where
indent :: Int
indent = maybe 0 _.column $ currentIndent $ layoutStack initStream

go :: Array SourceToken -> TokenStream -> Recovery (Array SourceToken)
go :: Array SourceToken -> TokenStream -> Tuple (Array SourceToken) TokenStream
go acc stream = case TokenStream.step stream of
TokenError errPos _ _ _ ->
Recovery acc errPos stream
TokenEOF eofPos _ ->
Recovery acc eofPos stream
TokenError _ _ _ _ ->
Tuple acc stream
TokenEOF _ _ ->
Tuple acc stream
TokenCons tok _ nextStream _ ->
if p tok indent then
go (Array.snoc acc tok) nextStream
else
Recovery acc tok.range.start stream
Tuple acc stream

recoverDecl :: RecoveryStrategy Declaration
recoverDecl = recoverIndent DeclError
Expand Down
Loading

0 comments on commit c931480

Please sign in to comment.