From 67323ef7038ee6514c1912684398e3da5a3fd207 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Tue, 27 Feb 2024 17:09:00 -0800 Subject: [PATCH] Add shebang support (#55) Fixes #54 --- package.json | 4 +- src/PureScript/CST.purs | 21 ++++----- src/PureScript/CST/Lexer.purs | 33 +++++++++++++- test/Main.purs | 81 ++++++++++++++++++++++++++++++++++- 4 files changed, 124 insertions(+), 15 deletions(-) diff --git a/package.json b/package.json index 40df845..eb2a5e5 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ }, "devDependencies": { "purescript": "^0.15.0", - "purs-tidy": "^0.8.0", - "spago": "^0.20.9" + "purs-tidy": "^0.10.0", + "spago": "^0.21.0" } } diff --git a/src/PureScript/CST.purs b/src/PureScript/CST.purs index d2b4fbd..1c9ca1a 100644 --- a/src/PureScript/CST.purs +++ b/src/PureScript/CST.purs @@ -23,13 +23,14 @@ import Data.Lazy as Z import Data.Maybe (Maybe(..)) import Data.Newtype (unwrap) import Data.Tuple (Tuple(..)) -import PureScript.CST.Lexer (lex) +import PureScript.CST.Lexer (lex, lexModule) import PureScript.CST.Parser (Recovered, parseModuleBody, parseModuleHeader) import PureScript.CST.Parser as Parser import PureScript.CST.Parser.Monad (Parser, ParserResult(..), PositionedError, fromParserResult, initialParserState, runParser, runParser') import PureScript.CST.Print as Print import PureScript.CST.Range (class TokensOf, tokensOf) import PureScript.CST.Range.TokenList as TokenList +import PureScript.CST.TokenStream (TokenStream) import PureScript.CST.Types (Binder, Declaration, Expr, ImportDecl, Module(..), ModuleHeader, Type) import Unsafe.Coerce (unsafeCoerce) @@ -54,26 +55,26 @@ toRecoveredParserResult = case _ of toRecovered :: forall f. f Void -> Recovered f toRecovered = unsafeCoerce -runRecoveredParser :: forall a. Parser (Recovered a) -> String -> RecoveredParserResult a -runRecoveredParser p = toRecoveredParserResult <<< flip runParser p <<< lex +runRecoveredParser :: forall a. Parser (Recovered a) -> TokenStream -> RecoveredParserResult a +runRecoveredParser p = toRecoveredParserResult <<< flip runParser p parseModule :: String -> RecoveredParserResult Module -parseModule = runRecoveredParser Parser.parseModule +parseModule = runRecoveredParser Parser.parseModule <<< lexModule parseImportDecl :: String -> RecoveredParserResult ImportDecl -parseImportDecl = runRecoveredParser Parser.parseImportDecl +parseImportDecl = runRecoveredParser Parser.parseImportDecl <<< lex parseDecl :: String -> RecoveredParserResult Declaration -parseDecl = runRecoveredParser Parser.parseDecl +parseDecl = runRecoveredParser Parser.parseDecl <<< lex parseExpr :: String -> RecoveredParserResult Expr -parseExpr = runRecoveredParser Parser.parseExpr +parseExpr = runRecoveredParser Parser.parseExpr <<< lex parseType :: String -> RecoveredParserResult Type -parseType = runRecoveredParser Parser.parseType +parseType = runRecoveredParser Parser.parseType <<< lex parseBinder :: String -> RecoveredParserResult Binder -parseBinder = runRecoveredParser Parser.parseBinder +parseBinder = runRecoveredParser Parser.parseBinder <<< lex newtype PartialModule e = PartialModule { header :: ModuleHeader e @@ -82,7 +83,7 @@ newtype PartialModule e = PartialModule parsePartialModule :: String -> RecoveredParserResult PartialModule parsePartialModule src = - toRecoveredParserResult $ case runParser' (initialParserState (lex src)) parseModuleHeader of + toRecoveredParserResult $ case runParser' (initialParserState (lexModule src)) parseModuleHeader of ParseSucc header state -> do let res = PartialModule diff --git a/src/PureScript/CST/Lexer.purs b/src/PureScript/CST/Lexer.purs index c0ebbe2..2567837 100644 --- a/src/PureScript/CST/Lexer.purs +++ b/src/PureScript/CST/Lexer.purs @@ -1,5 +1,6 @@ module PureScript.CST.Lexer ( lex + , lexModule , lexWithState , lexToken ) where @@ -9,6 +10,7 @@ import Prelude import Control.Alt (class Alt, alt) import Control.Monad.ST as ST import Control.Monad.ST.Ref as STRef +import Data.Array as Array import Data.Array.NonEmpty as NonEmptyArray import Data.Array.ST as STArray import Data.Char as Char @@ -193,15 +195,23 @@ many (Lex k) = Lex \str -> ST.run do fail :: forall a. ParseError -> Lex LexError a fail = Lex <<< LexFail <<< const +-- | Lexes according to root layout rules and standard language comments. lex :: String -> TokenStream lex = lexWithState (Tuple { line: 0, column: 0 } LytRoot : Nil) { line: 0, column: 0 } +-- | Lexes according to root layout rules as well as supporting leading shebang comments. +lexModule :: String -> TokenStream +lexModule = lexWithState' leadingModuleComments (Tuple { line: 0, column: 0 } LytRoot : Nil) { line: 0, column: 0 } + lexWithState :: LayoutStack -> SourcePos -> String -> TokenStream -lexWithState = init +lexWithState = lexWithState' leadingComments + +lexWithState' :: Lex LexError (Array (Comment LineFeed)) -> LayoutStack -> SourcePos -> String -> TokenStream +lexWithState' lexLeadingComments = init where init :: LayoutStack -> SourcePos -> String -> TokenStream init initStack initPos str = TokenStream $ Lazy.defer \_ -> do - let (Lex k) = leadingComments + let (Lex k) = lexLeadingComments case k str of LexFail _ _ -> unsafeCrashWith "Leading comments can't fail." @@ -358,6 +368,15 @@ bumpComment pos@{ line, column } = case _ of qualLength :: Maybe ModuleName -> Int qualLength = maybe 0 (add 1 <<< String.length <<< unwrap) +leadingModuleComments :: Lex LexError (Array (Comment LineFeed)) +leadingModuleComments = append <$> (leadingShebangs <|> pure []) <*> leadingComments + +leadingShebangs :: Lex LexError (Array (Comment LineFeed)) +leadingShebangs = ado + head <- shebangComment + tail <- many (try (Tuple <$> oneLineComment <*> shebangComment)) + in Array.cons (Comment head) (foldMap (\(Tuple a b) -> [ a, Comment b ]) tail) + leadingComments :: Lex LexError (Array (Comment LineFeed)) leadingComments = many do Comment <$> comment @@ -374,6 +393,9 @@ comment = regex (LexExpected "block comment") """\{-(-(?!\})|[^-]+)*(-\}|$)""" <|> regex (LexExpected "line comment") """--[^\r\n]*""" +shebangComment :: Lex LexError String +shebangComment = regex (LexExpected "shebang") """#![^\r\n]*""" + spaceComment :: Lex LexError Int spaceComment = SCU.length <$> regex (LexExpected "spaces") " +" @@ -382,6 +404,13 @@ lineComment = (Line LF <<< String.length) <$> regex (LexExpected "newline") "\n+" <|> (Line CRLF <<< (_ / 2) <<< String.length) <$> regex (LexExpected "newline") "(?:\r\n)+" +oneLineComment :: Lex LexError (Comment LineFeed) +oneLineComment = do + line <- lineComment + case line of + Line _ 1 -> pure line + _ -> fail $ LexExpected "one newline" "multiple newlines" + token :: Lex LexError Token token = parseHole diff --git a/test/Main.purs b/test/Main.purs index bacb8b9..aecf7e8 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -13,7 +13,7 @@ import Effect (Effect) import Effect.Class.Console as Console import Node.Process as Process import PureScript.CST (RecoveredParserResult(..), parseBinder, parseDecl, parseExpr, parseModule, parseType) -import PureScript.CST.Types (AppSpine(..), Binder, Declaration(..), DoStatement(..), Expr(..), Label(..), Labeled(..), LetBinding(..), Module(..), ModuleBody(..), Name(..), Prefixed(..), RecordLabeled(..), Separated(..), Token(..), Type(..), TypeVarBinding(..), Wrapped(..)) +import PureScript.CST.Types (AppSpine(..), Binder, Comment(..), Declaration(..), DoStatement(..), Expr(..), Label(..), Labeled(..), LetBinding(..), LineFeed(..), Module(..), ModuleBody(..), ModuleHeader(..), Name(..), Prefixed(..), RecordLabeled(..), Separated(..), Token(..), Type(..), TypeVarBinding(..), Wrapped(..)) class ParseFor f where parseFor :: String -> RecoveredParserResult f @@ -291,3 +291,82 @@ main = do true _ -> false + + assertParse "No module shebang" + """ + -- no shebang + module Test where + """ + case _ of + ParseSucceeded (Module { header: ModuleHeader { keyword } }) + | [ Comment "-- no shebang" + , Line LF 1 + ] <- keyword.leadingComments -> + true + _ -> + false + + assertParse "Module shebang" + """ + #! shebang + module Test where + """ + case _ of + ParseSucceeded (Module { header: ModuleHeader { keyword } }) + | [ Comment "#! shebang" + , Line LF 1 + ] <- keyword.leadingComments -> + true + _ -> + false + + assertParse "Multiple module shebangs" + """ + #! shebang 1 + #! shebang 2 + #! shebang 3 + -- no shebang + module Test where + """ + case _ of + ParseSucceeded (Module { header: ModuleHeader { keyword } }) + | [ Comment "#! shebang 1" + , Line LF 1 + , Comment "#! shebang 2" + , Line LF 1 + , Comment "#! shebang 3" + , Line LF 1 + , Comment "-- no shebang" + , Line LF 1 + ] <- keyword.leadingComments -> + true + _ -> + false + + assertParse "Multiple lines between shebangs should fail" + """ + #! shebang 1 + + #! shebang 2 + #! shebang 3 + module Test where + """ + case _ of + (ParseFailed _ :: RecoveredParserResult Module) -> + true + _ -> + false + + assertParse "Comments between shebangs should fail" + """ + #! shebang 1 + -- no shebang + #! shebang 2 + #! shebang 3 + module Test where + """ + case _ of + (ParseFailed _ :: RecoveredParserResult Module) -> + true + _ -> + false