From a0c1bdeb8b93e2661132a714668dff5f32ffb167 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Apr 2024 17:18:05 -0700 Subject: [PATCH] Markdown reader: auto-close unclosed divs. This applies to both fenced and HTML-ish varieties. Otherwise we face an exponential performance problem with backtracking. This also accords with the behavior of the `fenced_divs` extension in commonmark. A warning is issued when a div is implicitly closed. Closes #9635. --- src/Text/Pandoc/Readers/Markdown.hs | 26 +++++++++++++------------- test/Tests/Readers/Markdown.hs | 2 +- test/command/9635.md | 16 ++++++++++++++++ 3 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 test/command/9635.md diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 206e5bc3cc12..a78b1062ad3c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -2102,30 +2102,29 @@ divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = do guardEnabled Ext_native_divs try $ do - (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) []) + openpos <- getPosition + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("div" :: Text) []) -- we set stateInHtmlBlock so that closing tags that can be either block -- or inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just "div" } - bls <- option "" (blankline >> option "" blanklines) + optional blanklines contents <- mconcat <$> many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block) - closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text))) - if closed - then do - updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } - let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] T.words $ lookup "class" attrs - let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents - else -- avoid backtracing - return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents + void (htmlTag (~== TagClose ("div" :: Text))) <|> + (getPosition >>= report . UnclosedDiv openpos) + let ident = fromMaybe "" $ lookup "id" attrs + let classes = maybe [] T.words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } + return $ B.divWith (ident, classes, keyvals) <$> contents divFenced :: PandocMonad m => MarkdownParser m (F Blocks) divFenced = do guardEnabled Ext_fenced_divs try $ do + openpos <- getPosition string ":::" skipMany (char ':') skipMany spaceChar @@ -2135,7 +2134,8 @@ divFenced = do blankline updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 } - bs <- mconcat <$> manyTill block divFenceEnd + bs <- mconcat <$> many (notFollowedBy divFenceEnd >> block) + divFenceEnd <|> (getPosition >>= report . UnclosedDiv openpos) updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 } return $ B.divWith attribs <$> bs diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 001ca02077c7..5f67e111b416 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -449,7 +449,7 @@ tests = [ testGroup "inline code" <> codeBlockWith ("",["haskell"],[]) "b" <> - rawBlock "html" "
\n\n" + divWith ("",[],[]) mempty ] -- the round-trip properties frequently fail -- , testGroup "round trip" diff --git a/test/command/9635.md b/test/command/9635.md new file mode 100644 index 000000000000..088a5b6d294e --- /dev/null +++ b/test/command/9635.md @@ -0,0 +1,16 @@ +``` +% pandoc +> ::: {.fence} +> that is +> not closed + +okay +^D +2> [WARNING] Div at _chunk line 1 column 1 unclosed at _chunk line 5 column 1, closing implicitly. +
+
+

that is not closed

+
+
+

okay

+```