Skip to content

Commit

Permalink
Markdown reader: auto-close unclosed divs.
Browse files Browse the repository at this point in the history
This applies to both fenced and HTML-ish varieties.
Otherwise we face an exponential performance problem with
backtracking.

This also accords with the behavior of the `fenced_divs`
extension in commonmark.

A warning is issued when a div is implicitly closed.

Closes #9635.
  • Loading branch information
jgm committed Apr 11, 2024
1 parent 820e371 commit a0c1bde
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 14 deletions.
26 changes: 13 additions & 13 deletions src/Text/Pandoc/Readers/Markdown.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2102,30 +2102,29 @@ divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
divHtml = do
guardEnabled Ext_native_divs
try $ do
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
openpos <- getPosition
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("div" :: Text) [])
-- we set stateInHtmlBlock so that closing tags that can be either block
-- or inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
bls <- option "" (blankline >> option "" blanklines)
optional blanklines
contents <- mconcat <$>
many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text)))
>> block)
closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
if closed
then do
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.divWith (ident, classes, keyvals) <$> contents
else -- avoid backtracing
return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
void (htmlTag (~== TagClose ("div" :: Text))) <|>
(getPosition >>= report . UnclosedDiv openpos)
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
return $ B.divWith (ident, classes, keyvals) <$> contents

divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
divFenced = do
guardEnabled Ext_fenced_divs
try $ do
openpos <- getPosition
string ":::"
skipMany (char ':')
skipMany spaceChar
Expand All @@ -2135,7 +2134,8 @@ divFenced = do
blankline
updateState $ \st ->
st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
bs <- mconcat <$> manyTill block divFenceEnd
bs <- mconcat <$> many (notFollowedBy divFenceEnd >> block)
divFenceEnd <|> (getPosition >>= report . UnclosedDiv openpos)
updateState $ \st ->
st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
return $ B.divWith attribs <$> bs
Expand Down
2 changes: 1 addition & 1 deletion test/Tests/Readers/Markdown.hs
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ tests = [ testGroup "inline code"
<>
codeBlockWith ("",["haskell"],[]) "b"
<>
rawBlock "html" "<div>\n\n"
divWith ("",[],[]) mempty
]
-- the round-trip properties frequently fail
-- , testGroup "round trip"
Expand Down
16 changes: 16 additions & 0 deletions test/command/9635.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
```
% pandoc
> ::: {.fence}
> that is
> not closed
okay
^D
2> [WARNING] Div at _chunk line 1 column 1 unclosed at _chunk line 5 column 1, closing implicitly.
<blockquote>
<div class="fence">
<p>that is not closed</p>
</div>
</blockquote>
<p>okay</p>
```

0 comments on commit a0c1bde

Please sign in to comment.