From ca4ad3b4d6d39f6331996ca498b55c5fa707cd20 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Dec 2024 12:23:35 -0800 Subject: [PATCH] HTML reader: don't canonicalize data: URIs. It can be very expensive to call network-uri's URI parser on these. See #10075. --- src/Text/Pandoc/Readers/HTML.hs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b1e23ef9e00b..29eef27900cc 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1222,8 +1222,10 @@ htmlTag f = try $ do -- | Adjusts a url according to the document's base URL. canonicalizeUrl :: PandocMonad m => Text -> TagParser m Text -canonicalizeUrl url = do - mbBaseHref <- baseHref <$> getState - return $ case (parseURIReference (T.unpack url), mbBaseHref) of - (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs) - _ -> url +canonicalizeUrl url + | "data:" `T.isPrefixOf` url = return url + | otherwise = do + mbBaseHref <- baseHref <$> getState + return $ case (parseURIReference (T.unpack url), mbBaseHref) of + (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs) + _ -> url