Skip to content

Commit

Permalink
MediaWiki reader: Fix parsing of col/rowspan.
Browse files Browse the repository at this point in the history
Closes #6992.
  • Loading branch information
jgm committed Oct 9, 2024
1 parent 8884214 commit 0b51580
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 33 deletions.
59 changes: 27 additions & 32 deletions src/Text/Pandoc/Readers/MediaWiki.hs
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ table = do
caption <- option mempty tableCaption
optional rowsep
hasheader <- option False $ True <$ lookAhead (skipSpaces *> char '!')
(cellspecs',hdr) <- unzip <$> tableRow
let widths = map (tableWidth *) cellspecs'
(cellwidths,hdr) <- unzip <$> tableRow
let widths = map (tableWidth *) (concat cellwidths)
let restwidth = tableWidth - sum widths
let zerocols = length $ filter (==0.0) widths
let defaultwidth = if zerocols == 0 || zerocols == length widths
Expand All @@ -235,10 +235,10 @@ table = do
(TableFoot nullAttr [])

calculateAlignments :: [Cell] -> [Alignment]
calculateAlignments = map cellAligns
calculateAlignments = concatMap cellAligns
where
cellAligns :: Cell -> Alignment
cellAligns (Cell _ align _ _ _) = align
cellAligns :: Cell -> [Alignment]
cellAligns (Cell _ align _ (ColSpan colspan) _) = replicate colspan align

parseAttrs :: PandocMonad m => MWParser m [(Text,Text)]
parseAttrs = many1 parseAttr
Expand Down Expand Up @@ -269,22 +269,16 @@ rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <*
<* skipMany htmlComment
<* blanklines

cellsep :: PandocMonad m => MWParser m ()
cellsep :: PandocMonad m => MWParser m [(Text,Text)]
cellsep = try $ do
col <- sourceColumn <$> getPosition
skipSpaces
let pipeSep = do
char '|'
notFollowedBy (oneOf "-}+")
if col == 1
then optional (char '|')
else void (char '|')
let exclSep = do
char '!'
if col == 1
then optional (char '!')
else void (char '!')
pipeSep <|> exclSep
skipMany spaceChar
c <- oneOf "|!"
when (col > 1) $ void $ char c
notFollowedBy (oneOf "-}+")
attribs <- option [] (parseAttrs <* skipMany spaceChar <* char '|')
skipMany spaceChar
pure attribs

tableCaption :: PandocMonad m => MWParser m Inlines
tableCaption = try $ do
Expand All @@ -294,20 +288,17 @@ tableCaption = try $ do
sym "|+"
optional (try $ parseAttrs *> skipSpaces *> char '|' *> blanklines)
trimInlines . mconcat <$>
many (notFollowedBy (cellsep <|> rowsep) *> inline)
many (notFollowedBy (void cellsep <|> rowsep) *> inline)

tableRow :: PandocMonad m => MWParser m [(Double, Cell)]
tableRow :: PandocMonad m => MWParser m [([Double], Cell)]
tableRow = try $ skipMany htmlComment *> many tableCell

tableCell :: PandocMonad m => MWParser m (Double, Cell)
-- multiple widths because cell might have colspan
tableCell :: PandocMonad m => MWParser m ([Double], Cell)
tableCell = try $ do
cellsep
skipMany spaceChar
attribs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <*
notFollowedBy (char '|')
skipMany spaceChar
attribs <- cellsep
pos' <- getPosition
ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
ls <- T.concat <$> many (notFollowedBy (void cellsep <|> rowsep <|> tableEnd) *>
((snd <$> withRaw table) <|> countChar 1 anyChar))
bs <- parseFromString (do setPosition pos'
mconcat <$> many block) ls
Expand All @@ -316,18 +307,22 @@ tableCell = try $ do
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
let width = case lookup "width" attribs of
Just xs -> fromMaybe 0.0 $ parseWidth xs
Nothing -> 0.0
let rowspan = RowSpan . fromMaybe 1 $
safeRead =<< lookup "rowspan" attribs
let colspan = ColSpan . fromMaybe 1 $
safeRead =<< lookup "colspan" attribs
let ColSpan rawcolspan = colspan
let handledAttribs = ["align", "colspan", "rowspan"]
attribs' = [ (k, v) | (k, v) <- attribs
, k `notElem` handledAttribs
]
return (width, B.cellWith (toAttr attribs') align rowspan colspan bs)
let widths = case lookup "width" attribs of
Just xs -> maybe (replicate rawcolspan 0.0)
(\w -> replicate rawcolspan
(w / fromIntegral rawcolspan))
(parseWidth xs)
Nothing -> replicate rawcolspan 0.0
return (widths, B.cellWith (toAttr attribs') align rowspan colspan bs)

parseWidth :: Text -> Maybe Double
parseWidth s =
Expand Down
2 changes: 1 addition & 1 deletion test/command/2649.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
% pandoc -f mediawiki -t html5
{| border="4" cellspacing="2" cellpadding="0" WIDTH="100%"
|-----
| peildatum Simbase || november 2005 || colspan=2 | '''uitslagen Flohrgambiet''' ||
| peildatum Simbase || november 2005 || colspan=2 | '''uitslagen Flohrgambiet'''
|-----
| totaal aantal partijen Simbase || 7.316.773
| wit wint || 53%
Expand Down
76 changes: 76 additions & 0 deletions test/command/6992.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
```
% pandoc -f mediawiki -t native
{| class="wikitable"
!colspan=4 | template request
|-
| mode || No || String || MUST be "template" or omitted
|}
^D
[ Table
( "" , [] , [] )
(Caption Nothing [])
[ ( AlignDefault , ColWidthDefault )
, ( AlignDefault , ColWidthDefault )
, ( AlignDefault , ColWidthDefault )
, ( AlignDefault , ColWidthDefault )
]
(TableHead
( "" , [] , [] )
[ Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 4)
[ Para [ Str "template" , Space , Str "request" ] ]
]
])
[ TableBody
( "" , [] , [] )
(RowHeadColumns 0)
[]
[ Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "mode" ] ]
, Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "No" ] ]
, Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "String" ] ]
, Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Para
[ Str "MUST"
, Space
, Str "be"
, Space
, Str "\"template\""
, Space
, Str "or"
, Space
, Str "omitted"
]
]
]
]
]
(TableFoot ( "" , [] , [] ) [])
]
```

0 comments on commit 0b51580

Please sign in to comment.