Skip to content

Commit

Permalink
vaev-markup: parse inbody elements closing <p> scope
Browse files Browse the repository at this point in the history
  • Loading branch information
pauloamed committed Jan 8, 2025
1 parent 8512335 commit 06a6e22
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 1 deletion.
40 changes: 39 additions & 1 deletion src/web/vaev-markup/html.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4310,6 +4310,28 @@ void HtmlParser::_handleAfterHead(HtmlToken const &t) {
// 13.2.6.4.7 MARK: The "in body" insertion mode
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
void HtmlParser::_handleInBody(HtmlToken const &t) {
auto closePElementIfInButtonScope = [&]() {
// If the stack of open elements has a p element in button scope, then close a p element.
if (_hasElementInButtonScope(Html::P)) {

// Generate implied end tags, except for p elements.
generateImpliedEndTags(*this, Html::P);

// If the current node is not a p element, then this is a parse error.
if (last(_openElements)->tagName != Html::P)
_raise();

// Pop elements from the stack of open elements until a p element has been popped from the stack.
while (_openElements.len() > 0) {
auto lastEl = last(_openElements);

_openElements.popBack();
if (lastEl->tagName == Html::P)
break;
}
}
};

// A character token that is U+0000 NULL
if (t.type == HtmlToken::CHARACTER and t.rune == '\0') {
_raise();
Expand Down Expand Up @@ -4386,12 +4408,28 @@ void HtmlParser::_handleInBody(HtmlToken const &t) {

}

// TODO: A start tag whose tag name is one of:
// A start tag whose tag name is one of:
// "address", "article", "aside", "blockquote", "center",
// "details", "dialog", "dir", "div", "dl", "fieldset",
// "figcaption", "figure", "footer", "header", "hgroup",
// "main", "menu", "nav", "ol", "p", "search", "section",
// "summary", "ul"
else if (
t.type == HtmlToken::START_TAG and
(t.name == "address" or t.name == "article" or t.name == "aside" or t.name == "blockquote" or
t.name == "center" or t.name == "details" or t.name == "dialog" or t.name == "dir" or
t.name == "div" or t.name == "dl" or t.name == "fieldset" or t.name == "figcaption" or
t.name == "figure" or t.name == "footer" or t.name == "header" or t.name == "hgroup" or
t.name == "main" or t.name == "menu" or t.name == "nav" or t.name == "ol" or
t.name == "p" or t.name == "search" or t.name == "section" or t.name == "summary" or t.name == "ul"
)
) {
// If the stack of open elements has a p element in button scope, then close a p element.
closePElementIfInButtonScope();

// Insert an HTML element for the token.
insertHtmlElement(*this, t);
}

// TODO: A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"

Expand Down
20 changes: 20 additions & 0 deletions src/web/vaev-markup/tests/test-html-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,24 @@ test$("parse-p-after-comment") {
return Ok();
}

test$("parse-not-nested-p-and-els-inbody") {
auto dom = makeStrong<Markup::Document>(Mime::Url());
Markup::HtmlParser parser{dom};

parser.write("<div>b</div><p>a<div>b</div><p>a<p>a");

expect$(dom->nodeType() == NodeType::DOCUMENT);
expect$(dom->hasChildren());

auto html = try$(dom->firstChild().cast<Element>());
expect$(html->tagName == Html::HTML);
expect$(html->children().len() == 2);

auto body = try$(html->firstChild()->nextSibling().cast<Element>());
expect$(body->tagName == Html::BODY);
expect$(body->children().len() == 3);

return Ok();
}

} // namespace Vaev::Markup::Tests

0 comments on commit 06a6e22

Please sign in to comment.