Skip to content

Commit

Permalink
fix node issue 51593 (#583)
Browse files Browse the repository at this point in the history
* fix node issue 51593

* linting
  • Loading branch information
lemire authored Jan 29, 2024
1 parent 7ed703c commit f69e6c3
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 18 deletions.
6 changes: 4 additions & 2 deletions include/ada/url_aggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,12 @@ struct url_aggregator : url_base {
}

/**
* Return true on success.
* Return true on success. The 'in_place' parameter indicates whether the
* the string_view input is pointing in the buffer. When in_place is false,
* we must nearly always update the buffer.
* @see https://url.spec.whatwg.org/#concept-ipv4-parser
*/
[[nodiscard]] bool parse_ipv4(std::string_view input);
[[nodiscard]] bool parse_ipv4(std::string_view input, bool in_place);

/**
* Return true on success.
Expand Down
12 changes: 7 additions & 5 deletions src/url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
namespace ada {

bool url::parse_opaque_host(std::string_view input) {
ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]");
ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]");
if (std::any_of(input.begin(), input.end(),
ada::unicode::is_forbidden_host_code_point)) {
return is_valid = false;
Expand All @@ -23,7 +23,7 @@ bool url::parse_opaque_host(std::string_view input) {
}

bool url::parse_ipv4(std::string_view input) {
ada_log("parse_ipv4 ", input, "[", input.size(), " bytes]");
ada_log("parse_ipv4 ", input, " [", input.size(), " bytes]");
if (input.back() == '.') {
input.remove_suffix(1);
}
Expand Down Expand Up @@ -98,7 +98,7 @@ bool url::parse_ipv4(std::string_view input) {
}

bool url::parse_ipv6(std::string_view input) {
ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]");
ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]");

if (input.empty()) {
return is_valid = false;
Expand Down Expand Up @@ -422,7 +422,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) {
}

ada_really_inline bool url::parse_host(std::string_view input) {
ada_log("parse_host ", input, "[", input.size(), " bytes]");
ada_log("parse_host ", input, " [", input.size(), " bytes]");
if (input.empty()) {
return is_valid = false;
} // technically unnecessary.
Expand Down Expand Up @@ -474,6 +474,8 @@ ada_really_inline bool url::parse_host(std::string_view input) {
ada_log("parse_host to_ascii returns false");
return is_valid = false;
}
ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(),
" bytes]");

if (std::any_of(host.value().begin(), host.value().end(),
ada::unicode::is_forbidden_domain_code_point)) {
Expand All @@ -484,7 +486,7 @@ ada_really_inline bool url::parse_host(std::string_view input) {
// If asciiDomain ends in a number, then return the result of IPv4 parsing
// asciiDomain.
if (checkers::is_ipv4(host.value())) {
ada_log("parse_host got ipv4", *host);
ada_log("parse_host got ipv4 ", *host);
return parse_ipv4(host.value());
}

Expand Down
38 changes: 27 additions & 11 deletions src/url_aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ void url_aggregator::set_hash(const std::string_view input) {

bool url_aggregator::set_href(const std::string_view input) {
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
ada_log("url_aggregator::set_href ", input, "[", input.size(), " bytes]");
ada_log("url_aggregator::set_href ", input, " [", input.size(), " bytes]");
ada::result<url_aggregator> out = ada::parse<url_aggregator>(input);
ada_log("url_aggregator::set_href, success :", out.has_value());

Expand All @@ -425,7 +425,8 @@ bool url_aggregator::set_href(const std::string_view input) {
}

ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
ada_log("url_aggregator:parse_host ", input, "[", input.size(), " bytes]");
ada_log("url_aggregator:parse_host \"", input, "\" [", input.size(),
" bytes]");
ADA_ASSERT_TRUE(validate());
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
if (input.empty()) {
Expand Down Expand Up @@ -475,7 +476,7 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
update_base_hostname(input);
if (checkers::is_ipv4(get_hostname())) {
ada_log("parse_host fast path ipv4");
return parse_ipv4(get_hostname());
return parse_ipv4(get_hostname(), true);
}
ada_log("parse_host fast path ", get_hostname());
return true;
Expand All @@ -491,6 +492,8 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
ada_log("parse_host to_ascii returns false");
return is_valid = false;
}
ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(),
" bytes]");

if (std::any_of(host.value().begin(), host.value().end(),
ada::unicode::is_forbidden_domain_code_point)) {
Expand All @@ -500,8 +503,8 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
// If asciiDomain ends in a number, then return the result of IPv4 parsing
// asciiDomain.
if (checkers::is_ipv4(host.value())) {
ada_log("parse_host got ipv4", *host);
return parse_ipv4(host.value());
ada_log("parse_host got ipv4 ", *host);
return parse_ipv4(host.value(), false);
}

update_base_hostname(host.value());
Expand Down Expand Up @@ -754,7 +757,7 @@ bool url_aggregator::set_hostname(const std::string_view input) {
}

[[nodiscard]] std::string ada::url_aggregator::to_string() const {
ada_log("url_aggregator::to_string buffer:", buffer, "[", buffer.size(),
ada_log("url_aggregator::to_string buffer:", buffer, " [", buffer.size(),
" bytes]");
if (!is_valid) {
return "null";
Expand Down Expand Up @@ -853,8 +856,8 @@ bool url_aggregator::set_hostname(const std::string_view input) {
return checkers::verify_dns_length(get_hostname());
}

bool url_aggregator::parse_ipv4(std::string_view input) {
ada_log("parse_ipv4 ", input, "[", input.size(),
bool url_aggregator::parse_ipv4(std::string_view input, bool in_place) {
ada_log("parse_ipv4 ", input, " [", input.size(),
" bytes], overlaps with buffer: ",
helpers::overlaps(input, buffer) ? "yes" : "no");
ADA_ASSERT_TRUE(validate());
Expand All @@ -878,20 +881,25 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
} else {
std::from_chars_result r;
if (is_hex) {
ada_log("parse_ipv4 trying to parse hex number");
r = std::from_chars(input.data() + 2, input.data() + input.size(),
segment_result, 16);
} else if ((input.length() >= 2) && input[0] == '0' &&
checkers::is_digit(input[1])) {
ada_log("parse_ipv4 trying to parse octal number");
r = std::from_chars(input.data() + 1, input.data() + input.size(),
segment_result, 8);
} else {
ada_log("parse_ipv4 trying to parse decimal number");
pure_decimal_count++;
r = std::from_chars(input.data(), input.data() + input.size(),
segment_result, 10);
}
if (r.ec != std::errc()) {
ada_log("parse_ipv4 parsing failed");
return is_valid = false;
}
ada_log("parse_ipv4 parsed ", segment_result);
input.remove_prefix(r.ptr - input.data());
}
if (input.empty()) {
Expand All @@ -916,17 +924,22 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
}
}
if ((digit_count != 4) || (!input.empty())) {
ada_log("parse_ipv4 found invalid (more than 4 numbers or empty) ");
return is_valid = false;
}
final:
ada_log("url_aggregator::parse_ipv4 completed ", get_href(),
" host: ", get_host());

// We could also check r.ptr to see where the parsing ended.
if (pure_decimal_count == 4 && !trailing_dot) {
if (in_place && pure_decimal_count == 4 && !trailing_dot) {
ada_log(
"url_aggregator::parse_ipv4 completed and was already correct in the "
"buffer");
// The original input was already all decimal and we validated it. So we
// don't need to do anything.
} else {
ada_log("url_aggregator::parse_ipv4 completed and we need to update it");
// Optimization opportunity: Get rid of unnecessary string return in ipv4
// serializer.
// TODO: This is likely a bug because it goes back update_base_hostname, not
Expand All @@ -940,8 +953,11 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
}

bool url_aggregator::parse_ipv6(std::string_view input) {
// TODO: Implement in_place optimization: we know that input points
// in the buffer, so we can just check whether the buffer is already
// well formatted.
// TODO: Find a way to merge parse_ipv6 with url.cpp implementation.
ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]");
ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]");
ADA_ASSERT_TRUE(validate());
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
if (input.empty()) {
Expand Down Expand Up @@ -1175,7 +1191,7 @@ bool url_aggregator::parse_ipv6(std::string_view input) {
}

bool url_aggregator::parse_opaque_host(std::string_view input) {
ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]");
ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]");
ADA_ASSERT_TRUE(validate());
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
if (std::any_of(input.begin(), input.end(),
Expand Down
7 changes: 7 additions & 0 deletions tests/basic_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,3 +402,10 @@ TYPED_TEST(basic_tests, nodejs_51514) {
auto out = ada::parse<TypeParam>("http://1.1.1.256");
ASSERT_FALSE(out);
}
// https://github.com/nodejs/node/issues/51593
TYPED_TEST(basic_tests, nodejs_51593) {
auto out = ada::parse<TypeParam>("http://\u200b123.123.123.123");
ASSERT_TRUE(out);
ASSERT_EQ(out->get_href(), "http://123.123.123.123/");
SUCCEED();
}

0 comments on commit f69e6c3

Please sign in to comment.