Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

have the C++ compiler generate the tables. #556

Merged
merged 9 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/lint_and_format_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ jobs:

- name: Install clang-format
run: |
sudo apt update && sudo apt install clang-format-15 -y
sudo ln -sf /usr/bin/clang-format-15 /usr/bin/clang-format
sudo apt update && sudo apt install clang-format-14 -y
sudo ln -sf /usr/bin/clang-format-14 /usr/bin/clang-format

- name: Build with Lint and Format Check
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/visual_studio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ jobs:
run: cmake --build build --config "${{matrix.config}}" --verbose
- name: Run tests
working-directory: build
run: ctest -C "${{matrix.config}}" --output-on-failure
run: ctest -C "${{matrix.config}}" --output-on-failure
54 changes: 21 additions & 33 deletions src/checkers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,39 +57,27 @@ ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept {

// for use with path_signature, we include all characters that need percent
// encoding.
static constexpr uint8_t path_signature_table[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
static_assert(path_signature_table[uint8_t('?')] == 1);
static_assert(path_signature_table[uint8_t('`')] == 1);
static_assert(path_signature_table[uint8_t('{')] == 1);
static_assert(path_signature_table[uint8_t('}')] == 1);
//
static_assert(path_signature_table[uint8_t(' ')] == 1);
static_assert(path_signature_table[uint8_t('?')] == 1);
static_assert(path_signature_table[uint8_t('"')] == 1);
static_assert(path_signature_table[uint8_t('#')] == 1);
static_assert(path_signature_table[uint8_t('<')] == 1);
static_assert(path_signature_table[uint8_t('>')] == 1);
static_assert(path_signature_table[uint8_t('\\')] == 2);
static_assert(path_signature_table[uint8_t('.')] == 4);
static_assert(path_signature_table[uint8_t('%')] == 8);

//
static_assert(path_signature_table[0] == 1);
static_assert(path_signature_table[31] == 1);
static_assert(path_signature_table[127] == 1);
static_assert(path_signature_table[128] == 1);
static_assert(path_signature_table[255] == 1);
static constexpr std::array<uint8_t, 256> path_signature_table =
[]() constexpr {
std::array<uint8_t, 256> result{};
for (size_t i = 0; i < 256; i++) {
if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e ||
i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7b || i == 0x7d ||
i > 0x7e) {
result[i] = 1;
} else if (i == 0x25) {
result[i] = 8;
} else if (i == 0x2e) {
result[i] = 4;
} else if (i == 0x5c) {
result[i] = 2;
} else {
result[i] = 0;
}
}
return result;
}
();

ada_really_inline constexpr uint8_t path_signature(
std::string_view input) noexcept {
Expand Down
86 changes: 38 additions & 48 deletions src/helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,18 +302,15 @@ ada_really_inline size_t find_next_host_delimiter_special(
}
#else
// : / [ \\ ?
static constexpr bool special_host_delimiters[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static constexpr std::array<uint8_t, 256> special_host_delimiters =
[]() constexpr {
std::array<uint8_t, 256> result{};
for(int i : {':', '/', '[', '\\', '?'}) {
result[i] = 1;
}
return result;
}
();
// credit: @the-moisrex recommended a table-based approach
ada_really_inline size_t find_next_host_delimiter_special(
std::string_view view, size_t location) noexcept {
Expand Down Expand Up @@ -436,18 +433,14 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view,
}
#else
// : / [ ?
static constexpr bool host_delimiters[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static constexpr std::array<uint8_t, 256> host_delimiters = []() constexpr {
std::array<uint8_t, 256> result{};
for(int i : {':', '/', '?', '['}) {
result[i] = 1;
}
return result;
}
();
// credit: @the-moisrex recommended a table-based approach
ada_really_inline size_t find_next_host_delimiter(std::string_view view,
size_t location) noexcept {
Expand Down Expand Up @@ -735,18 +728,19 @@ ada_really_inline void strip_trailing_spaces_from_opaque_path(
}

// @ / \\ ?
static constexpr bool authority_delimiter_special[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static constexpr std::array<uint8_t, 256> authority_delimiter_special =
[]() constexpr {
std::array<uint8_t, 256> result{};
for (size_t i = 0; i < 256; i++) {
lemire marked this conversation as resolved.
Show resolved Hide resolved
if (i == '@' || i == '/' || i == '\\' || i == '?') {
result[i] = 1;
} else {
result[i] = 0;
}
}
return result;
}
();
// credit: @the-moisrex recommended a table-based approach
ada_really_inline size_t
find_authority_delimiter_special(std::string_view view) noexcept {
Expand All @@ -761,18 +755,14 @@ find_authority_delimiter_special(std::string_view view) noexcept {
}

// @ / ?
static constexpr bool authority_delimiter[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static constexpr std::array<uint8_t, 256> authority_delimiter = []() constexpr {
std::array<uint8_t, 256> result{};
for(int i : {'@', '/', '?'}) {
result[i] = 1;
}
return result;
}
();
// credit: @the-moisrex recommended a table-based approach
ada_really_inline size_t
find_authority_delimiter(std::string_view view) noexcept {
Expand Down
164 changes: 62 additions & 102 deletions src/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,56 +150,38 @@ ada_really_inline bool has_tabs_or_newline(
// U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>),
// U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or
// U+007C (|).
constexpr static bool is_forbidden_host_code_point_table[] = {
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static_assert(sizeof(is_forbidden_host_code_point_table) == 256);
constexpr static std::array<uint8_t, 256> is_forbidden_host_code_point_table =
[]() constexpr {
std::array<uint8_t, 256> result{};
for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', '>',
'?', '@', '[', '\\', ']', '^', '|'}) {
result[c] = true;
}
return result;
}
();

ada_really_inline constexpr bool is_forbidden_host_code_point(
const char c) noexcept {
return is_forbidden_host_code_point_table[uint8_t(c)];
}

static_assert(unicode::is_forbidden_host_code_point('\0'));
static_assert(unicode::is_forbidden_host_code_point('\t'));
static_assert(unicode::is_forbidden_host_code_point('\n'));
static_assert(unicode::is_forbidden_host_code_point('\r'));
static_assert(unicode::is_forbidden_host_code_point(' '));
static_assert(unicode::is_forbidden_host_code_point('#'));
static_assert(unicode::is_forbidden_host_code_point('/'));
static_assert(unicode::is_forbidden_host_code_point(':'));
static_assert(unicode::is_forbidden_host_code_point('?'));
static_assert(unicode::is_forbidden_host_code_point('@'));
static_assert(unicode::is_forbidden_host_code_point('['));
static_assert(unicode::is_forbidden_host_code_point('?'));
static_assert(unicode::is_forbidden_host_code_point('<'));
static_assert(unicode::is_forbidden_host_code_point('>'));
static_assert(unicode::is_forbidden_host_code_point('\\'));
static_assert(unicode::is_forbidden_host_code_point(']'));
static_assert(unicode::is_forbidden_host_code_point('^'));
static_assert(unicode::is_forbidden_host_code_point('|'));

constexpr static uint8_t is_forbidden_domain_code_point_table[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
constexpr static std::array<uint8_t, 256> is_forbidden_domain_code_point_table =
[]() constexpr {
std::array<uint8_t, 256> result{};
for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', '>',
'?', '@', '[', '\\', ']', '^', '|', '%'}) {
result[c] = true;
}
for (uint8_t c = 0; c <= 32; c++) {
result[c] = true;
}
for (size_t c = 127; c < 255; c++) {
result[c] = true;
}
return result;
}
();

static_assert(sizeof(is_forbidden_domain_code_point_table) == 256);

Expand All @@ -224,22 +206,25 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point(
return accumulator;
}

constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};

static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256);
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2);
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2);
constexpr static std::array<uint8_t, 256>
is_forbidden_domain_code_point_table_or_upper = []() constexpr {
std::array<uint8_t, 256> result{};
for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', '>',
'?', '@', '[', '\\', ']', '^', '|', '%'}) {
result[c] = 1;
}
for (uint8_t c = 'A'; c <= 'Z'; c++) {
result[c] = 2;
}
for (uint8_t c = 0; c <= 32; c++) {
result[c] = 1;
}
for (size_t c = 127; c < 255; c++) {
result[c] = 1;
}
return result;
}
();

ada_really_inline constexpr uint8_t
contains_forbidden_domain_code_point_or_upper(const char* input,
Expand All @@ -263,55 +248,30 @@ contains_forbidden_domain_code_point_or_upper(const char* input,
return accumulator;
}

static_assert(unicode::is_forbidden_domain_code_point('%'));
static_assert(unicode::is_forbidden_domain_code_point('\x7f'));
static_assert(unicode::is_forbidden_domain_code_point('\0'));
static_assert(unicode::is_forbidden_domain_code_point('\t'));
static_assert(unicode::is_forbidden_domain_code_point('\n'));
static_assert(unicode::is_forbidden_domain_code_point('\r'));
static_assert(unicode::is_forbidden_domain_code_point(' '));
static_assert(unicode::is_forbidden_domain_code_point('#'));
static_assert(unicode::is_forbidden_domain_code_point('/'));
static_assert(unicode::is_forbidden_domain_code_point(':'));
static_assert(unicode::is_forbidden_domain_code_point('?'));
static_assert(unicode::is_forbidden_domain_code_point('@'));
static_assert(unicode::is_forbidden_domain_code_point('['));
static_assert(unicode::is_forbidden_domain_code_point('?'));
static_assert(unicode::is_forbidden_domain_code_point('<'));
static_assert(unicode::is_forbidden_domain_code_point('>'));
static_assert(unicode::is_forbidden_domain_code_point('\\'));
static_assert(unicode::is_forbidden_domain_code_point(']'));
static_assert(unicode::is_forbidden_domain_code_point('^'));
static_assert(unicode::is_forbidden_domain_code_point('|'));

constexpr static bool is_alnum_plus_table[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

static_assert(sizeof(is_alnum_plus_table) == 256);
// std::isalnum(c) || c == '+' || c == '-' || c == '.') is true for
constexpr static std::array<bool, 256> is_alnum_plus_table = []() constexpr {
lemire marked this conversation as resolved.
Show resolved Hide resolved
std::array<bool, 256> result{};
for (size_t c = 0; c < 256; c++) {
if (c >= '0' && c <= '9') {
result[c] = true;
} else if (c >= 'a' && c <= 'z') {
result[c] = true;
} else if (c >= 'A' && c <= 'Z') {
result[c] = true;
} else if (c == '+' || c == '-' || c == '.') {
result[c] = true;
}
}
return result;
}
();

ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept {
return is_alnum_plus_table[uint8_t(c)];
// A table is almost surely much faster than the
// following under most compilers: return
// return (std::isalnum(c) || c == '+' || c == '-' || c == '.');
}
static_assert(unicode::is_alnum_plus('+'));
static_assert(unicode::is_alnum_plus('-'));
static_assert(unicode::is_alnum_plus('.'));
static_assert(unicode::is_alnum_plus('0'));
static_assert(unicode::is_alnum_plus('1'));
static_assert(unicode::is_alnum_plus('a'));
static_assert(unicode::is_alnum_plus('b'));

ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
Expand Down
Loading