Skip to content

Commit

Permalink
refactor percent_encode_index to be more simd friendly
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Nov 17, 2024
1 parent 6518b54 commit a4e2ed3
Showing 1 changed file with 25 additions and 4 deletions.
29 changes: 25 additions & 4 deletions include/ada/unicode-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,31 @@
namespace ada::unicode {
ada_really_inline size_t percent_encode_index(const std::string_view input,
const uint8_t character_set[]) {
return std::distance(
input.begin(), std::ranges::find_if(input, [character_set](const char c) {
return character_sets::bit_at(character_set, c);
}));
const char* data = input.data();
const size_t size = input.size();

// Process 8 bytes at a time using unrolled loop
size_t i = 0;
for (; i + 8 <= size; i += 8) {
unsigned char chunk[8];
std::memcpy(&chunk, data + i, 8); // Avoid potential alignment issues

// Check 8 characters at once
for (size_t j = 0; j < 8; j++) {
if (character_sets::bit_at(character_set, chunk[j])) {
return i + j;
}
}
}

// Handle remaining bytes
for (; i < size; i++) {
if (character_sets::bit_at(character_set, data[i])) {
return i;
}
}

return size;
}
} // namespace ada::unicode

Expand Down

0 comments on commit a4e2ed3

Please sign in to comment.