Skip to content

Commit

Permalink
add simd string quote seeking
Browse files Browse the repository at this point in the history
  • Loading branch information
davidhewitt committed Dec 5, 2023
1 parent 849d5b8 commit ca4cf3e
Showing 1 changed file with 77 additions and 0 deletions.
77 changes: 77 additions & 0 deletions src/string_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,83 @@ where
let mut found_escape = false;
let mut ascii_only = true;

#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))]
'simd_aarch64: {
#[cfg(target_arch = "aarch64")]
mod impl_ {
pub use std::arch::aarch64::{
uint8x16_t, vceqq_u8 as simd_eq, vcltq_u8 as simd_lt, vdupq_n_u8 as simd_duplicate,
vld1q_u8 as simd_load, vmaxvq_u8, vorrq_u8 as simd_or,
};

pub const SIMD_STEP: usize = 16;

pub fn is_vector_nonzero(vec: uint8x16_t) -> bool {
unsafe { vmaxvq_u8(vec) != 0 }
}
}

#[cfg(target_arch = "x86_64")]
mod impl_ {
pub use std::arch::x86_64::{
__m256i, _mm256_cmpeq_epi8 as simd_eq, _mm256_cmpgt_epi8 as simd_lt, _mm256_loadu_si256,
_mm256_or_si256 as simd_or, _mm256_set1_epi8, _mm256_testz_si256,
};

pub const SIMD_STEP: usize = 32;

pub fn is_vector_nonzero(vec: __m256i) -> bool {
unsafe { _mm256_testz_si256(vec, vec) != 0 }
}

pub unsafe fn simd_duplicate(val: u8) -> __m256i {
_mm256_set1_epi8(val as i8)
}

pub unsafe fn simd_load(ptr: *const u8) -> __m256i {
_mm256_loadu_si256(ptr as *const __m256i)
}
}

use impl_::*;

let simd_quote = unsafe { simd_duplicate(b'"') };
let simd_backslash = unsafe { simd_duplicate(b'\\') };
let simd_mask_32 = unsafe { simd_duplicate(32) };

for remaining_chunk in data
.get(index..)
.into_iter()
.flat_map(|remaining| remaining.chunks_exact(SIMD_STEP))
{
let remaining_chunk_v = unsafe { simd_load(remaining_chunk.as_ptr()) };

let backslash = unsafe { simd_eq(remaining_chunk_v, simd_backslash) };
let mask = unsafe { simd_lt(remaining_chunk_v, simd_mask_32) };
let backslash_or_mask = unsafe { simd_or(backslash, mask) };

// go slow if backslash or mask found
if is_vector_nonzero(backslash_or_mask) {
break 'simd_aarch64;
}

// Compare the remaining chunk with the special characters
let compare_result = unsafe { simd_eq(remaining_chunk_v, simd_quote) };

// Check if any element in the comparison result is true
if is_vector_nonzero(compare_result) {
// Found a match, return the index
let j = unsafe { remaining_chunk.iter().position(|&x| x == b'"').unwrap_unchecked() };
return Ok((
StringOutput::Data(unsafe { std::str::from_utf8_unchecked(&data[start..index + j]) }),
index + j + 1,
));
}

index += remaining_chunk.len();
}
}

while let Some(next) = data.get(index) {
match next {
b'"' => {
Expand Down

0 comments on commit ca4cf3e

Please sign in to comment.