From 6f392c35aba47c295d25741ef8e1a8de6bdf98fb Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 11 Feb 2020 13:17:51 -0800 Subject: [PATCH 01/20] Fix decode output buffer length calculation. The output length only needs to be half the input length. --- src/decode.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 80aec6a..635f029 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -145,9 +145,9 @@ pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { if src.is_empty() { return Err(Error::InvalidLength(0)); } - let len = dst.len().checked_mul(2).unwrap(); - if src.len() < len || ((src.len() & 1) != 0) { - return Err(Error::InvalidLength(len)); + let decoded_len = src.len().checked_div(2).unwrap(); + if dst.len() < decoded_len || ((src.len() & 1) != 0) { + return Err(Error::InvalidLength(src.len())); } if !hex_check(src) { return Err(Error::InvalidChar); From a81190187b588a61666b384bc9c2862f1ecc1bdb Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 11 Feb 2020 13:18:58 -0800 Subject: [PATCH 02/20] Upgrade criterion and test a variety of byte lengths. This uses criterions benchmark groups to compare the relative performance between the different implementations at various byte lengths. The criterion reports now provide graphs that overlay the different implementation so you can easily see the relative performance. --- Cargo.toml | 5 +- benches/check.rs | 70 +++++++++-------- benches/hex.rs | 197 ++++++++++++++++++++++++++++------------------- 3 files changed, 161 insertions(+), 111 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a5d0cd1..2c211a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,10 +17,11 @@ exclude = [ [dev-dependencies] -criterion = "0.2" +criterion = "0.3" rustc-hex = "1.0" -hex = "0.3.2" +hex = "0.4" proptest = "0.8" +rand = "0.7.3" [[bench]] name = "hex" diff --git a/benches/check.rs b/benches/check.rs index 60052b9..5a0a06d 100644 --- a/benches/check.rs +++ b/benches/check.rs @@ -1,38 +1,46 @@ -use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; use faster_hex::{hex_check_fallback, hex_check_sse}; +use std::time::Duration; -fn bench(c: &mut Criterion) { - let s1 = "Bf9E2d38aceDeeCbbAfccc4B4B7AE"; - let s2 = "ed136fFDdCcC1DbaFE8CB6Df1AdDBAea44aCcC17b0DbC2741F9CeEeaFbE7A51D"; - let s3 = " \u{0} ๐€€G\u{0}๐€€ GG\u{0}๐€€G\u{0}Gเ €\u{0} ๐€€ \u{0}:\u{0}\u{0}gเ €G G::GG::g๐€€G๐€€\u{0}\u{0}ยก๐€€เ €\u{0}:GGG Gg๐€€ :\u{0}:gG ยก"; - let s4 = "ed136fFDdCcC1DbaFE8CB6Df1AdDBAea44aCcC17b0DbC2741F9CeEeaFbE7A51D\u{0} ๐€€G\u{0}๐€€ GG\u{0}๐€€G\u{0}Gเ €\u{0} ๐€€ \u{0}:\u{0}\u{0}gเ €G G::GG::g๐€€G๐€€\u{0}\u{0}ยก๐€€เ €\u{0}:GGG Gg๐€€ :\u{0}:gG ยก"; +const INPUT: &[&str] = &[ + "Bf9E2d38aceDeeCbbAfccc4B4B7AE", + "ed136fFDdCcC1DbaFE8CB6Df1AdDBAea44aCcC17b0DbC2741F9CeEeaFbE7A51D", + " \u{0} ๐€€G\u{0}๐€€ GG\u{0}๐€€G\u{0}Gเ €\u{0} ๐€€ \u{0}:\u{0}\u{0}gเ €G G::GG::g๐€€G๐€€\u{0}\u{0}ยก๐€€เ €\u{0}:GGG Gg๐€€ :\u{0}:gG ยก", + "ed136fFDdCcC1DbaFE8CB6Df1AdDBAea44aCcC17b0DbC2741F9CeEeaFbE7A51D\u{0} ๐€€G\u{0}๐€€ GG\u{0}๐€€G\u{0}Gเ €\u{0} ๐€€ \u{0}:\u{0}\u{0}gเ €G G::GG::g๐€€G๐€€\u{0}\u{0}ยก๐€€เ €\u{0}:GGG Gg๐€€ :\u{0}:gG ยก", +]; - c.bench_function("bench_check_fallback", move |b| { - b.iter(|| { - let ret1 = hex_check_fallback(s1.as_bytes()); - black_box(ret1); - let ret2 = hex_check_fallback(s2.as_bytes()); - black_box(ret2); - let ret3 = hex_check_fallback(s3.as_bytes()); - black_box(ret3); - let ret4 = hex_check_fallback(s4.as_bytes()); - black_box(ret4); - }) - }); +fn bench(c: &mut Criterion) { + let mut check_fallback_group = c.benchmark_group("check"); + for (idx, input) in INPUT.iter().enumerate() { + check_fallback_group.bench_with_input( + BenchmarkId::new("fallback", idx), + input, + |b, &input| { + b.iter(|| { + let ret = hex_check_fallback(input.as_bytes()); + black_box(ret); + }) + }, + ); + check_fallback_group.bench_with_input(BenchmarkId::new("sse", idx), input, |b, &input| { + b.iter(|| { + let ret = unsafe { hex_check_sse(input.as_bytes()) }; + black_box(ret); + }) + }); + } + check_fallback_group.finish(); +} - c.bench_function("bench_check_sse", move |b| { - b.iter(|| { - let ret1 = unsafe { hex_check_sse(s1.as_bytes()) }; - black_box(ret1); - let ret2 = unsafe { hex_check_sse(s2.as_bytes()) }; - black_box(ret2); - let ret3 = unsafe { hex_check_sse(s3.as_bytes()) }; - black_box(ret3); - let ret4 = unsafe { hex_check_sse(s4.as_bytes()) }; - black_box(ret4); - }) - }); +fn quicker() -> Criterion { + Criterion::default() + .warm_up_time(Duration::from_millis(500)) + .measurement_time(Duration::from_secs(1)) } -criterion_group!(benches, bench); +criterion_group! { + name = benches; + config = quicker(); + targets = bench +} criterion_main!(benches); diff --git a/benches/hex.rs b/benches/hex.rs index 018db82..9a09f4f 100644 --- a/benches/hex.rs +++ b/benches/hex.rs @@ -1,91 +1,132 @@ -use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use faster_hex::{ hex_decode, hex_decode_fallback, hex_decode_unchecked, hex_encode_fallback, hex_string, }; use rustc_hex::{FromHex, ToHex}; +use std::time::Duration; -fn bench(c: &mut Criterion) { - let s = "Day before yesterday I saw a rabbit, and yesterday a deer, and today, you."; - - c.bench_function("bench_rustc_hex_encode", move |b| { - b.iter(|| { - let ret = s.as_bytes().to_hex(); - black_box(ret); - }) - }); - - c.bench_function("bench_hex_encode", move |b| { - b.iter(|| { - let ret = hex::encode(s); - black_box(ret); - }) - }); - - c.bench_function("bench_faster_hex_encode", move |b| { - b.iter(|| { - let ret = hex_string(s.as_bytes()).unwrap(); - black_box(ret); - }) - }); - - c.bench_function("bench_faster_hex_encode_fallback", move |b| { - b.iter(|| { - let bytes = s.as_bytes(); - let mut buffer = vec![0; bytes.len() * 2]; - let ret = hex_encode_fallback(bytes, &mut buffer); - black_box(ret); - }) - }); +const BYTE_SIZES: [usize; 5] = [2, 16, 32, 128, 4096]; - c.bench_function("bench_rustc_hex_decode", move |b| { - let hex = s.as_bytes().to_hex(); - b.iter(|| { - let ret: Vec = hex.from_hex().unwrap(); - black_box(ret); - }) - }); +fn rand_slice(size: usize) -> Vec { + use rand::Rng; + let mut input: Vec = vec![0; size]; + rand::thread_rng().fill(input.as_mut_slice()); + input +} - c.bench_function("bench_hex_decode", move |b| { - let hex = s.as_bytes().to_hex(); - b.iter(|| { - let ret: Vec = hex::decode(&hex).unwrap(); - black_box(ret); - }) - }); +fn rand_hex_encoded(size: usize) -> String { + use rand::seq::SliceRandom; + String::from_utf8( + std::iter::repeat(()) + .map(|_| *b"0123456789abcdef".choose(&mut rand::thread_rng()).unwrap()) + .take(size) + .collect(), + ) + .unwrap() +} - c.bench_function("bench_faster_hex_decode", move |b| { - let hex = hex_string(s.as_bytes()).unwrap(); - let len = s.as_bytes().len(); - b.iter(|| { - let mut dst = Vec::with_capacity(len); - dst.resize(len, 0); - let ret = hex_decode(hex.as_bytes(), &mut dst); - black_box(ret); - }) - }); +fn bench(c: &mut Criterion) { + let mut encode_group = c.benchmark_group("encode"); + for size in &BYTE_SIZES[..] { + encode_group.throughput(Throughput::Bytes(*size as u64)); + encode_group.bench_with_input(BenchmarkId::new("rustc", size), size, |b, &size| { + let input = rand_slice(size); + b.iter(|| { + let ret = input.to_hex(); + black_box(ret); + }) + }); + encode_group.bench_with_input(BenchmarkId::new("hex", size), size, |b, &size| { + let input = rand_slice(size); + b.iter(|| { + let ret = hex::encode(&input); + black_box(ret); + }) + }); + encode_group.bench_with_input(BenchmarkId::new("faster_hex", size), size, |b, &size| { + let input = rand_slice(size); + b.iter(|| { + let ret = hex_string(&input).unwrap(); + black_box(ret); + }) + }); + encode_group.bench_with_input( + BenchmarkId::new("faster_hex_fallback", size), + size, + |b, &size| { + let input = rand_slice(size); + let mut buffer = vec![0; input.len() * 2]; + b.iter(|| { + let ret = hex_encode_fallback(&input, buffer.as_mut_slice()); + black_box(ret); + }) + }, + ); + } + encode_group.finish(); - c.bench_function("bench_faster_hex_decode_unchecked", move |b| { - let hex = hex_string(s.as_bytes()).unwrap(); - let len = s.as_bytes().len(); - b.iter(|| { - let mut dst = Vec::with_capacity(len); - dst.resize(len, 0); - let ret = hex_decode_unchecked(hex.as_bytes(), &mut dst); - black_box(ret); - }) - }); + let mut decode_group = c.benchmark_group("decode"); + for size in &BYTE_SIZES[..] { + decode_group.throughput(Throughput::Bytes(*size as u64)); + decode_group.bench_with_input(BenchmarkId::new("rustc", size), size, |b, &size| { + let hex_input = rand_hex_encoded(size); + b.iter(|| { + let ret: Vec = hex_input.from_hex().unwrap(); + black_box(ret); + }) + }); + decode_group.bench_with_input(BenchmarkId::new("hex", size), size, |b, &size| { + let hex_input = rand_hex_encoded(size); + b.iter(|| { + let ret: Vec = hex::decode(&hex_input).unwrap(); + black_box(ret); + }) + }); + decode_group.bench_with_input(BenchmarkId::new("faster_hex", size), size, |b, &size| { + let hex_input = rand_hex_encoded(size); + let mut dst = vec![0; size / 2]; + b.iter(|| { + let ret = hex_decode(hex_input.as_bytes(), &mut dst).unwrap(); + black_box(ret); + }) + }); + decode_group.bench_with_input( + BenchmarkId::new("faster_hex_unchecked", size), + size, + |b, &size| { + let hex_input = rand_hex_encoded(size); + let mut dst = vec![0; size / 2]; + b.iter(|| { + let ret = hex_decode_unchecked(hex_input.as_bytes(), &mut dst); + black_box(ret); + }) + }, + ); + decode_group.bench_with_input( + BenchmarkId::new("faster_hex_fallback", size), + size, + |b, &size| { + let hex_input = rand_hex_encoded(size); + let mut dst = vec![0; size / 2]; + b.iter(|| { + let ret = hex_decode_fallback(hex_input.as_bytes(), &mut dst); + black_box(ret); + }) + }, + ); + } + decode_group.finish(); +} - c.bench_function("bench_faster_hex_decode_fallback", move |b| { - let hex = hex_string(s.as_bytes()).unwrap(); - let len = s.as_bytes().len(); - b.iter(|| { - let mut dst = Vec::with_capacity(len); - dst.resize(len, 0); - let ret = hex_decode_fallback(hex.as_bytes(), &mut dst); - black_box(ret); - }) - }); +fn quicker() -> Criterion { + Criterion::default() + .warm_up_time(Duration::from_millis(500)) + .measurement_time(Duration::from_secs(1)) } -criterion_group!(benches, bench); +criterion_group! { + name = benches; + config = quicker(); + targets = bench +} criterion_main!(benches); From 50f9d467b333fe6ca658b9750172b01832cfe071 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 11 Feb 2020 13:24:10 -0800 Subject: [PATCH 03/20] Fix compiler warnings. Range patterns are deprecated, switch to inclusive ranges instead. Re-export the deprecated function `hex_to` in a separate use statement that allows deprecation warnings. --- src/decode.rs | 2 +- src/lib.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 635f029..55c9b72 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -95,7 +95,7 @@ pub fn hex_check(src: &[u8]) -> bool { pub fn hex_check_fallback(src: &[u8]) -> bool { for byte in src { match byte { - b'A'...b'F' | b'a'...b'f' | b'0'...b'9' => continue, + b'A'..=b'F' | b'a'..=b'f' | b'0'..=b'9' => continue, _ => { return false; } diff --git a/src/lib.rs b/src/lib.rs index 3c4e825..4f6ea3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,12 +4,15 @@ mod error; pub use crate::decode::{ hex_check_fallback, hex_decode, hex_decode_fallback, hex_decode_unchecked, }; -pub use crate::encode::{hex_encode, hex_encode_fallback, hex_string, hex_to}; +pub use crate::encode::{hex_encode, hex_encode_fallback, hex_string}; pub use crate::error::Error; #[cfg(any(target_arch = "x86", target_arch = "x86_64", feature = "sse4.1"))] pub use crate::decode::hex_check_sse; +#[allow(deprecated)] +pub use crate::encode::hex_to; + #[cfg(test)] mod tests { use crate::decode::hex_decode; From a5898da4245a68b518b397d1a4f6cb4fab2edcc1 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 11 Feb 2020 13:34:37 -0800 Subject: [PATCH 04/20] Don't return an error when decoding an empty slice. This isn't an error condition. A decoded empty slice is just an empty slice. --- src/decode.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 55c9b72..79a2d33 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -142,9 +142,6 @@ pub unsafe fn hex_check_sse(mut src: &[u8]) -> bool { } pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { - if src.is_empty() { - return Err(Error::InvalidLength(0)); - } let decoded_len = src.len().checked_div(2).unwrap(); if dst.len() < decoded_len || ((src.len() & 1) != 0) { return Err(Error::InvalidLength(src.len())); From 59e5b285e71e31f7fc36336c3c3e8cada1c4d7ef Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 11 Feb 2020 13:35:20 -0800 Subject: [PATCH 05/20] Add a basic roundtrip property test. --- src/lib.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 4f6ea3f..2fb0402 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,4 +77,15 @@ mod tests { _test_hex_decode_check(s, false); } } + + proptest! { + #[test] + fn test_roundtrip(input: Vec) { + let mut encoded = vec![0; input.len() * 2]; + hex_encode(&input, &mut encoded).unwrap(); + let mut decoded = vec![0; input.len()]; + hex_decode(&encoded, &mut decoded).unwrap(); + assert_eq!(&decoded, &input); + } + } } From 2605d11e943fd45e47902fc04710ef65984b2cfb Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Fri, 14 Feb 2020 09:12:16 -0800 Subject: [PATCH 06/20] Reorganize decode into submodules. This also removes functions from the public api. hex_decode and hex_decode_unchecked are the only things exposed by default. hex_check_sse, hex_check_fallback, and hex_decode_fallback are only visible when compiled with the 'bench' feature. This means that benches now need to specify --features=bench when running. $ cargo bench --features=bench This makes no changes to the actual implementation and benchmarks confirm that. --- Cargo.toml | 5 + src/decode.rs | 480 +++++++++++++++++++++++++------------------------- src/lib.rs | 13 +- 3 files changed, 255 insertions(+), 243 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2c211a6..9a023dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,9 @@ exclude = [ "fuzz/*" ] +[features] +bench = [] + [dev-dependencies] criterion = "0.3" @@ -26,8 +29,10 @@ rand = "0.7.3" [[bench]] name = "hex" harness = false +required-features = ["bench"] [[bench]] name = "check" harness = false +required-features = ["bench"] diff --git a/src/decode.rs b/src/decode.rs index 79a2d33..50e7918 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1,286 +1,292 @@ -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - use crate::error::Error; -const NIL: u8 = u8::max_value(); -const T_MASK: i32 = 65535; - -// ASCII -> hex -pub(crate) static UNHEX: [u8; 256] = [ - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, 10, 11, 12, 13, 14, 15, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 10, 11, 12, 13, - 14, 15, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, -]; - -// ASCII -> hex << 4 -pub(crate) static UNHEX4: [u8; 256] = [ - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, 160, 176, 192, 208, 224, 240, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, 160, 176, 192, 208, 224, 240, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, -]; - -const _0213: i32 = 0b11011000; - -// lower nibble -#[inline] -fn unhex_b(x: usize) -> u8 { - UNHEX[x] -} - -// upper nibble, logically equivalent to unhex_b(x) << 4 -#[inline] -fn unhex_a(x: usize) -> u8 { - UNHEX4[x] +pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { + let decoded_len = src.len().checked_div(2).unwrap(); + if dst.len() < decoded_len || ((src.len() & 1) != 0) { + return Err(Error::InvalidLength(src.len())); + } + if !hex_check(src) { + return Err(Error::InvalidChar); + } + hex_decode_unchecked(src, dst); + Ok(()) } -#[inline] -#[target_feature(enable = "avx2")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -unsafe fn unhex_avx2(value: __m256i) -> __m256i { - let sr6 = _mm256_srai_epi16(value, 6); - let and15 = _mm256_and_si256(value, _mm256_set1_epi16(0xf)); - let mul = _mm256_maddubs_epi16(sr6, _mm256_set1_epi16(9)); - _mm256_add_epi16(mul, and15) -} +pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("avx2") { + return unsafe { arch::avx2::hex_decode(src, dst) }; + } + } -// (a << 4) | b; -#[inline] -#[target_feature(enable = "avx2")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -unsafe fn nib2byte_avx2(a1: __m256i, b1: __m256i, a2: __m256i, b2: __m256i) -> __m256i { - let a4_1 = _mm256_slli_epi16(a1, 4); - let a4_2 = _mm256_slli_epi16(a2, 4); - let a4orb_1 = _mm256_or_si256(a4_1, b1); - let a4orb_2 = _mm256_or_si256(a4_2, b2); - let pck1 = _mm256_packus_epi16(a4orb_1, a4orb_2); - _mm256_permute4x64_epi64(pck1, _0213) + arch::fallback::hex_decode(src, dst); } -pub fn hex_check(src: &[u8]) -> bool { +fn hex_check(src: &[u8]) -> bool { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if is_x86_feature_detected!("sse4.1") { - return unsafe { hex_check_sse(src) }; + return unsafe { arch::avx2::hex_check_sse(src) }; } } - hex_check_fallback(src) + arch::fallback::hex_check(src) } -pub fn hex_check_fallback(src: &[u8]) -> bool { - for byte in src { - match byte { - b'A'..=b'F' | b'a'..=b'f' | b'0'..=b'9' => continue, - _ => { - return false; +pub mod arch { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub mod avx2 { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + #[target_feature(enable = "avx2")] + pub unsafe fn hex_decode(mut src: &[u8], mut dst: &mut [u8]) { + // 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, + // 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1 + let mask_a = _mm256_setr_epi8( + 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 0, -1, 2, -1, 4, -1, 6, + -1, 8, -1, 10, -1, 12, -1, 14, -1, + ); + + // 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, + // 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1 + let mask_b = _mm256_setr_epi8( + 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 1, -1, 3, -1, 5, -1, 7, + -1, 9, -1, 11, -1, 13, -1, 15, -1, + ); + + while dst.len() >= 32 { + let av1 = _mm256_loadu_si256(src.as_ptr() as *const _); + let av2 = _mm256_loadu_si256(src[32..].as_ptr() as *const _); + + let mut a1 = _mm256_shuffle_epi8(av1, mask_a); + let mut b1 = _mm256_shuffle_epi8(av1, mask_b); + let mut a2 = _mm256_shuffle_epi8(av2, mask_a); + let mut b2 = _mm256_shuffle_epi8(av2, mask_b); + + a1 = unhex(a1); + a2 = unhex(a2); + b1 = unhex(b1); + b2 = unhex(b2); + + let bytes = nib2byte(a1, b1, a2, b2); + + //dst does not need to be aligned on any particular boundary + _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, bytes); + dst = &mut dst[32..]; + src = &src[64..]; } + crate::decode::arch::fallback::hex_decode(&src, &mut dst) } - } - true -} -#[target_feature(enable = "sse4.1")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -pub unsafe fn hex_check_sse(mut src: &[u8]) -> bool { - let ascii_zero = _mm_set1_epi8((b'0' - 1) as i8); - let ascii_nine = _mm_set1_epi8((b'9' + 1) as i8); - let ascii_ua = _mm_set1_epi8((b'A' - 1) as i8); - let ascii_uf = _mm_set1_epi8((b'F' + 1) as i8); - let ascii_la = _mm_set1_epi8((b'a' - 1) as i8); - let ascii_lf = _mm_set1_epi8((b'f' + 1) as i8); + #[inline] + #[target_feature(enable = "avx2")] + unsafe fn unhex(value: __m256i) -> __m256i { + let sr6 = _mm256_srai_epi16(value, 6); + let and15 = _mm256_and_si256(value, _mm256_set1_epi16(0xf)); + let mul = _mm256_maddubs_epi16(sr6, _mm256_set1_epi16(9)); + _mm256_add_epi16(mul, and15) + } - while src.len() >= 16 { - let unchecked = _mm_loadu_si128(src.as_ptr() as *const _); + // (a << 4) | b; + #[inline] + #[target_feature(enable = "avx2")] + unsafe fn nib2byte(a1: __m256i, b1: __m256i, a2: __m256i, b2: __m256i) -> __m256i { + let a4_1 = _mm256_slli_epi16(a1, 4); + let a4_2 = _mm256_slli_epi16(a2, 4); + let a4orb_1 = _mm256_or_si256(a4_1, b1); + let a4orb_2 = _mm256_or_si256(a4_2, b2); + let pck1 = _mm256_packus_epi16(a4orb_1, a4orb_2); + _mm256_permute4x64_epi64(pck1, 0b11011000) + } - let gt0 = _mm_cmpgt_epi8(unchecked, ascii_zero); - let lt9 = _mm_cmplt_epi8(unchecked, ascii_nine); - let outside1 = _mm_and_si128(gt0, lt9); + #[target_feature(enable = "sse4.1")] + pub unsafe fn hex_check_sse(mut src: &[u8]) -> bool { + let ascii_zero = _mm_set1_epi8((b'0' - 1) as i8); + let ascii_nine = _mm_set1_epi8((b'9' + 1) as i8); + let ascii_ua = _mm_set1_epi8((b'A' - 1) as i8); + let ascii_uf = _mm_set1_epi8((b'F' + 1) as i8); + let ascii_la = _mm_set1_epi8((b'a' - 1) as i8); + let ascii_lf = _mm_set1_epi8((b'f' + 1) as i8); - let gtua = _mm_cmpgt_epi8(unchecked, ascii_ua); - let ltuf = _mm_cmplt_epi8(unchecked, ascii_uf); - let outside2 = _mm_and_si128(gtua, ltuf); + while src.len() >= 16 { + let unchecked = _mm_loadu_si128(src.as_ptr() as *const _); - let gtla = _mm_cmpgt_epi8(unchecked, ascii_la); - let ltlf = _mm_cmplt_epi8(unchecked, ascii_lf); - let outside3 = _mm_and_si128(gtla, ltlf); + let gt0 = _mm_cmpgt_epi8(unchecked, ascii_zero); + let lt9 = _mm_cmplt_epi8(unchecked, ascii_nine); + let outside1 = _mm_and_si128(gt0, lt9); - let tmp = _mm_or_si128(outside1, outside2); - let ret = _mm_movemask_epi8(_mm_or_si128(tmp, outside3)); + let gtua = _mm_cmpgt_epi8(unchecked, ascii_ua); + let ltuf = _mm_cmplt_epi8(unchecked, ascii_uf); + let outside2 = _mm_and_si128(gtua, ltuf); - if ret != T_MASK { - return false; - } + let gtla = _mm_cmpgt_epi8(unchecked, ascii_la); + let ltlf = _mm_cmplt_epi8(unchecked, ascii_lf); + let outside3 = _mm_and_si128(gtla, ltlf); - src = &src[16..]; - } - hex_check_fallback(src) -} + let tmp = _mm_or_si128(outside1, outside2); + let ret = _mm_movemask_epi8(_mm_or_si128(tmp, outside3)); -pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { - let decoded_len = src.len().checked_div(2).unwrap(); - if dst.len() < decoded_len || ((src.len() & 1) != 0) { - return Err(Error::InvalidLength(src.len())); - } - if !hex_check(src) { - return Err(Error::InvalidChar); - } - hex_decode_unchecked(src, dst); - Ok(()) -} + if ret != 0x0000_ffff { + return false; + } -pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - if is_x86_feature_detected!("avx2") { - return unsafe { hex_decode_avx2(src, dst) }; + src = &src[16..]; + } + crate::decode::arch::fallback::hex_check(src) } - } - hex_decode_fallback(src, dst); -} + #[cfg(test)] + mod tests { + use super::*; + use proptest::{proptest, proptest_helper}; -#[target_feature(enable = "avx2")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -unsafe fn hex_decode_avx2(mut src: &[u8], mut dst: &mut [u8]) { - // 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, - // 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1 - let mask_a = _mm256_setr_epi8( - 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 0, -1, 2, -1, 4, -1, 6, -1, 8, - -1, 10, -1, 12, -1, 14, -1, - ); - - // 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, - // 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1 - let mask_b = _mm256_setr_epi8( - 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, - -1, 11, -1, 13, -1, 15, -1, - ); - - while dst.len() >= 32 { - let av1 = _mm256_loadu_si256(src.as_ptr() as *const _); - let av2 = _mm256_loadu_si256(src[32..].as_ptr() as *const _); - - let mut a1 = _mm256_shuffle_epi8(av1, mask_a); - let mut b1 = _mm256_shuffle_epi8(av1, mask_b); - let mut a2 = _mm256_shuffle_epi8(av2, mask_a); - let mut b2 = _mm256_shuffle_epi8(av2, mask_b); - - a1 = unhex_avx2(a1); - a2 = unhex_avx2(a2); - b1 = unhex_avx2(b1); - b2 = unhex_avx2(b2); - - let bytes = nib2byte_avx2(a1, b1, a2, b2); - - //dst does not need to be aligned on any particular boundary - _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, bytes); - dst = &mut dst[32..]; - src = &src[64..]; - } - hex_decode_fallback(&src, &mut dst) -} + fn _test_check_sse_true(s: &String) { + assert!(unsafe { hex_check_sse(s.as_bytes()) }); + } -pub fn hex_decode_fallback(src: &[u8], dst: &mut [u8]) { - for (slot, bytes) in dst.iter_mut().zip(src.chunks(2)) { - let a = unhex_a(bytes[0] as usize); - let b = unhex_b(bytes[1] as usize); - *slot = a | b; - } -} + proptest! { + #[test] + fn test_check_sse_true(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { + _test_check_sse_true(s); + } + } -#[cfg(test)] -mod tests { - use crate::decode::hex_check_fallback; - use crate::decode::hex_check_sse; - use crate::decode::hex_decode_fallback; - use crate::encode::hex_string; - use proptest::{proptest, proptest_helper}; + fn _test_check_sse_false(s: &String) { + assert!(!unsafe { hex_check_sse(s.as_bytes()) }); + } - fn _test_decode_fallback(s: &String) { - let len = s.as_bytes().len(); - let mut dst = Vec::with_capacity(len); - dst.resize(len, 0); + proptest! { + #[test] + fn test_check_sse_false(ref s in ".{16}[^0-9a-fA-F]+") { + _test_check_sse_false(s); + } + } + } + } - let hex_string = hex_string(s.as_bytes()).unwrap(); + pub mod fallback { + pub fn hex_check(src: &[u8]) -> bool { + for byte in src { + match byte { + b'A'..=b'F' | b'a'..=b'f' | b'0'..=b'9' => continue, + _ => { + return false; + } + } + } + true + } - hex_decode_fallback(hex_string.as_bytes(), &mut dst); + pub fn hex_decode(src: &[u8], dst: &mut [u8]) { + for (slot, bytes) in dst.iter_mut().zip(src.chunks(2)) { + let a = unhex_a(bytes[0] as usize); + let b = unhex_b(bytes[1] as usize); + *slot = a | b; + } + } - assert_eq!(&dst[..], s.as_bytes()); - } + // lower nibble + #[inline] + fn unhex_b(x: usize) -> u8 { + const NIL: u8 = u8::max_value(); + // ASCII -> hex + static UNHEX: [u8; 256] = [ + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 10, 11, 12, 13, 14, + 15, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 10, 11, 12, 13, 14, 15, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, + ]; + UNHEX[x] + } - proptest! { - #[test] - fn test_decode_fallback(ref s in ".+") { - _test_decode_fallback(s); + // upper nibble, logically equivalent to unhex_b(x) << 4 + #[inline] + fn unhex_a(x: usize) -> u8 { + const NIL: u8 = u8::max_value(); + // ASCII -> hex + // ASCII -> hex << 4 + static UNHEX4: [u8; 256] = [ + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 0, + 16, 32, 48, 64, 80, 96, 112, 128, 144, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 160, 176, + 192, 208, 224, 240, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 160, 176, + 192, 208, 224, 240, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + ]; + UNHEX4[x] } - } - fn _test_check_fallback_true(s: &String) { - assert!(hex_check_fallback(s.as_bytes())); - } + #[cfg(test)] + mod tests { + use super::*; + use proptest::{proptest, proptest_helper}; - proptest! { - #[test] - fn test_check_fallback_true(ref s in "[0-9a-fA-F]+") { - _test_check_fallback_true(s); - } - } + fn _test_decode(s: &String) { + let len = s.as_bytes().len(); + let mut dst = Vec::with_capacity(len); + dst.resize(len, 0); - fn _test_check_fallback_false(s: &String) { - assert!(!hex_check_fallback(s.as_bytes())); - } + let hex_string = crate::hex_string(s.as_bytes()).unwrap(); - proptest! { - #[test] - fn test_check_fallback_false(ref s in ".{16}[^0-9a-fA-F]+") { - _test_check_fallback_false(s); - } - } + hex_decode(hex_string.as_bytes(), &mut dst); - fn _test_check_sse_true(s: &String) { - assert!(unsafe { hex_check_sse(s.as_bytes()) }); - } + assert_eq!(&dst[..], s.as_bytes()); + } - proptest! { - #[test] - fn test_check_sse_true(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { - _test_check_sse_true(s); - } - } + proptest! { + #[test] + fn test_decode(ref s in ".+") { + _test_decode(s); + } + } - fn _test_check_sse_false(s: &String) { - assert!(!unsafe { hex_check_sse(s.as_bytes()) }); - } + fn _test_check_true(s: &String) { + assert!(hex_check(s.as_bytes())); + } - proptest! { - #[test] - fn test_check_sse_false(ref s in ".{16}[^0-9a-fA-F]+") { - _test_check_sse_false(s); + proptest! { + #[test] + fn test_check_true(ref s in "[0-9a-fA-F]+") { + _test_check_true(s); + } + } + + fn _test_check_false(s: &String) { + assert!(!hex_check(s.as_bytes())); + } + + proptest! { + #[test] + fn test_check_false(ref s in ".{16}[^0-9a-fA-F]+") { + _test_check_false(s); + } + } } } } diff --git a/src/lib.rs b/src/lib.rs index 2fb0402..1141c5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,19 @@ mod decode; mod encode; mod error; -pub use crate::decode::{ - hex_check_fallback, hex_decode, hex_decode_fallback, hex_decode_unchecked, -}; +pub use crate::decode::{hex_decode, hex_decode_unchecked}; pub use crate::encode::{hex_encode, hex_encode_fallback, hex_string}; pub use crate::error::Error; -#[cfg(any(target_arch = "x86", target_arch = "x86_64", feature = "sse4.1"))] -pub use crate::decode::hex_check_sse; - #[allow(deprecated)] pub use crate::encode::hex_to; +#[cfg(feature = "bench")] +pub use crate::decode::{ + arch::avx2::hex_check_sse, + arch::fallback::{hex_check as hex_check_fallback, hex_decode as hex_decode_fallback}, +}; + #[cfg(test)] mod tests { use crate::decode::hex_decode; From ca461859e2509760e1e5a7d0888f3f2a5380f06c Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Fri, 14 Feb 2020 09:41:38 -0800 Subject: [PATCH 07/20] migrate hex_check from sse to avx2 --- benches/check.rs | 4 +-- src/decode.rs | 78 +++++++++++++++++++++++++----------------------- src/lib.rs | 2 +- 3 files changed, 43 insertions(+), 41 deletions(-) diff --git a/benches/check.rs b/benches/check.rs index 5a0a06d..0a92009 100644 --- a/benches/check.rs +++ b/benches/check.rs @@ -1,5 +1,5 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; -use faster_hex::{hex_check_fallback, hex_check_sse}; +use faster_hex::{hex_check_fallback, hex_check_avx2}; use std::time::Duration; const INPUT: &[&str] = &[ @@ -24,7 +24,7 @@ fn bench(c: &mut Criterion) { ); check_fallback_group.bench_with_input(BenchmarkId::new("sse", idx), input, |b, &input| { b.iter(|| { - let ret = unsafe { hex_check_sse(input.as_bytes()) }; + let ret = unsafe { hex_check_avx2(input.as_bytes()) }; black_box(ret); }) }); diff --git a/src/decode.rs b/src/decode.rs index 50e7918..6675b35 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -27,7 +27,7 @@ fn hex_check(src: &[u8]) -> bool { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if is_x86_feature_detected!("sse4.1") { - return unsafe { arch::avx2::hex_check_sse(src) }; + return unsafe { arch::avx2::hex_check(src) }; } } @@ -103,38 +103,40 @@ pub mod arch { _mm256_permute4x64_epi64(pck1, 0b11011000) } - #[target_feature(enable = "sse4.1")] - pub unsafe fn hex_check_sse(mut src: &[u8]) -> bool { - let ascii_zero = _mm_set1_epi8((b'0' - 1) as i8); - let ascii_nine = _mm_set1_epi8((b'9' + 1) as i8); - let ascii_ua = _mm_set1_epi8((b'A' - 1) as i8); - let ascii_uf = _mm_set1_epi8((b'F' + 1) as i8); - let ascii_la = _mm_set1_epi8((b'a' - 1) as i8); - let ascii_lf = _mm_set1_epi8((b'f' + 1) as i8); - - while src.len() >= 16 { - let unchecked = _mm_loadu_si128(src.as_ptr() as *const _); - - let gt0 = _mm_cmpgt_epi8(unchecked, ascii_zero); - let lt9 = _mm_cmplt_epi8(unchecked, ascii_nine); - let outside1 = _mm_and_si128(gt0, lt9); - - let gtua = _mm_cmpgt_epi8(unchecked, ascii_ua); - let ltuf = _mm_cmplt_epi8(unchecked, ascii_uf); - let outside2 = _mm_and_si128(gtua, ltuf); - - let gtla = _mm_cmpgt_epi8(unchecked, ascii_la); - let ltlf = _mm_cmplt_epi8(unchecked, ascii_lf); - let outside3 = _mm_and_si128(gtla, ltlf); - - let tmp = _mm_or_si128(outside1, outside2); - let ret = _mm_movemask_epi8(_mm_or_si128(tmp, outside3)); - - if ret != 0x0000_ffff { + #[target_feature(enable = "avx2")] + #[allow(overflowing_literals)] + pub unsafe fn hex_check(mut src: &[u8]) -> bool { + let ascii_zero = _mm256_set1_epi8((b'0' - 1) as i8); + let ascii_nine = _mm256_set1_epi8((b'9' + 1) as i8); + let ascii_ua = _mm256_set1_epi8((b'A' - 1) as i8); + let ascii_uf = _mm256_set1_epi8((b'F' + 1) as i8); + let ascii_la = _mm256_set1_epi8((b'a' - 1) as i8); + let ascii_lf = _mm256_set1_epi8((b'f' + 1) as i8); + + while src.len() >= 32 { + let unchecked = _mm256_loadu_si256(src.as_ptr() as *const _); + + let gt0 = _mm256_cmpgt_epi8(unchecked, ascii_zero); + let lt9 = _mm256_cmpgt_epi8(ascii_nine, unchecked); + let outside1 = _mm256_and_si256(gt0, lt9); + + let gtua = _mm256_cmpgt_epi8(unchecked, ascii_ua); + let ltuf = _mm256_cmpgt_epi8(ascii_uf, unchecked); + let outside2 = _mm256_and_si256(gtua, ltuf); + + let gtla = _mm256_cmpgt_epi8(unchecked, ascii_la); + let ltlf = _mm256_cmpgt_epi8(ascii_lf, unchecked); + let outside3 = _mm256_and_si256(gtla, ltlf); + + let tmp = _mm256_or_si256(outside1, outside2); + let ret = _mm256_movemask_epi8(_mm256_or_si256(tmp, outside3)); + + eprintln!("{:x}", ret); + if ret != 0xffff_ffff { return false; } - src = &src[16..]; + src = &src[32..]; } crate::decode::arch::fallback::hex_check(src) } @@ -144,25 +146,25 @@ pub mod arch { use super::*; use proptest::{proptest, proptest_helper}; - fn _test_check_sse_true(s: &String) { - assert!(unsafe { hex_check_sse(s.as_bytes()) }); + fn _test_check_true(s: &String) { + assert!(unsafe { hex_check(s.as_bytes()) }); } proptest! { #[test] - fn test_check_sse_true(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { - _test_check_sse_true(s); + fn test_check_true(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { + _test_check_true(s); } } - fn _test_check_sse_false(s: &String) { - assert!(!unsafe { hex_check_sse(s.as_bytes()) }); + fn _test_check_false(s: &String) { + assert!(!unsafe { hex_check(s.as_bytes()) }); } proptest! { #[test] - fn test_check_sse_false(ref s in ".{16}[^0-9a-fA-F]+") { - _test_check_sse_false(s); + fn test_check_false(ref s in ".{32}[^0-9a-fA-F]+") { + _test_check_false(s); } } } diff --git a/src/lib.rs b/src/lib.rs index 1141c5a..b7a5b4b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ pub use crate::encode::hex_to; #[cfg(feature = "bench")] pub use crate::decode::{ - arch::avx2::hex_check_sse, + arch::avx2::hex_check as hex_check_avx2, arch::fallback::{hex_check as hex_check_fallback, hex_decode as hex_decode_fallback}, }; From 21cacdd3e1a136d7c929cd9e7d35eb04c4b4fac2 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Fri, 14 Feb 2020 10:03:31 -0800 Subject: [PATCH 08/20] Modify the avx2::hex_check Rather than checking to ensure all bytes are within valid ranges, this now checks to ensure no bytes are within invalid ranges. This is mostly to avoid compiler warning when comparing an i32 to 0xffff because we now compare the i32 to zero. The performance is identical. --- src/decode.rs | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 6675b35..f36f710 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -106,36 +106,33 @@ pub mod arch { #[target_feature(enable = "avx2")] #[allow(overflowing_literals)] pub unsafe fn hex_check(mut src: &[u8]) -> bool { - let ascii_zero = _mm256_set1_epi8((b'0' - 1) as i8); - let ascii_nine = _mm256_set1_epi8((b'9' + 1) as i8); - let ascii_ua = _mm256_set1_epi8((b'A' - 1) as i8); - let ascii_uf = _mm256_set1_epi8((b'F' + 1) as i8); - let ascii_la = _mm256_set1_epi8((b'a' - 1) as i8); - let ascii_lf = _mm256_set1_epi8((b'f' + 1) as i8); + let ascii_zero = _mm256_set1_epi8(b'0' as i8); + let ascii_nine = _mm256_set1_epi8(b'9' as i8); + let ascii_ua = _mm256_set1_epi8(b'A' as i8); + let ascii_uf = _mm256_set1_epi8(b'F' as i8); + let ascii_la = _mm256_set1_epi8(b'a' as i8); + let ascii_lf = _mm256_set1_epi8(b'f' as i8); while src.len() >= 32 { let unchecked = _mm256_loadu_si256(src.as_ptr() as *const _); - let gt0 = _mm256_cmpgt_epi8(unchecked, ascii_zero); - let lt9 = _mm256_cmpgt_epi8(ascii_nine, unchecked); - let outside1 = _mm256_and_si256(gt0, lt9); - - let gtua = _mm256_cmpgt_epi8(unchecked, ascii_ua); - let ltuf = _mm256_cmpgt_epi8(ascii_uf, unchecked); - let outside2 = _mm256_and_si256(gtua, ltuf); - - let gtla = _mm256_cmpgt_epi8(unchecked, ascii_la); - let ltlf = _mm256_cmpgt_epi8(ascii_lf, unchecked); - let outside3 = _mm256_and_si256(gtla, ltlf); - - let tmp = _mm256_or_si256(outside1, outside2); - let ret = _mm256_movemask_epi8(_mm256_or_si256(tmp, outside3)); - - eprintln!("{:x}", ret); - if ret != 0xffff_ffff { + let lt0 = _mm256_cmpgt_epi8(ascii_zero, unchecked); + let gt9 = _mm256_cmpgt_epi8(unchecked, ascii_nine); + let ltua = _mm256_cmpgt_epi8(ascii_ua, unchecked); + let gtuf = _mm256_cmpgt_epi8(unchecked, ascii_uf); + let ltla = _mm256_cmpgt_epi8(ascii_la, unchecked); + let gtlf = _mm256_cmpgt_epi8(unchecked, ascii_lf); + + let between_nine_ua = _mm256_and_si256(gt9, ltua); + let between_uf_la = _mm256_and_si256(gtuf, ltla); + + let any_invalid = _mm256_or_si256(lt0, between_nine_ua); + let any_invalid = _mm256_or_si256(any_invalid, between_uf_la); + let any_invalid = _mm256_or_si256(any_invalid, gtlf); + let ret = _mm256_movemask_epi8(any_invalid); + if ret != 0 { return false; } - src = &src[32..]; } crate::decode::arch::fallback::hex_check(src) From 0de392a49c4c38a507b0d40f173b7cb24e95dd25 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Fri, 14 Feb 2020 12:45:03 -0800 Subject: [PATCH 09/20] Modify decode to check the input as decoding rather than as a separate pass over the input data. This speeds up checked decoding substantially, while not changing the performance of unchecked decoding. On my machine this increases throughput when decoding 2 bytes of data by 20%, and when decoding 4096 bytes of data by 116%. --- benches/check.rs | 2 +- benches/hex.rs | 15 ++- src/decode.rs | 253 ++++++++++++++++++++++++++++++----------------- src/lib.rs | 5 +- 4 files changed, 179 insertions(+), 96 deletions(-) diff --git a/benches/check.rs b/benches/check.rs index 0a92009..f480ab6 100644 --- a/benches/check.rs +++ b/benches/check.rs @@ -1,5 +1,5 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; -use faster_hex::{hex_check_fallback, hex_check_avx2}; +use faster_hex::{hex_check_avx2, hex_check_fallback}; use std::time::Duration; const INPUT: &[&str] = &[ diff --git a/benches/hex.rs b/benches/hex.rs index 9a09f4f..9606ad4 100644 --- a/benches/hex.rs +++ b/benches/hex.rs @@ -1,6 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use faster_hex::{ - hex_decode, hex_decode_fallback, hex_decode_unchecked, hex_encode_fallback, hex_string, + hex_decode, hex_decode_fallback, hex_decode_unchecked, hex_decode_unchecked_fallback, + hex_encode_fallback, hex_string, }; use rustc_hex::{FromHex, ToHex}; use std::time::Duration; @@ -114,6 +115,18 @@ fn bench(c: &mut Criterion) { }) }, ); + decode_group.bench_with_input( + BenchmarkId::new("faster_hex_unchecked_fallback", size), + size, + |b, &size| { + let hex_input = rand_hex_encoded(size); + let mut dst = vec![0; size / 2]; + b.iter(|| { + let ret = hex_decode_unchecked_fallback(hex_input.as_bytes(), &mut dst); + black_box(ret); + }) + }, + ); } decode_group.finish(); } diff --git a/src/decode.rs b/src/decode.rs index f36f710..b32bc05 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1,39 +1,41 @@ use crate::error::Error; pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { - let decoded_len = src.len().checked_div(2).unwrap(); - if dst.len() < decoded_len || ((src.len() & 1) != 0) { - return Err(Error::InvalidLength(src.len())); - } - if !hex_check(src) { - return Err(Error::InvalidChar); - } - hex_decode_unchecked(src, dst); - Ok(()) -} - -pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { + validate_buffer_length(src, dst)?; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - if is_x86_feature_detected!("avx2") { + if is_x86_feature_detected!("avx2") && src.len() >= 64 { return unsafe { arch::avx2::hex_decode(src, dst) }; } } - - arch::fallback::hex_decode(src, dst); + arch::fallback::hex_decode(src, dst) } -fn hex_check(src: &[u8]) -> bool { +pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { + validate_buffer_length(src, dst).unwrap(); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - if is_x86_feature_detected!("sse4.1") { - return unsafe { arch::avx2::hex_check(src) }; + if is_x86_feature_detected!("avx2") && src.len() >= 64 { + return unsafe { + arch::avx2::hex_decode_unchecked(src, dst); + }; } } + arch::fallback::hex_decode_unchecked(src, dst) +} - arch::fallback::hex_check(src) +#[inline] +fn validate_buffer_length(src: &[u8], dst: &[u8]) -> Result<(), Error> { + let decoded_len = src.len().checked_div(2).unwrap(); + if dst.len() < decoded_len || ((src.len() & 1) != 0) { + return Err(Error::InvalidLength(src.len())); + } + Ok(()) } +struct Checked; +struct Unchecked; + pub mod arch { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub mod avx2 { @@ -42,8 +44,24 @@ pub mod arch { #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; + use crate::decode::{Checked, Error, Unchecked}; + + #[target_feature(enable = "avx2")] + pub unsafe fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { + _hex_decode::(src, dst).map_err(|_| Error::InvalidChar) + } + + #[target_feature(enable = "avx2")] + pub unsafe fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { + let _ = _hex_decode::(src, dst); + } + + #[inline] #[target_feature(enable = "avx2")] - pub unsafe fn hex_decode(mut src: &[u8], mut dst: &mut [u8]) { + pub unsafe fn _hex_decode( + mut src: &[u8], + mut dst: &mut [u8], + ) -> Result<(), ()> { // 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, // 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1 let mask_a = _mm256_setr_epi8( @@ -62,6 +80,10 @@ pub mod arch { let av1 = _mm256_loadu_si256(src.as_ptr() as *const _); let av2 = _mm256_loadu_si256(src[32..].as_ptr() as *const _); + if !::is_valid(av1) && !::is_valid(av2) { + return Err(()); + } + let mut a1 = _mm256_shuffle_epi8(av1, mask_a); let mut b1 = _mm256_shuffle_epi8(av1, mask_b); let mut a2 = _mm256_shuffle_epi8(av2, mask_a); @@ -79,7 +101,7 @@ pub mod arch { dst = &mut dst[32..]; src = &src[64..]; } - crate::decode::arch::fallback::hex_decode(&src, &mut dst) + crate::decode::arch::fallback::_hex_decode::(&src, &mut dst) } #[inline] @@ -104,24 +126,38 @@ pub mod arch { } #[target_feature(enable = "avx2")] - #[allow(overflowing_literals)] pub unsafe fn hex_check(mut src: &[u8]) -> bool { - let ascii_zero = _mm256_set1_epi8(b'0' as i8); - let ascii_nine = _mm256_set1_epi8(b'9' as i8); - let ascii_ua = _mm256_set1_epi8(b'A' as i8); - let ascii_uf = _mm256_set1_epi8(b'F' as i8); - let ascii_la = _mm256_set1_epi8(b'a' as i8); - let ascii_lf = _mm256_set1_epi8(b'f' as i8); - while src.len() >= 32 { let unchecked = _mm256_loadu_si256(src.as_ptr() as *const _); + if !Checked::is_valid(unchecked) { + return false; + } + src = &src[32..]; + } + crate::decode::arch::fallback::hex_check(src) + } + + pub trait IsValid: crate::decode::arch::fallback::IsValid { + unsafe fn is_valid(input: __m256i) -> bool; + } - let lt0 = _mm256_cmpgt_epi8(ascii_zero, unchecked); - let gt9 = _mm256_cmpgt_epi8(unchecked, ascii_nine); - let ltua = _mm256_cmpgt_epi8(ascii_ua, unchecked); - let gtuf = _mm256_cmpgt_epi8(unchecked, ascii_uf); - let ltla = _mm256_cmpgt_epi8(ascii_la, unchecked); - let gtlf = _mm256_cmpgt_epi8(unchecked, ascii_lf); + impl IsValid for Checked { + #[inline] + #[target_feature(enable = "avx2")] + unsafe fn is_valid(input: __m256i) -> bool { + let ascii_zero = _mm256_set1_epi8(b'0' as i8); + let ascii_nine = _mm256_set1_epi8(b'9' as i8); + let ascii_ua = _mm256_set1_epi8(b'A' as i8); + let ascii_uf = _mm256_set1_epi8(b'F' as i8); + let ascii_la = _mm256_set1_epi8(b'a' as i8); + let ascii_lf = _mm256_set1_epi8(b'f' as i8); + + let lt0 = _mm256_cmpgt_epi8(ascii_zero, input); + let gt9 = _mm256_cmpgt_epi8(input, ascii_nine); + let ltua = _mm256_cmpgt_epi8(ascii_ua, input); + let gtuf = _mm256_cmpgt_epi8(input, ascii_uf); + let ltla = _mm256_cmpgt_epi8(ascii_la, input); + let gtlf = _mm256_cmpgt_epi8(input, ascii_lf); let between_nine_ua = _mm256_and_si256(gt9, ltua); let between_uf_la = _mm256_and_si256(gtuf, ltla); @@ -129,13 +165,16 @@ pub mod arch { let any_invalid = _mm256_or_si256(lt0, between_nine_ua); let any_invalid = _mm256_or_si256(any_invalid, between_uf_la); let any_invalid = _mm256_or_si256(any_invalid, gtlf); - let ret = _mm256_movemask_epi8(any_invalid); - if ret != 0 { - return false; - } - src = &src[32..]; + _mm256_movemask_epi8(any_invalid) == 0 + } + } + + impl IsValid for Unchecked { + #[inline] + #[target_feature(enable = "avx2")] + unsafe fn is_valid(_: __m256i) -> bool { + true } - crate::decode::arch::fallback::hex_check(src) } #[cfg(test)] @@ -168,77 +207,105 @@ pub mod arch { } pub mod fallback { + use crate::decode::{Checked, Error, Unchecked}; + pub fn hex_check(src: &[u8]) -> bool { - for byte in src { - match byte { - b'A'..=b'F' | b'a'..=b'f' | b'0'..=b'9' => continue, - _ => { - return false; - } - } - } - true + src.iter().cloned().all(|b| unhex_a(b) != 0xff) + } + + #[inline] + pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { + _hex_decode::(src, dst).map_err(|_| Error::InvalidChar) + } + + #[inline] + pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { + let _ = _hex_decode::(src, dst); } - pub fn hex_decode(src: &[u8], dst: &mut [u8]) { + #[inline] + pub fn _hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), ()> { for (slot, bytes) in dst.iter_mut().zip(src.chunks(2)) { - let a = unhex_a(bytes[0] as usize); - let b = unhex_b(bytes[1] as usize); + if !V::is_valid(bytes[0], bytes[1]) { + return Err(()); + } + let a = unhex_a(bytes[0]); + let b = unhex_b(bytes[1]); *slot = a | b; } + Ok(()) + } + + pub trait IsValid { + fn is_valid(a: u8, b: u8) -> bool; + } + + impl IsValid for Checked { + #[inline] + fn is_valid(a: u8, b: u8) -> bool { + (unhex_a(a) | unhex_a(b)) != 0xff + } + } + + impl IsValid for Unchecked { + #[inline] + fn is_valid(_: u8, _: u8) -> bool { + return true; + } } // lower nibble #[inline] - fn unhex_b(x: usize) -> u8 { - const NIL: u8 = u8::max_value(); + fn unhex_b(x: u8) -> u8 { // ASCII -> hex static UNHEX: [u8; 256] = [ - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 0, - 1, 2, 3, 4, 5, 6, 7, 8, 9, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 10, 11, 12, 13, 14, - 15, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 10, 11, 12, 13, 14, 15, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, ]; - UNHEX[x] + UNHEX[x as usize] } // upper nibble, logically equivalent to unhex_b(x) << 4 #[inline] - fn unhex_a(x: usize) -> u8 { - const NIL: u8 = u8::max_value(); - // ASCII -> hex + fn unhex_a(x: u8) -> u8 { // ASCII -> hex << 4 static UNHEX4: [u8; 256] = [ - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 0, - 16, 32, 48, 64, 80, 96, 112, 128, 144, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 160, 176, - 192, 208, 224, 240, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, 160, 176, - 192, 208, 224, 240, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, - NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 160, 176, 192, 208, 224, 240, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 160, 176, 192, 208, 224, 240, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, ]; - UNHEX4[x] + UNHEX4[x as usize] } #[cfg(test)] @@ -253,7 +320,7 @@ pub mod arch { let hex_string = crate::hex_string(s.as_bytes()).unwrap(); - hex_decode(hex_string.as_bytes(), &mut dst); + hex_decode(hex_string.as_bytes(), &mut dst).unwrap(); assert_eq!(&dst[..], s.as_bytes()); } diff --git a/src/lib.rs b/src/lib.rs index b7a5b4b..05cbd6e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,10 @@ pub use crate::encode::hex_to; #[cfg(feature = "bench")] pub use crate::decode::{ arch::avx2::hex_check as hex_check_avx2, - arch::fallback::{hex_check as hex_check_fallback, hex_decode as hex_decode_fallback}, + arch::fallback::{ + hex_check as hex_check_fallback, hex_decode as hex_decode_fallback, + hex_decode_unchecked as hex_decode_unchecked_fallback, + }, }; #[cfg(test)] From fcb43b29f92e7a18e85f6ae142069b0fa5f4ce3e Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Fri, 14 Feb 2020 13:37:47 -0800 Subject: [PATCH 10/20] Faster avx2 is_valid implementation --- src/decode.rs | 70 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index b32bc05..3836a6c 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -145,27 +145,55 @@ pub mod arch { #[inline] #[target_feature(enable = "avx2")] unsafe fn is_valid(input: __m256i) -> bool { - let ascii_zero = _mm256_set1_epi8(b'0' as i8); - let ascii_nine = _mm256_set1_epi8(b'9' as i8); - let ascii_ua = _mm256_set1_epi8(b'A' as i8); - let ascii_uf = _mm256_set1_epi8(b'F' as i8); - let ascii_la = _mm256_set1_epi8(b'a' as i8); - let ascii_lf = _mm256_set1_epi8(b'f' as i8); - - let lt0 = _mm256_cmpgt_epi8(ascii_zero, input); - let gt9 = _mm256_cmpgt_epi8(input, ascii_nine); - let ltua = _mm256_cmpgt_epi8(ascii_ua, input); - let gtuf = _mm256_cmpgt_epi8(input, ascii_uf); - let ltla = _mm256_cmpgt_epi8(ascii_la, input); - let gtlf = _mm256_cmpgt_epi8(input, ascii_lf); - - let between_nine_ua = _mm256_and_si256(gt9, ltua); - let between_uf_la = _mm256_and_si256(gtuf, ltla); - - let any_invalid = _mm256_or_si256(lt0, between_nine_ua); - let any_invalid = _mm256_or_si256(any_invalid, between_uf_la); - let any_invalid = _mm256_or_si256(any_invalid, gtlf); - _mm256_movemask_epi8(any_invalid) == 0 + let hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(input, 4), _mm256_set1_epi8(0x0f)); + let low_nibbles = _mm256_and_si256(input, _mm256_set1_epi8(0x0f)); + let mask_lut = _mm256_setr_epi8( + 0b0000_1000, // 0 + 0b0101_1000, // 1 .. 6 + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0000_1000, // 7 .. 9 + 0b0000_1000, // + 0b0000_1000, // + 0b0000_0000, // 10 .. 15 + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + // + 0b0000_1000, // 0 + 0b0101_1000, // 1 .. 6 + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0000_1000, // 7 .. 9 + 0b0000_1000, // + 0b0000_1000, // + 0b0000_0000, // 10 .. 15 + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + ); + + #[allow(overflowing_literals)] + let bit_pos_lut = _mm256_setr_epi8( + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ); + + let m = _mm256_shuffle_epi8(mask_lut, low_nibbles); + let bit = _mm256_shuffle_epi8(bit_pos_lut, hi_nibbles); + let non_match = _mm256_cmpeq_epi8(_mm256_and_si256(m, bit), _mm256_setzero_si256()); + _mm256_movemask_epi8(non_match) == 0 } } From ad34b9ef3717ee05d3963a9e19d9ffbac692f7b7 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Fri, 14 Feb 2020 15:08:54 -0800 Subject: [PATCH 11/20] New avx2 decode algorithm. Use a lookup table to determine the offset to add to each byte. This improves performance substantially. decode/faster_hex/4096 thrpt increased 15% decode/faster_hex_unchecked/4096 thrpt increased 54% --- src/decode.rs | 105 +++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 57 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 3836a6c..97c31da 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -62,42 +62,17 @@ pub mod arch { mut src: &[u8], mut dst: &mut [u8], ) -> Result<(), ()> { - // 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, - // 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1 - let mask_a = _mm256_setr_epi8( - 0, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 0, -1, 2, -1, 4, -1, 6, - -1, 8, -1, 10, -1, 12, -1, 14, -1, - ); - - // 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, - // 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1 - let mask_b = _mm256_setr_epi8( - 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 1, -1, 3, -1, 5, -1, 7, - -1, 9, -1, 11, -1, 13, -1, 15, -1, - ); - - while dst.len() >= 32 { + while src.len() >= 64 { let av1 = _mm256_loadu_si256(src.as_ptr() as *const _); let av2 = _mm256_loadu_si256(src[32..].as_ptr() as *const _); - - if !::is_valid(av1) && !::is_valid(av2) { - return Err(()); - } - - let mut a1 = _mm256_shuffle_epi8(av1, mask_a); - let mut b1 = _mm256_shuffle_epi8(av1, mask_b); - let mut a2 = _mm256_shuffle_epi8(av2, mask_a); - let mut b2 = _mm256_shuffle_epi8(av2, mask_b); - - a1 = unhex(a1); - a2 = unhex(a2); - b1 = unhex(b1); - b2 = unhex(b2); - - let bytes = nib2byte(a1, b1, a2, b2); - - //dst does not need to be aligned on any particular boundary - _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, bytes); + let av1 = decode_chunk::(av1)?; + let av1 = + _mm256_permutevar8x32_epi32(av1, _mm256_setr_epi32(0, 1, 4, 5, -1, -1, -1, -1)); + let av2 = decode_chunk::(av2)?; + let av2 = + _mm256_permutevar8x32_epi32(av2, _mm256_setr_epi32(-1, -1, -1, -1, 0, 1, 4, 5)); + let decoded = _mm256_or_si256(av1, av2); + _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, decoded); dst = &mut dst[32..]; src = &src[64..]; } @@ -106,30 +81,47 @@ pub mod arch { #[inline] #[target_feature(enable = "avx2")] - unsafe fn unhex(value: __m256i) -> __m256i { - let sr6 = _mm256_srai_epi16(value, 6); - let and15 = _mm256_and_si256(value, _mm256_set1_epi16(0xf)); - let mul = _mm256_maddubs_epi16(sr6, _mm256_set1_epi16(9)); - _mm256_add_epi16(mul, and15) - } + unsafe fn decode_chunk(input: __m256i) -> Result<__m256i, ()> { + #[allow(overflowing_literals)] + let hi_nibbles = + _mm256_and_si256(_mm256_srli_epi32(input, 4), _mm256_set1_epi8(0b00001111)); + let low_nibbles = _mm256_and_si256(input, _mm256_set1_epi8(0b00001111)); + + if !::is_valid(hi_nibbles, low_nibbles) { + return Err(()); + } - // (a << 4) | b; - #[inline] - #[target_feature(enable = "avx2")] - unsafe fn nib2byte(a1: __m256i, b1: __m256i, a2: __m256i, b2: __m256i) -> __m256i { - let a4_1 = _mm256_slli_epi16(a1, 4); - let a4_2 = _mm256_slli_epi16(a2, 4); - let a4orb_1 = _mm256_or_si256(a4_1, b1); - let a4orb_2 = _mm256_or_si256(a4_2, b2); - let pck1 = _mm256_packus_epi16(a4orb_1, a4orb_2); - _mm256_permute4x64_epi64(pck1, 0b11011000) + let shift_lut = _mm256_setr_epi8( + 0, 0, 0, -48, -55, 0, -87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -48, -55, 0, -87, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ); + + let sh = _mm256_shuffle_epi8(shift_lut, hi_nibbles); + let input = _mm256_add_epi8(input, sh); + #[allow(overflowing_literals)] + let input = _mm256_maddubs_epi16( + input, + _mm256_set1_epi32(0b00000001_00010000_00000001_00010000), + ); + let input = _mm256_shuffle_epi8( + input, + _mm256_setr_epi8( + 0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1, 0, 2, 4, 6, 8, 10, + 12, 14, -1, -1, -1, -1, -1, -1, -1, -1, + ), + ); + Ok(input) } + #[cfg(any(test, feature = "bench"))] #[target_feature(enable = "avx2")] pub unsafe fn hex_check(mut src: &[u8]) -> bool { while src.len() >= 32 { - let unchecked = _mm256_loadu_si256(src.as_ptr() as *const _); - if !Checked::is_valid(unchecked) { + let input = _mm256_loadu_si256(src.as_ptr() as *const _); + let hi_nibbles = + _mm256_and_si256(_mm256_srli_epi32(input, 4), _mm256_set1_epi8(0b00001111)); + let low_nibbles = _mm256_and_si256(input, _mm256_set1_epi8(0b00001111)); + if !Checked::is_valid(hi_nibbles, low_nibbles) { return false; } src = &src[32..]; @@ -138,15 +130,13 @@ pub mod arch { } pub trait IsValid: crate::decode::arch::fallback::IsValid { - unsafe fn is_valid(input: __m256i) -> bool; + unsafe fn is_valid(hi_nibbles: __m256i, low_nibbles: __m256i) -> bool; } impl IsValid for Checked { #[inline] #[target_feature(enable = "avx2")] - unsafe fn is_valid(input: __m256i) -> bool { - let hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(input, 4), _mm256_set1_epi8(0x0f)); - let low_nibbles = _mm256_and_si256(input, _mm256_set1_epi8(0x0f)); + unsafe fn is_valid(hi_nibbles: __m256i, low_nibbles: __m256i) -> bool { let mask_lut = _mm256_setr_epi8( 0b0000_1000, // 0 0b0101_1000, // 1 .. 6 @@ -200,7 +190,7 @@ pub mod arch { impl IsValid for Unchecked { #[inline] #[target_feature(enable = "avx2")] - unsafe fn is_valid(_: __m256i) -> bool { + unsafe fn is_valid(_: __m256i, _: __m256i) -> bool { true } } @@ -237,6 +227,7 @@ pub mod arch { pub mod fallback { use crate::decode::{Checked, Error, Unchecked}; + #[cfg(any(test, feature = "bench"))] pub fn hex_check(src: &[u8]) -> bool { src.iter().cloned().all(|b| unhex_a(b) != 0xff) } From 4d1af868df0e5a857ff9d3939284c8a36f64bf06 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Fri, 14 Feb 2020 15:19:09 -0800 Subject: [PATCH 12/20] Update the name of the check bench to reflect it's now avx2 rather than sse. --- benches/check.rs | 2 +- benches/hex.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benches/check.rs b/benches/check.rs index f480ab6..47a2178 100644 --- a/benches/check.rs +++ b/benches/check.rs @@ -22,7 +22,7 @@ fn bench(c: &mut Criterion) { }) }, ); - check_fallback_group.bench_with_input(BenchmarkId::new("sse", idx), input, |b, &input| { + check_fallback_group.bench_with_input(BenchmarkId::new("avx2", idx), input, |b, &input| { b.iter(|| { let ret = unsafe { hex_check_avx2(input.as_bytes()) }; black_box(ret); diff --git a/benches/hex.rs b/benches/hex.rs index 9606ad4..1950dff 100644 --- a/benches/hex.rs +++ b/benches/hex.rs @@ -110,7 +110,7 @@ fn bench(c: &mut Criterion) { let hex_input = rand_hex_encoded(size); let mut dst = vec![0; size / 2]; b.iter(|| { - let ret = hex_decode_fallback(hex_input.as_bytes(), &mut dst); + let ret = hex_decode_fallback(hex_input.as_bytes(), &mut dst).unwrap(); black_box(ret); }) }, From 2773009197f16c18c13bad502be96834c05d0060 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Sun, 16 Feb 2020 13:25:22 -0800 Subject: [PATCH 13/20] New encode algorithm --- src/encode.rs | 105 +++++++++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/src/encode.rs b/src/encode.rs index 345b59a..64b37ab 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -43,84 +43,83 @@ pub fn hex_to(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { #[target_feature(enable = "avx2")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -unsafe fn hex_encode_avx2(mut src: &[u8], dst: &mut [u8]) { - let ascii_zero = _mm256_set1_epi8(b'0' as i8); - let nines = _mm256_set1_epi8(9); - let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8); - let and4bits = _mm256_set1_epi8(0xf); - - let mut i = 0_isize; +unsafe fn hex_encode_avx2(mut src: &[u8], mut dst: &mut [u8]) { while src.len() >= 32 { - // https://stackoverflow.com/questions/47425851/whats-the-difference-between-mm256-lddqu-si256-and-mm256-loadu-si256 - let invec = _mm256_loadu_si256(src.as_ptr() as *const _); - - let masked1 = _mm256_and_si256(invec, and4bits); - let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits); - - // return 0xff corresponding to the elements > 9, or 0x00 otherwise - let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines); - let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines); - - // add '0' or the offset depending on the masks - let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1)); - let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2)); - - // interleave masked1 and masked2 bytes - let res1 = _mm256_unpacklo_epi8(masked2, masked1); - let res2 = _mm256_unpackhi_epi8(masked2, masked1); - - // Store everything into the right destination now - let base = dst.as_mut_ptr().offset(i * 2); - let base1 = base.offset(0) as *mut _; - let base2 = base.offset(16) as *mut _; - let base3 = base.offset(32) as *mut _; - let base4 = base.offset(48) as *mut _; - _mm256_storeu2_m128i(base3, base1, res1); - _mm256_storeu2_m128i(base4, base2, res2); + let input = _mm256_loadu_si256(src.as_ptr() as *const _); + _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, encode_chunk_avx2(_mm256_castsi256_si128(input))); + _mm256_storeu_si256(dst.as_mut_ptr().offset(32) as *mut _, encode_chunk_avx2(_mm256_extracti128_si256(input, 1))); src = &src[32..]; - i += 32; + dst = &mut dst[64..]; } + if src.len() >= 16 { + let chunk = _mm_loadu_si128(src.as_ptr() as *const _); + _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, encode_chunk_avx2(chunk)); + src = &src[16..]; + dst = &mut dst[32..]; + } + hex_encode_fallback(src, dst); +} - let i = i as usize; - hex_encode_sse41(src, &mut dst[i * 2..]); +#[target_feature(enable="avx2")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +unsafe fn encode_chunk_avx2(input: __m128i) -> __m256i { + let hi = _mm_shuffle_epi8(input, _mm_setr_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, + )); + let lo = _mm_shuffle_epi8(input, _mm_setr_epi8( + 8, 9, 10, 11, 12, 13, 14, 15, + 8, 9, 10, 11, 12, 13, 14, 15, + )); + let joined = _mm256_set_m128i(lo, hi); + let shifted = _mm256_srlv_epi64(joined, _mm256_setr_epi64x(4, 0, 4, 0)); + let masked = _mm256_and_si256(shifted, _mm256_set1_epi8(0xf)); + let shuffled = _mm256_shuffle_epi8(masked, _mm256_setr_epi8( + 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, + 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, + )); + let offset_lut = _mm256_setr_epi8( + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 87, 87, 87, 87, 87, 87, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 87, 87, 87, 87, 87, 87, + ); + let offsets = _mm256_shuffle_epi8(offset_lut, shuffled); + _mm256_add_epi8(shuffled, offsets) } // copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp #[target_feature(enable = "sse4.1")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) { - let ascii_zero = _mm_set1_epi8(b'0' as i8); - let nines = _mm_set1_epi8(9); - let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8); +unsafe fn hex_encode_sse41(mut src: &[u8], mut dst: &mut [u8]) { let and4bits = _mm_set1_epi8(0xf); - let mut i = 0_isize; while src.len() >= 16 { let invec = _mm_loadu_si128(src.as_ptr() as *const _); let masked1 = _mm_and_si128(invec, and4bits); let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits); - // return 0xff corresponding to the elements > 9, or 0x00 otherwise - let cmpmask1 = _mm_cmpgt_epi8(masked1, nines); - let cmpmask2 = _mm_cmpgt_epi8(masked2, nines); + let offset_lut = _mm_setr_epi8( + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 87, 87, 87, 87, 87, 87, + ); + let offsets1 = _mm_shuffle_epi8(offset_lut, masked1); + let offsets2 = _mm_shuffle_epi8(offset_lut, masked2); - // add '0' or the offset depending on the masks - let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1)); - let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2)); + let masked1 = _mm_add_epi8(masked1, offsets1); + let masked2 = _mm_add_epi8(masked2, offsets2); // interleave masked1 and masked2 bytes let res1 = _mm_unpacklo_epi8(masked2, masked1); let res2 = _mm_unpackhi_epi8(masked2, masked1); - _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1); - _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2); + _mm_storeu_si128(dst.as_mut_ptr() as *mut _, res1); + _mm_storeu_si128(dst.as_mut_ptr().offset(16) as *mut _, res2); src = &src[16..]; - i += 16; + dst = &mut dst[32..]; } - - let i = i as usize; - hex_encode_fallback(src, &mut dst[i * 2..]); + hex_encode_fallback(src, dst); } #[inline] From e5c516fddd49f7565968b50cb90f9ca6ff389645 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Sun, 16 Feb 2020 15:55:49 -0800 Subject: [PATCH 14/20] hex_string doesn't need to return an Result --- src/decode.rs | 2 +- src/encode.rs | 6 ++++-- src/lib.rs | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 97c31da..91106c6 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -337,7 +337,7 @@ pub mod arch { let mut dst = Vec::with_capacity(len); dst.resize(len, 0); - let hex_string = crate::hex_string(s.as_bytes()).unwrap(); + let hex_string = crate::hex_string(s.as_bytes()); hex_decode(hex_string.as_bytes(), &mut dst).unwrap(); diff --git a/src/encode.rs b/src/encode.rs index 64b37ab..8f6c9c8 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -9,9 +9,11 @@ use crate::error::Error; static TABLE: &[u8] = b"0123456789abcdef"; -pub fn hex_string(src: &[u8]) -> Result { +pub fn hex_string(src: &[u8]) -> String { let mut buffer = vec![0; src.len() * 2]; - hex_encode(src, &mut buffer).map(|_| unsafe { String::from_utf8_unchecked(buffer) }) + // should never panic because the destination buffer is large enough. + hex_encode(src, &mut buffer).unwrap(); + unsafe { String::from_utf8_unchecked(buffer) } } pub fn hex_encode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { diff --git a/src/lib.rs b/src/lib.rs index 05cbd6e..b6b0b6a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,7 @@ mod tests { hex_encode(s.as_bytes(), &mut buffer).unwrap(); let encode = unsafe { str::from_utf8_unchecked(&buffer[..s.as_bytes().len() * 2]) }; - let hex_string = hex_string(s.as_bytes()).unwrap(); + let hex_string = hex_string(s.as_bytes()); assert_eq!(encode, hex::encode(s)); assert_eq!(hex_string, hex::encode(s)); @@ -47,7 +47,7 @@ mod tests { let mut dst = Vec::with_capacity(len); dst.resize(len, 0); - let hex_string = hex_string(s.as_bytes()).unwrap(); + let hex_string = hex_string(s.as_bytes()); hex_decode(hex_string.as_bytes(), &mut dst).unwrap(); From 1558c78e02535face81e3b29aa1180bd8e9a3a3e Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Mon, 17 Feb 2020 15:50:58 -0800 Subject: [PATCH 15/20] Add sse decoding --- benches/check.rs | 30 +++++---- benches/hex.rs | 2 +- src/decode.rs | 166 +++++++++++++++++++++++++++++++++++++++++++++++ src/encode.rs | 52 ++++++++------- src/lib.rs | 1 + 5 files changed, 213 insertions(+), 38 deletions(-) diff --git a/benches/check.rs b/benches/check.rs index 47a2178..bfec2be 100644 --- a/benches/check.rs +++ b/benches/check.rs @@ -1,5 +1,5 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; -use faster_hex::{hex_check_avx2, hex_check_fallback}; +use faster_hex::{hex_check_avx2, hex_check_fallback, hex_check_sse}; use std::time::Duration; const INPUT: &[&str] = &[ @@ -10,26 +10,28 @@ const INPUT: &[&str] = &[ ]; fn bench(c: &mut Criterion) { - let mut check_fallback_group = c.benchmark_group("check"); + let mut check_group = c.benchmark_group("check"); for (idx, input) in INPUT.iter().enumerate() { - check_fallback_group.bench_with_input( - BenchmarkId::new("fallback", idx), - input, - |b, &input| { - b.iter(|| { - let ret = hex_check_fallback(input.as_bytes()); - black_box(ret); - }) - }, - ); - check_fallback_group.bench_with_input(BenchmarkId::new("avx2", idx), input, |b, &input| { + check_group.bench_with_input(BenchmarkId::new("fallback", idx), input, |b, &input| { + b.iter(|| { + let ret = hex_check_fallback(input.as_bytes()); + black_box(ret); + }) + }); + check_group.bench_with_input(BenchmarkId::new("avx2", idx), input, |b, &input| { b.iter(|| { let ret = unsafe { hex_check_avx2(input.as_bytes()) }; black_box(ret); }) }); + check_group.bench_with_input(BenchmarkId::new("sse", idx), input, |b, &input| { + b.iter(|| { + let ret = unsafe { hex_check_sse(input.as_bytes()) }; + black_box(ret); + }) + }); } - check_fallback_group.finish(); + check_group.finish(); } fn quicker() -> Criterion { diff --git a/benches/hex.rs b/benches/hex.rs index 1950dff..03ef725 100644 --- a/benches/hex.rs +++ b/benches/hex.rs @@ -47,7 +47,7 @@ fn bench(c: &mut Criterion) { encode_group.bench_with_input(BenchmarkId::new("faster_hex", size), size, |b, &size| { let input = rand_slice(size); b.iter(|| { - let ret = hex_string(&input).unwrap(); + let ret = hex_string(&input); black_box(ret); }) }); diff --git a/src/decode.rs b/src/decode.rs index 91106c6..4f1e73c 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -7,6 +7,9 @@ pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { if is_x86_feature_detected!("avx2") && src.len() >= 64 { return unsafe { arch::avx2::hex_decode(src, dst) }; } + if is_x86_feature_detected!("sse4.1") && src.len() >= 32 { + return unsafe { arch::sse::hex_decode(src, dst) }; + } } arch::fallback::hex_decode(src, dst) } @@ -20,6 +23,11 @@ pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { arch::avx2::hex_decode_unchecked(src, dst); }; } + if is_x86_feature_detected!("sse4.1") && src.len() >= 32 { + return unsafe { + arch::sse::hex_decode_unchecked(src, dst); + }; + } } arch::fallback::hex_decode_unchecked(src, dst) } @@ -224,6 +232,164 @@ pub mod arch { } } + pub mod sse { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + use crate::decode::{Checked, Error, Unchecked}; + + #[target_feature(enable = "sse4.1")] + pub unsafe fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { + _hex_decode::(src, dst).map_err(|_| Error::InvalidChar) + } + + #[target_feature(enable = "sse4.1")] + pub unsafe fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { + let _ = _hex_decode::(src, dst); + } + + #[inline] + #[target_feature(enable = "sse4.1")] + pub unsafe fn _hex_decode( + mut src: &[u8], + mut dst: &mut [u8], + ) -> Result<(), ()> { + while src.len() >= 32 { + let av1 = _mm_loadu_si128(src.as_ptr() as *const _); + let av2 = _mm_loadu_si128(src[16..].as_ptr() as *const _); + let av1 = decode_chunk::(av1)?; + let av1 = _mm_shuffle_epi8( + av1, + _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1), + ); + let av2 = decode_chunk::(av2)?; + let av2 = _mm_shuffle_epi8( + av2, + _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0, 2, 4, 6, 8, 10, 12, 14), + ); + let decoded = _mm_or_si128(av1, av2); + _mm_storeu_si128(dst.as_mut_ptr() as *mut _, decoded); + dst = &mut dst[16..]; + src = &src[32..]; + } + crate::decode::arch::fallback::_hex_decode::(&src, &mut dst) + } + + #[inline] + #[target_feature(enable = "sse4.1")] + unsafe fn decode_chunk(input: __m128i) -> Result<__m128i, ()> { + #[allow(overflowing_literals)] + let hi_nibbles = _mm_and_si128(_mm_srli_epi32(input, 4), _mm_set1_epi8(0b00001111)); + let low_nibbles = _mm_and_si128(input, _mm_set1_epi8(0b00001111)); + + if !::is_valid(hi_nibbles, low_nibbles) { + return Err(()); + } + + let shift_lut = _mm_setr_epi8(0, 0, 0, -48, -55, 0, -87, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + let sh = _mm_shuffle_epi8(shift_lut, hi_nibbles); + let input = _mm_add_epi8(input, sh); + #[allow(overflowing_literals)] + let input = + _mm_maddubs_epi16(input, _mm_set1_epi32(0b00000001_00010000_00000001_00010000)); + Ok(input) + } + + #[cfg(any(test, feature = "bench"))] + #[target_feature(enable = "sse4.1")] + pub unsafe fn hex_check(mut src: &[u8]) -> bool { + while src.len() >= 16 { + let input = _mm_loadu_si128(src.as_ptr() as *const _); + let hi_nibbles = _mm_and_si128(_mm_srli_epi32(input, 4), _mm_set1_epi8(0b00001111)); + let low_nibbles = _mm_and_si128(input, _mm_set1_epi8(0b00001111)); + if !Checked::is_valid(hi_nibbles, low_nibbles) { + return false; + } + src = &src[16..]; + } + crate::decode::arch::fallback::hex_check(src) + } + + pub trait IsValid: crate::decode::arch::fallback::IsValid { + unsafe fn is_valid(hi_nibbles: __m128i, low_nibbles: __m128i) -> bool; + } + + impl IsValid for Checked { + #[inline] + #[target_feature(enable = "sse4.1")] + unsafe fn is_valid(hi_nibbles: __m128i, low_nibbles: __m128i) -> bool { + let mask_lut = _mm_setr_epi8( + 0b0000_1000, // 0 + 0b0101_1000, // 1 .. 6 + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0101_1000, // + 0b0000_1000, // 7 .. 9 + 0b0000_1000, // + 0b0000_1000, // + 0b0000_0000, // 10 .. 15 + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + 0b0000_0000, // + ); + + #[allow(overflowing_literals)] + let bit_pos_lut = _mm_setr_epi8( + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, + ); + + let m = _mm_shuffle_epi8(mask_lut, low_nibbles); + let bit = _mm_shuffle_epi8(bit_pos_lut, hi_nibbles); + let non_match = _mm_cmpeq_epi8(_mm_and_si128(m, bit), _mm_setzero_si128()); + _mm_movemask_epi8(non_match) == 0 + } + } + + impl IsValid for Unchecked { + #[inline] + #[target_feature(enable = "sse4.1")] + unsafe fn is_valid(_: __m128i, _: __m128i) -> bool { + true + } + } + + #[cfg(test)] + mod tests { + use super::*; + use proptest::{proptest, proptest_helper}; + + fn _test_check_true(s: &String) { + assert!(unsafe { hex_check(s.as_bytes()) }); + } + + proptest! { + #[test] + fn test_check_true(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { + _test_check_true(s); + } + } + + fn _test_check_false(s: &String) { + assert!(!unsafe { hex_check(s.as_bytes()) }); + } + + proptest! { + #[test] + fn test_check_false(ref s in ".{32}[^0-9a-fA-F]+") { + _test_check_false(s); + } + } + } + } + pub mod fallback { use crate::decode::{Checked, Error, Unchecked}; diff --git a/src/encode.rs b/src/encode.rs index 8f6c9c8..07f4ec8 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -24,11 +24,11 @@ pub fn hex_encode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - if is_x86_feature_detected!("avx2") { + if is_x86_feature_detected!("avx2") && src.len() >= 16 { unsafe { hex_encode_avx2(src, dst) }; return Ok(()); } - if is_x86_feature_detected!("sse4.1") { + if is_x86_feature_detected!("sse4.1") && src.len() >= 16 { unsafe { hex_encode_sse41(src, dst) }; return Ok(()); } @@ -48,8 +48,14 @@ pub fn hex_to(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { unsafe fn hex_encode_avx2(mut src: &[u8], mut dst: &mut [u8]) { while src.len() >= 32 { let input = _mm256_loadu_si256(src.as_ptr() as *const _); - _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, encode_chunk_avx2(_mm256_castsi256_si128(input))); - _mm256_storeu_si256(dst.as_mut_ptr().offset(32) as *mut _, encode_chunk_avx2(_mm256_extracti128_si256(input, 1))); + _mm256_storeu_si256( + dst.as_mut_ptr() as *mut _, + encode_chunk_avx2(_mm256_castsi256_si128(input)), + ); + _mm256_storeu_si256( + dst.as_mut_ptr().offset(32) as *mut _, + encode_chunk_avx2(_mm256_extracti128_si256(input, 1)), + ); src = &src[32..]; dst = &mut dst[64..]; } @@ -62,29 +68,30 @@ unsafe fn hex_encode_avx2(mut src: &[u8], mut dst: &mut [u8]) { hex_encode_fallback(src, dst); } -#[target_feature(enable="avx2")] +#[target_feature(enable = "avx2")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] unsafe fn encode_chunk_avx2(input: __m128i) -> __m256i { - let hi = _mm_shuffle_epi8(input, _mm_setr_epi8( - 0, 1, 2, 3, 4, 5, 6, 7, - 0, 1, 2, 3, 4, 5, 6, 7, - )); - let lo = _mm_shuffle_epi8(input, _mm_setr_epi8( - 8, 9, 10, 11, 12, 13, 14, 15, - 8, 9, 10, 11, 12, 13, 14, 15, - )); + let hi = _mm_shuffle_epi8( + input, + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7), + ); + let lo = _mm_shuffle_epi8( + input, + _mm_setr_epi8(8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15), + ); let joined = _mm256_set_m128i(lo, hi); let shifted = _mm256_srlv_epi64(joined, _mm256_setr_epi64x(4, 0, 4, 0)); let masked = _mm256_and_si256(shifted, _mm256_set1_epi8(0xf)); - let shuffled = _mm256_shuffle_epi8(masked, _mm256_setr_epi8( - 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, - 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, - )); + let shuffled = _mm256_shuffle_epi8( + masked, + _mm256_setr_epi8( + 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, + 5, 13, 6, 14, 7, 15, + ), + ); let offset_lut = _mm256_setr_epi8( - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 87, 87, 87, 87, 87, 87, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 87, 87, 87, 87, 87, 87, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 87, 87, 87, 87, 87, 87, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 87, 87, 87, 87, 87, 87, ); let offsets = _mm256_shuffle_epi8(offset_lut, shuffled); _mm256_add_epi8(shuffled, offsets) @@ -103,8 +110,7 @@ unsafe fn hex_encode_sse41(mut src: &[u8], mut dst: &mut [u8]) { let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits); let offset_lut = _mm_setr_epi8( - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 87, 87, 87, 87, 87, 87, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 87, 87, 87, 87, 87, 87, ); let offsets1 = _mm_shuffle_epi8(offset_lut, masked1); let offsets2 = _mm_shuffle_epi8(offset_lut, masked2); diff --git a/src/lib.rs b/src/lib.rs index b6b0b6a..13f14e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ pub use crate::decode::{ hex_check as hex_check_fallback, hex_decode as hex_decode_fallback, hex_decode_unchecked as hex_decode_unchecked_fallback, }, + arch::sse::hex_check as hex_check_sse, }; #[cfg(test)] From b7e178b394f3d7b00952f7c45bf074f95b4d080b Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Mon, 17 Feb 2020 16:07:37 -0800 Subject: [PATCH 16/20] Add features for avx2 and sse41 --- Cargo.toml | 4 +- src/decode.rs | 41 ++++++++---- src/encode.rs | 179 +++++++++++++++++++++++++++----------------------- src/lib.rs | 2 +- 4 files changed, 127 insertions(+), 99 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9a023dc..0ec1215 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,8 +16,10 @@ exclude = [ ] [features] +default = ["avx2", "sse41"] bench = [] - +avx2 = [] +sse41 = [] [dev-dependencies] criterion = "0.3" diff --git a/src/decode.rs b/src/decode.rs index 4f1e73c..649a991 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -4,11 +4,17 @@ pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { validate_buffer_length(src, dst)?; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - if is_x86_feature_detected!("avx2") && src.len() >= 64 { - return unsafe { arch::avx2::hex_decode(src, dst) }; + #[cfg(feature = "avx2")] + { + if is_x86_feature_detected!("avx2") && src.len() >= 64 { + return unsafe { arch::avx2::hex_decode(src, dst) }; + } } - if is_x86_feature_detected!("sse4.1") && src.len() >= 32 { - return unsafe { arch::sse::hex_decode(src, dst) }; + #[cfg(feature = "sse41")] + { + if is_x86_feature_detected!("sse4.1") && src.len() >= 32 { + return unsafe { arch::sse41::hex_decode(src, dst) }; + } } } arch::fallback::hex_decode(src, dst) @@ -18,15 +24,21 @@ pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { validate_buffer_length(src, dst).unwrap(); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - if is_x86_feature_detected!("avx2") && src.len() >= 64 { - return unsafe { - arch::avx2::hex_decode_unchecked(src, dst); - }; + #[cfg(feature = "avx2")] + { + if is_x86_feature_detected!("avx2") && src.len() >= 64 { + return unsafe { + arch::avx2::hex_decode_unchecked(src, dst); + }; + } } - if is_x86_feature_detected!("sse4.1") && src.len() >= 32 { - return unsafe { - arch::sse::hex_decode_unchecked(src, dst); - }; + #[cfg(feature = "sse41")] + { + if is_x86_feature_detected!("sse4.1") && src.len() >= 32 { + return unsafe { + arch::sse41::hex_decode_unchecked(src, dst); + }; + } } } arch::fallback::hex_decode_unchecked(src, dst) @@ -45,7 +57,7 @@ struct Checked; struct Unchecked; pub mod arch { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg(all(feature = "avx2", any(target_arch = "x86", target_arch = "x86_64")))] pub mod avx2 { #[cfg(target_arch = "x86")] use std::arch::x86::*; @@ -232,7 +244,8 @@ pub mod arch { } } - pub mod sse { + #[cfg(all(feature = "sse41", any(target_arch = "x86", target_arch = "x86_64")))] + pub mod sse41 { #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] diff --git a/src/encode.rs b/src/encode.rs index 07f4ec8..d1a1bcc 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -1,10 +1,5 @@ #![allow(clippy::cast_ptr_alignment)] -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - use crate::error::Error; static TABLE: &[u8] = b"0123456789abcdef"; @@ -24,13 +19,19 @@ pub fn hex_encode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - if is_x86_feature_detected!("avx2") && src.len() >= 16 { - unsafe { hex_encode_avx2(src, dst) }; - return Ok(()); + #[cfg(feature = "avx2")] + { + if is_x86_feature_detected!("avx2") && src.len() >= 16 { + unsafe { avx2::hex_encode(src, dst) }; + return Ok(()); + } } - if is_x86_feature_detected!("sse4.1") && src.len() >= 16 { - unsafe { hex_encode_sse41(src, dst) }; - return Ok(()); + #[cfg(feature = "sse41")] + { + if is_x86_feature_detected!("sse4.1") && src.len() >= 16 { + unsafe { sse41::hex_encode(src, dst) }; + return Ok(()); + } } } @@ -43,91 +44,103 @@ pub fn hex_to(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { hex_encode(src, dst) } -#[target_feature(enable = "avx2")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -unsafe fn hex_encode_avx2(mut src: &[u8], mut dst: &mut [u8]) { - while src.len() >= 32 { - let input = _mm256_loadu_si256(src.as_ptr() as *const _); - _mm256_storeu_si256( - dst.as_mut_ptr() as *mut _, - encode_chunk_avx2(_mm256_castsi256_si128(input)), +#[cfg(all(feature = "avx2", any(target_arch = "x86", target_arch = "x86_64")))] +mod avx2 { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + #[target_feature(enable = "avx2")] + pub(super) unsafe fn hex_encode(mut src: &[u8], mut dst: &mut [u8]) { + while src.len() >= 32 { + let input = _mm256_loadu_si256(src.as_ptr() as *const _); + _mm256_storeu_si256( + dst.as_mut_ptr() as *mut _, + encode_chunk(_mm256_castsi256_si128(input)), + ); + _mm256_storeu_si256( + dst.as_mut_ptr().offset(32) as *mut _, + encode_chunk(_mm256_extracti128_si256(input, 1)), + ); + src = &src[32..]; + dst = &mut dst[64..]; + } + if src.len() >= 16 { + let chunk = _mm_loadu_si128(src.as_ptr() as *const _); + _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, encode_chunk(chunk)); + src = &src[16..]; + dst = &mut dst[32..]; + } + super::hex_encode_fallback(src, dst); + } + + #[target_feature(enable = "avx2")] + unsafe fn encode_chunk(input: __m128i) -> __m256i { + let hi = _mm_shuffle_epi8( + input, + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7), ); - _mm256_storeu_si256( - dst.as_mut_ptr().offset(32) as *mut _, - encode_chunk_avx2(_mm256_extracti128_si256(input, 1)), + let lo = _mm_shuffle_epi8( + input, + _mm_setr_epi8(8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15), ); - src = &src[32..]; - dst = &mut dst[64..]; - } - if src.len() >= 16 { - let chunk = _mm_loadu_si128(src.as_ptr() as *const _); - _mm256_storeu_si256(dst.as_mut_ptr() as *mut _, encode_chunk_avx2(chunk)); - src = &src[16..]; - dst = &mut dst[32..]; + let joined = _mm256_set_m128i(lo, hi); + let shifted = _mm256_srlv_epi64(joined, _mm256_setr_epi64x(4, 0, 4, 0)); + let masked = _mm256_and_si256(shifted, _mm256_set1_epi8(0xf)); + let shuffled = _mm256_shuffle_epi8( + masked, + _mm256_setr_epi8( + 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 0, 8, 1, 9, 2, 10, 3, 11, 4, + 12, 5, 13, 6, 14, 7, 15, + ), + ); + let offset_lut = _mm256_setr_epi8( + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 87, 87, 87, 87, 87, 87, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 87, 87, 87, 87, 87, 87, + ); + let offsets = _mm256_shuffle_epi8(offset_lut, shuffled); + _mm256_add_epi8(shuffled, offsets) } - hex_encode_fallback(src, dst); } -#[target_feature(enable = "avx2")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -unsafe fn encode_chunk_avx2(input: __m128i) -> __m256i { - let hi = _mm_shuffle_epi8( - input, - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7), - ); - let lo = _mm_shuffle_epi8( - input, - _mm_setr_epi8(8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15), - ); - let joined = _mm256_set_m128i(lo, hi); - let shifted = _mm256_srlv_epi64(joined, _mm256_setr_epi64x(4, 0, 4, 0)); - let masked = _mm256_and_si256(shifted, _mm256_set1_epi8(0xf)); - let shuffled = _mm256_shuffle_epi8( - masked, - _mm256_setr_epi8( - 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, - 5, 13, 6, 14, 7, 15, - ), - ); - let offset_lut = _mm256_setr_epi8( - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 87, 87, 87, 87, 87, 87, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 87, 87, 87, 87, 87, 87, - ); - let offsets = _mm256_shuffle_epi8(offset_lut, shuffled); - _mm256_add_epi8(shuffled, offsets) -} +#[cfg(all(feature = "sse41", any(target_arch = "x86", target_arch = "x86_64")))] +mod sse41 { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; -// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp -#[target_feature(enable = "sse4.1")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -unsafe fn hex_encode_sse41(mut src: &[u8], mut dst: &mut [u8]) { - let and4bits = _mm_set1_epi8(0xf); + #[target_feature(enable = "sse4.1")] + pub(super) unsafe fn hex_encode(mut src: &[u8], mut dst: &mut [u8]) { + let and4bits = _mm_set1_epi8(0xf); - while src.len() >= 16 { - let invec = _mm_loadu_si128(src.as_ptr() as *const _); + while src.len() >= 16 { + let invec = _mm_loadu_si128(src.as_ptr() as *const _); - let masked1 = _mm_and_si128(invec, and4bits); - let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits); + let masked1 = _mm_and_si128(invec, and4bits); + let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits); - let offset_lut = _mm_setr_epi8( - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 87, 87, 87, 87, 87, 87, - ); - let offsets1 = _mm_shuffle_epi8(offset_lut, masked1); - let offsets2 = _mm_shuffle_epi8(offset_lut, masked2); + let offset_lut = _mm_setr_epi8( + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 87, 87, 87, 87, 87, 87, + ); + let offsets1 = _mm_shuffle_epi8(offset_lut, masked1); + let offsets2 = _mm_shuffle_epi8(offset_lut, masked2); - let masked1 = _mm_add_epi8(masked1, offsets1); - let masked2 = _mm_add_epi8(masked2, offsets2); + let masked1 = _mm_add_epi8(masked1, offsets1); + let masked2 = _mm_add_epi8(masked2, offsets2); - // interleave masked1 and masked2 bytes - let res1 = _mm_unpacklo_epi8(masked2, masked1); - let res2 = _mm_unpackhi_epi8(masked2, masked1); + // interleave masked1 and masked2 bytes + let res1 = _mm_unpacklo_epi8(masked2, masked1); + let res2 = _mm_unpackhi_epi8(masked2, masked1); - _mm_storeu_si128(dst.as_mut_ptr() as *mut _, res1); - _mm_storeu_si128(dst.as_mut_ptr().offset(16) as *mut _, res2); - src = &src[16..]; - dst = &mut dst[32..]; + _mm_storeu_si128(dst.as_mut_ptr() as *mut _, res1); + _mm_storeu_si128(dst.as_mut_ptr().offset(16) as *mut _, res2); + src = &src[16..]; + dst = &mut dst[32..]; + } + super::hex_encode_fallback(src, dst); } - hex_encode_fallback(src, dst); } #[inline] diff --git a/src/lib.rs b/src/lib.rs index 13f14e5..397b2d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ pub use crate::decode::{ hex_check as hex_check_fallback, hex_decode as hex_decode_fallback, hex_decode_unchecked as hex_decode_unchecked_fallback, }, - arch::sse::hex_check as hex_check_sse, + arch::sse41::hex_check as hex_check_sse, }; #[cfg(test)] From ae677f1aafd7635629a0ed1209f78777b97fbdbe Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 18 Feb 2020 06:50:48 -0800 Subject: [PATCH 17/20] Add a decode function that returns a Vec and a couple proptests to compare the implementation against the hex crate. --- src/decode.rs | 6 ++++++ src/lib.rs | 18 ++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 649a991..4955e9f 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1,5 +1,11 @@ use crate::error::Error; +pub fn decode(src: &[u8]) -> Result, Error> { + let mut output = vec![0u8; src.len() / 2]; + hex_decode(src, &mut output)?; + Ok(output) +} + pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { validate_buffer_length(src, dst)?; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] diff --git a/src/lib.rs b/src/lib.rs index 397b2d4..5ed6d6b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ mod decode; mod encode; mod error; -pub use crate::decode::{hex_decode, hex_decode_unchecked}; +pub use crate::decode::{decode, hex_decode, hex_decode_unchecked}; pub use crate::encode::{hex_encode, hex_encode_fallback, hex_string}; pub use crate::error::Error; @@ -20,7 +20,7 @@ pub use crate::decode::{ #[cfg(test)] mod tests { - use crate::decode::hex_decode; + use crate::decode::{decode, hex_decode}; use crate::encode::{hex_encode, hex_string}; use proptest::{proptest, proptest_helper}; use std::str; @@ -92,5 +92,19 @@ mod tests { hex_decode(&encoded, &mut decoded).unwrap(); assert_eq!(&decoded, &input); } + + #[test] + fn test_encode_matches(input: Vec) { + let encoded = hex_string(&input); + let expected = hex::encode(&input); + assert_eq!(encoded, expected); + } + + #[test] + fn test_decode_matches(input: Vec) { + let decoded = decode(&input).map_err(|_| ()); + let expected = hex::decode(&input).map_err(|_| ()); + assert_eq!(decoded, expected); + } } } From 9bd7c34a36cd723cea363aeeee6872a3801bf54b Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 18 Feb 2020 07:04:45 -0800 Subject: [PATCH 18/20] Rename functions removing the hex prefix as it stutters with the name of the crate. --- Cargo.toml | 4 +-- benches/check.rs | 8 ++--- benches/hex.rs | 16 ++++----- src/decode.rs | 84 ++++++++++++++++++++++-------------------------- src/encode.rs | 31 ++++++++---------- src/lib.rs | 77 +++++++++++++++----------------------------- 6 files changed, 92 insertions(+), 128 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0ec1215..2356566 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,10 +31,10 @@ rand = "0.7.3" [[bench]] name = "hex" harness = false -required-features = ["bench"] +required-features = ["bench", "avx2", "sse41"] [[bench]] name = "check" harness = false -required-features = ["bench"] +required-features = ["bench", "avx2", "sse41"] diff --git a/benches/check.rs b/benches/check.rs index bfec2be..b1fe60d 100644 --- a/benches/check.rs +++ b/benches/check.rs @@ -1,5 +1,5 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; -use faster_hex::{hex_check_avx2, hex_check_fallback, hex_check_sse}; +use faster_hex::{check_avx2, check_fallback, check_sse}; use std::time::Duration; const INPUT: &[&str] = &[ @@ -14,19 +14,19 @@ fn bench(c: &mut Criterion) { for (idx, input) in INPUT.iter().enumerate() { check_group.bench_with_input(BenchmarkId::new("fallback", idx), input, |b, &input| { b.iter(|| { - let ret = hex_check_fallback(input.as_bytes()); + let ret = check_fallback(input.as_bytes()); black_box(ret); }) }); check_group.bench_with_input(BenchmarkId::new("avx2", idx), input, |b, &input| { b.iter(|| { - let ret = unsafe { hex_check_avx2(input.as_bytes()) }; + let ret = unsafe { check_avx2(input.as_bytes()) }; black_box(ret); }) }); check_group.bench_with_input(BenchmarkId::new("sse", idx), input, |b, &input| { b.iter(|| { - let ret = unsafe { hex_check_sse(input.as_bytes()) }; + let ret = unsafe { check_sse(input.as_bytes()) }; black_box(ret); }) }); diff --git a/benches/hex.rs b/benches/hex.rs index 03ef725..458a581 100644 --- a/benches/hex.rs +++ b/benches/hex.rs @@ -1,7 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use faster_hex::{ - hex_decode, hex_decode_fallback, hex_decode_unchecked, hex_decode_unchecked_fallback, - hex_encode_fallback, hex_string, + decode_fallback, decode_to_slice, decode_to_slice_unchecked, decode_unchecked_fallback, encode, + encode_fallback, }; use rustc_hex::{FromHex, ToHex}; use std::time::Duration; @@ -47,7 +47,7 @@ fn bench(c: &mut Criterion) { encode_group.bench_with_input(BenchmarkId::new("faster_hex", size), size, |b, &size| { let input = rand_slice(size); b.iter(|| { - let ret = hex_string(&input); + let ret = encode(&input); black_box(ret); }) }); @@ -58,7 +58,7 @@ fn bench(c: &mut Criterion) { let input = rand_slice(size); let mut buffer = vec![0; input.len() * 2]; b.iter(|| { - let ret = hex_encode_fallback(&input, buffer.as_mut_slice()); + let ret = encode_fallback(&input, buffer.as_mut_slice()); black_box(ret); }) }, @@ -87,7 +87,7 @@ fn bench(c: &mut Criterion) { let hex_input = rand_hex_encoded(size); let mut dst = vec![0; size / 2]; b.iter(|| { - let ret = hex_decode(hex_input.as_bytes(), &mut dst).unwrap(); + let ret = decode_to_slice(hex_input.as_bytes(), &mut dst).unwrap(); black_box(ret); }) }); @@ -98,7 +98,7 @@ fn bench(c: &mut Criterion) { let hex_input = rand_hex_encoded(size); let mut dst = vec![0; size / 2]; b.iter(|| { - let ret = hex_decode_unchecked(hex_input.as_bytes(), &mut dst); + let ret = decode_to_slice_unchecked(hex_input.as_bytes(), &mut dst); black_box(ret); }) }, @@ -110,7 +110,7 @@ fn bench(c: &mut Criterion) { let hex_input = rand_hex_encoded(size); let mut dst = vec![0; size / 2]; b.iter(|| { - let ret = hex_decode_fallback(hex_input.as_bytes(), &mut dst).unwrap(); + let ret = decode_fallback(hex_input.as_bytes(), &mut dst).unwrap(); black_box(ret); }) }, @@ -122,7 +122,7 @@ fn bench(c: &mut Criterion) { let hex_input = rand_hex_encoded(size); let mut dst = vec![0; size / 2]; b.iter(|| { - let ret = hex_decode_unchecked_fallback(hex_input.as_bytes(), &mut dst); + let ret = decode_unchecked_fallback(hex_input.as_bytes(), &mut dst); black_box(ret); }) }, diff --git a/src/decode.rs b/src/decode.rs index 4955e9f..4e9f99c 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -2,31 +2,31 @@ use crate::error::Error; pub fn decode(src: &[u8]) -> Result, Error> { let mut output = vec![0u8; src.len() / 2]; - hex_decode(src, &mut output)?; + decode_to_slice(src, &mut output)?; Ok(output) } -pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { +pub fn decode_to_slice(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { validate_buffer_length(src, dst)?; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { #[cfg(feature = "avx2")] { if is_x86_feature_detected!("avx2") && src.len() >= 64 { - return unsafe { arch::avx2::hex_decode(src, dst) }; + return unsafe { arch::avx2::decode(src, dst) }; } } #[cfg(feature = "sse41")] { if is_x86_feature_detected!("sse4.1") && src.len() >= 32 { - return unsafe { arch::sse41::hex_decode(src, dst) }; + return unsafe { arch::sse41::decode(src, dst) }; } } } - arch::fallback::hex_decode(src, dst) + arch::fallback::decode(src, dst) } -pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { +pub fn decode_to_slice_unchecked(src: &[u8], dst: &mut [u8]) { validate_buffer_length(src, dst).unwrap(); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { @@ -34,7 +34,7 @@ pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { { if is_x86_feature_detected!("avx2") && src.len() >= 64 { return unsafe { - arch::avx2::hex_decode_unchecked(src, dst); + arch::avx2::decode_unchecked(src, dst); }; } } @@ -42,12 +42,12 @@ pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { { if is_x86_feature_detected!("sse4.1") && src.len() >= 32 { return unsafe { - arch::sse41::hex_decode_unchecked(src, dst); + arch::sse41::decode_unchecked(src, dst); }; } } } - arch::fallback::hex_decode_unchecked(src, dst) + arch::fallback::decode_unchecked(src, dst) } #[inline] @@ -73,21 +73,18 @@ pub mod arch { use crate::decode::{Checked, Error, Unchecked}; #[target_feature(enable = "avx2")] - pub unsafe fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { - _hex_decode::(src, dst).map_err(|_| Error::InvalidChar) + pub unsafe fn decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { + _decode::(src, dst).map_err(|_| Error::InvalidChar) } #[target_feature(enable = "avx2")] - pub unsafe fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { - let _ = _hex_decode::(src, dst); + pub unsafe fn decode_unchecked(src: &[u8], dst: &mut [u8]) { + let _ = _decode::(src, dst); } #[inline] #[target_feature(enable = "avx2")] - pub unsafe fn _hex_decode( - mut src: &[u8], - mut dst: &mut [u8], - ) -> Result<(), ()> { + pub unsafe fn _decode(mut src: &[u8], mut dst: &mut [u8]) -> Result<(), ()> { while src.len() >= 64 { let av1 = _mm256_loadu_si256(src.as_ptr() as *const _); let av2 = _mm256_loadu_si256(src[32..].as_ptr() as *const _); @@ -102,7 +99,7 @@ pub mod arch { dst = &mut dst[32..]; src = &src[64..]; } - crate::decode::arch::fallback::_hex_decode::(&src, &mut dst) + crate::decode::arch::fallback::_decode::(&src, &mut dst) } #[inline] @@ -141,7 +138,7 @@ pub mod arch { #[cfg(any(test, feature = "bench"))] #[target_feature(enable = "avx2")] - pub unsafe fn hex_check(mut src: &[u8]) -> bool { + pub unsafe fn check(mut src: &[u8]) -> bool { while src.len() >= 32 { let input = _mm256_loadu_si256(src.as_ptr() as *const _); let hi_nibbles = @@ -152,7 +149,7 @@ pub mod arch { } src = &src[32..]; } - crate::decode::arch::fallback::hex_check(src) + crate::decode::arch::fallback::check(src) } pub trait IsValid: crate::decode::arch::fallback::IsValid { @@ -227,7 +224,7 @@ pub mod arch { use proptest::{proptest, proptest_helper}; fn _test_check_true(s: &String) { - assert!(unsafe { hex_check(s.as_bytes()) }); + assert!(unsafe { check(s.as_bytes()) }); } proptest! { @@ -238,7 +235,7 @@ pub mod arch { } fn _test_check_false(s: &String) { - assert!(!unsafe { hex_check(s.as_bytes()) }); + assert!(!unsafe { check(s.as_bytes()) }); } proptest! { @@ -260,21 +257,18 @@ pub mod arch { use crate::decode::{Checked, Error, Unchecked}; #[target_feature(enable = "sse4.1")] - pub unsafe fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { - _hex_decode::(src, dst).map_err(|_| Error::InvalidChar) + pub unsafe fn decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { + _decode::(src, dst).map_err(|_| Error::InvalidChar) } #[target_feature(enable = "sse4.1")] - pub unsafe fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { - let _ = _hex_decode::(src, dst); + pub unsafe fn decode_unchecked(src: &[u8], dst: &mut [u8]) { + let _ = _decode::(src, dst); } #[inline] #[target_feature(enable = "sse4.1")] - pub unsafe fn _hex_decode( - mut src: &[u8], - mut dst: &mut [u8], - ) -> Result<(), ()> { + pub unsafe fn _decode(mut src: &[u8], mut dst: &mut [u8]) -> Result<(), ()> { while src.len() >= 32 { let av1 = _mm_loadu_si128(src.as_ptr() as *const _); let av2 = _mm_loadu_si128(src[16..].as_ptr() as *const _); @@ -293,7 +287,7 @@ pub mod arch { dst = &mut dst[16..]; src = &src[32..]; } - crate::decode::arch::fallback::_hex_decode::(&src, &mut dst) + crate::decode::arch::fallback::_decode::(&src, &mut dst) } #[inline] @@ -319,7 +313,7 @@ pub mod arch { #[cfg(any(test, feature = "bench"))] #[target_feature(enable = "sse4.1")] - pub unsafe fn hex_check(mut src: &[u8]) -> bool { + pub unsafe fn check(mut src: &[u8]) -> bool { while src.len() >= 16 { let input = _mm_loadu_si128(src.as_ptr() as *const _); let hi_nibbles = _mm_and_si128(_mm_srli_epi32(input, 4), _mm_set1_epi8(0b00001111)); @@ -329,7 +323,7 @@ pub mod arch { } src = &src[16..]; } - crate::decode::arch::fallback::hex_check(src) + crate::decode::arch::fallback::check(src) } pub trait IsValid: crate::decode::arch::fallback::IsValid { @@ -386,7 +380,7 @@ pub mod arch { use proptest::{proptest, proptest_helper}; fn _test_check_true(s: &String) { - assert!(unsafe { hex_check(s.as_bytes()) }); + assert!(unsafe { check(s.as_bytes()) }); } proptest! { @@ -397,7 +391,7 @@ pub mod arch { } fn _test_check_false(s: &String) { - assert!(!unsafe { hex_check(s.as_bytes()) }); + assert!(!unsafe { check(s.as_bytes()) }); } proptest! { @@ -413,22 +407,22 @@ pub mod arch { use crate::decode::{Checked, Error, Unchecked}; #[cfg(any(test, feature = "bench"))] - pub fn hex_check(src: &[u8]) -> bool { + pub fn check(src: &[u8]) -> bool { src.iter().cloned().all(|b| unhex_a(b) != 0xff) } #[inline] - pub fn hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { - _hex_decode::(src, dst).map_err(|_| Error::InvalidChar) + pub fn decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { + _decode::(src, dst).map_err(|_| Error::InvalidChar) } #[inline] - pub fn hex_decode_unchecked(src: &[u8], dst: &mut [u8]) { - let _ = _hex_decode::(src, dst); + pub fn decode_unchecked(src: &[u8], dst: &mut [u8]) { + let _ = _decode::(src, dst); } #[inline] - pub fn _hex_decode(src: &[u8], dst: &mut [u8]) -> Result<(), ()> { + pub fn _decode(src: &[u8], dst: &mut [u8]) -> Result<(), ()> { for (slot, bytes) in dst.iter_mut().zip(src.chunks(2)) { if !V::is_valid(bytes[0], bytes[1]) { return Err(()); @@ -522,9 +516,9 @@ pub mod arch { let mut dst = Vec::with_capacity(len); dst.resize(len, 0); - let hex_string = crate::hex_string(s.as_bytes()); + let hex_string = crate::encode(s.as_bytes()); - hex_decode(hex_string.as_bytes(), &mut dst).unwrap(); + decode(hex_string.as_bytes(), &mut dst).unwrap(); assert_eq!(&dst[..], s.as_bytes()); } @@ -537,7 +531,7 @@ pub mod arch { } fn _test_check_true(s: &String) { - assert!(hex_check(s.as_bytes())); + assert!(check(s.as_bytes())); } proptest! { @@ -548,7 +542,7 @@ pub mod arch { } fn _test_check_false(s: &String) { - assert!(!hex_check(s.as_bytes())); + assert!(!check(s.as_bytes())); } proptest! { diff --git a/src/encode.rs b/src/encode.rs index d1a1bcc..f43a42a 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -4,14 +4,14 @@ use crate::error::Error; static TABLE: &[u8] = b"0123456789abcdef"; -pub fn hex_string(src: &[u8]) -> String { +pub fn encode(src: &[u8]) -> String { let mut buffer = vec![0; src.len() * 2]; // should never panic because the destination buffer is large enough. - hex_encode(src, &mut buffer).unwrap(); + encode_to_slice(src, &mut buffer).unwrap(); unsafe { String::from_utf8_unchecked(buffer) } } -pub fn hex_encode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { +pub fn encode_to_slice(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { let len = src.len().checked_mul(2).unwrap(); if dst.len() < len { return Err(Error::InvalidLength(len)); @@ -22,28 +22,23 @@ pub fn hex_encode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { #[cfg(feature = "avx2")] { if is_x86_feature_detected!("avx2") && src.len() >= 16 { - unsafe { avx2::hex_encode(src, dst) }; + unsafe { avx2::encode(src, dst) }; return Ok(()); } } #[cfg(feature = "sse41")] { if is_x86_feature_detected!("sse4.1") && src.len() >= 16 { - unsafe { sse41::hex_encode(src, dst) }; + unsafe { sse41::encode(src, dst) }; return Ok(()); } } } - hex_encode_fallback(src, dst); + encode_fallback(src, dst); Ok(()) } -#[deprecated(since = "0.3.0", note = "please use `hex_encode` instead")] -pub fn hex_to(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { - hex_encode(src, dst) -} - #[cfg(all(feature = "avx2", any(target_arch = "x86", target_arch = "x86_64")))] mod avx2 { #[cfg(target_arch = "x86")] @@ -52,7 +47,7 @@ mod avx2 { use std::arch::x86_64::*; #[target_feature(enable = "avx2")] - pub(super) unsafe fn hex_encode(mut src: &[u8], mut dst: &mut [u8]) { + pub(super) unsafe fn encode(mut src: &[u8], mut dst: &mut [u8]) { while src.len() >= 32 { let input = _mm256_loadu_si256(src.as_ptr() as *const _); _mm256_storeu_si256( @@ -72,7 +67,7 @@ mod avx2 { src = &src[16..]; dst = &mut dst[32..]; } - super::hex_encode_fallback(src, dst); + super::encode_fallback(src, dst); } #[target_feature(enable = "avx2")] @@ -112,7 +107,7 @@ mod sse41 { use std::arch::x86_64::*; #[target_feature(enable = "sse4.1")] - pub(super) unsafe fn hex_encode(mut src: &[u8], mut dst: &mut [u8]) { + pub(super) unsafe fn encode(mut src: &[u8], mut dst: &mut [u8]) { let and4bits = _mm_set1_epi8(0xf); while src.len() >= 16 { @@ -139,7 +134,7 @@ mod sse41 { src = &src[16..]; dst = &mut dst[32..]; } - super::hex_encode_fallback(src, dst); + super::encode_fallback(src, dst); } } @@ -148,7 +143,7 @@ fn hex(byte: u8) -> u8 { TABLE[byte as usize] } -pub fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) { +pub fn encode_fallback(src: &[u8], dst: &mut [u8]) { for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) { slots[0] = hex((*byte >> 4) & 0xf); slots[1] = hex(*byte & 0xf); @@ -157,13 +152,13 @@ pub fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) { #[cfg(test)] mod tests { - use crate::encode::hex_encode_fallback; + use crate::encode::encode_fallback; use proptest::{proptest, proptest_helper}; use std::str; fn _test_encode_fallback(s: &String) { let mut buffer = vec![0; s.as_bytes().len() * 2]; - hex_encode_fallback(s.as_bytes(), &mut buffer); + encode_fallback(s.as_bytes(), &mut buffer); let encode = unsafe { str::from_utf8_unchecked(&buffer[..s.as_bytes().len() * 2]) }; assert_eq!(encode, hex::encode(s)); } diff --git a/src/lib.rs b/src/lib.rs index 5ed6d6b..754b295 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,101 +1,76 @@ mod decode; mod encode; mod error; -pub use crate::decode::{decode, hex_decode, hex_decode_unchecked}; -pub use crate::encode::{hex_encode, hex_encode_fallback, hex_string}; +pub use crate::decode::{decode, decode_to_slice, decode_to_slice_unchecked}; +pub use crate::encode::{encode, encode_to_slice}; pub use crate::error::Error; -#[allow(deprecated)] -pub use crate::encode::hex_to; - #[cfg(feature = "bench")] pub use crate::decode::{ - arch::avx2::hex_check as hex_check_avx2, + arch::avx2::check as check_avx2, arch::fallback::{ - hex_check as hex_check_fallback, hex_decode as hex_decode_fallback, - hex_decode_unchecked as hex_decode_unchecked_fallback, + check as check_fallback, decode as decode_fallback, + decode_unchecked as decode_unchecked_fallback, }, - arch::sse41::hex_check as hex_check_sse, + arch::sse41::check as check_sse, }; +#[cfg(feature = "bench")] +pub use crate::encode::encode_fallback; #[cfg(test)] mod tests { - use crate::decode::{decode, hex_decode}; - use crate::encode::{hex_encode, hex_string}; + use crate::decode::decode; + use crate::encode::{encode, encode_to_slice}; use proptest::{proptest, proptest_helper}; use std::str; - fn _test_hex_encode(s: &String) { + fn _test_encode(s: &String) { let mut buffer = vec![0; s.as_bytes().len() * 2]; - hex_encode(s.as_bytes(), &mut buffer).unwrap(); - let encode = unsafe { str::from_utf8_unchecked(&buffer[..s.as_bytes().len() * 2]) }; + encode_to_slice(s.as_bytes(), &mut buffer).unwrap(); + let encoded = unsafe { str::from_utf8_unchecked(&buffer[..s.as_bytes().len() * 2]) }; - let hex_string = hex_string(s.as_bytes()); + let hex_string = encode(s.as_bytes()); - assert_eq!(encode, hex::encode(s)); + assert_eq!(encoded, hex::encode(s)); assert_eq!(hex_string, hex::encode(s)); } proptest! { #[test] - fn test_hex_encode(ref s in ".*") { - _test_hex_encode(s); - } - } - - fn _test_hex_decode(s: &String) { - let len = s.as_bytes().len(); - let mut dst = Vec::with_capacity(len); - dst.resize(len, 0); - - let hex_string = hex_string(s.as_bytes()); - - hex_decode(hex_string.as_bytes(), &mut dst).unwrap(); - - assert_eq!(&dst[..], s.as_bytes()); - } - - proptest! { - #[test] - fn test_hex_decode(ref s in ".+") { - _test_hex_decode(s); + fn test_encode(ref s in ".*") { + _test_encode(s); } } - fn _test_hex_decode_check(s: &String, ok: bool) { - let len = s.as_bytes().len(); - let mut dst = Vec::with_capacity(len / 2); - dst.resize(len / 2, 0); - assert!(hex_decode(s.as_bytes(), &mut dst).is_ok() == ok); + fn _test_decode_check(s: &String, ok: bool) { + assert!(decode(s.as_bytes()).is_ok() == ok); } proptest! { #[test] - fn test_hex_decode_check(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { - _test_hex_decode_check(s, true); + fn test_decode_check(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { + _test_decode_check(s, true); } } proptest! { #[test] - fn test_hex_decode_check_odd(ref s in "[0-9a-fA-F]{11}") { - _test_hex_decode_check(s, false); + fn test_decode_check_odd(ref s in "[0-9a-fA-F]{11}") { + _test_decode_check(s, false); } } proptest! { #[test] fn test_roundtrip(input: Vec) { - let mut encoded = vec![0; input.len() * 2]; - hex_encode(&input, &mut encoded).unwrap(); - let mut decoded = vec![0; input.len()]; - hex_decode(&encoded, &mut decoded).unwrap(); + let encoded = encode(&input); + let decoded = decode(encoded.as_bytes()).unwrap(); assert_eq!(&decoded, &input); } #[test] fn test_encode_matches(input: Vec) { - let encoded = hex_string(&input); + let encoded = encode(&input); let expected = hex::encode(&input); assert_eq!(encoded, expected); } From 124a106f0e2601df8264cc2e97dfa9a5d56f86fe Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 18 Feb 2020 07:25:33 -0800 Subject: [PATCH 19/20] Accept input as AsRef<[u8]> to be more convenient. --- src/decode.rs | 18 +++++++++++++++--- src/encode.rs | 12 ++++++++++-- src/lib.rs | 6 +++--- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 4e9f99c..ba5e52b 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1,12 +1,20 @@ use crate::error::Error; -pub fn decode(src: &[u8]) -> Result, Error> { +pub fn decode(src: &I) -> Result, Error> +where + I: AsRef<[u8]> + ?Sized, +{ + let src = src.as_ref(); let mut output = vec![0u8; src.len() / 2]; decode_to_slice(src, &mut output)?; Ok(output) } -pub fn decode_to_slice(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { +pub fn decode_to_slice(src: &I, dst: &mut [u8]) -> Result<(), Error> +where + I: AsRef<[u8]> + ?Sized, +{ + let src = src.as_ref(); validate_buffer_length(src, dst)?; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { @@ -26,7 +34,11 @@ pub fn decode_to_slice(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { arch::fallback::decode(src, dst) } -pub fn decode_to_slice_unchecked(src: &[u8], dst: &mut [u8]) { +pub fn decode_to_slice_unchecked(src: &I, dst: &mut [u8]) +where + I: AsRef<[u8]> + ?Sized, +{ + let src = src.as_ref(); validate_buffer_length(src, dst).unwrap(); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { diff --git a/src/encode.rs b/src/encode.rs index f43a42a..d240277 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -4,14 +4,22 @@ use crate::error::Error; static TABLE: &[u8] = b"0123456789abcdef"; -pub fn encode(src: &[u8]) -> String { +pub fn encode(src: &I) -> String +where + I: AsRef<[u8]> + ?Sized, +{ + let src = src.as_ref(); let mut buffer = vec![0; src.len() * 2]; // should never panic because the destination buffer is large enough. encode_to_slice(src, &mut buffer).unwrap(); unsafe { String::from_utf8_unchecked(buffer) } } -pub fn encode_to_slice(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { +pub fn encode_to_slice(src: &I, dst: &mut [u8]) -> Result<(), Error> +where + I: AsRef<[u8]> + ?Sized, +{ + let src = src.as_ref(); let len = src.len().checked_mul(2).unwrap(); if dst.len() < len { return Err(Error::InvalidLength(len)); diff --git a/src/lib.rs b/src/lib.rs index 754b295..c4acb07 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,7 @@ mod tests { encode_to_slice(s.as_bytes(), &mut buffer).unwrap(); let encoded = unsafe { str::from_utf8_unchecked(&buffer[..s.as_bytes().len() * 2]) }; - let hex_string = encode(s.as_bytes()); + let hex_string = encode(s); assert_eq!(encoded, hex::encode(s)); assert_eq!(hex_string, hex::encode(s)); @@ -43,7 +43,7 @@ mod tests { } fn _test_decode_check(s: &String, ok: bool) { - assert!(decode(s.as_bytes()).is_ok() == ok); + assert!(decode(s).is_ok() == ok); } proptest! { @@ -64,7 +64,7 @@ mod tests { #[test] fn test_roundtrip(input: Vec) { let encoded = encode(&input); - let decoded = decode(encoded.as_bytes()).unwrap(); + let decoded = decode(&encoded).unwrap(); assert_eq!(&decoded, &input); } From 8d8ace96a8871958eae2a520afb7c646493b4402 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Tue, 18 Feb 2020 07:30:35 -0800 Subject: [PATCH 20/20] Remove the check bench and implementations. The benches added unnecessary code that isn't available via the public api and is only benchmarked as a side-effect of the decode benchmark. --- Cargo.toml | 6 --- benches/check.rs | 48 -------------------- src/decode.rs | 114 ----------------------------------------------- src/lib.rs | 12 ++--- 4 files changed, 4 insertions(+), 176 deletions(-) delete mode 100644 benches/check.rs diff --git a/Cargo.toml b/Cargo.toml index 2356566..ec776a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,9 +32,3 @@ rand = "0.7.3" name = "hex" harness = false required-features = ["bench", "avx2", "sse41"] - - -[[bench]] -name = "check" -harness = false -required-features = ["bench", "avx2", "sse41"] diff --git a/benches/check.rs b/benches/check.rs deleted file mode 100644 index b1fe60d..0000000 --- a/benches/check.rs +++ /dev/null @@ -1,48 +0,0 @@ -use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; -use faster_hex::{check_avx2, check_fallback, check_sse}; -use std::time::Duration; - -const INPUT: &[&str] = &[ - "Bf9E2d38aceDeeCbbAfccc4B4B7AE", - "ed136fFDdCcC1DbaFE8CB6Df1AdDBAea44aCcC17b0DbC2741F9CeEeaFbE7A51D", - " \u{0} ๐€€G\u{0}๐€€ GG\u{0}๐€€G\u{0}Gเ €\u{0} ๐€€ \u{0}:\u{0}\u{0}gเ €G G::GG::g๐€€G๐€€\u{0}\u{0}ยก๐€€เ €\u{0}:GGG Gg๐€€ :\u{0}:gG ยก", - "ed136fFDdCcC1DbaFE8CB6Df1AdDBAea44aCcC17b0DbC2741F9CeEeaFbE7A51D\u{0} ๐€€G\u{0}๐€€ GG\u{0}๐€€G\u{0}Gเ €\u{0} ๐€€ \u{0}:\u{0}\u{0}gเ €G G::GG::g๐€€G๐€€\u{0}\u{0}ยก๐€€เ €\u{0}:GGG Gg๐€€ :\u{0}:gG ยก", -]; - -fn bench(c: &mut Criterion) { - let mut check_group = c.benchmark_group("check"); - for (idx, input) in INPUT.iter().enumerate() { - check_group.bench_with_input(BenchmarkId::new("fallback", idx), input, |b, &input| { - b.iter(|| { - let ret = check_fallback(input.as_bytes()); - black_box(ret); - }) - }); - check_group.bench_with_input(BenchmarkId::new("avx2", idx), input, |b, &input| { - b.iter(|| { - let ret = unsafe { check_avx2(input.as_bytes()) }; - black_box(ret); - }) - }); - check_group.bench_with_input(BenchmarkId::new("sse", idx), input, |b, &input| { - b.iter(|| { - let ret = unsafe { check_sse(input.as_bytes()) }; - black_box(ret); - }) - }); - } - check_group.finish(); -} - -fn quicker() -> Criterion { - Criterion::default() - .warm_up_time(Duration::from_millis(500)) - .measurement_time(Duration::from_secs(1)) -} - -criterion_group! { - name = benches; - config = quicker(); - targets = bench -} -criterion_main!(benches); diff --git a/src/decode.rs b/src/decode.rs index ba5e52b..95b1752 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -148,22 +148,6 @@ pub mod arch { Ok(input) } - #[cfg(any(test, feature = "bench"))] - #[target_feature(enable = "avx2")] - pub unsafe fn check(mut src: &[u8]) -> bool { - while src.len() >= 32 { - let input = _mm256_loadu_si256(src.as_ptr() as *const _); - let hi_nibbles = - _mm256_and_si256(_mm256_srli_epi32(input, 4), _mm256_set1_epi8(0b00001111)); - let low_nibbles = _mm256_and_si256(input, _mm256_set1_epi8(0b00001111)); - if !Checked::is_valid(hi_nibbles, low_nibbles) { - return false; - } - src = &src[32..]; - } - crate::decode::arch::fallback::check(src) - } - pub trait IsValid: crate::decode::arch::fallback::IsValid { unsafe fn is_valid(hi_nibbles: __m256i, low_nibbles: __m256i) -> bool; } @@ -229,34 +213,6 @@ pub mod arch { true } } - - #[cfg(test)] - mod tests { - use super::*; - use proptest::{proptest, proptest_helper}; - - fn _test_check_true(s: &String) { - assert!(unsafe { check(s.as_bytes()) }); - } - - proptest! { - #[test] - fn test_check_true(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { - _test_check_true(s); - } - } - - fn _test_check_false(s: &String) { - assert!(!unsafe { check(s.as_bytes()) }); - } - - proptest! { - #[test] - fn test_check_false(ref s in ".{32}[^0-9a-fA-F]+") { - _test_check_false(s); - } - } - } } #[cfg(all(feature = "sse41", any(target_arch = "x86", target_arch = "x86_64")))] @@ -323,21 +279,6 @@ pub mod arch { Ok(input) } - #[cfg(any(test, feature = "bench"))] - #[target_feature(enable = "sse4.1")] - pub unsafe fn check(mut src: &[u8]) -> bool { - while src.len() >= 16 { - let input = _mm_loadu_si128(src.as_ptr() as *const _); - let hi_nibbles = _mm_and_si128(_mm_srli_epi32(input, 4), _mm_set1_epi8(0b00001111)); - let low_nibbles = _mm_and_si128(input, _mm_set1_epi8(0b00001111)); - if !Checked::is_valid(hi_nibbles, low_nibbles) { - return false; - } - src = &src[16..]; - } - crate::decode::arch::fallback::check(src) - } - pub trait IsValid: crate::decode::arch::fallback::IsValid { unsafe fn is_valid(hi_nibbles: __m128i, low_nibbles: __m128i) -> bool; } @@ -385,44 +326,11 @@ pub mod arch { true } } - - #[cfg(test)] - mod tests { - use super::*; - use proptest::{proptest, proptest_helper}; - - fn _test_check_true(s: &String) { - assert!(unsafe { check(s.as_bytes()) }); - } - - proptest! { - #[test] - fn test_check_true(ref s in "([0-9a-fA-F][0-9a-fA-F])+") { - _test_check_true(s); - } - } - - fn _test_check_false(s: &String) { - assert!(!unsafe { check(s.as_bytes()) }); - } - - proptest! { - #[test] - fn test_check_false(ref s in ".{32}[^0-9a-fA-F]+") { - _test_check_false(s); - } - } - } } pub mod fallback { use crate::decode::{Checked, Error, Unchecked}; - #[cfg(any(test, feature = "bench"))] - pub fn check(src: &[u8]) -> bool { - src.iter().cloned().all(|b| unhex_a(b) != 0xff) - } - #[inline] pub fn decode(src: &[u8], dst: &mut [u8]) -> Result<(), Error> { _decode::(src, dst).map_err(|_| Error::InvalidChar) @@ -541,28 +449,6 @@ pub mod arch { _test_decode(s); } } - - fn _test_check_true(s: &String) { - assert!(check(s.as_bytes())); - } - - proptest! { - #[test] - fn test_check_true(ref s in "[0-9a-fA-F]+") { - _test_check_true(s); - } - } - - fn _test_check_false(s: &String) { - assert!(!check(s.as_bytes())); - } - - proptest! { - #[test] - fn test_check_false(ref s in ".{16}[^0-9a-fA-F]+") { - _test_check_false(s); - } - } } } } diff --git a/src/lib.rs b/src/lib.rs index c4acb07..89659c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,16 +6,12 @@ pub use crate::encode::{encode, encode_to_slice}; pub use crate::error::Error; #[cfg(feature = "bench")] -pub use crate::decode::{ - arch::avx2::check as check_avx2, - arch::fallback::{ - check as check_fallback, decode as decode_fallback, - decode_unchecked as decode_unchecked_fallback, +pub use crate::{ + decode::arch::fallback::{ + decode as decode_fallback, decode_unchecked as decode_unchecked_fallback, }, - arch::sse41::check as check_sse, + encode::encode_fallback, }; -#[cfg(feature = "bench")] -pub use crate::encode::encode_fallback; #[cfg(test)] mod tests {