From aad4d347ba575387ab17299713222316c637f18b Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 11 Sep 2023 05:32:48 -0400 Subject: [PATCH] Update xxhash functions to use a more optimized single-arg paths --- Cargo.toml | 1 + udf-xxhash/src/lib.rs | 87 +++++++++++++++++++++---------------------- 2 files changed, 43 insertions(+), 45 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01b4d94..cc1ce1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,4 +11,5 @@ members = [ [profile.release] # Minimize binary size by removing all symbols strip = "symbols" +codegen-units = 1 lto = "fat" diff --git a/udf-xxhash/src/lib.rs b/udf-xxhash/src/lib.rs index fc86d02..d76e440 100644 --- a/udf-xxhash/src/lib.rs +++ b/udf-xxhash/src/lib.rs @@ -14,21 +14,20 @@ //! ``` use udf::prelude::*; -use xxhash_rust::xxh3::Xxh3; -use xxhash_rust::xxh32::Xxh32; -use xxhash_rust::xxh64::Xxh64; +use xxhash_rust::xxh3::{xxh3_64, Xxh3}; +use xxhash_rust::xxh32::{xxh32, Xxh32}; +use xxhash_rust::xxh64::{xxh64, Xxh64}; -/// We will reuse our state between runs to save a few cycles -struct XxHash3(Xxh3); -struct XxHash32(Xxh32); -struct XxHash64(Xxh64); +struct XxHash3; +struct XxHash32; +struct XxHash64; #[register(name = "xxhash3")] impl BasicUdf for XxHash3 { type Returns<'a> = i64; fn init(_cfg: &UdfCfg, _args: &ArgList) -> Result { - Ok(Self(Xxh3::new())) + Ok(Self) } fn process<'a>( @@ -37,14 +36,14 @@ impl BasicUdf for XxHash3 { args: &ArgList, _error: Option, ) -> Result, ProcessError> { - let hasher = &mut self.0; - - hash_args(args, |buf| hasher.update(buf)); - - let result = hasher.digest(); - hasher.reset(); - - Ok(result as i64) + if args.len() == 1 { + Ok(hash_arg(args.get(0).unwrap(), xxh3_64) as i64) + } else { + let mut hasher = Xxh3::new(); + args.iter() + .for_each(|arg| hash_arg(arg, |buf| hasher.update(buf))); + Ok(hasher.digest() as i64) + } } } @@ -53,7 +52,7 @@ impl BasicUdf for XxHash32 { type Returns<'a> = i64; fn init(_cfg: &UdfCfg, _args: &ArgList) -> Result { - Ok(Self(Xxh32::new(0))) + Ok(Self) } fn process<'a>( @@ -62,14 +61,14 @@ impl BasicUdf for XxHash32 { args: &ArgList, _error: Option, ) -> Result, ProcessError> { - let hasher = &mut self.0; - - hash_args(args, |buf| hasher.update(buf)); - - let result = hasher.digest(); - hasher.reset(0); - - Ok(result.into()) + if args.len() == 1 { + Ok(hash_arg(args.get(0).unwrap(), |buf| xxh32(buf, 0)).into()) + } else { + let mut hasher = Xxh32::new(0); + args.iter() + .for_each(|arg| hash_arg(arg, |buf| hasher.update(buf))); + Ok(hasher.digest() as i64) + } } } @@ -78,7 +77,7 @@ impl BasicUdf for XxHash64 { type Returns<'a> = i64; fn init(_cfg: &UdfCfg, _args: &ArgList) -> Result { - Ok(Self(Xxh64::new(0))) + Ok(Self) } fn process<'a>( @@ -87,27 +86,25 @@ impl BasicUdf for XxHash64 { args: &ArgList, _error: Option, ) -> Result, ProcessError> { - let hasher = &mut self.0; - - hash_args(args, |buf| hasher.update(buf)); - - let result = hasher.digest(); - hasher.reset(0); - - Ok(result as i64) + if args.len() == 1 { + Ok(hash_arg(args.get(0).unwrap(), |buf| xxh64(buf, 0)) as i64) + } else { + let mut hasher = Xxh64::new(0); + args.iter() + .for_each(|arg| hash_arg(arg, |buf| hasher.update(buf))); + Ok(hasher.digest() as i64) + } } } -/// Hash all arguments to a hasher function -fn hash_args(args: &ArgList, mut update_fn: impl FnMut(&[u8])) { - for arg in args { - // Any non-null value will update the hash, null values do nothing. - match arg.value() { - SqlResult::String(Some(buf)) => update_fn(buf), - SqlResult::Real(Some(f)) => update_fn(&f.to_le_bytes()), - SqlResult::Int(Some(i)) => update_fn(&i.to_le_bytes()), - SqlResult::Decimal(Some(d)) => update_fn(d.as_bytes()), - _ => update_fn([].as_slice()), - } +/// Turn a SQL argument into a hashable buffer and pass it the given function +fn hash_arg(arg: SqlArg, mut hash_fn: impl FnMut(&[u8]) -> T) -> T { + // Any non-null value will update the hash, null values do nothing. + match arg.value() { + SqlResult::String(Some(buf)) => hash_fn(buf), + SqlResult::Real(Some(f)) => hash_fn(&f.to_le_bytes()), + SqlResult::Int(Some(i)) => hash_fn(&i.to_le_bytes()), + SqlResult::Decimal(Some(d)) => hash_fn(d.as_bytes()), + _ => hash_fn([].as_slice()), } }