Skip to content

Commit

Permalink
Merge pull request #21 from pluots/xxhash-rewrite
Browse files Browse the repository at this point in the history
Update xxhash functions to use a more optimized path for single arguments
  • Loading branch information
tgross35 authored Sep 11, 2023
2 parents 2ac8cf5 + aad4d34 commit 206f1e4
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 45 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ members = [
[profile.release]
# Minimize binary size by removing all symbols
strip = "symbols"
codegen-units = 1
lto = "fat"
87 changes: 42 additions & 45 deletions udf-xxhash/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,20 @@
//! ```
use udf::prelude::*;
use xxhash_rust::xxh3::Xxh3;
use xxhash_rust::xxh32::Xxh32;
use xxhash_rust::xxh64::Xxh64;
use xxhash_rust::xxh3::{xxh3_64, Xxh3};
use xxhash_rust::xxh32::{xxh32, Xxh32};
use xxhash_rust::xxh64::{xxh64, Xxh64};

/// We will reuse our state between runs to save a few cycles
struct XxHash3(Xxh3);
struct XxHash32(Xxh32);
struct XxHash64(Xxh64);
struct XxHash3;
struct XxHash32;
struct XxHash64;

#[register(name = "xxhash3")]
impl BasicUdf for XxHash3 {
type Returns<'a> = i64;

fn init(_cfg: &UdfCfg<Init>, _args: &ArgList<Init>) -> Result<Self, String> {
Ok(Self(Xxh3::new()))
Ok(Self)
}

fn process<'a>(
Expand All @@ -37,14 +36,14 @@ impl BasicUdf for XxHash3 {
args: &ArgList<Process>,
_error: Option<NonZeroU8>,
) -> Result<Self::Returns<'a>, ProcessError> {
let hasher = &mut self.0;

hash_args(args, |buf| hasher.update(buf));

let result = hasher.digest();
hasher.reset();

Ok(result as i64)
if args.len() == 1 {
Ok(hash_arg(args.get(0).unwrap(), xxh3_64) as i64)
} else {
let mut hasher = Xxh3::new();
args.iter()
.for_each(|arg| hash_arg(arg, |buf| hasher.update(buf)));
Ok(hasher.digest() as i64)
}
}
}

Expand All @@ -53,7 +52,7 @@ impl BasicUdf for XxHash32 {
type Returns<'a> = i64;

fn init(_cfg: &UdfCfg<Init>, _args: &ArgList<Init>) -> Result<Self, String> {
Ok(Self(Xxh32::new(0)))
Ok(Self)
}

fn process<'a>(
Expand All @@ -62,14 +61,14 @@ impl BasicUdf for XxHash32 {
args: &ArgList<Process>,
_error: Option<NonZeroU8>,
) -> Result<Self::Returns<'a>, ProcessError> {
let hasher = &mut self.0;

hash_args(args, |buf| hasher.update(buf));

let result = hasher.digest();
hasher.reset(0);

Ok(result.into())
if args.len() == 1 {
Ok(hash_arg(args.get(0).unwrap(), |buf| xxh32(buf, 0)).into())
} else {
let mut hasher = Xxh32::new(0);
args.iter()
.for_each(|arg| hash_arg(arg, |buf| hasher.update(buf)));
Ok(hasher.digest() as i64)
}
}
}

Expand All @@ -78,7 +77,7 @@ impl BasicUdf for XxHash64 {
type Returns<'a> = i64;

fn init(_cfg: &UdfCfg<Init>, _args: &ArgList<Init>) -> Result<Self, String> {
Ok(Self(Xxh64::new(0)))
Ok(Self)
}

fn process<'a>(
Expand All @@ -87,27 +86,25 @@ impl BasicUdf for XxHash64 {
args: &ArgList<Process>,
_error: Option<NonZeroU8>,
) -> Result<Self::Returns<'a>, ProcessError> {
let hasher = &mut self.0;

hash_args(args, |buf| hasher.update(buf));

let result = hasher.digest();
hasher.reset(0);

Ok(result as i64)
if args.len() == 1 {
Ok(hash_arg(args.get(0).unwrap(), |buf| xxh64(buf, 0)) as i64)
} else {
let mut hasher = Xxh64::new(0);
args.iter()
.for_each(|arg| hash_arg(arg, |buf| hasher.update(buf)));
Ok(hasher.digest() as i64)
}
}
}

/// Hash all arguments to a hasher function
fn hash_args(args: &ArgList<Process>, mut update_fn: impl FnMut(&[u8])) {
for arg in args {
// Any non-null value will update the hash, null values do nothing.
match arg.value() {
SqlResult::String(Some(buf)) => update_fn(buf),
SqlResult::Real(Some(f)) => update_fn(&f.to_le_bytes()),
SqlResult::Int(Some(i)) => update_fn(&i.to_le_bytes()),
SqlResult::Decimal(Some(d)) => update_fn(d.as_bytes()),
_ => update_fn([].as_slice()),
}
/// Turn a SQL argument into a hashable buffer and pass it the given function
fn hash_arg<T>(arg: SqlArg<Process>, mut hash_fn: impl FnMut(&[u8]) -> T) -> T {
// Any non-null value will update the hash, null values do nothing.
match arg.value() {
SqlResult::String(Some(buf)) => hash_fn(buf),
SqlResult::Real(Some(f)) => hash_fn(&f.to_le_bytes()),
SqlResult::Int(Some(i)) => hash_fn(&i.to_le_bytes()),
SqlResult::Decimal(Some(d)) => hash_fn(d.as_bytes()),
_ => hash_fn([].as_slice()),
}
}

0 comments on commit 206f1e4

Please sign in to comment.