Skip to content

Commit

Permalink
Improve like kernel by ~2% (#5390)
Browse files Browse the repository at this point in the history
* Rework like to use memchr

* Fix clippy

* Rename a function

* Incorporate review comment
  • Loading branch information
psvri authored Feb 13, 2024
1 parent 1b06d78 commit 0d96f1e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
1 change: 1 addition & 0 deletions arrow-string/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ arrow-select = { workspace = true }
regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] }
regex-syntax = { version = "0.8.0", default-features = false, features = ["unicode"] }
num = { version = "0.4", default-features = false, features = ["std"] }
memchr = "2.7.1"
19 changes: 12 additions & 7 deletions arrow-string/src/predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

use arrow_array::{BooleanArray, GenericStringArray, OffsetSizeTrait};
use arrow_schema::ArrowError;
use memchr::memchr2;
use regex::{Regex, RegexBuilder};

/// A string based predicate
Expand All @@ -39,19 +40,19 @@ pub enum Predicate<'a> {
impl<'a> Predicate<'a> {
/// Create a predicate for the given like pattern
pub fn like(pattern: &'a str) -> Result<Self, ArrowError> {
if !pattern.contains(is_like_pattern) {
if !contains_like_pattern(pattern) {
Ok(Self::Eq(pattern))
} else if pattern.ends_with('%')
&& !pattern.ends_with("\\%")
&& !pattern[..pattern.len() - 1].contains(is_like_pattern)
&& !contains_like_pattern(&pattern[..pattern.len() - 1])
{
Ok(Self::StartsWith(&pattern[..pattern.len() - 1]))
} else if pattern.starts_with('%') && !pattern[1..].contains(is_like_pattern) {
} else if pattern.starts_with('%') && !contains_like_pattern(&pattern[1..]) {
Ok(Self::EndsWith(&pattern[1..]))
} else if pattern.starts_with('%')
&& pattern.ends_with('%')
&& !pattern.ends_with("\\%")
&& !pattern[1..pattern.len() - 1].contains(is_like_pattern)
&& !contains_like_pattern(&pattern[1..pattern.len() - 1])
{
Ok(Self::Contains(&pattern[1..pattern.len() - 1]))
} else {
Expand All @@ -62,14 +63,14 @@ impl<'a> Predicate<'a> {
/// Create a predicate for the given ilike pattern
pub fn ilike(pattern: &'a str, is_ascii: bool) -> Result<Self, ArrowError> {
if is_ascii && pattern.is_ascii() {
if !pattern.contains(is_like_pattern) {
if !contains_like_pattern(pattern) {
return Ok(Self::IEqAscii(pattern));
} else if pattern.ends_with('%')
&& !pattern.ends_with("\\%")
&& !pattern[..pattern.len() - 1].contains(is_like_pattern)
&& !contains_like_pattern(&pattern[..pattern.len() - 1])
{
return Ok(Self::IStartsWithAscii(&pattern[..pattern.len() - 1]));
} else if pattern.starts_with('%') && !pattern[1..].contains(is_like_pattern) {
} else if pattern.starts_with('%') && !contains_like_pattern(&pattern[1..]) {
return Ok(Self::IEndsWithAscii(&pattern[1..]));
}
}
Expand Down Expand Up @@ -188,6 +189,10 @@ fn is_like_pattern(c: char) -> bool {
c == '%' || c == '_'
}

fn contains_like_pattern(pattern: &str) -> bool {
memchr2(b'%', b'_', pattern.as_bytes()).is_some()
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down

0 comments on commit 0d96f1e

Please sign in to comment.