Skip to content

Commit

Permalink
perf: rechunk before grouping on multiple keys (pola-rs#11711)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 13, 2023
1 parent f18583c commit abd7586
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 7 deletions.
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ impl StringNameSpace {
}

/// Slice the string values.
pub fn str_slice(self, start: i64, length: Option<u64>) -> Expr {
pub fn slice(self, start: i64, length: Option<u64>) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::Slice(
start, length,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ impl OptimizationRule for DelayRechunk {
match lp_arena.get(node) {
// An aggregation can be partitioned, its wasteful to rechunk before that partition.
#[allow(unused_mut)]
ALogicalPlan::Aggregate { input, .. } => {
if !self.processed.insert(node.0) {
ALogicalPlan::Aggregate { input, keys, .. } => {
// Multiple keys on multiple chunks is much slower, so rechunk.
if !self.processed.insert(node.0) || keys.len() > 1 {
return None;
};

Expand Down
6 changes: 3 additions & 3 deletions crates/polars-sql/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ impl SqlFunctionVisitor<'_> {
EndsWith => self.visit_binary(|e, s| e.str().ends_with(s)),
InitCap => self.visit_unary(|e| e.str().to_titlecase()),
Left => self.try_visit_binary(|e, length| {
Ok(e.str().str_slice(0, match length {
Ok(e.str().slice(0, match length {
Expr::Literal(LiteralValue::Int64(n)) => Some(n as u64),
_ => {
polars_bail!(InvalidOperation: "Invalid 'length' for Left: {}", function.args[1]);
Expand Down Expand Up @@ -729,15 +729,15 @@ impl SqlFunctionVisitor<'_> {
StartsWith => self.visit_binary(|e, s| e.str().starts_with(s)),
Substring => match function.args.len() {
2 => self.try_visit_binary(|e, start| {
Ok(e.str().str_slice(match start {
Ok(e.str().slice(match start {
Expr::Literal(LiteralValue::Int64(n)) => n,
_ => {
polars_bail!(InvalidOperation: "Invalid 'start' for Substring: {}", function.args[1]);
}
}, None))
}),
3 => self.try_visit_ternary(|e, start, length| {
Ok(e.str().str_slice(
Ok(e.str().slice(
match start {
Expr::Literal(LiteralValue::Int64(n)) => n,
_ => {
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/expr/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ impl PyExpr {
}

fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
self.inner.clone().str().str_slice(start, length).into()
self.inner.clone().str().slice(start, length).into()
}

fn str_explode(&self) -> Self {
Expand Down

0 comments on commit abd7586

Please sign in to comment.