-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add ConfigOptions to ScalarFunctionArgs #13527
base: main
Are you sure you want to change the base?
Changes from all commits
a18c82f
ad50ae6
42cd8dc
e84ca6d
1e87f7e
cc1f540
a733c8f
87b388e
6a6d86c
b8bf209
dc9f5db
1041525
5557f64
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,7 @@ use datafusion::functions_aggregate::first_last::first_value_udaf; | |
use datafusion::optimizer::simplify_expressions::ExprSimplifier; | ||
use datafusion::physical_expr::{analyze, AnalysisContext, ExprBoundaries}; | ||
use datafusion::prelude::*; | ||
use datafusion_common::config::ConfigOptions; | ||
use datafusion_common::tree_node::{Transformed, TreeNode}; | ||
use datafusion_common::{ScalarValue, ToDFSchema}; | ||
use datafusion_expr::execution_props::ExecutionProps; | ||
|
@@ -169,7 +170,8 @@ fn simplify_demo() -> Result<()> { | |
// expressions, such as the current time (to evaluate `now()` | ||
// correctly) | ||
let props = ExecutionProps::new(); | ||
let context = SimplifyContext::new(&props).with_schema(schema); | ||
let config_options = ConfigOptions::default(); | ||
let context = SimplifyContext::new(&props, &config_options).with_schema(schema); | ||
let simplifier = ExprSimplifier::new(context); | ||
|
||
// And then call the simplify_expr function: | ||
|
@@ -184,7 +186,8 @@ fn simplify_demo() -> Result<()> { | |
|
||
// here are some other examples of what DataFusion is capable of | ||
let schema = Schema::new(vec![make_field("i", DataType::Int64)]).to_dfschema_ref()?; | ||
let context = SimplifyContext::new(&props).with_schema(schema.clone()); | ||
let context = | ||
SimplifyContext::new(&props, &config_options).with_schema(schema.clone()); | ||
let simplifier = ExprSimplifier::new(context); | ||
|
||
// basic arithmetic simplification | ||
|
@@ -356,8 +359,13 @@ fn type_coercion_demo() -> Result<()> { | |
|
||
// Evaluation with an expression that has not been type coerced cannot succeed. | ||
let props = ExecutionProps::default(); | ||
let physical_expr = | ||
datafusion_physical_expr::create_physical_expr(&expr, &df_schema, &props)?; | ||
let config_options = ConfigOptions::default(); | ||
let physical_expr = datafusion_physical_expr::create_physical_expr( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems somewhat inevitable that creating a physical expr will require the config options However, I also think threading through the config options down through to the physical creation will (finally) permit people to pass things from the session down to function implementations (I think @cisaacson also was trying to do this in the past) |
||
&expr, | ||
&df_schema, | ||
&props, | ||
&config_options, | ||
)?; | ||
let e = physical_expr.evaluate(&batch).unwrap_err(); | ||
assert!(e | ||
.find_root() | ||
|
@@ -370,13 +378,15 @@ fn type_coercion_demo() -> Result<()> { | |
assert!(physical_expr.evaluate(&batch).is_ok()); | ||
|
||
// 2. Type coercion with `ExprSimplifier::coerce`. | ||
let context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema.clone())); | ||
let context = SimplifyContext::new(&props, &config_options) | ||
.with_schema(Arc::new(df_schema.clone())); | ||
let simplifier = ExprSimplifier::new(context); | ||
let coerced_expr = simplifier.coerce(expr.clone(), &df_schema)?; | ||
let physical_expr = datafusion_physical_expr::create_physical_expr( | ||
&coerced_expr, | ||
&df_schema, | ||
&props, | ||
&config_options, | ||
)?; | ||
assert!(physical_expr.evaluate(&batch).is_ok()); | ||
|
||
|
@@ -389,6 +399,7 @@ fn type_coercion_demo() -> Result<()> { | |
&coerced_expr, | ||
&df_schema, | ||
&props, | ||
&config_options, | ||
)?; | ||
assert!(physical_expr.evaluate(&batch).is_ok()); | ||
|
||
|
@@ -417,6 +428,7 @@ fn type_coercion_demo() -> Result<()> { | |
&coerced_expr, | ||
&df_schema, | ||
&props, | ||
&config_options, | ||
)?; | ||
assert!(physical_expr.evaluate(&batch).is_ok()); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ use datafusion::execution::context::ExecutionProps; | |
use datafusion::physical_expr::create_physical_expr; | ||
use datafusion::physical_optimizer::pruning::{PruningPredicate, PruningStatistics}; | ||
use datafusion::prelude::*; | ||
use datafusion_common::config::ConfigOptions; | ||
use std::collections::HashSet; | ||
use std::sync::Arc; | ||
|
||
|
@@ -187,7 +188,9 @@ impl PruningStatistics for MyCatalog { | |
fn create_pruning_predicate(expr: Expr, schema: &SchemaRef) -> PruningPredicate { | ||
let df_schema = DFSchema::try_from(schema.as_ref().clone()).unwrap(); | ||
let props = ExecutionProps::new(); | ||
let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap(); | ||
let config_options = ConfigOptions::default(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Something we could potentially do to make this API slightly easier to use might be to create a static default Something like impl ConfigOptions {
/// returns a reference to default ConfigOptions
pub fn default_singleton() -> &'static ConfigOptions
} This would then make it easier to return For example, then in |
||
let physical_expr = | ||
create_physical_expr(&expr, &df_schema, &props, &config_options).unwrap(); | ||
PruningPredicate::try_new(physical_expr, schema.clone()).unwrap() | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -241,6 +241,7 @@ async fn prune_partitions( | |
partitions: Vec<Partition>, | ||
filters: &[Expr], | ||
partition_cols: &[(String, DataType)], | ||
ctx: &SessionState, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recommend passing |
||
) -> Result<Vec<Partition>> { | ||
if filters.is_empty() { | ||
return Ok(partitions); | ||
|
@@ -286,13 +287,12 @@ async fn prune_partitions( | |
)?; | ||
|
||
let batch = RecordBatch::try_new(schema, arrays)?; | ||
|
||
// TODO: Plumb this down | ||
let props = ExecutionProps::new(); | ||
let config_options = ctx.config_options(); | ||
|
||
// Applies `filter` to `batch` returning `None` on error | ||
let do_filter = |filter| -> Result<ArrayRef> { | ||
let expr = create_physical_expr(filter, &df_schema, &props)?; | ||
let expr = create_physical_expr(filter, &df_schema, &props, config_options)?; | ||
expr.evaluate(&batch)?.into_array(partitions.len()) | ||
}; | ||
|
||
|
@@ -436,7 +436,7 @@ pub async fn pruned_partition_list<'a>( | |
debug!("Listed {} partitions", partitions.len()); | ||
|
||
let pruned = | ||
prune_partitions(table_path, partitions, filters, partition_cols).await?; | ||
prune_partitions(table_path, partitions, filters, partition_cols, ctx).await?; | ||
|
||
debug!("Pruning yielded {} partitions", pruned.len()); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an API change, but I think it is required to thread the config options through as each argument is specific
We could potentially improve the
try_into_physical_plan
API (as a follow on PR) to make it easier to update the API in the future using a trait or something likehttps://github.com/apache/datafusion/blob/e99e02b9b9093ceb0c13a2dd32a2a89beba47930/datafusion/expr/src/expr_schema.rs#L39-L38
So this would look something like