Skip to content

Commit

Permalink
refactor(core): remove rule wrapper (#229)
Browse files Browse the repository at this point in the history
yet another thing we can remove after the predicate refactor.

---------

Signed-off-by: Alex Chi <iskyzh@gmail.com>
  • Loading branch information
skyzh authored Nov 8, 2024
1 parent 75f9fe6 commit c77a719
Show file tree
Hide file tree
Showing 13 changed files with 76 additions and 163 deletions.
17 changes: 9 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.1.0"
version = "0.1.1"
edition = "2021"
homepage = "https://github.com/cmu-db/optd"
keywords = ["sql", "database", "optimizer", "datafusion"]
Expand Down
11 changes: 5 additions & 6 deletions datafusion-optd-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@
name = "datafusion-optd-cli"
description = "Command Line Client for DataFusion query engine."
version = "32.0.0"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
edition = "2021"
keywords = ["arrow", "datafusion", "query", "sql"]
license = "Apache-2.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
homepage = "https://github.com/cmu-db/optd"
repository = "https://github.com/cmu-db/optd"
rust-version = "1.70"
readme = "README.md"

Expand Down Expand Up @@ -57,9 +56,9 @@ tokio = { version = "1.24", features = [
"parking_lot",
] }
url = "2.2"
optd-datafusion-bridge = { path = "../optd-datafusion-bridge" }
optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost" }
optd-datafusion-repr = { path = "../optd-datafusion-repr" }
optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" }
optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" }
optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
tracing-subscriber = "0.3"
tracing = "0.1"

Expand Down
10 changes: 5 additions & 5 deletions optd-core/src/cascades/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::nodes::{
};
use crate::optimizer::Optimizer;
use crate::property::{PropertyBuilder, PropertyBuilderAny};
use crate::rules::RuleWrapper;
use crate::rules::Rule;

pub type RuleId = usize;

Expand All @@ -47,7 +47,7 @@ pub struct CascadesOptimizer<T: NodeType, M: Memo<T> = NaiveMemo<T>> {
explored_group: HashSet<GroupId>,
explored_expr: HashSet<ExprId>,
fired_rules: HashMap<ExprId, HashSet<RuleId>>,
rules: Arc<[Arc<RuleWrapper<T, Self>>]>,
rules: Arc<[Arc<dyn Rule<T, Self>>]>,
disabled_rules: HashSet<usize>,
cost: Arc<dyn CostModel<T, M>>,
property_builders: Arc<[Box<dyn PropertyBuilderAny<T>>]>,
Expand Down Expand Up @@ -94,15 +94,15 @@ impl Display for PredId {

impl<T: NodeType> CascadesOptimizer<T, NaiveMemo<T>> {
pub fn new(
rules: Vec<Arc<RuleWrapper<T, Self>>>,
rules: Vec<Arc<dyn Rule<T, Self>>>,
cost: Box<dyn CostModel<T, NaiveMemo<T>>>,
property_builders: Vec<Box<dyn PropertyBuilderAny<T>>>,
) -> Self {
Self::new_with_prop(rules, cost, property_builders, Default::default())
}

pub fn new_with_prop(
rules: Vec<Arc<RuleWrapper<T, Self>>>,
rules: Vec<Arc<dyn Rule<T, Self>>>,
cost: Box<dyn CostModel<T, NaiveMemo<T>>>,
property_builders: Vec<Box<dyn PropertyBuilderAny<T>>>,
prop: OptimizerProperties,
Expand Down Expand Up @@ -153,7 +153,7 @@ impl<T: NodeType, M: Memo<T>> CascadesOptimizer<T, M> {
self.cost.clone()
}

pub fn rules(&self) -> Arc<[Arc<RuleWrapper<T, Self>>]> {
pub fn rules(&self) -> Arc<[Arc<dyn Rule<T, Self>>]> {
self.rules.clone()
}

Expand Down
11 changes: 3 additions & 8 deletions optd-core/src/cascades/tasks/apply_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::cascades::optimizer::{CascadesOptimizer, ExprId, RuleId};
use crate::cascades::tasks::{OptimizeExpressionTask, OptimizeInputsTask};
use crate::cascades::{GroupId, Memo};
use crate::nodes::{ArcPlanNode, NodeType, PlanNode, PlanNodeOrGroup};
use crate::rules::{OptimizeType, RuleMatcher};
use crate::rules::RuleMatcher;

pub struct ApplyRuleTask {
rule_id: RuleId,
Expand Down Expand Up @@ -164,21 +164,16 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for ApplyRuleTask {
return Ok(vec![]);
}

let rule_wrapper = optimizer.rules()[self.rule_id].clone();
let rule = rule_wrapper.rule();
let rule = optimizer.rules()[self.rule_id].clone();

trace!(event = "task_begin", task = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, rule = %rule.name(), optimize_type=%rule_wrapper.optimize_type());
trace!(event = "task_begin", task = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, rule = %rule.name());
let group_id = optimizer.get_group_id(self.expr_id);
let mut tasks = vec![];
let binding_exprs = match_and_pick_expr(rule.matcher(), self.expr_id, optimizer);
for binding in binding_exprs {
trace!(event = "before_apply_rule", task = "apply_rule", input_binding=%binding);
let applied = rule.apply(optimizer, binding);

if rule_wrapper.optimize_type() == OptimizeType::Heuristics {
panic!("no more heuristics rule in cascades");
}

for expr in applied {
trace!(event = "after_apply_rule", task = "apply_rule", output_binding=%expr);
// TODO: remove clone in the below line
Expand Down
3 changes: 1 addition & 2 deletions optd-core/src/cascades/tasks/optimize_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for OptimizeExpressionTask {
let expr = optimizer.get_expr_memoed(self.expr_id);
trace!(event = "task_begin", task = "optimize_expr", expr_id = %self.expr_id, expr = %expr);
let mut tasks = vec![];
for (rule_id, rule_wrapper) in optimizer.rules().iter().enumerate() {
let rule = rule_wrapper.rule();
for (rule_id, rule) in optimizer.rules().iter().enumerate() {
if optimizer.is_rule_fired(self.expr_id, rule_id) {
continue;
}
Expand Down
50 changes: 0 additions & 50 deletions optd-core/src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,11 @@

mod ir;

use std::fmt::{Display, Formatter};
use std::sync::Arc;

pub use ir::RuleMatcher;

use crate::nodes::{ArcPlanNode, NodeType, PlanNodeOrGroup};
use crate::optimizer::Optimizer;

#[derive(Clone, Copy, Debug, PartialEq)]
pub enum OptimizeType {
Cascades,
Heuristics,
}

impl Display for OptimizeType {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Cascades => write!(f, "cascades"),
Self::Heuristics => write!(f, "heuristics"),
}
}
}

pub struct RuleWrapper<T: NodeType, O: Optimizer<T>> {
pub rule: Arc<dyn Rule<T, O>>,
pub optimize_type: OptimizeType,
}

impl<T: NodeType, O: Optimizer<T>> RuleWrapper<T, O> {
pub fn new(rule: Arc<dyn Rule<T, O>>, optimizer_type: OptimizeType) -> Self {
Self {
rule,
optimize_type: optimizer_type,
}
}
pub fn new_cascades(rule: Arc<dyn Rule<T, O>>) -> Arc<Self> {
Arc::new(Self {
rule,
optimize_type: OptimizeType::Cascades,
})
}
pub fn new_heuristic(rule: Arc<dyn Rule<T, O>>) -> Arc<Self> {
Arc::new(Self {
rule,
optimize_type: OptimizeType::Heuristics,
})
}
pub fn rule(&self) -> Arc<dyn Rule<T, O>> {
self.rule.clone()
}
pub fn optimize_type(&self) -> OptimizeType {
self.optimize_type
}
}

// TODO: docs, possible renames.
// TODO: Why do we have all of these match types? Seems like possible overkill.
pub trait Rule<T: NodeType, O: Optimizer<T>>: 'static + Send + Sync {
Expand Down
4 changes: 2 additions & 2 deletions optd-datafusion-bridge/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ datafusion = "32.0.0"
datafusion-expr = "32.0.0"
async-trait = "0.1"
itertools = "0.13"
optd-core = { path = "../optd-core" }
optd-datafusion-repr = { path = "../optd-datafusion-repr" }
optd-core = { path = "../optd-core", version = "0.1" }
optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
anyhow = "1"
async-recursion = "1"
futures-lite = "2"
Expand Down
17 changes: 10 additions & 7 deletions optd-datafusion-repr-adv-cost/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
[package]
name = "optd-datafusion-repr-adv-cost"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
description = "datafusion plan representation for optd"
version = { workspace = true }
edition = { workspace = true }
homepage = { workspace = true }
keywords = { workspace = true }
license = { workspace = true }
repository = { workspace = true }

[dependencies]
anyhow = "1"
arrow-schema = "47.0.0"
assert_approx_eq = "1.1.0"
datafusion = "32.0.0"
ordered-float = "4"
optd-datafusion-repr = { path = "../optd-datafusion-repr" }
optd-core = { path = "../optd-core" }
optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
optd-core = { path = "../optd-core", version = "0.1" }
serde = { version = "1.0", features = ["derive"] }
rayon = "1.10"
itertools = "0.13"
test-case = "3.3"
tracing = "0.1"
tracing-subscriber = "0.3"
optd-gungnir = { path = "../optd-gungnir" }
optd-gungnir = { path = "../optd-gungnir", version = "0.1" }
serde_with = { version = "3.7.0", features = ["json"] }
2 changes: 1 addition & 1 deletion optd-datafusion-repr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ tracing = "0.1"
tracing-subscriber = "0.3"
pretty-xmlish = "0.1"
itertools = "0.13"
optd-core = { path = "../optd-core" }
optd-core = { path = "../optd-core", version = "0.1" }
camelpaste = "0.1"
datafusion-expr = "32.0.0"
serde = { version = "1.0", features = ["derive"] }
Expand Down
Loading

0 comments on commit c77a719

Please sign in to comment.