Skip to content

Commit

Permalink
Complex Expression handling for uncorrelated IN and NOT IN subqueries (
Browse files Browse the repository at this point in the history
…#16439)

Signed-off-by: Manan Gupta <manan@planetscale.com>
  • Loading branch information
GuptaManan100 authored Jul 23, 2024
1 parent 00cba23 commit 2e847cd
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 6 deletions.
42 changes: 42 additions & 0 deletions go/test/endtoend/vtgate/vitess_tester/subquery/subquery.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
create table t1
(
id1 bigint,
id2 bigint,
primary key (id1)
) Engine = InnoDB;

create table t2
(
id3 bigint,
id4 bigint,
primary key (id3)
) Engine = InnoDB;

INSERT INTO t1 (id1, id2) VALUES
(0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4);

INSERT INTO t2 (id3, id4) VALUES
(0, 0),
(1, 1);

# Aggregation query with multiple expressions one of which is an IN subquery.
SELECT count(*) FROM t1 WHERE id1 = 0 AND id1 IN (SELECT id4 FROM t2);
# Aggregation query with a complex expression that has an IN subquery.
SELECT count(*) FROM t1 WHERE id1 = 2 OR id1 IN (SELECT id4 FROM t2);
# Aggregation query with multiple expressions one of which is an IN subquery that returns empty results.
SELECT count(*) FROM t1 WHERE id1 = 0 AND id1 IN (SELECT id4 FROM t2 where id4 = 3);
# Aggregation query with a complex expression that has an IN subquery that returns empty results.
SELECT count(*) FROM t1 WHERE id1 = 2 OR id1 IN (SELECT id4 FROM t2 where id4 = 3);

# Aggregation query with multiple expressions one of which is an NOT IN subquery.
SELECT count(*) FROM t1 WHERE id1 = 2 AND id1 NOT IN (SELECT id4 FROM t2);
# Aggregation query with a complex expression that has an NOT IN subquery.
SELECT count(*) FROM t1 WHERE id1 = 0 OR id1 NOT IN (SELECT id4 FROM t2);
# Aggregation query with multiple expressions one of which is an NOT IN subquery that returns empty results.
SELECT count(*) FROM t1 WHERE id1 = 2 AND id1 NOT IN (SELECT id4 FROM t2 where id4 = 3);
# Aggregation query with a complex expression that has an NOT IN subquery that returns empty results.
SELECT count(*) FROM t1 WHERE id1 = 0 OR id1 NOT IN (SELECT id4 FROM t2 where id4 = 3);
33 changes: 27 additions & 6 deletions go/vt/vtgate/planbuilder/operators/subquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@ func (sq *SubQuery) GetMergePredicates() []sqlparser.Expr {
}

func (sq *SubQuery) settle(ctx *plancontext.PlanningContext, outer Operator) Operator {
if !sq.TopLevel {
// We can allow uncorrelated queries even when subquery isn't the top level construct,
// like if its underneath an Aggregator, because they will be pulled out and run separately.
if !sq.TopLevel && sq.correlated {
panic(subqueryNotAtTopErr)
}
if sq.correlated && sq.FilterType != opcode.PulloutExists {
Expand Down Expand Up @@ -253,6 +255,20 @@ func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operato
}
post := func(cursor *sqlparser.CopyOnWriteCursor) {
node := cursor.Node()
// For IN and NOT IN type filters, we have to add a Expression that checks if we got any rows back or not
// for correctness. That expression should be ANDed with the expression that has the IN/NOT IN comparison.
if compExpr, isCompExpr := node.(*sqlparser.ComparisonExpr); sq.FilterType.NeedsListArg() && isCompExpr {
if listArg, isListArg := compExpr.Right.(sqlparser.ListArg); isListArg && listArg.String() == sq.ArgName {
if sq.FilterType == opcode.PulloutIn {
cursor.Replace(sqlparser.AndExpressions(sqlparser.NewArgument(hasValuesArg()), compExpr))
} else {
cursor.Replace(&sqlparser.OrExpr{
Left: sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())),
Right: compExpr,
})
}
}
}
if _, ok := node.(*sqlparser.Subquery); !ok {
return
}
Expand All @@ -277,13 +293,18 @@ func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operato
sq.FilterType = opcode.PulloutExists // it's the same pullout as EXISTS, just with a NOT in front of the predicate
predicates = append(predicates, sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())))
case opcode.PulloutIn:
predicates = append(predicates, sqlparser.NewArgument(hasValuesArg()), rhsPred)
// Because we replace the comparison expression with an AND expression, it might be the top level construct there.
// In this case, it is better to send the two sides of the AND expression separately in the predicates because it can
// lead to better routing. This however might not always be true for example we can have the rhsPred to be something like
// `user.id = 2 OR (:__sq_has_values AND user.id IN ::sql1)`
if andExpr, isAndExpr := rhsPred.(*sqlparser.AndExpr); isAndExpr {
predicates = append(predicates, andExpr.Left, andExpr.Right)
} else {
predicates = append(predicates, rhsPred)
}
sq.SubqueryValueName = sq.ArgName
case opcode.PulloutNotIn:
predicates = append(predicates, &sqlparser.OrExpr{
Left: sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())),
Right: rhsPred,
})
predicates = append(predicates, rhsPred)
sq.SubqueryValueName = sq.ArgName
case opcode.PulloutValue:
predicates = append(predicates, rhsPred)
Expand Down
106 changes: 106 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/select_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -1883,6 +1883,112 @@
]
}
},
{
"comment": "Complex expression in a subquery used in IN clause of an aggregate query",
"query": "select count(*) from user where user.id = 2 or user.id in (select id from unsharded_a where colb = 2)",
"plan": {
"QueryType": "SELECT",
"Original": "select count(*) from user where user.id = 2 or user.id in (select id from unsharded_a where colb = 2)",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "sum_count_star(0) AS count(*)",
"Inputs": [
{
"OperatorType": "UncorrelatedSubquery",
"Variant": "PulloutIn",
"PulloutVars": [
"__sq_has_values",
"__sq1"
],
"Inputs": [
{
"InputName": "SubQuery",
"OperatorType": "Route",
"Variant": "Unsharded",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select id from unsharded_a where 1 != 1",
"Query": "select id from unsharded_a where colb = 2",
"Table": "unsharded_a"
},
{
"InputName": "Outer",
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select count(*) from `user` where 1 != 1",
"Query": "select count(*) from `user` where `user`.id = 2 or :__sq_has_values and `user`.id in ::__sq1",
"Table": "`user`"
}
]
}
]
},
"TablesUsed": [
"main.unsharded_a",
"user.user"
]
}
},
{
"comment": "Complex expression in a subquery used in NOT IN clause of an aggregate query",
"query": "select count(*) from user where user.id = 2 or user.id not in (select id from unsharded_a where colb = 2)",
"plan": {
"QueryType": "SELECT",
"Original": "select count(*) from user where user.id = 2 or user.id not in (select id from unsharded_a where colb = 2)",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "sum_count_star(0) AS count(*)",
"Inputs": [
{
"OperatorType": "UncorrelatedSubquery",
"Variant": "PulloutNotIn",
"PulloutVars": [
"__sq_has_values",
"__sq1"
],
"Inputs": [
{
"InputName": "SubQuery",
"OperatorType": "Route",
"Variant": "Unsharded",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select id from unsharded_a where 1 != 1",
"Query": "select id from unsharded_a where colb = 2",
"Table": "unsharded_a"
},
{
"InputName": "Outer",
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select count(*) from `user` where 1 != 1",
"Query": "select count(*) from `user` where `user`.id = 2 or (not :__sq_has_values or `user`.id not in ::__sq1)",
"Table": "`user`"
}
]
}
]
},
"TablesUsed": [
"main.unsharded_a",
"user.user"
]
}
},
{
"comment": "testing SingleRow Projection with arithmetics",
"query": "select 42+2",
Expand Down

0 comments on commit 2e847cd

Please sign in to comment.