Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize multiple consecutive LIMITs #35384

Merged
merged 5 commits into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ protected override ShapedQueryExpression TranslateDistinct(ShapedQueryExpression
}

selectExpression.ApplyOffset(translation);
selectExpression.ApplyLimit(TranslateExpression(Expression.Constant(1))!);
ApplyLimit(selectExpression, TranslateExpression(Expression.Constant(1))!);

return source;
}
Expand Down Expand Up @@ -693,7 +693,7 @@ protected override ShapedQueryExpression TranslateExcept(ShapedQueryExpression s
_queryCompilationContext.Logger.FirstWithoutOrderByAndFilterWarning();
}

selectExpression.ApplyLimit(TranslateExpression(Expression.Constant(1))!);
ApplyLimit(selectExpression, TranslateExpression(Expression.Constant(1))!);

return source.ShaperExpression.Type != returnType
? source.UpdateShaperExpression(Expression.Convert(source.ShaperExpression, returnType))
Expand Down Expand Up @@ -940,7 +940,7 @@ private SqlExpression CreateJoinPredicate(Expression outerKey, Expression innerK
}

selectExpression.ReverseOrderings();
selectExpression.ApplyLimit(TranslateExpression(Expression.Constant(1))!);
ApplyLimit(selectExpression, TranslateExpression(Expression.Constant(1))!);

return source.ShaperExpression.Type != returnType
? source.UpdateShaperExpression(Expression.Convert(source.ShaperExpression, returnType))
Expand Down Expand Up @@ -1208,7 +1208,7 @@ protected override Expression VisitMethodCall(MethodCallExpression methodCallExp
}

var selectExpression = (SelectExpression)source.QueryExpression;
selectExpression.ApplyLimit(TranslateExpression(Expression.Constant(_subquery ? 1 : 2))!);
ApplyLimit(selectExpression, TranslateExpression(Expression.Constant(_subquery ? 1 : 2))!);

return source.ShaperExpression.Type != returnType
? source.UpdateShaperExpression(Expression.Convert(source.ShaperExpression, returnType))
Expand Down Expand Up @@ -1258,11 +1258,49 @@ protected override Expression VisitMethodCall(MethodCallExpression methodCallExp
_queryCompilationContext.Logger.RowLimitingOperationWithoutOrderByWarning();
}

selectExpression.ApplyLimit(translation);
ApplyLimit(selectExpression, translation);

return source;
}

private void ApplyLimit(SelectExpression selectExpression, SqlExpression limit)
{
var oldLimit = selectExpression.Limit;

if (oldLimit is null)
{
selectExpression.SetLimit(limit);
return;
}

if (oldLimit is SqlConstantExpression { Value: int oldConst } && limit is SqlConstantExpression { Value: int newConst })
{
// if both the old and new limit are constants, use the smaller one
// (aka constant-fold LEAST(constA, constB))
if (oldConst > newConst)
{
selectExpression.SetLimit(limit);
}

return;
}

if (oldLimit.Equals(limit))
{
return;
}

// if possible, use LEAST(oldLimit, limit); otherwise, use nested queries
if (_sqlTranslator.GenerateLeast([oldLimit, limit], limit.Type) is { } newLimit)
{
selectExpression.SetLimit(newLimit);
}
else
{
selectExpression.ApplyLimit(limit);
}
}

/// <inheritdoc />
protected override ShapedQueryExpression? TranslateTakeWhile(ShapedQueryExpression source, LambdaExpression predicate)
=> null;
Expand Down
10 changes: 10 additions & 0 deletions src/EFCore.Relational/Query/SqlExpressions/SelectExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1924,6 +1924,16 @@ public void ApplyLimit(SqlExpression sqlExpression)
PushdownIntoSubquery();
Copy link
Contributor Author

@ranma42 ranma42 Dec 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could actually aim for never doing a pushdown and instead replace the limit with LEAST(Limit, sqlExpression, but I am unsure if this is easy to do here (in this context we don't have access to TranslateMin)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And you can't guarantee LEAST is supported at this stage

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exactly :(

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am experimenting with:

  • adding a SelectExpression.SetLimit method
  • replacing the ApplyLimit invocations with SetLimits as they are done from a context that also invokes GenerateLeast (and if that returns null, I can easily fall back to ApplyLimit)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm probably in the minority but it probably will break EFCore.Jet. I can only have a constant expression for TOP. And since I don't have LIMIT/OFFSET, TOP= LIMIT. And I do have a translation for LEAST

Copy link
Contributor Author

@ranma42 ranma42 Dec 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uhm... how does that work with non-constant Take()? (parameters are likely the most common, but in theory any kind of expression is allowed there)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm probably in the minority but it probably will break EFCore.Jet. I can only have a constant expression for TOP. And since I don't have LIMIT/OFFSET, TOP= LIMIT. And I do have a translation for LEAST

Also, note that as long as the code is only LIMITing on constants, you still get a (combined) constant as the new LIMIT.
LEAST/MIN are emitted when combining non-constants (or a constant and a non-constant), so it might be possible to handle it in the same way as you are doing now (I would assume something with row numbers)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I throw a Not Translated error (no row_number or equivlalent in ms access).

But I'm happy as long as a constant expression stays as a plain constant expression

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uhm... how does that work with non-constant Take()? (parameters are likely the most common, but in theory any kind of expression is allowed there)

Just before I sent it off to Oledb/ODBC, the query is modified to inline the parameter. If it refers to a column, well it just fails

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Guys, one of my major gripes with the current query pipeline design is the non-openness of SelectExpression, i.e. code there isn't overridable by providers; this case with integrating LEAST() instead of pushing down is a great example of why that's needed. I do have a plan in mind: I'd like to first make SelectExpression fully mutable, and then extract most/all actual logic out of it, so that SelectExpression becomes a regular, pure, immutable expression just like any other. The logic could go to QueryableMethodTranslatingEV, but some of these operations are probably also needed in postprocessing, so possible a separate service is maybe needed, that would be accessible from everywhere and be overridable by the provider. I hope to make at least some progress in this general direction in 10 (e.g. making SelectExpression fully immutable).

Until then, we're a bit stuck hacking around... @ranma42 what you've done in this draft looks good to me - let me know when this is ready for review.

}

SetLimit(sqlExpression);
}

/// <summary>
/// Sets a new limit of the <see cref="SelectExpression" /> to limit the number of rows returned in the result set.
/// </summary>
/// <param name="sqlExpression">An expression representing limit row count.</param>
[EntityFrameworkInternal]
public void SetLimit(SqlExpression sqlExpression)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add [EntityFrameworkInternal] as while this looks good for now, I don't see this API sticking around (e.g. once SelectExpression is immutable).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed I assume that making SelectExpression immutable will change its API quite a bit (which might also be a very good opportunity to re-organize some code around it 😉 )

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely... That's high up on my list for 10.

{
Limit = sqlExpression;

if (Offset is null && Limit is SqlConstantExpression { Value: 1 })
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,22 @@ public override async Task SelectMany_correlated_with_outer_7(bool async)
AssertSql();
}

public override async Task SelectMany_with_multiple_Take(bool async)
{
// Cosmos client evaluation. Issue #17246.
await AssertTranslationFailed(() => base.SelectMany_with_multiple_Take(async));

AssertSql();
}

public override async Task Select_with_multiple_Take(bool async)
{
// Cosmos client evaluation. Issue #17246.
await AssertTranslationFailed(() => base.Select_with_multiple_Take(async));

AssertSql();
}

public override async Task FirstOrDefault_over_empty_collection_of_value_type_returns_correct_results(bool async)
{
// Cosmos client evaluation. Issue #17246.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,20 @@ from o in ss.Set<Order>().Where(o => c.CustomerID.Length >= o.CustomerID.Length)
AssertEqual(e.o, a.o);
});

[ConditionalTheory]
[MemberData(nameof(IsAsyncData))]
public virtual Task SelectMany_with_multiple_Take(bool async)
=> AssertQuery(
async,
ss => ss.Set<Customer>().SelectMany(c => c.Orders.OrderBy(o => o.OrderID).Take(5).Take(3)));

[ConditionalTheory]
[MemberData(nameof(IsAsyncData))]
public virtual Task Select_with_multiple_Take(bool async)
=> AssertQuery(
async,
ss => ss.Set<Customer>().OrderBy(o => o.CustomerID).Take(5).Take(3));

[ConditionalTheory]
[MemberData(nameof(IsAsyncData))]
public virtual Task FirstOrDefault_over_empty_collection_of_value_type_returns_correct_results(bool async)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,15 @@ public override async Task Skip_navigation_order_by_single_or_default(bool async
"""
SELECT [s0].[Id], [s0].[Name]
FROM [EntityOnes] AS [e]
OUTER APPLY (
SELECT TOP(1) [s].[Id], [s].[Name]
LEFT JOIN (
SELECT [s].[Id], [s].[Name], [s].[LeftId]
FROM (
SELECT TOP(1) [e0].[Id], [e0].[Name]
SELECT [e0].[Id], [e0].[Name], [j].[LeftId], ROW_NUMBER() OVER(PARTITION BY [j].[LeftId] ORDER BY [e0].[Id]) AS [row]
FROM [JoinOneSelfPayload] AS [j]
INNER JOIN [EntityOnes] AS [e0] ON [j].[RightId] = [e0].[Id]
WHERE [e].[Id] = [j].[LeftId]
ORDER BY [e0].[Id]
) AS [s]
ORDER BY [s].[Id]
) AS [s0]
WHERE [s].[row] <= 1
) AS [s0] ON [e].[Id] = [s0].[LeftId]
""");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,17 +299,15 @@ public override async Task Skip_navigation_order_by_single_or_default(bool async
"""
SELECT [s0].[Id], [s0].[Name]
FROM [EntityOnes] AS [e]
OUTER APPLY (
SELECT TOP(1) [s].[Id], [s].[Name]
LEFT JOIN (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting change... The LEFT JOIN is obviously better than the OUTER APPLY (and also unlocks SQLite which doesn't support APPLY), but then we get the row number inside... Looks OK in any case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am afraid we have that in multiple places; I might eventually investigate, but seems like something that is already happening :\

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. BTW CROSS/OUTER APPLY vs. ROW_NUMBER() is something that's already tracked in #30450, it's the kind of deep performance investigations we haven't gotten around to yet. I hope that once the basic query architecture refactoring is farther ahead, all this will be much easier to tackle.

SELECT [s].[Id], [s].[Name], [s].[LeftId]
FROM (
SELECT TOP(1) [e0].[Id], [e0].[Name]
SELECT [e0].[Id], [e0].[Name], [j].[LeftId], ROW_NUMBER() OVER(PARTITION BY [j].[LeftId] ORDER BY [e0].[Id]) AS [row]
FROM [JoinOneSelfPayload] AS [j]
INNER JOIN [EntityOnes] AS [e0] ON [j].[RightId] = [e0].[Id]
WHERE [e].[Id] = [j].[LeftId]
ORDER BY [e0].[Id]
) AS [s]
ORDER BY [s].[Id]
) AS [s0]
WHERE [s].[row] <= 1
) AS [s0] ON [e].[Id] = [s0].[LeftId]
""");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -760,14 +760,10 @@ public override async Task Project_single_element_from_collection_with_OrderBy_T
AssertSql(
"""
SELECT (
SELECT TOP(1) [o0].[CustomerID]
FROM (
SELECT TOP(1) [o].[CustomerID], [o].[OrderID]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[OrderID]
) AS [o0]
ORDER BY [o0].[OrderID])
SELECT TOP(1) [o].[CustomerID]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[OrderID])
FROM [Customers] AS [c]
""");
}
Expand Down Expand Up @@ -828,14 +824,10 @@ public override async Task Project_single_element_from_collection_with_OrderBy_T
AssertSql(
"""
SELECT (
SELECT TOP(1) [o0].[CustomerID]
FROM (
SELECT TOP(1) [o].[CustomerID], [o].[OrderID]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[OrderID]
) AS [o0]
ORDER BY [o0].[OrderID])
SELECT TOP(1) [o].[CustomerID]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[OrderID])
FROM [Customers] AS [c]
WHERE [c].[CustomerID] = N'ALFKI'
""");
Expand Down Expand Up @@ -869,14 +861,10 @@ public override async Task Project_single_element_from_collection_with_multiple_
AssertSql(
"""
SELECT (
SELECT TOP(1) [o0].[CustomerID]
FROM (
SELECT TOP(2) [o].[CustomerID], [o].[OrderID], [o].[OrderDate]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[OrderID], [o].[OrderDate] DESC
) AS [o0]
ORDER BY [o0].[OrderID], [o0].[OrderDate] DESC)
SELECT TOP(1) [o].[CustomerID]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[OrderID], [o].[OrderDate] DESC)
FROM [Customers] AS [c]
""");
}
Expand All @@ -892,14 +880,10 @@ await base
AssertSql(
"""
SELECT (
SELECT TOP(1) [o0].[c]
FROM (
SELECT TOP(2) CAST(LEN([o].[CustomerID]) AS int) AS [c], [o].[OrderID], [o].[OrderDate]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[OrderID], [o].[OrderDate] DESC
) AS [o0]
ORDER BY [o0].[OrderID], [o0].[OrderDate] DESC)
SELECT TOP(1) CAST(LEN([o].[CustomerID]) AS int)
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[OrderID], [o].[OrderDate] DESC)
FROM [Customers] AS [c]
""");
}
Expand All @@ -911,14 +895,10 @@ public override async Task Project_single_element_from_collection_with_multiple_
AssertSql(
"""
SELECT (
SELECT TOP(1) [o0].[CustomerID]
FROM (
SELECT TOP(2) [o].[CustomerID], [o].[OrderDate]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[CustomerID], [o].[OrderDate] DESC
) AS [o0]
ORDER BY [o0].[CustomerID], [o0].[OrderDate] DESC)
SELECT TOP(1) [o].[CustomerID]
FROM [Orders] AS [o]
WHERE [c].[CustomerID] = [o].[CustomerID]
ORDER BY [o].[CustomerID], [o].[OrderDate] DESC)
FROM [Customers] AS [c]
""");
}
Expand All @@ -930,15 +910,11 @@ public override async Task Project_single_element_from_collection_with_OrderBy_o
AssertSql(
"""
SELECT COALESCE((
SELECT TOP(1) [s].[OrderID]
FROM (
SELECT TOP(1) [o0].[OrderID], [p].[ProductName]
FROM [Order Details] AS [o0]
INNER JOIN [Products] AS [p] ON [o0].[ProductID] = [p].[ProductID]
WHERE [o].[OrderID] = [o0].[OrderID]
ORDER BY [p].[ProductName]
) AS [s]
ORDER BY [s].[ProductName]), 0)
SELECT TOP(1) [o0].[OrderID]
FROM [Order Details] AS [o0]
INNER JOIN [Products] AS [p] ON [o0].[ProductID] = [p].[ProductID]
WHERE [o].[OrderID] = [o0].[OrderID]
ORDER BY [p].[ProductName]), 0)
FROM [Orders] AS [o]
WHERE [o].[OrderID] < 10300
""");
Expand All @@ -953,17 +929,15 @@ public override async Task Project_single_element_from_collection_with_OrderBy_o
"""
SELECT [s0].[OrderID], [s0].[ProductID], [s0].[Discount], [s0].[Quantity], [s0].[UnitPrice]
FROM [Orders] AS [o]
OUTER APPLY (
SELECT TOP(1) [s].[OrderID], [s].[ProductID], [s].[Discount], [s].[Quantity], [s].[UnitPrice]
LEFT JOIN (
SELECT [s].[OrderID], [s].[ProductID], [s].[Discount], [s].[Quantity], [s].[UnitPrice]
FROM (
SELECT TOP(1) [o0].[OrderID], [o0].[ProductID], [o0].[Discount], [o0].[Quantity], [o0].[UnitPrice], [p].[ProductName]
SELECT [o0].[OrderID], [o0].[ProductID], [o0].[Discount], [o0].[Quantity], [o0].[UnitPrice], ROW_NUMBER() OVER(PARTITION BY [o0].[OrderID] ORDER BY [p].[ProductName]) AS [row]
FROM [Order Details] AS [o0]
INNER JOIN [Products] AS [p] ON [o0].[ProductID] = [p].[ProductID]
WHERE [o].[OrderID] = [o0].[OrderID]
ORDER BY [p].[ProductName]
) AS [s]
ORDER BY [s].[ProductName]
) AS [s0]
WHERE [s].[row] <= 1
) AS [s0] ON [o].[OrderID] = [s0].[OrderID]
WHERE [o].[OrderID] < 10250
""");
}
Expand Down Expand Up @@ -1336,6 +1310,44 @@ WHERE CAST(LEN([c].[CustomerID]) AS int) >= CAST(LEN([o].[CustomerID]) AS int)
""");
}

public override async Task SelectMany_with_multiple_Take(bool async)
{
await base.SelectMany_with_multiple_Take(async);

AssertSql(
"""
SELECT [o1].[OrderID], [o1].[CustomerID], [o1].[EmployeeID], [o1].[OrderDate]
FROM [Customers] AS [c]
INNER JOIN (
SELECT [o0].[OrderID], [o0].[CustomerID], [o0].[EmployeeID], [o0].[OrderDate]
FROM (
SELECT [o].[OrderID], [o].[CustomerID], [o].[EmployeeID], [o].[OrderDate], ROW_NUMBER() OVER(PARTITION BY [o].[CustomerID] ORDER BY [o].[OrderID]) AS [row]
FROM [Orders] AS [o]
) AS [o0]
WHERE [o0].[row] <= 3
) AS [o1] ON [c].[CustomerID] = [o1].[CustomerID]
""");
}

public override async Task Select_with_multiple_Take(bool async)
{
await base.Select_with_multiple_Take(async);

AssertSql(
"""
@p0='3'
@p='5'
SELECT TOP(@p0) [c0].[CustomerID], [c0].[Address], [c0].[City], [c0].[CompanyName], [c0].[ContactName], [c0].[ContactTitle], [c0].[Country], [c0].[Fax], [c0].[Phone], [c0].[PostalCode], [c0].[Region]
FROM (
SELECT TOP(@p) [c].[CustomerID], [c].[Address], [c].[City], [c].[CompanyName], [c].[ContactName], [c].[ContactTitle], [c].[Country], [c].[Fax], [c].[Phone], [c].[PostalCode], [c].[Region]
FROM [Customers] AS [c]
ORDER BY [c].[CustomerID]
) AS [c0]
ORDER BY [c0].[CustomerID]
""");
}

public override async Task FirstOrDefault_over_empty_collection_of_value_type_returns_correct_results(bool async)
{
await base.FirstOrDefault_over_empty_collection_of_value_type_returns_correct_results(async);
Expand Down
Loading