From bd14e0e40782dbd0fa49de597ec30217b48961f2 Mon Sep 17 00:00:00 2001 From: JiaKe Date: Tue, 22 Aug 2023 02:20:35 +0800 Subject: [PATCH] feat: add windowrel support in proto (#399) Co-authored-by: Weston Pace Co-authored-by: Victor Barua --- proto/substrait/algebra.proto | 52 +++++++++++++++++++++++ site/docs/relations/physical_relations.md | 17 ++++++++ 2 files changed, 69 insertions(+) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 578eb381d..8dd0a336d 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -230,6 +230,42 @@ message AggregateRel { } } +// ConsistentPartitionWindowRel provides the ability to perform calculations across sets of rows +// that are related to the current query row. It can be used to execute window functions where +// all the windows share the same partitioning and ordering. +message ConsistentPartitionWindowRel { + RelCommon common = 1; + Rel input = 2; + repeated WindowRelFunction window_functions = 3; + repeated Expression partition_expressions = 4; + repeated SortField sorts = 5; + + substrait.extensions.AdvancedExtension advanced_extension = 10; + + // This message mirrors the `WindowFunction` message but removes the fields defining the partition, + // sorts, and bounds, since those must be consistent across the various functions in this rel. Refer + // to the `WindowFunction` message for a description of these fields. + message WindowRelFunction { + uint32 function_reference = 1; + + repeated FunctionArgument arguments = 9; + + repeated FunctionOption options = 11; + + Type output_type = 7; + + AggregationPhase phase = 6; + + AggregateFunction.AggregationInvocation invocation = 10; + + Expression.WindowFunction.Bound lower_bound = 5; + + Expression.WindowFunction.Bound upper_bound = 4; + + Expression.WindowFunction.BoundsType bounds_type = 12; + } +} + // The ORDERY BY (or sorting) relational operator. Beside describing a base relation, it includes a list of fields to sort on message SortRel { RelCommon common = 1; @@ -398,6 +434,7 @@ message Rel { //Physical relations HashJoinRel hash_join = 13; MergeJoinRel merge_join = 14; + ConsistentPartitionWindowRel window = 17; ExchangeRel exchange = 15; ExpandRel expand = 16; } @@ -860,6 +897,9 @@ message Expression { // the same partition as the current record are aggregated. repeated Expression partitions = 2; + // Defines the bounds type: ROWS, RANGE + BoundsType bounds_type = 12; + // Defines the record relative to the current record from which the window // extends. The bound is inclusive. If the lower bound indexes a record // greater than the upper bound, TODO (null range/no records passed? @@ -877,6 +917,18 @@ message Expression { // Deprecated; use arguments instead. repeated Expression args = 8 [deprecated = true]; + enum BoundsType { + BOUNDS_TYPE_UNSPECIFIED = 0; + // The lower and upper bound specify how many rows before and after the current row + // the window should extend. + BOUNDS_TYPE_ROWS = 1; + // The lower and upper bound describe a range of values. The window should include all rows + // where the value of the ordering column is greater than or equal to (current_value - lower bound) + // and less than or equal to (current_value + upper bound). This bounds type is only valid if there + // is a single ordering column. + BOUNDS_TYPE_RANGE = 2; + } + // Defines one of the two boundaries for the window of a window function. message Bound { // Defines that the bound extends this far back from the current record. diff --git a/site/docs/relations/physical_relations.md b/site/docs/relations/physical_relations.md index fe311fbdf..74f5cd09c 100644 --- a/site/docs/relations/physical_relations.md +++ b/site/docs/relations/physical_relations.md @@ -205,6 +205,23 @@ The streaming aggregate operation leverages data ordered by the grouping express | Per Grouping Set | A list of expression grouping that the aggregation measured should be calculated for. | Optional, defaults to 0. | | Measures | A list of one or more aggregate expressions. Aggregate expressions ordering requirements must be compatible with expected ordering. | Optional, required if no grouping sets. | +## Consistent Partition Window Operation +A consistent partition window operation is a special type of project operation where every function is a window function and all of the window functions share the same sorting and partitioning. This allows for the sort and partition to be calculated once and shared between the various function evaluations. + +| Signature | Value | +| -------------------- |----------------------------------------------------------------------| +| Inputs | 1 | +| Outputs | 1 | +| Property Maintenance | Maintains distribution and ordering. | +| Direct Output Order | Same as Project operator (input followed by each window expression). | + +### Window Properties + +| Property | Description | Required | +| ------------------ | ------------------------------- | ---------------------- | +| Input | The relational input. | Required | +| Window Functions | One or more window functions. | At least one required. | + ## Expand Operation