diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 60c1d2090..578eb381d 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -342,6 +342,34 @@ message ExchangeRel { } } +// Duplicates records, possibly switching output expressions between each duplicate. +// Default output is all of the fields declared followed by one int64 field that contains the +// duplicate_id which is a zero-index ordinal of which duplicate of the original record this +// corresponds to. +message ExpandRel { + RelCommon common = 1; + Rel input = 2; + repeated ExpandField fields = 4; + + message ExpandField { + oneof field_type { + // Field that switches output based on which duplicate_id we're outputting + SwitchingField switching_field = 2; + + // Field that outputs the same value no matter which duplicate_id we're on. + Expression consistent_field = 3; + } + } + + message SwitchingField { + // Array that contains an expression to output per duplicate_id + // each `switching_field` must have the same number of expressions + // all expressions within a switching field must be the same type class but can differ in nullability. + // this column will be nullable if any of the expressions are nullable. + repeated Expression duplicates = 1; + } +} + // A relation with output field names. // // This is for use at the root of a `Rel` tree. @@ -367,11 +395,11 @@ message Rel { ExtensionMultiRel extension_multi = 10; ExtensionLeafRel extension_leaf = 11; CrossRel cross = 12; - //Physical relations HashJoinRel hash_join = 13; MergeJoinRel merge_join = 14; ExchangeRel exchange = 15; + ExpandRel expand = 16; } } diff --git a/site/docs/relations/physical_relations.md b/site/docs/relations/physical_relations.md index c3d8da102..fe311fbdf 100644 --- a/site/docs/relations/physical_relations.md +++ b/site/docs/relations/physical_relations.md @@ -206,6 +206,31 @@ The streaming aggregate operation leverages data ordered by the grouping express | Measures | A list of one or more aggregate expressions. Aggregate expressions ordering requirements must be compatible with expected ordering. | Optional, required if no grouping sets. | +## Expand Operation + +The expand operation creates duplicates of input records based on the Expand Fields. Each Expand Field can be a Switching Field or an expression. Switching Fields are described below. If an Expand Field is an expression then its value is consistent across all duplicate rows. + +| Signature | Value | +| -------------------- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Inputs | 1 | +| Outputs | 1 | +| Property Maintenance | Distribution is maintained if all the distribution fields are consistent fields with direct references. Ordering can only be maintained down to the level of consistent fields that are kept.| +| Direct Output Order | The expand fields followed by an i32 column describing the index of the duplicate that the row is derived from. | + +### Expand Properties + +| Property | Description | Required | +| --------- |--------------------------------------| -------- | +| Input | The relational input. | Required | +| Direct Fields | Expressions describing the output fields. These refer to the schema of the input. Each Direct Field must be an expression or a Switching Field | Required | + +### Switching Field Properties + +A switching field is a field whose value is different in each duplicated row. All switching fields in an Expand Operation must have the same number of duplicates. + +| Property | Description | Required | +| --------- |--------------------------------------| -------- | +| Duplicates | List of one or more expressions. The output will contain a row for each expression. | Required | ## Hashing Window Operation