-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
Multijoin
feature to community (#4170)
* Initial commit, added major chunk-hashing files, now compiles. * Added tests and fillChunk fixes. * MultiJoin static OA Hashing implemented and tests passing. * MultiJoin incremental OA Hashing implemented and tests passing.
- Loading branch information
Showing
53 changed files
with
7,618 additions
and
119 deletions.
There are no files selected for viewing
97 changes: 97 additions & 0 deletions
97
engine/api/src/main/java/io/deephaven/engine/table/MultiJoinFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package io.deephaven.engine.table; | ||
|
||
import io.deephaven.api.JoinMatch; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.util.ServiceLoader; | ||
|
||
/** | ||
* <p> | ||
* Join unique rows from a set of tables onto a set of common keys. | ||
* </p> | ||
* | ||
* <p> | ||
* The multiJoin operation collects the set of distinct keys from the input tables, then joins at most one row per key | ||
* from each of the input tables onto the result. Input tables need not have a matching row for each key, but they may | ||
* not have multiple matching rows for a given key. | ||
* </p> | ||
* | ||
* <p> | ||
* Input tables with non-matching key column names must use the {@link JoinMatch} format to map keys to the common | ||
* output table key column names (e.g. "OutputKey=SourceKey"). Also, individual columns to include from input tables may | ||
* be specified and optionally renamed using {@link io.deephaven.api.JoinAddition} format (e.g. "NewCol=OldColName"). If | ||
* no output columns are specified then every non-key column from the input table will be included in the multi-join | ||
* output table. | ||
* </p> | ||
* | ||
* <p> | ||
* The multiJoin operation can be thought of as a merge of the key columns, followed by a selectDistinct and then a | ||
* series of iterative naturalJoin operations as follows (this example has common key column names and includes all | ||
* columns from the input tables): | ||
* </p> | ||
* | ||
* <pre>{@code | ||
* private Table doIterativeMultiJoin(String [] keyColumns, List<? extends Table> inputTables) { | ||
* final List<Table> keyTables = inputTables.stream().map(t -> t.view(keyColumns)).collect(Collectors.toList()); | ||
* final Table base = TableTools.merge(keyTables).selectDistinct(keyColumns); | ||
* | ||
* Table result = base; | ||
* for (int ii = 0; ii < inputTables.size(); ++ii) { | ||
* result = result.naturalJoin(inputTables.get(ii), Arrays.asList(keyColumns)); | ||
* } | ||
* | ||
* return result; | ||
* } | ||
* } | ||
* </pre> | ||
*/ | ||
|
||
public class MultiJoinFactory { | ||
|
||
/** | ||
* Creator interface for runtime-supplied implementation. | ||
*/ | ||
public interface Creator { | ||
MultiJoinTable of(@NotNull final MultiJoinInput... multiJoinInputs); | ||
} | ||
|
||
/** | ||
* Creator provider to supply the implementation at runtime. | ||
*/ | ||
@FunctionalInterface | ||
public interface CreatorProvider { | ||
Creator get(); | ||
} | ||
|
||
private static final class MultiJoinTableCreatorHolder { | ||
private static final MultiJoinFactory.Creator creator = | ||
ServiceLoader.load(MultiJoinFactory.CreatorProvider.class).iterator().next().get(); | ||
} | ||
|
||
private static MultiJoinFactory.Creator multiJoinTableCreator() { | ||
return MultiJoinTableCreatorHolder.creator; | ||
} | ||
|
||
/** | ||
* Join tables that have common key column names; include all columns from the input tables. | ||
* <p> | ||
* | ||
* @param keys the key column pairs in the format "Result=Source" or "ColumnInBoth" | ||
* @param inputTables the tables to join together | ||
* @return a MultiJoinTable with one row for each key and the corresponding row in each input table | ||
*/ | ||
public static MultiJoinTable of(@NotNull final String[] keys, @NotNull final Table... inputTables) { | ||
return multiJoinTableCreator().of(MultiJoinInput.from(keys, inputTables)); | ||
} | ||
|
||
/** | ||
* Perform a multiJoin for one or more tables; allows renaming of key column names and specifying individual input | ||
* table columns to include in the final output table. | ||
* | ||
* @param multiJoinInputs the description of each table that contributes to the result | ||
* @return a MultiJoinTable with one row for each key and the corresponding row in each input table | ||
*/ | ||
public static MultiJoinTable of(@NotNull final MultiJoinInput... multiJoinInputs) { | ||
return multiJoinTableCreator().of(multiJoinInputs); | ||
} | ||
} |
116 changes: 116 additions & 0 deletions
116
engine/api/src/main/java/io/deephaven/engine/table/MultiJoinInput.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
package io.deephaven.engine.table; | ||
|
||
import io.deephaven.annotations.SimpleStyle; | ||
import io.deephaven.api.JoinAddition; | ||
import io.deephaven.api.JoinMatch; | ||
import org.immutables.value.Value.Immutable; | ||
import org.immutables.value.Value.Parameter; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
|
||
/** | ||
* An input to a multiJoin. | ||
* <p> | ||
* The table, key columns, and columns to add are encapsulated in the join descriptor. | ||
*/ | ||
@Immutable | ||
@SimpleStyle | ||
public abstract class MultiJoinInput { | ||
/** | ||
* Create a multiJoin table input. | ||
* | ||
* @param inputTable The table to include in a multiJoin | ||
* @param columnsToMatch An array of {@link JoinMatch} specifying match conditions | ||
* @param columnsToAdd An array of {@link JoinAddition} specifying the columns to add | ||
*/ | ||
public static MultiJoinInput of( | ||
@NotNull final Table inputTable, | ||
@NotNull JoinMatch[] columnsToMatch, | ||
@NotNull JoinAddition[] columnsToAdd) { | ||
return ImmutableMultiJoinInput.of(inputTable, columnsToMatch, columnsToAdd); | ||
} | ||
|
||
/** | ||
* Create a multiJoin table input. | ||
* | ||
* @param inputTable The table to include in a multiJoin | ||
* @param columnsToMatch A collection of {@link JoinMatch} specifying the key columns | ||
* @param columnsToAdd A collection of {@link JoinAddition} specifying the columns to add | ||
*/ | ||
public static MultiJoinInput of( | ||
@NotNull final Table inputTable, | ||
@NotNull final Collection<? extends JoinMatch> columnsToMatch, | ||
@NotNull final Collection<? extends JoinAddition> columnsToAdd) { | ||
return of(inputTable, columnsToMatch.toArray(JoinMatch[]::new), columnsToAdd.toArray(JoinAddition[]::new)); | ||
} | ||
|
||
/** | ||
* Create a multiJoin table input. | ||
* | ||
* @param inputTable The table to include in a multiJoin | ||
* @param columnsToMatch The key columns, in string format (e.g. "ResultKey=SourceKey" or "KeyInBoth"). | ||
* @param columnsToAdd The columns to add, in string format (e.g. "ResultColumn=SourceColumn" or | ||
* "SourceColumnToAddWithSameName"); empty for all columns | ||
*/ | ||
public static MultiJoinInput of( | ||
@NotNull final Table inputTable, | ||
@NotNull final String[] columnsToMatch, | ||
@NotNull final String[] columnsToAdd) { | ||
return of(inputTable, JoinMatch.from(columnsToMatch), JoinAddition.from(columnsToAdd)); | ||
} | ||
|
||
/** | ||
* Create a multiJoin table input. | ||
* <p> | ||
* | ||
* @param inputTable The table to include in a multiJoin | ||
* @param columnsToMatch The key columns, in string format (e.g. "ResultKey=SourceKey" or "KeyInBoth"). | ||
*/ | ||
public static MultiJoinInput of(@NotNull final Table inputTable, @NotNull final String... columnsToMatch) { | ||
return of(inputTable, JoinMatch.from(columnsToMatch), Collections.emptyList()); | ||
} | ||
|
||
/** | ||
* Create a multiJoin table input. | ||
* | ||
* @param inputTable The table to include in a multiJoin | ||
* @param columnsToMatch A comma separated list of key columns, in string format (e.g. "ResultKey=SourceKey" or | ||
* "KeyInBoth"). | ||
* @param columnsToAdd A comma separated list of columns to add, in string format (e.g. "ResultColumn=SourceColumn" | ||
* or "SourceColumnToAddWithSameName"); empty for all columns | ||
*/ | ||
public static MultiJoinInput of(@NotNull final Table inputTable, String columnsToMatch, String columnsToAdd) { | ||
return of(inputTable, | ||
columnsToMatch == null || columnsToMatch.isEmpty() | ||
? Collections.emptyList() | ||
: JoinMatch.from(columnsToMatch), | ||
columnsToAdd == null || columnsToAdd.isEmpty() | ||
? Collections.emptyList() | ||
: JoinAddition.from(columnsToAdd)); | ||
} | ||
|
||
/** | ||
* Create an array of {@link MultiJoinInput} with common keys; includes all non-key columns as output columns. | ||
* | ||
* @param keys The key columns, common to all tables | ||
* @param inputTables An array of tables to include in the output | ||
*/ | ||
@NotNull | ||
public static MultiJoinInput[] from(@NotNull final String[] keys, @NotNull final Table[] inputTables) { | ||
return Arrays.stream(inputTables) | ||
.map(t -> MultiJoinInput.of(t, keys)) | ||
.toArray(MultiJoinInput[]::new); | ||
} | ||
|
||
@Parameter | ||
public abstract Table inputTable(); | ||
|
||
@Parameter | ||
public abstract JoinMatch[] columnsToMatch(); | ||
|
||
@Parameter | ||
public abstract JoinAddition[] columnsToAdd(); | ||
} |
19 changes: 19 additions & 0 deletions
19
engine/api/src/main/java/io/deephaven/engine/table/MultiJoinTable.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
package io.deephaven.engine.table; | ||
|
||
import java.util.Collection; | ||
|
||
public interface MultiJoinTable { | ||
/** | ||
* Get the output {@link Table table} from this multi-join table. | ||
* | ||
* @return The output {@link Table table} | ||
*/ | ||
Table table(); | ||
|
||
/** | ||
* Get the key column names from this multi-join table. | ||
* | ||
* @return The key column names as a collection of strings | ||
*/ | ||
Collection<String> keyColumns(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.