Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Multijoin feature to community #4170

Merged
merged 19 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package io.deephaven.engine.table;

import io.deephaven.api.JoinMatch;
import io.deephaven.util.annotations.TestUseOnly;
import org.jetbrains.annotations.NotNull;

import java.util.Arrays;
import java.util.ServiceLoader;

/**
* <p>
* Join unique rows from a set of tables onto a set of common keys.
* </p>
*
* <p>
* The multiJoin operation collects the set of distinct keys from the input tables, then joins one row from each of the
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
* input tables onto the result. Input tables need not have a matching row for each key, but they may not have multiple
* matching rows for a given key.
* </p>
*
* <p>
* Input tables with distinct key column names must use the {@link JoinMatch} format to map keys to the common output
* table key column names (e.g. "OutputKey=UniqueRHSKey"). Also, individual columns to include from input tables may be
* specified and optionally renamed using {@link io.deephaven.api.JoinAddition} format (e.g. "NewCol=OldColName"). If no
* output columns are specified then every column from the input table will be included in the multi-join output table.
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
* </p>
*
* <p>
* The multiJoin operation can be thought of as a merge of the key columns, followed by a selectDistinct and then a
* series of iterative naturalJoin operations as follows (this example has common key column names and includes all
* columns from the input tables):
* </p>
*
* <pre>{@code
* private Table doIterativeMultiJoin(String [] keyColumns, List<? extends Table> inputTables) {
* final List<Table> keyTables = inputTables.stream().map(t -> t.view(keyColumns)).collect(Collectors.toList());
* final Table base = TableTools.merge(keyTables).selectDistinct(keyColumns);
*
* Table result = base;
* for (int ii = 0; ii < inputTables.size(); ++ii) {
* result = result.naturalJoin(inputTables.get(ii), Arrays.asList(keyColumns));
* }
*
* return result;
* }
* }
* </pre>
*/

public class MultiJoinFactory {

/**
* Creator interface for runtime-supplied implementation.
*/
public interface Creator {
MultiJoinTable of(@NotNull final MultiJoinInput... multiJoinInputs);
}

/**
* Creator provider to supply the implementation at runtime.
*/
@FunctionalInterface
public interface CreatorProvider {
Creator get();
}

private static final class MultiJoinTableCreatorHolder {
private static final MultiJoinFactory.Creator creator =
ServiceLoader.load(MultiJoinFactory.CreatorProvider.class).iterator().next().get();
}

private static MultiJoinFactory.Creator multiJoinTableCreator() {
return MultiJoinTableCreatorHolder.creator;
}

/**
* Join tables that have common key column names; include all columns from the input tables.
* <p>
*
* @param keys the key column pairs in the format "Result=Source" or "ColumnInBoth"
* @param inputTables the tables to join together
* @return a MultiJoinTable with one row for each key and the corresponding row in each input table
*/
public static MultiJoinTable of(@NotNull final String[] keys, @NotNull final Table... inputTables) {
return multiJoinTableCreator().of(createSimpleJoinInput(keys, inputTables));
}

/**
* Perform a multiJoin for one or more tables; allows renaming of key column names and specifying individual input
* table columns to include in the final output table.
*
* @param multiJoinInputs the description of each table that contributes to the result
* @return a MultiJoinTable with one row for each key and the corresponding row in each input table
*/
public static MultiJoinTable of(@NotNull final MultiJoinInput... multiJoinInputs) {
return multiJoinTableCreator().of(multiJoinInputs);
}

@TestUseOnly
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
@NotNull
public static MultiJoinInput[] createSimpleJoinInput(@NotNull final String[] keys,
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
@NotNull final Table[] inputTables) {
return Arrays.stream(inputTables)
.map(t -> MultiJoinInput.of(t, keys))
.toArray(MultiJoinInput[]::new);
}
}
102 changes: 102 additions & 0 deletions engine/api/src/main/java/io/deephaven/engine/table/MultiJoinInput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package io.deephaven.engine.table;

import io.deephaven.annotations.SimpleStyle;
import io.deephaven.api.JoinAddition;
import io.deephaven.api.JoinMatch;
import org.immutables.value.Value.Immutable;
import org.immutables.value.Value.Parameter;
import org.jetbrains.annotations.NotNull;

import java.util.Collection;
import java.util.Collections;

/**
* A descriptor of an input to a multiJoin.
* <p>
* The table, key columns, and columns to add are encapsulated in the join descriptor.
*/
@Immutable
@SimpleStyle
public abstract class MultiJoinInput {
/**
* Create a multiJoin table descriptor.
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch An array of {@link JoinMatch} specifying match conditions
* @param columnsToAdd An array of {@link JoinAddition} specifying the columns from the right side that need to be
* added to the table as a result of the match.
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
*/
public static MultiJoinInput of(@NotNull final Table inputTable,
@NotNull JoinMatch[] columnsToMatch,
@NotNull JoinAddition[] columnsToAdd) {
return ImmutableMultiJoinInput.of(inputTable, columnsToMatch, columnsToAdd);
}


/**
* Create a multiJoin table descriptor.
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch A collection of {@link JoinMatch} specifying the key columns
* @param columnsToAdd A collection of {@link JoinAddition} specifying the columns from the right side that need to
* be added to the table as a result of the match.
*/
public static MultiJoinInput of(@NotNull final Table inputTable,
@NotNull final Collection<? extends JoinMatch> columnsToMatch,
@NotNull final Collection<? extends JoinAddition> columnsToAdd) {
return of(inputTable, columnsToMatch.toArray(JoinMatch[]::new), columnsToAdd.toArray(JoinAddition[]::new));
}

/**
* Create a multiJoin table descriptor.
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch The key columns, in string format (e.g. "ResultKey=SourceKey" or "KeyInBoth").
* @param columnsToAdd The columns to add, in string format (e.g. "ResultColumn=SourceColumn" or "ColumnInBoth"),
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
* empty for all columns
*/
public static MultiJoinInput of(@NotNull final Table inputTable,
@NotNull final String[] columnsToMatch,
@NotNull final String[] columnsToAdd) {
return of(inputTable, JoinMatch.from(columnsToMatch), JoinAddition.from(columnsToAdd));
}

/**
* Create a multiJoin table descriptor.
* <p>
*
* @param inputTable the table to include in a multiJoin
* @param columnsToMatch The match conditions ("leftColumn=rightColumn" or "columnFoundInBoth")
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
*/
public static MultiJoinInput of(@NotNull final Table inputTable, @NotNull final String... columnsToMatch) {
return of(inputTable, JoinMatch.from(columnsToMatch), Collections.emptyList());
}

/**
* Create a multiJoin table descriptor.
*
* @param inputTable the table to include in a multiJoin
* @param columnsToMatch A comma separated list of match conditions ("leftColumn=rightColumn" or
* "columnFoundInBoth")
* @param columnsToAdd A comma separated list with the columns from the right side that need to be added to the left
* side as a result of the match.
*/
public static MultiJoinInput of(@NotNull final Table inputTable, String columnsToMatch, String columnsToAdd) {
return of(inputTable,
columnsToMatch == null || columnsToMatch.isEmpty()
? Collections.emptyList()
: JoinMatch.from(columnsToMatch),
columnsToAdd == null || columnsToAdd.isEmpty()
? Collections.emptyList()
: JoinAddition.from(columnsToAdd));
}

@Parameter
public abstract Table inputTable();

@Parameter
public abstract JoinMatch[] columnsToMatch();

@Parameter
public abstract JoinAddition[] columnsToAdd();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package io.deephaven.engine.table;

import java.util.Collection;

public interface MultiJoinTable {
/**
* Get the output {@link Table table} from this multi-join table.
*
* @return The output {@link Table table}
*/
Table table();

/**
* Get the key column names from this multi-join table.
*
* @return The key column names as a collection of strings
*/
Collection<String> keyColumns();
}
2 changes: 2 additions & 0 deletions engine/table/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ spotless {
'**/asofjoin/typed/rightincopen/gen/*.java',
'**/asofjoin/typed/staticopen/gen/*.java',
'**/updateby/hashing/typed/open/gen/*.java',
'**/multijoin/typed/staticopen/gen/*.java',
'**/multijoin/typed/incopen/gen/*.java',
'src/main/java/io/deephaven/engine/table/impl/SymbolTableCombiner.java',
'src/main/java/io/deephaven/libs/GroovyStaticImports.java',
'src/test/java/**/*Sample.java'
Expand Down
Loading
Loading