Skip to content

Commit

Permalink
Add Multijoin feature to community (#4170)
Browse files Browse the repository at this point in the history
* Initial commit, added major chunk-hashing files, now compiles.

* Added tests and fillChunk fixes.

* MultiJoin static OA Hashing implemented and tests passing.

* MultiJoin incremental OA Hashing implemented and tests passing.
  • Loading branch information
lbooker42 authored Aug 4, 2023
1 parent dbf9c85 commit f15c8ff
Show file tree
Hide file tree
Showing 53 changed files with 7,618 additions and 119 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package io.deephaven.engine.table;

import io.deephaven.api.JoinMatch;
import org.jetbrains.annotations.NotNull;

import java.util.ServiceLoader;

/**
* <p>
* Join unique rows from a set of tables onto a set of common keys.
* </p>
*
* <p>
* The multiJoin operation collects the set of distinct keys from the input tables, then joins at most one row per key
* from each of the input tables onto the result. Input tables need not have a matching row for each key, but they may
* not have multiple matching rows for a given key.
* </p>
*
* <p>
* Input tables with non-matching key column names must use the {@link JoinMatch} format to map keys to the common
* output table key column names (e.g. "OutputKey=SourceKey"). Also, individual columns to include from input tables may
* be specified and optionally renamed using {@link io.deephaven.api.JoinAddition} format (e.g. "NewCol=OldColName"). If
* no output columns are specified then every non-key column from the input table will be included in the multi-join
* output table.
* </p>
*
* <p>
* The multiJoin operation can be thought of as a merge of the key columns, followed by a selectDistinct and then a
* series of iterative naturalJoin operations as follows (this example has common key column names and includes all
* columns from the input tables):
* </p>
*
* <pre>{@code
* private Table doIterativeMultiJoin(String [] keyColumns, List<? extends Table> inputTables) {
* final List<Table> keyTables = inputTables.stream().map(t -> t.view(keyColumns)).collect(Collectors.toList());
* final Table base = TableTools.merge(keyTables).selectDistinct(keyColumns);
*
* Table result = base;
* for (int ii = 0; ii < inputTables.size(); ++ii) {
* result = result.naturalJoin(inputTables.get(ii), Arrays.asList(keyColumns));
* }
*
* return result;
* }
* }
* </pre>
*/

public class MultiJoinFactory {

/**
* Creator interface for runtime-supplied implementation.
*/
public interface Creator {
MultiJoinTable of(@NotNull final MultiJoinInput... multiJoinInputs);
}

/**
* Creator provider to supply the implementation at runtime.
*/
@FunctionalInterface
public interface CreatorProvider {
Creator get();
}

private static final class MultiJoinTableCreatorHolder {
private static final MultiJoinFactory.Creator creator =
ServiceLoader.load(MultiJoinFactory.CreatorProvider.class).iterator().next().get();
}

private static MultiJoinFactory.Creator multiJoinTableCreator() {
return MultiJoinTableCreatorHolder.creator;
}

/**
* Join tables that have common key column names; include all columns from the input tables.
* <p>
*
* @param keys the key column pairs in the format "Result=Source" or "ColumnInBoth"
* @param inputTables the tables to join together
* @return a MultiJoinTable with one row for each key and the corresponding row in each input table
*/
public static MultiJoinTable of(@NotNull final String[] keys, @NotNull final Table... inputTables) {
return multiJoinTableCreator().of(MultiJoinInput.from(keys, inputTables));
}

/**
* Perform a multiJoin for one or more tables; allows renaming of key column names and specifying individual input
* table columns to include in the final output table.
*
* @param multiJoinInputs the description of each table that contributes to the result
* @return a MultiJoinTable with one row for each key and the corresponding row in each input table
*/
public static MultiJoinTable of(@NotNull final MultiJoinInput... multiJoinInputs) {
return multiJoinTableCreator().of(multiJoinInputs);
}
}
116 changes: 116 additions & 0 deletions engine/api/src/main/java/io/deephaven/engine/table/MultiJoinInput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package io.deephaven.engine.table;

import io.deephaven.annotations.SimpleStyle;
import io.deephaven.api.JoinAddition;
import io.deephaven.api.JoinMatch;
import org.immutables.value.Value.Immutable;
import org.immutables.value.Value.Parameter;
import org.jetbrains.annotations.NotNull;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;

/**
* An input to a multiJoin.
* <p>
* The table, key columns, and columns to add are encapsulated in the join descriptor.
*/
@Immutable
@SimpleStyle
public abstract class MultiJoinInput {
/**
* Create a multiJoin table input.
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch An array of {@link JoinMatch} specifying match conditions
* @param columnsToAdd An array of {@link JoinAddition} specifying the columns to add
*/
public static MultiJoinInput of(
@NotNull final Table inputTable,
@NotNull JoinMatch[] columnsToMatch,
@NotNull JoinAddition[] columnsToAdd) {
return ImmutableMultiJoinInput.of(inputTable, columnsToMatch, columnsToAdd);
}

/**
* Create a multiJoin table input.
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch A collection of {@link JoinMatch} specifying the key columns
* @param columnsToAdd A collection of {@link JoinAddition} specifying the columns to add
*/
public static MultiJoinInput of(
@NotNull final Table inputTable,
@NotNull final Collection<? extends JoinMatch> columnsToMatch,
@NotNull final Collection<? extends JoinAddition> columnsToAdd) {
return of(inputTable, columnsToMatch.toArray(JoinMatch[]::new), columnsToAdd.toArray(JoinAddition[]::new));
}

/**
* Create a multiJoin table input.
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch The key columns, in string format (e.g. "ResultKey=SourceKey" or "KeyInBoth").
* @param columnsToAdd The columns to add, in string format (e.g. "ResultColumn=SourceColumn" or
* "SourceColumnToAddWithSameName"); empty for all columns
*/
public static MultiJoinInput of(
@NotNull final Table inputTable,
@NotNull final String[] columnsToMatch,
@NotNull final String[] columnsToAdd) {
return of(inputTable, JoinMatch.from(columnsToMatch), JoinAddition.from(columnsToAdd));
}

/**
* Create a multiJoin table input.
* <p>
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch The key columns, in string format (e.g. "ResultKey=SourceKey" or "KeyInBoth").
*/
public static MultiJoinInput of(@NotNull final Table inputTable, @NotNull final String... columnsToMatch) {
return of(inputTable, JoinMatch.from(columnsToMatch), Collections.emptyList());
}

/**
* Create a multiJoin table input.
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch A comma separated list of key columns, in string format (e.g. "ResultKey=SourceKey" or
* "KeyInBoth").
* @param columnsToAdd A comma separated list of columns to add, in string format (e.g. "ResultColumn=SourceColumn"
* or "SourceColumnToAddWithSameName"); empty for all columns
*/
public static MultiJoinInput of(@NotNull final Table inputTable, String columnsToMatch, String columnsToAdd) {
return of(inputTable,
columnsToMatch == null || columnsToMatch.isEmpty()
? Collections.emptyList()
: JoinMatch.from(columnsToMatch),
columnsToAdd == null || columnsToAdd.isEmpty()
? Collections.emptyList()
: JoinAddition.from(columnsToAdd));
}

/**
* Create an array of {@link MultiJoinInput} with common keys; includes all non-key columns as output columns.
*
* @param keys The key columns, common to all tables
* @param inputTables An array of tables to include in the output
*/
@NotNull
public static MultiJoinInput[] from(@NotNull final String[] keys, @NotNull final Table[] inputTables) {
return Arrays.stream(inputTables)
.map(t -> MultiJoinInput.of(t, keys))
.toArray(MultiJoinInput[]::new);
}

@Parameter
public abstract Table inputTable();

@Parameter
public abstract JoinMatch[] columnsToMatch();

@Parameter
public abstract JoinAddition[] columnsToAdd();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package io.deephaven.engine.table;

import java.util.Collection;

public interface MultiJoinTable {
/**
* Get the output {@link Table table} from this multi-join table.
*
* @return The output {@link Table table}
*/
Table table();

/**
* Get the key column names from this multi-join table.
*
* @return The key column names as a collection of strings
*/
Collection<String> keyColumns();
}
2 changes: 2 additions & 0 deletions engine/table/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ spotless {
'**/asofjoin/typed/rightincopen/gen/*.java',
'**/asofjoin/typed/staticopen/gen/*.java',
'**/updateby/hashing/typed/open/gen/*.java',
'**/multijoin/typed/staticopen/gen/*.java',
'**/multijoin/typed/incopen/gen/*.java',
'src/main/java/io/deephaven/engine/table/impl/SymbolTableCombiner.java',
'src/main/java/io/deephaven/libs/GroovyStaticImports.java',
'src/test/java/**/*Sample.java'
Expand Down
Loading

0 comments on commit f15c8ff

Please sign in to comment.