Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Multijoin feature to community #4170

Merged
merged 19 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
package io.deephaven.engine.table;

import io.deephaven.api.JoinMatch;
import io.deephaven.util.annotations.TestUseOnly;
import org.jetbrains.annotations.NotNull;

import java.util.Arrays;
import java.util.ServiceLoader;

/**
Expand All @@ -13,16 +11,17 @@
* </p>
*
* <p>
* The multiJoin operation collects the set of distinct keys from the input tables, then joins one row from each of the
* input tables onto the result. Input tables need not have a matching row for each key, but they may not have multiple
* matching rows for a given key.
* The multiJoin operation collects the set of distinct keys from the input tables, then joins at most one row per key
* from each of the input tables onto the result. Input tables need not have a matching row for each key, but they may
* not have multiple matching rows for a given key.
* </p>
*
* <p>
* Input tables with distinct key column names must use the {@link JoinMatch} format to map keys to the common output
* table key column names (e.g. "OutputKey=UniqueRHSKey"). Also, individual columns to include from input tables may be
* specified and optionally renamed using {@link io.deephaven.api.JoinAddition} format (e.g. "NewCol=OldColName"). If no
* output columns are specified then every column from the input table will be included in the multi-join output table.
* Input tables with non-matching key column names must use the {@link JoinMatch} format to map keys to the common
* output table key column names (e.g. "OutputKey=SourceKey"). Also, individual columns to include from input tables may
* be specified and optionally renamed using {@link io.deephaven.api.JoinAddition} format (e.g. "NewCol=OldColName"). If
* no output columns are specified then every non-key column from the input table will be included in the multi-join
* output table.
* </p>
*
* <p>
Expand Down Expand Up @@ -82,7 +81,7 @@ private static MultiJoinFactory.Creator multiJoinTableCreator() {
* @return a MultiJoinTable with one row for each key and the corresponding row in each input table
*/
public static MultiJoinTable of(@NotNull final String[] keys, @NotNull final Table... inputTables) {
return multiJoinTableCreator().of(createSimpleJoinInput(keys, inputTables));
return multiJoinTableCreator().of(MultiJoinInput.from(keys, inputTables));
}

/**
Expand All @@ -95,13 +94,4 @@ public static MultiJoinTable of(@NotNull final String[] keys, @NotNull final Tab
public static MultiJoinTable of(@NotNull final MultiJoinInput... multiJoinInputs) {
return multiJoinTableCreator().of(multiJoinInputs);
}

@TestUseOnly
@NotNull
public static MultiJoinInput[] createSimpleJoinInput(@NotNull final String[] keys,
@NotNull final Table[] inputTables) {
return Arrays.stream(inputTables)
.map(t -> MultiJoinInput.of(t, keys))
.toArray(MultiJoinInput[]::new);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.immutables.value.Value.Parameter;
import org.jetbrains.annotations.NotNull;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;

Expand All @@ -23,10 +24,10 @@ public abstract class MultiJoinInput {
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch An array of {@link JoinMatch} specifying match conditions
* @param columnsToAdd An array of {@link JoinAddition} specifying the columns from the right side that need to be
* added to the table as a result of the match.
* @param columnsToAdd An array of {@link JoinAddition} specifying the columns to add
*/
public static MultiJoinInput of(@NotNull final Table inputTable,
public static MultiJoinInput of(
@NotNull final Table inputTable,
@NotNull JoinMatch[] columnsToMatch,
@NotNull JoinAddition[] columnsToAdd) {
return ImmutableMultiJoinInput.of(inputTable, columnsToMatch, columnsToAdd);
Expand All @@ -38,10 +39,10 @@ public static MultiJoinInput of(@NotNull final Table inputTable,
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch A collection of {@link JoinMatch} specifying the key columns
* @param columnsToAdd A collection of {@link JoinAddition} specifying the columns from the right side that need to
* be added to the table as a result of the match.
* @param columnsToAdd A collection of {@link JoinAddition} specifying the columns to add
*/
public static MultiJoinInput of(@NotNull final Table inputTable,
public static MultiJoinInput of(
@NotNull final Table inputTable,
@NotNull final Collection<? extends JoinMatch> columnsToMatch,
@NotNull final Collection<? extends JoinAddition> columnsToAdd) {
return of(inputTable, columnsToMatch.toArray(JoinMatch[]::new), columnsToAdd.toArray(JoinAddition[]::new));
Expand All @@ -52,10 +53,11 @@ public static MultiJoinInput of(@NotNull final Table inputTable,
*
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch The key columns, in string format (e.g. "ResultKey=SourceKey" or "KeyInBoth").
* @param columnsToAdd The columns to add, in string format (e.g. "ResultColumn=SourceColumn" or "ColumnInBoth"),
* empty for all columns
* @param columnsToAdd The columns to add, in string format (e.g. "ResultColumn=SourceColumn" or
* "SourceColumnToAddWithSameName"); empty for all columns
*/
public static MultiJoinInput of(@NotNull final Table inputTable,
public static MultiJoinInput of(
@NotNull final Table inputTable,
@NotNull final String[] columnsToMatch,
@NotNull final String[] columnsToAdd) {
return of(inputTable, JoinMatch.from(columnsToMatch), JoinAddition.from(columnsToAdd));
Expand All @@ -65,8 +67,8 @@ public static MultiJoinInput of(@NotNull final Table inputTable,
* Create a multiJoin table descriptor.
* <p>
*
* @param inputTable the table to include in a multiJoin
* @param columnsToMatch The match conditions ("leftColumn=rightColumn" or "columnFoundInBoth")
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch The key columns, in string format (e.g. "ResultKey=SourceKey" or "KeyInBoth").
*/
public static MultiJoinInput of(@NotNull final Table inputTable, @NotNull final String... columnsToMatch) {
return of(inputTable, JoinMatch.from(columnsToMatch), Collections.emptyList());
Expand All @@ -75,11 +77,11 @@ public static MultiJoinInput of(@NotNull final Table inputTable, @NotNull final
/**
* Create a multiJoin table descriptor.
*
* @param inputTable the table to include in a multiJoin
* @param columnsToMatch A comma separated list of match conditions ("leftColumn=rightColumn" or
* "columnFoundInBoth")
* @param columnsToAdd A comma separated list with the columns from the right side that need to be added to the left
* side as a result of the match.
* @param inputTable The table to include in a multiJoin
* @param columnsToMatch A comma separated list of key columns, in string format (e.g. "ResultKey=SourceKey" or
* "KeyInBoth").
* @param columnsToAdd A comma separated list of columns to add, in string format (e.g. "ResultColumn=SourceColumn"
* or "SourceColumnToAddWithSameName"); empty for all columns
*/
public static MultiJoinInput of(@NotNull final Table inputTable, String columnsToMatch, String columnsToAdd) {
return of(inputTable,
Expand All @@ -91,6 +93,19 @@ public static MultiJoinInput of(@NotNull final Table inputTable, String columnsT
: JoinAddition.from(columnsToAdd));
}

/**
* Create an array of multiJoin table descriptors with common keys; includes all non-key columns as output columns.
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
*
* @param keys The key columns, common to all tables
* @param inputTables An array of tables to include in the output
*/
@NotNull
public static MultiJoinInput[] from(@NotNull final String[] keys, @NotNull final Table[] inputTables) {
return Arrays.stream(inputTables)
.map(t -> MultiJoinInput.of(t, keys))
.toArray(MultiJoinInput[]::new);
}

@Parameter
public abstract Table inputTable();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@
import io.deephaven.engine.table.impl.util.WritableRowRedirection;

/**
* This is a common interface for the static and incremental state manager so that our bucketed MultiJoinTableImpl system is
* This is a common interface for the static and incremental state manager so that our bucketed MultiJoinTable system is
* capable of using them interchangeably to build the table.
*/
public interface MultiJoinStateManager {
/**
* Add the given table to this multijoin result.
* Add the given table to this multiJoin result.
*
* @param table the table to add
* @param sources the column sources that contain the keys
* @param tableNumber the table number that we are adding rows for
* @param tableNumber the table number for which we are adding rows
*/
void build(final Table table, ColumnSource<?>[] sources, int tableNumber);

/**
* How many rows are in our result table?
* Get the number of rows in the result table
lbooker42 marked this conversation as resolved.
Show resolved Hide resolved
*
* @return the number of rows in the result table
*/
Expand All @@ -31,19 +31,19 @@ public interface MultiJoinStateManager {
ColumnSource<?>[] getKeyHashTableSources();

/**
* Get the result RedirectionIndex for a given table
* Get the result {@link WritableRowRedirection row redirection} for a given table
*
* @param tableNumber the table to fetch
* @return the redirection index for the table
* @return the row redirection for the table
*/
WritableRowRedirection getRowRedirectionForTable(int tableNumber);

/**
* Ensure that this state manager can handle tables different tables as constituents of the multiJoin.
* Ensure that this state manager can handle {@code numTables} tables as constituents of the multiJoin.
*
* @param tables the number of tables that participate
* @param numTables the number of tables that participate
*/
void ensureTableCapacity(int tables);
void ensureTableCapacity(int numTables);

void setTargetLoadFactor(final double targetLoadFactor);

Expand Down
Loading
Loading