forked from deephaven/deephaven-core
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add KafkaTools.Consume#objectProcessorSpec for custom KeyOrValueSpecs
This introduces a new public interface, ObjectProcessor, that provides a means for end-users to create custom KeyOrValueSpec for use with Kafka ingestion. The interface isn't specific to kafka though, and should be extensible to other domains in the future. The existing KeyOrValue specs are left unchanged; but arguably, there may be value in porting our existing implementations over to it in the future. Fixes deephaven#4346
- Loading branch information
1 parent
b14b349
commit b8bb1f3
Showing
11 changed files
with
1,203 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
121 changes: 121 additions & 0 deletions
121
extensions/kafka/src/main/java/io/deephaven/kafka/KeyOrValueSpecObjectProcessorImpl.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
package io.deephaven.kafka; | ||
|
||
import io.confluent.kafka.schemaregistry.SchemaProvider; | ||
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; | ||
import io.deephaven.chunk.ObjectChunk; | ||
import io.deephaven.chunk.WritableChunk; | ||
import io.deephaven.chunk.attributes.Values; | ||
import io.deephaven.engine.table.ColumnDefinition; | ||
import io.deephaven.engine.table.TableDefinition; | ||
import io.deephaven.kafka.KafkaTools.Consume.KeyOrValueSpec; | ||
import io.deephaven.kafka.KafkaTools.KeyOrValue; | ||
import io.deephaven.kafka.KafkaTools.KeyOrValueIngestData; | ||
import io.deephaven.kafka.ingest.KafkaStreamPublisher; | ||
import io.deephaven.kafka.ingest.KeyOrValueProcessor; | ||
import io.deephaven.kafka.ingest.MultiFieldChunkAdapter; | ||
import io.deephaven.processor.ObjectProcessor; | ||
import io.deephaven.qst.type.Type; | ||
import org.apache.commons.lang3.mutable.MutableInt; | ||
import org.apache.kafka.common.serialization.Deserializer; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
import java.util.Optional; | ||
import java.util.function.Function; | ||
|
||
/** | ||
* This implementation is useful for presenting an easier onboarding ramp and better (and public) interface | ||
* {@link KafkaTools.Consume#objectProcessorSpec(Deserializer, ObjectProcessor, List)} for end-users. The | ||
* {@link ObjectProcessor} is a user-visible replacement for {@link KeyOrValueProcessor}. In the meantime though, we are | ||
* adapting into a {@link KeyOrValueProcessor} until such a time when {@link KafkaStreamPublisher} can be re-written to | ||
* take advantage of these better interfaces. | ||
*/ | ||
class KeyOrValueSpecObjectProcessorImpl<T> extends KeyOrValueSpec implements KeyOrValueProcessor { | ||
private final Deserializer<T> deserializer; | ||
private final ObjectProcessor<T> processor; | ||
private final List<String> columnNames; | ||
private Function<WritableChunk<?>[], List<WritableChunk<?>>> offsetsAdapter; | ||
|
||
KeyOrValueSpecObjectProcessorImpl( | ||
Deserializer<T> deserializer, ObjectProcessor<T> processor, List<String> columnNames) { | ||
if (columnNames.size() != processor.outputTypes().size()) { | ||
throw new IllegalArgumentException("Expected columnNames and processor.outputTypes() to be the same size"); | ||
} | ||
if (columnNames.stream().distinct().count() != columnNames.size()) { | ||
throw new IllegalArgumentException("Expected columnNames to have distinct values"); | ||
} | ||
this.deserializer = Objects.requireNonNull(deserializer); | ||
this.processor = Objects.requireNonNull(processor); | ||
this.columnNames = List.copyOf(columnNames); | ||
} | ||
|
||
@Override | ||
public Optional<SchemaProvider> getSchemaProvider() { | ||
return Optional.empty(); | ||
} | ||
|
||
@Override | ||
protected Deserializer<T> getDeserializer(KeyOrValue keyOrValue, SchemaRegistryClient schemaRegistryClient, | ||
Map<String, ?> configs) { | ||
return deserializer; | ||
} | ||
|
||
@Override | ||
protected KeyOrValueIngestData getIngestData(KeyOrValue keyOrValue, SchemaRegistryClient schemaRegistryClient, | ||
Map<String, ?> configs, MutableInt nextColumnIndexMut, List<ColumnDefinition<?>> columnDefinitionsOut) { | ||
final KeyOrValueIngestData data = new KeyOrValueIngestData(); | ||
data.fieldPathToColumnName = new LinkedHashMap<>(); | ||
final int L = columnNames.size(); | ||
for (int i = 0; i < L; ++i) { | ||
final String columnName = columnNames.get(i); | ||
final Type<?> type = processor.outputTypes().get(i); | ||
data.fieldPathToColumnName.put(columnName, columnName); | ||
columnDefinitionsOut.add(ColumnDefinition.of(columnName, type)); | ||
} | ||
return data; | ||
} | ||
|
||
@Override | ||
protected KeyOrValueProcessor getProcessor(TableDefinition tableDef, KeyOrValueIngestData data) { | ||
offsetsAdapter = offsetsFunction(MultiFieldChunkAdapter.chunkOffsets(tableDef, data.fieldPathToColumnName)); | ||
return this; | ||
} | ||
|
||
@Override | ||
public void handleChunk(ObjectChunk<Object, Values> inputChunk, WritableChunk<Values>[] publisherChunks) { | ||
// noinspection unchecked | ||
final ObjectChunk<T, ?> in = (ObjectChunk<T, ?>) inputChunk; | ||
// we except isInOrder to be true, so apply should be an O(1) op no matter how many columns there are. | ||
processor.processAll(in, offsetsAdapter.apply(publisherChunks)); | ||
} | ||
|
||
private static <T> Function<T[], List<T>> offsetsFunction(int[] offsets) { | ||
return offsets.length == 0 | ||
? array -> Collections.emptyList() | ||
: isInOrder(offsets) | ||
? array -> Arrays.asList(array).subList(offsets[0], offsets[0] + offsets.length) | ||
: array -> reorder(array, offsets); | ||
} | ||
|
||
private static boolean isInOrder(int[] offsets) { | ||
for (int i = 1; i < offsets.length; ++i) { | ||
if (offsets[i - 1] + 1 != offsets[i]) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
private static <T> List<T> reorder(T[] array, int[] offsets) { | ||
final List<T> out = new ArrayList<>(offsets.length); | ||
for (int offset : offsets) { | ||
out.add(array[offset]); | ||
} | ||
return out; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.