diff --git a/extensions/json-jackson/src/test/java/io/deephaven/json/JsonStreamPublisherTest.java b/extensions/json-jackson/src/test/java/io/deephaven/json/JsonStreamPublisherTest.java index 7f5582c6c90..764d5db6385 100644 --- a/extensions/json-jackson/src/test/java/io/deephaven/json/JsonStreamPublisherTest.java +++ b/extensions/json-jackson/src/test/java/io/deephaven/json/JsonStreamPublisherTest.java @@ -20,6 +20,7 @@ import java.io.Closeable; import java.io.File; +import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.nio.ByteBuffer; @@ -228,6 +229,48 @@ void singleObjectArray() { } } + // This is harder to implement than it looks like; right now, we expect chunks to be handed off at the same time and + // do it in a streaming fashion. But in the case where we have a very large grouped array, we'd need to buffer all + // of the first column. + // + // { + // "prices": [1.0, 2.0, ..., 1000000.0], + // "sizes": [1, 2, ..., 1000000] + // } + // + // whereas with the logical equivalent (from data perspective): + // + // [ { "price": 1.0, "size": 1 }, { "price": 2.0, "size": 2 }, ..., { "price": 1000000.0, "size": 1000000 } ] + // + // can process this in streaming fashion. + @Disabled("feature not implemented") + @Test + void singleObjectFieldArrayGroup() throws IOException { + final JsonStreamPublisher publisher = publisher(ObjectOptions.builder() + .addFields(ObjectFieldOptions.builder() + .name("names") + .options(StringOptions.standard().array()) + .arrayGroup("names_and_ages") + .build()) + .addFields(ObjectFieldOptions.builder() + .name("ages") + .options(IntOptions.standard().array()) + .arrayGroup("names_and_ages") + .build()) + .build(), false); + try (final StreamConsumerRecorder recorder = new StreamConsumerRecorder(publisher)) { + publisher.register(recorder); + // { "names": ["foo", null, "bar", "baz", null, null], "ages": [42, null, 43, null, 44, null] } + directExecute(publisher, Source.of( + "{ \"names\": [\"foo\", null, \"bar\", \"baz\", null, null], \"ages\": [42, null, 43, null, 44, null] }")); + recorder.flushPublisher(); + recorder.assertEquals( + ObjectChunk.chunkWrap(new String[] {"foo", null, "bar", "baz", null, null}), + IntChunk.chunkWrap(new int[] {42, NULL_INT, 43, NULL_INT, 44, NULL_INT})); + recorder.clear(); + } + } + @Test void singlePrimitiveKv() { final JsonStreamPublisher publisher = diff --git a/extensions/json-jackson/src/test/java/io/deephaven/json/ObjectOptionsTest.java b/extensions/json-jackson/src/test/java/io/deephaven/json/ObjectOptionsTest.java index 66ba5af9697..c263c12a620 100644 --- a/extensions/json-jackson/src/test/java/io/deephaven/json/ObjectOptionsTest.java +++ b/extensions/json-jackson/src/test/java/io/deephaven/json/ObjectOptionsTest.java @@ -134,6 +134,43 @@ void caseInsensitiveOverlap() { } } + @Test + void objectFields() throws IOException { + // { "prices": [1.1, 2.2, 3.3], "other": [2, 4, 8, 16] } + parse(ObjectOptions.builder() + .putFields("prices", DoubleOptions.standard().array()) + .putFields("other", LongOptions.standard().array()) + .build(), + "{ \"prices\": [1.1, 2.2, 3.3], \"other\": [2, 4, 8, 16] }", + ObjectChunk + .chunkWrap(new Object[] {new double[] {1.1, 2.2, 3.3}}), + ObjectChunk.chunkWrap(new Object[] { + new long[] {2, 4, 8, 16}})); + } + + @Test + void objectFieldsArrayGroup() throws IOException { + // Note: array groups don't cause any difference wrt ObjectProcessor based destructuring + // { "prices": [1.1, 2.2, 3.3], "sizes": [2, 4, 8] } + parse(ObjectOptions.builder() + .addFields(ObjectFieldOptions.builder() + .name("prices") + .options(DoubleOptions.standard().array()) + .arrayGroup("prices_and_sizes") + .build()) + .addFields(ObjectFieldOptions.builder() + .name("sizes") + .options(LongOptions.standard().array()) + .arrayGroup("prices_and_sizes") + .build()) + .build(), + "{ \"prices\": [1.1, 2.2, 3.3], \"sizes\": [2, 4, 8] }", + ObjectChunk + .chunkWrap(new Object[] {new double[] {1.1, 2.2, 3.3}}), + ObjectChunk.chunkWrap(new Object[] { + new long[] {2, 4, 8}})); + } + @Test void columnNames() { assertThat(OBJECT_NAME_AGE_FIELD.named(String.class).columnNames()).containsExactly("name", "age"); diff --git a/extensions/json/src/main/java/io/deephaven/json/ObjectFieldOptions.java b/extensions/json/src/main/java/io/deephaven/json/ObjectFieldOptions.java index 079a59c2926..6e13e748f08 100644 --- a/extensions/json/src/main/java/io/deephaven/json/ObjectFieldOptions.java +++ b/extensions/json/src/main/java/io/deephaven/json/ObjectFieldOptions.java @@ -8,6 +8,7 @@ import org.immutables.value.Value.Default; import org.immutables.value.Value.Immutable; +import java.util.Optional; import java.util.Set; import java.util.TreeSet; @@ -56,6 +57,20 @@ public RepeatedBehavior repeatedBehavior() { return RepeatedBehavior.USE_FIRST; } + /** + * The array group for {@code this} field. This is useful in scenarios where {@code this} field's array is + * guaranteed to have the same cardinality as one or more other array fields. For example, in the following snippet, + * we would model "prices" and "quantities" as having the same array group: + * + *
+     * {
+     *   "prices": [1.1, 2.2, 3.3],
+     *   "quantities": [9, 5, 42],
+     * }
+     * 
+ */ + public abstract Optional arrayGroup(); + public enum RepeatedBehavior { /** * Throws an error if a repeated field is encountered @@ -88,6 +103,8 @@ public interface Builder { Builder caseInsensitiveMatch(boolean caseInsensitiveMatch); + Builder arrayGroup(Object arrayGroup); + ObjectFieldOptions build(); } @@ -109,4 +126,14 @@ final void checkNonOverlapping() { } } } + + @Check + final void checkArrayGroup() { + if (arrayGroup().isEmpty()) { + return; + } + if (!(options() instanceof ArrayOptions)) { + throw new IllegalArgumentException("arrayGroup is only valid with ArrayOptions"); + } + } }