Skip to content

Commit

Permalink
Add arrayGroup construct
Browse files Browse the repository at this point in the history
  • Loading branch information
devinrsmith committed Mar 18, 2024
1 parent db00ad5 commit caf160f
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -228,6 +229,48 @@ void singleObjectArray() {
}
}

// This is harder to implement than it looks like; right now, we expect chunks to be handed off at the same time and
// do it in a streaming fashion. But in the case where we have a very large grouped array, we'd need to buffer all
// of the first column.
//
// {
// "prices": [1.0, 2.0, ..., 1000000.0],
// "sizes": [1, 2, ..., 1000000]
// }
//
// whereas with the logical equivalent (from data perspective):
//
// [ { "price": 1.0, "size": 1 }, { "price": 2.0, "size": 2 }, ..., { "price": 1000000.0, "size": 1000000 } ]
//
// can process this in streaming fashion.
@Disabled("feature not implemented")
@Test
void singleObjectFieldArrayGroup() throws IOException {
final JsonStreamPublisher publisher = publisher(ObjectOptions.builder()
.addFields(ObjectFieldOptions.builder()
.name("names")
.options(StringOptions.standard().array())
.arrayGroup("names_and_ages")
.build())
.addFields(ObjectFieldOptions.builder()
.name("ages")
.options(IntOptions.standard().array())
.arrayGroup("names_and_ages")
.build())
.build(), false);
try (final StreamConsumerRecorder recorder = new StreamConsumerRecorder(publisher)) {
publisher.register(recorder);
// { "names": ["foo", null, "bar", "baz", null, null], "ages": [42, null, 43, null, 44, null] }
directExecute(publisher, Source.of(
"{ \"names\": [\"foo\", null, \"bar\", \"baz\", null, null], \"ages\": [42, null, 43, null, 44, null] }"));
recorder.flushPublisher();
recorder.assertEquals(
ObjectChunk.chunkWrap(new String[] {"foo", null, "bar", "baz", null, null}),
IntChunk.chunkWrap(new int[] {42, NULL_INT, 43, NULL_INT, 44, NULL_INT}));
recorder.clear();
}
}

@Test
void singlePrimitiveKv() {
final JsonStreamPublisher publisher =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,43 @@ void caseInsensitiveOverlap() {
}
}

@Test
void objectFields() throws IOException {
// { "prices": [1.1, 2.2, 3.3], "other": [2, 4, 8, 16] }
parse(ObjectOptions.builder()
.putFields("prices", DoubleOptions.standard().array())
.putFields("other", LongOptions.standard().array())
.build(),
"{ \"prices\": [1.1, 2.2, 3.3], \"other\": [2, 4, 8, 16] }",
ObjectChunk
.chunkWrap(new Object[] {new double[] {1.1, 2.2, 3.3}}),
ObjectChunk.chunkWrap(new Object[] {
new long[] {2, 4, 8, 16}}));
}

@Test
void objectFieldsArrayGroup() throws IOException {
// Note: array groups don't cause any difference wrt ObjectProcessor based destructuring
// { "prices": [1.1, 2.2, 3.3], "sizes": [2, 4, 8] }
parse(ObjectOptions.builder()
.addFields(ObjectFieldOptions.builder()
.name("prices")
.options(DoubleOptions.standard().array())
.arrayGroup("prices_and_sizes")
.build())
.addFields(ObjectFieldOptions.builder()
.name("sizes")
.options(LongOptions.standard().array())
.arrayGroup("prices_and_sizes")
.build())
.build(),
"{ \"prices\": [1.1, 2.2, 3.3], \"sizes\": [2, 4, 8] }",
ObjectChunk
.chunkWrap(new Object[] {new double[] {1.1, 2.2, 3.3}}),
ObjectChunk.chunkWrap(new Object[] {
new long[] {2, 4, 8}}));
}

@Test
void columnNames() {
assertThat(OBJECT_NAME_AGE_FIELD.named(String.class).columnNames()).containsExactly("name", "age");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.immutables.value.Value.Default;
import org.immutables.value.Value.Immutable;

import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;

Expand Down Expand Up @@ -56,6 +57,20 @@ public RepeatedBehavior repeatedBehavior() {
return RepeatedBehavior.USE_FIRST;
}

/**
* The array group for {@code this} field. This is useful in scenarios where {@code this} field's array is
* guaranteed to have the same cardinality as one or more other array fields. For example, in the following snippet,
* we would model "prices" and "quantities" as having the same array group:
*
* <pre>
* {
* "prices": [1.1, 2.2, 3.3],
* "quantities": [9, 5, 42],
* }
* </pre>
*/
public abstract Optional<Object> arrayGroup();

public enum RepeatedBehavior {
/**
* Throws an error if a repeated field is encountered
Expand Down Expand Up @@ -88,6 +103,8 @@ public interface Builder {

Builder caseInsensitiveMatch(boolean caseInsensitiveMatch);

Builder arrayGroup(Object arrayGroup);

ObjectFieldOptions build();
}

Expand All @@ -109,4 +126,14 @@ final void checkNonOverlapping() {
}
}
}

@Check
final void checkArrayGroup() {
if (arrayGroup().isEmpty()) {
return;
}
if (!(options() instanceof ArrayOptions)) {
throw new IllegalArgumentException("arrayGroup is only valid with ArrayOptions");
}
}
}

0 comments on commit caf160f

Please sign in to comment.