From 58783c0286263ffd354208546e8528bea4388466 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Fri, 8 Sep 2023 19:31:01 -0700 Subject: [PATCH 01/15] Bump to 0.29.0 (#4477) --- R/rdeephaven/DESCRIPTION | 2 +- authorization-codegen/protoc-gen-contextual-auth-wiring | 2 +- authorization-codegen/protoc-gen-service-auth-wiring | 2 +- .../src/main/groovy/io.deephaven.common-conventions.gradle | 2 +- py/client-ticking/README.md | 2 +- py/client-ticking/setup.py | 4 ++-- py/client/README.md | 2 +- py/client/pydeephaven/__init__.py | 2 +- py/client/setup.py | 2 +- py/embedded-server/deephaven_server/__init__.py | 2 +- py/server/deephaven/__init__.py | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/rdeephaven/DESCRIPTION b/R/rdeephaven/DESCRIPTION index 4057416f746..cd4ce38d6cb 100644 --- a/R/rdeephaven/DESCRIPTION +++ b/R/rdeephaven/DESCRIPTION @@ -1,7 +1,7 @@ Package: rdeephaven Type: Package Title: R Client for Deephaven Core -Version: 0.28 +Version: 0.29.0 Date: 2023-05-12 Author: Deephaven Data Labs Maintainer: Alex Peters diff --git a/authorization-codegen/protoc-gen-contextual-auth-wiring b/authorization-codegen/protoc-gen-contextual-auth-wiring index 3e08445799d..5b4cd3202ee 100755 --- a/authorization-codegen/protoc-gen-contextual-auth-wiring +++ b/authorization-codegen/protoc-gen-contextual-auth-wiring @@ -1,2 +1,2 @@ # protoc-gen-contextual-auth-wiring -java -cp authorization-codegen/build/libs/deephaven-authorization-codegen-0.28.0-all.jar io.deephaven.auth.codegen.GenerateContextualAuthWiring +java -cp authorization-codegen/build/libs/deephaven-authorization-codegen-0.29.0-all.jar io.deephaven.auth.codegen.GenerateContextualAuthWiring diff --git a/authorization-codegen/protoc-gen-service-auth-wiring b/authorization-codegen/protoc-gen-service-auth-wiring index 3d58c53d348..4a84af05baf 100755 --- a/authorization-codegen/protoc-gen-service-auth-wiring +++ b/authorization-codegen/protoc-gen-service-auth-wiring @@ -1,2 +1,2 @@ # protoc-gen-service-auth-wiring -java -cp authorization-codegen/build/libs/deephaven-authorization-codegen-0.28.0-all.jar io.deephaven.auth.codegen.GenerateServiceAuthWiring +java -cp authorization-codegen/build/libs/deephaven-authorization-codegen-0.29.0-all.jar io.deephaven.auth.codegen.GenerateServiceAuthWiring diff --git a/buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle b/buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle index 4cbaa1699e7..0b8d5c1c891 100644 --- a/buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle +++ b/buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle @@ -5,7 +5,7 @@ plugins { } group = 'io.deephaven' -version = '0.28.0' +version = '0.29.0' if (!name.startsWith('deephaven-')) { archivesBaseName = "deephaven-${name}" diff --git a/py/client-ticking/README.md b/py/client-ticking/README.md index 593e6d2ecf2..876a6a35cef 100644 --- a/py/client-ticking/README.md +++ b/py/client-ticking/README.md @@ -66,7 +66,7 @@ Then install the package. Note the actual name of the `.whl` file may be different depending on system details. ``` -pip3 install --force --no-deps dist/pydeephaven_ticking-0.28.0-cp310-cp310-linux_x86_64.whl +pip3 install --force --no-deps dist/pydeephaven_ticking-0.29.0-cp310-cp310-linux_x86_64.whl ``` The reason for the "--force" flag is to overwrite any previously-built version of the package that diff --git a/py/client-ticking/setup.py b/py/client-ticking/setup.py index 140af2a27cb..b5fc690a040 100644 --- a/py/client-ticking/setup.py +++ b/py/client-ticking/setup.py @@ -13,7 +13,7 @@ setup( name='pydeephaven-ticking', - version='0.28.0.dev', + version='0.29.0.dev', description='The Deephaven Python Client for Ticking Tables', long_description=README, long_description_content_type="text/markdown", @@ -44,5 +44,5 @@ libraries=["dhcore"] )]), python_requires='>=3.8', - install_requires=['pydeephaven==0.28.0'] + install_requires=['pydeephaven==0.29.0'] ) diff --git a/py/client/README.md b/py/client/README.md index 6f20d85c613..70bb04bb3e3 100644 --- a/py/client/README.md +++ b/py/client/README.md @@ -38,7 +38,7 @@ $ python3 -m examples.demo_asof_join Note the actual name of the `.whl` file may be different depending on system details. ``` shell -$ pip3 install dist/pydeephaven-0.28.0-py3-none-any.whl +$ pip3 install dist/pydeephaven-0.29.0-py3-none-any.whl ``` ## Quick start diff --git a/py/client/pydeephaven/__init__.py b/py/client/pydeephaven/__init__.py index fa7a0639da5..e04095714ba 100644 --- a/py/client/pydeephaven/__init__.py +++ b/py/client/pydeephaven/__init__.py @@ -35,4 +35,4 @@ pass __all__ = ["Session", "DHError", "SortDirection"] -__version__ = "0.28.0" +__version__ = "0.29.0" diff --git a/py/client/setup.py b/py/client/setup.py index a3cbebe749c..be8954864fb 100644 --- a/py/client/setup.py +++ b/py/client/setup.py @@ -12,7 +12,7 @@ setup( name='pydeephaven', - version='0.28.0', + version='0.29.0', description='The Deephaven Python Client', long_description=README, long_description_content_type="text/markdown", diff --git a/py/embedded-server/deephaven_server/__init__.py b/py/embedded-server/deephaven_server/__init__.py index 3512bcc352c..66e5f0d8ec5 100644 --- a/py/embedded-server/deephaven_server/__init__.py +++ b/py/embedded-server/deephaven_server/__init__.py @@ -1,7 +1,7 @@ # # Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending # -__version__ = "0.28.0" +__version__ = "0.29.0" from .start_jvm import DEFAULT_JVM_PROPERTIES, DEFAULT_JVM_ARGS, start_jvm from .server import Server diff --git a/py/server/deephaven/__init__.py b/py/server/deephaven/__init__.py index fb740352e8c..a5ce6b412e4 100644 --- a/py/server/deephaven/__init__.py +++ b/py/server/deephaven/__init__.py @@ -7,7 +7,7 @@ """ -__version__ = "0.28.0" +__version__ = "0.29.0" from deephaven_internal import jvm From 0caa2c7d6f1f48fa376c9b2cdd7ab4a8b28d9dc0 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Mon, 11 Sep 2023 11:50:32 -0700 Subject: [PATCH 02/15] Improve generic types for TypedFunctions (#4463) --- extensions/protobuf/build.gradle | 2 + .../deephaven/functions/BooleanFunctions.java | 134 +++++++++++--- .../io/deephaven/functions/ByteFunctions.java | 16 +- .../io/deephaven/functions/CharFunctions.java | 16 +- .../deephaven/functions/DoubleFunctions.java | 22 ++- .../deephaven/functions/FloatFunctions.java | 16 +- .../io/deephaven/functions/IntFunctions.java | 22 ++- .../io/deephaven/functions/LongFunctions.java | 22 ++- .../deephaven/functions/ObjectFunctions.java | 168 ++++++------------ .../functions/PrimitiveFunctions.java | 72 ++++++++ .../deephaven/functions/ShortFunctions.java | 16 +- .../functions/ToBooleanFunction.java | 32 +++- .../deephaven/functions/ToByteFunction.java | 4 +- .../deephaven/functions/ToCharFunction.java | 4 +- .../deephaven/functions/ToDoubleFunction.java | 4 +- .../deephaven/functions/ToFloatFunction.java | 4 +- .../io/deephaven/functions/ToIntFunction.java | 4 +- .../deephaven/functions/ToLongFunction.java | 4 +- .../deephaven/functions/ToObjectFunction.java | 71 +++++--- .../deephaven/functions/ToShortFunction.java | 4 +- .../deephaven/functions/TypedFunctions.java | 40 +++++ .../functions/ToBooleanFunctionTest.java | 129 +++++++++++--- 22 files changed, 569 insertions(+), 237 deletions(-) create mode 100644 extensions/protobuf/src/main/java/io/deephaven/functions/PrimitiveFunctions.java create mode 100644 extensions/protobuf/src/main/java/io/deephaven/functions/TypedFunctions.java diff --git a/extensions/protobuf/build.gradle b/extensions/protobuf/build.gradle index 7c4dcd6eedd..5216aea91e2 100644 --- a/extensions/protobuf/build.gradle +++ b/extensions/protobuf/build.gradle @@ -11,6 +11,8 @@ dependencies { Classpaths.inheritImmutables(project) + compileOnly depAnnotations + Classpaths.inheritJUnitPlatform(project) Classpaths.inheritAssertJ(project) diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/BooleanFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/BooleanFunctions.java index 1a9e581cb01..f64cd61a474 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/BooleanFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/BooleanFunctions.java @@ -3,21 +3,31 @@ */ package io.deephaven.functions; +import org.jetbrains.annotations.NotNull; + import java.util.Collection; import java.util.List; import java.util.Objects; import java.util.function.Function; import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; class BooleanFunctions { static ToBooleanFunction cast() { + return cast(PrimitiveBoolean.INSTANCE); + } + + static ToBooleanFunction cast(ToBooleanFunction f) { // noinspection unchecked - return (ToBooleanFunction) PrimitiveBoolean.INSTANCE; + return (ToBooleanFunction) f; } - static ToBooleanFunction of(Predicate predicate) { - return predicate instanceof ToBooleanFunction ? (ToBooleanFunction) predicate : predicate::test; + static ToBooleanFunction of(Predicate predicate) { + return predicate instanceof ToBooleanFunction + ? cast((ToBooleanFunction) predicate) + : predicate::test; } public static ToBooleanFunction ofTrue() { @@ -30,15 +40,19 @@ public static ToBooleanFunction ofFalse() { return (ToBooleanFunction) OfFalse.INSTANCE; } - static ToBooleanFunction map(Function f, Predicate g) { + static ToBooleanFunction map( + Function f, + Predicate g) { return new BooleanMap<>(f, g); } - static ToBooleanFunction not(ToBooleanFunction f) { - return f instanceof BooleanNot ? of(((BooleanNot) f).function()) : new BooleanNot<>(f); + static ToBooleanFunction not(Predicate f) { + return f instanceof BooleanNot + ? cast(((BooleanNot) f).negate()) + : new BooleanNot<>(f); } - static ToBooleanFunction or(Collection> functions) { + static ToBooleanFunction or(Collection> functions) { if (functions.isEmpty()) { return ofFalse(); } @@ -48,7 +62,7 @@ static ToBooleanFunction or(Collection> functions) { return new BooleanOr<>(functions); } - static ToBooleanFunction and(Collection> functions) { + static ToBooleanFunction and(Collection> functions) { if (functions.isEmpty()) { return ofTrue(); } @@ -65,6 +79,26 @@ private enum OfTrue implements ToBooleanFunction { public boolean test(Object value) { return true; } + + @Override + @NotNull + public ToBooleanFunction negate() { + return ofFalse(); + } + + @Override + @NotNull + public ToBooleanFunction and(@NotNull Predicate other) { + // always other + return of(other); + } + + @Override + @NotNull + public ToBooleanFunction or(@NotNull Predicate other) { + // always true + return this; + } } private enum OfFalse implements ToBooleanFunction { @@ -74,6 +108,26 @@ private enum OfFalse implements ToBooleanFunction { public boolean test(Object value) { return false; } + + @Override + @NotNull + public ToBooleanFunction negate() { + return ofTrue(); + } + + @Override + @NotNull + public ToBooleanFunction and(@NotNull Predicate other) { + // always false + return this; + } + + @Override + @NotNull + public ToBooleanFunction or(@NotNull Predicate other) { + // always other + return of(other); + } } private enum PrimitiveBoolean implements ToBooleanFunction { @@ -86,10 +140,10 @@ public boolean test(Object value) { } private static class BooleanMap implements ToBooleanFunction { - private final Function f; - private final Predicate g; + private final Function f; + private final Predicate g; - public BooleanMap(Function f, Predicate g) { + public BooleanMap(Function f, Predicate g) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); } @@ -98,58 +152,92 @@ public BooleanMap(Function f, Predicate g) { public boolean test(T value) { return g.test(f.apply(value)); } + + @Override + @NotNull + public ToBooleanFunction negate() { + return new BooleanMap<>(f, g.negate()); + } } private static class BooleanNot implements ToBooleanFunction { - private final Predicate function; + private final Predicate function; - public BooleanNot(ToBooleanFunction function) { + public BooleanNot(Predicate function) { this.function = Objects.requireNonNull(function); } - public Predicate function() { - return function; - } - @Override public boolean test(T value) { return !function.test(value); } + + @Override + @NotNull + public ToBooleanFunction negate() { + return of(function); + } } private static class BooleanAnd implements ToBooleanFunction { - private final Collection> functions; + private final Collection> functions; - public BooleanAnd(Collection> functions) { + public BooleanAnd(Collection> functions) { this.functions = List.copyOf(functions); } @Override public boolean test(T value) { - for (Predicate function : functions) { + for (Predicate function : functions) { if (!function.test(value)) { return false; } } return true; } + + @Override + @NotNull + public ToBooleanFunction negate() { + return new BooleanOr<>(functions.stream().map(Predicate::negate).collect(Collectors.toList())); + } + + @Override + @NotNull + public ToBooleanFunction and(@NotNull Predicate other) { + // noinspection Convert2Diamond + return new BooleanAnd(Stream.concat(functions.stream(), Stream.of(other)).collect(Collectors.toList())); + } } private static class BooleanOr implements ToBooleanFunction { - private final Collection> functions; + private final Collection> functions; - public BooleanOr(Collection> functions) { + public BooleanOr(Collection> functions) { this.functions = List.copyOf(functions); } @Override public boolean test(T value) { - for (Predicate function : functions) { + for (Predicate function : functions) { if (function.test(value)) { return true; } } return false; } + + @Override + @NotNull + public ToBooleanFunction negate() { + return new BooleanAnd<>(functions.stream().map(Predicate::negate).collect(Collectors.toList())); + } + + @Override + @NotNull + public ToBooleanFunction or(@NotNull Predicate other) { + // noinspection Convert2Diamond + return new BooleanOr(Stream.concat(functions.stream(), Stream.of(other)).collect(Collectors.toList())); + } } } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ByteFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ByteFunctions.java index b1b3cdf3e6b..d44cef49b45 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ByteFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ByteFunctions.java @@ -8,11 +8,17 @@ class ByteFunctions { static ToByteFunction cast() { + return cast(PrimitiveByte.INSTANCE); + } + + static ToByteFunction cast(ToByteFunction f) { // noinspection unchecked - return (ToByteFunction) PrimitiveByte.INSTANCE; + return (ToByteFunction) f; } - static ToByteFunction map(Function f, ToByteFunction g) { + static ToByteFunction map( + Function f, + ToByteFunction g) { return new ByteMap<>(f, g); } @@ -26,10 +32,10 @@ public byte applyAsByte(Object value) { } private static class ByteMap implements ToByteFunction { - private final Function f; - private final ToByteFunction g; + private final Function f; + private final ToByteFunction g; - public ByteMap(Function f, ToByteFunction g) { + public ByteMap(Function f, ToByteFunction g) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/CharFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/CharFunctions.java index 4921bbb7f6d..428465ff8e8 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/CharFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/CharFunctions.java @@ -8,11 +8,17 @@ class CharFunctions { static ToCharFunction cast() { + return cast(PrimitiveChar.INSTANCE); + } + + static ToCharFunction cast(ToCharFunction f) { // noinspection unchecked - return (ToCharFunction) PrimitiveChar.INSTANCE; + return (ToCharFunction) f; } - static ToCharFunction map(Function f, ToCharFunction g) { + static ToCharFunction map( + Function f, + ToCharFunction g) { return new CharMap<>(f, g); } @@ -26,10 +32,10 @@ public char applyAsChar(Object value) { } private static class CharMap implements ToCharFunction { - private final Function f; - private final ToCharFunction g; + private final Function f; + private final ToCharFunction g; - public CharMap(Function f, ToCharFunction g) { + public CharMap(Function f, ToCharFunction g) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/DoubleFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/DoubleFunctions.java index bbe3ff205ed..f1fab905a14 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/DoubleFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/DoubleFunctions.java @@ -9,15 +9,23 @@ class DoubleFunctions { static ToDoubleFunction cast() { + return cast(PrimitiveDouble.INSTANCE); + } + + static ToDoubleFunction cast(ToDoubleFunction f) { // noinspection unchecked - return (ToDoubleFunction) PrimitiveDouble.INSTANCE; + return (ToDoubleFunction) f; } - static ToDoubleFunction of(java.util.function.ToDoubleFunction f) { - return f instanceof ToDoubleFunction ? (ToDoubleFunction) f : f::applyAsDouble; + static ToDoubleFunction of(java.util.function.ToDoubleFunction f) { + return f instanceof ToDoubleFunction + ? cast((ToDoubleFunction) f) + : f::applyAsDouble; } - static ToDoubleFunction map(Function f, java.util.function.ToDoubleFunction g) { + static ToDoubleFunction map( + Function f, + java.util.function.ToDoubleFunction g) { return new DoubleFunctionMap<>(f, g); } @@ -31,10 +39,10 @@ public double applyAsDouble(Object value) { } private static class DoubleFunctionMap implements ToDoubleFunction { - private final Function f; - private final java.util.function.ToDoubleFunction g; + private final Function f; + private final java.util.function.ToDoubleFunction g; - public DoubleFunctionMap(Function f, java.util.function.ToDoubleFunction g) { + public DoubleFunctionMap(Function f, java.util.function.ToDoubleFunction g) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/FloatFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/FloatFunctions.java index f3f6f9303a6..5e8c6d35f03 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/FloatFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/FloatFunctions.java @@ -8,11 +8,17 @@ class FloatFunctions { static ToFloatFunction cast() { + return cast(PrimitiveFloat.INSTANCE); + } + + static ToFloatFunction cast(ToFloatFunction f) { // noinspection unchecked - return (ToFloatFunction) PrimitiveFloat.INSTANCE; + return (ToFloatFunction) f; } - static ToFloatFunction map(Function f, ToFloatFunction g) { + static ToFloatFunction map( + Function f, + ToFloatFunction g) { return new FloatMap<>(f, g); } @@ -26,10 +32,10 @@ public float applyAsFloat(Object value) { } private static class FloatMap implements ToFloatFunction { - private final Function f; - private final ToFloatFunction g; + private final Function f; + private final ToFloatFunction g; - public FloatMap(Function f, ToFloatFunction g) { + public FloatMap(Function f, ToFloatFunction g) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/IntFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/IntFunctions.java index e97673a937e..f707d16f654 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/IntFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/IntFunctions.java @@ -8,15 +8,23 @@ class IntFunctions { static ToIntFunction cast() { + return cast(PrimitiveInt.INSTANCE); + } + + static ToIntFunction cast(ToIntFunction f) { // noinspection unchecked - return (ToIntFunction) PrimitiveInt.INSTANCE; + return (ToIntFunction) f; } - static ToIntFunction of(java.util.function.ToIntFunction f) { - return f instanceof ToIntFunction ? (ToIntFunction) f : f::applyAsInt; + static ToIntFunction of(java.util.function.ToIntFunction f) { + return f instanceof ToIntFunction + ? cast((ToIntFunction) f) + : f::applyAsInt; } - static ToIntFunction map(Function f, java.util.function.ToIntFunction g) { + static ToIntFunction map( + Function f, + java.util.function.ToIntFunction g) { return new IntMap<>(f, g); } @@ -30,10 +38,10 @@ public int applyAsInt(Object value) { } private static class IntMap implements ToIntFunction { - private final Function f; - private final java.util.function.ToIntFunction g; + private final Function f; + private final java.util.function.ToIntFunction g; - public IntMap(Function f, java.util.function.ToIntFunction g) { + public IntMap(Function f, java.util.function.ToIntFunction g) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/LongFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/LongFunctions.java index 20b135f8b88..30ba0608f65 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/LongFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/LongFunctions.java @@ -9,15 +9,23 @@ class LongFunctions { static ToLongFunction cast() { + return cast(PrimitiveLong.INSTANCE); + } + + static ToLongFunction cast(ToLongFunction f) { // noinspection unchecked - return (ToLongFunction) PrimitiveLong.INSTANCE; + return (ToLongFunction) f; } - static ToLongFunction of(java.util.function.ToLongFunction f) { - return f instanceof ToLongFunction ? (ToLongFunction) f : f::applyAsLong; + static ToLongFunction of(java.util.function.ToLongFunction f) { + return f instanceof ToLongFunction + ? cast((ToLongFunction) f) + : f::applyAsLong; } - static ToLongFunction map(Function f, java.util.function.ToLongFunction g) { + static ToLongFunction map( + Function f, + java.util.function.ToLongFunction g) { return new LongMap<>(f, g); } @@ -31,10 +39,10 @@ public long applyAsLong(Object value) { } private static class LongMap implements ToLongFunction { - private final Function f; - private final java.util.function.ToLongFunction g; + private final Function f; + private final java.util.function.ToLongFunction g; - public LongMap(Function f, java.util.function.ToLongFunction g) { + public LongMap(Function f, java.util.function.ToLongFunction g) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ObjectFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ObjectFunctions.java index bab369e5552..951441a0e09 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ObjectFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ObjectFunctions.java @@ -14,32 +14,46 @@ class ObjectFunctions { static ToObjectFunction identity() { + return cast(Identity.INSTANCE); + } + + static ToObjectFunction cast(ToObjectFunction f) { // noinspection unchecked - return (ToObjectFunction) Identity.INSTANCE; + return (ToObjectFunction) f; } static ToObjectFunction cast(GenericType type) { return new Casted<>(type); } - static ToObjectFunction of(Function f, GenericType returnType) { - return new FunctionImpl<>(f, returnType); + static ToObjectFunction of( + Function f, + GenericType returnType) { + return f instanceof ToObjectFunction + ? castOrMapCast((ToObjectFunction) f, returnType) + : new FunctionImpl<>(f, returnType); } - static ToObjectFunction map(Function f, ToObjectFunction g) { - return new ObjectMap<>(f, g, g.returnType()); - } - - static ToObjectFunction map(Function f, Function g, GenericType returnType) { - return new ObjectMap<>(f, g, returnType); + static ToObjectFunction castOrMapCast( + ToObjectFunction f, + GenericType returnType) { + // noinspection unchecked + return f.returnType().equals(returnType) + ? (ToObjectFunction) f + : f.mapToObj(ObjectFunctions.cast(returnType)); } - static ToPrimitiveFunction mapPrimitive(ToObjectFunction f, ToPrimitiveFunction g) { - return MapPrimitiveVisitor.of(f, g); + static ToObjectFunction map( + Function f, + ToObjectFunction g) { + return new ObjectMap<>(f, g, g.returnType()); } - static TypedFunction map(ToObjectFunction f, TypedFunction g) { - return MapVisitor.of(f, g); + static ToObjectFunction map( + Function f, + Function g, + GenericType returnType) { + return new ObjectMap<>(f, g, returnType); } private enum Identity implements ToObjectFunction { @@ -58,63 +72,65 @@ public Object apply(Object value) { } @Override - public ToBooleanFunction mapToBoolean(Predicate g) { + public ToBooleanFunction mapToBoolean(Predicate g) { return BooleanFunctions.of(g); } + @Override - public ToCharFunction mapToChar(ToCharFunction g) { - return g; + public ToCharFunction mapToChar(ToCharFunction g) { + return CharFunctions.cast(g); } @Override - public ToByteFunction mapToByte(ToByteFunction g) { - return g; + public ToByteFunction mapToByte(ToByteFunction g) { + return ByteFunctions.cast(g); } @Override - public ToShortFunction mapToShort(ToShortFunction g) { - return g; + public ToShortFunction mapToShort(ToShortFunction g) { + return ShortFunctions.cast(g); } @Override - public ToIntFunction mapToInt(java.util.function.ToIntFunction g) { + public ToIntFunction mapToInt(java.util.function.ToIntFunction g) { return IntFunctions.of(g); } @Override - public ToLongFunction mapToLong(java.util.function.ToLongFunction g) { + public ToLongFunction mapToLong(java.util.function.ToLongFunction g) { return LongFunctions.of(g); } @Override - public ToFloatFunction mapToFloat(ToFloatFunction g) { - return g; + public ToFloatFunction mapToFloat(ToFloatFunction g) { + return FloatFunctions.cast(g); } @Override - public ToDoubleFunction mapToDouble(java.util.function.ToDoubleFunction g) { + public ToDoubleFunction mapToDouble(java.util.function.ToDoubleFunction g) { return DoubleFunctions.of(g); } @Override - public ToObjectFunction mapToObj(ToObjectFunction g) { - return g; + public ToObjectFunction mapToObj(ToObjectFunction g) { + return ObjectFunctions.cast(g); } @Override - public ToObjectFunction mapToObj(Function g, GenericType returnType) { - return ToObjectFunction.of(g, returnType); + public ToObjectFunction mapToObj(Function g, + GenericType returnType) { + return ObjectFunctions.of(g, returnType); } @Override - public ToPrimitiveFunction mapToPrimitive(ToPrimitiveFunction g) { - return g; + public ToPrimitiveFunction mapToPrimitive(ToPrimitiveFunction g) { + return PrimitiveFunctions.cast(g); } @Override - public TypedFunction map(TypedFunction g) { - return g; + public TypedFunction map(TypedFunction g) { + return TypedFunctions.cast(g); } } @@ -137,10 +153,10 @@ public R apply(T value) { } private static final class FunctionImpl implements ToObjectFunction { - private final Function f; + private final Function f; private final GenericType returnType; - FunctionImpl(Function f, GenericType returnType) { + FunctionImpl(Function f, GenericType returnType) { this.f = Objects.requireNonNull(f); this.returnType = Objects.requireNonNull(returnType); } @@ -157,11 +173,12 @@ public R apply(T value) { } private static class ObjectMap implements ToObjectFunction { - private final Function f; - private final Function g; + private final Function f; + private final Function g; private final GenericType returnType; - public ObjectMap(Function f, Function g, GenericType returnType) { + public ObjectMap(Function f, Function g, + GenericType returnType) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); this.returnType = Objects.requireNonNull(returnType); @@ -178,79 +195,4 @@ public Z apply(T value) { } } - private static class MapPrimitiveVisitor implements ToPrimitiveFunction.Visitor> { - - public static ToPrimitiveFunction of(ToObjectFunction f, ToPrimitiveFunction g) { - return g.walk(new MapPrimitiveVisitor<>(f)); - } - - private final ToObjectFunction f; - - private MapPrimitiveVisitor(ToObjectFunction f) { - this.f = Objects.requireNonNull(f); - } - - @Override - public ToBooleanFunction visit(ToBooleanFunction g) { - return f.mapToBoolean(g); - } - - @Override - public ToCharFunction visit(ToCharFunction g) { - return f.mapToChar(g); - } - - @Override - public ToByteFunction visit(ToByteFunction g) { - return f.mapToByte(g); - } - - @Override - public ToShortFunction visit(ToShortFunction g) { - return f.mapToShort(g); - } - - @Override - public ToIntFunction visit(ToIntFunction g) { - return f.mapToInt(g); - } - - @Override - public ToLongFunction visit(ToLongFunction g) { - return f.mapToLong(g); - } - - @Override - public ToFloatFunction visit(ToFloatFunction g) { - return f.mapToFloat(g); - } - - @Override - public ToDoubleFunction visit(ToDoubleFunction g) { - return f.mapToDouble(g); - } - } - - private static class MapVisitor implements TypedFunction.Visitor> { - - public static TypedFunction of(ToObjectFunction f, TypedFunction g) { - return g.walk(new MapVisitor<>(f)); - } - - private final ToObjectFunction f; - - private MapVisitor(ToObjectFunction f) { - this.f = Objects.requireNonNull(f); - } - - @Override - public ToPrimitiveFunction visit(ToPrimitiveFunction g) { - return f.mapToPrimitive(g); - } - - @Override - public ToObjectFunction visit(ToObjectFunction g) { - return f.mapToObj(g); - } - } } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/PrimitiveFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/PrimitiveFunctions.java new file mode 100644 index 00000000000..c4ba0015685 --- /dev/null +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/PrimitiveFunctions.java @@ -0,0 +1,72 @@ +package io.deephaven.functions; + +import java.util.Objects; + +class PrimitiveFunctions { + static ToPrimitiveFunction cast(ToPrimitiveFunction f) { + // noinspection unchecked + return (ToPrimitiveFunction) f; + } + + static ToPrimitiveFunction map( + ToObjectFunction f, + ToPrimitiveFunction g) { + return MapPrimitiveVisitor.of(f, g); + } + + private static class MapPrimitiveVisitor + implements ToPrimitiveFunction.Visitor> { + + public static ToPrimitiveFunction of( + ToObjectFunction f, + ToPrimitiveFunction g) { + return g.walk(new MapPrimitiveVisitor<>(f)); + } + + private final ToObjectFunction f; + + private MapPrimitiveVisitor(ToObjectFunction f) { + this.f = Objects.requireNonNull(f); + } + + @Override + public ToBooleanFunction visit(ToBooleanFunction g) { + return f.mapToBoolean(g); + } + + @Override + public ToCharFunction visit(ToCharFunction g) { + return f.mapToChar(g); + } + + @Override + public ToByteFunction visit(ToByteFunction g) { + return f.mapToByte(g); + } + + @Override + public ToShortFunction visit(ToShortFunction g) { + return f.mapToShort(g); + } + + @Override + public ToIntFunction visit(ToIntFunction g) { + return f.mapToInt(g); + } + + @Override + public ToLongFunction visit(ToLongFunction g) { + return f.mapToLong(g); + } + + @Override + public ToFloatFunction visit(ToFloatFunction g) { + return f.mapToFloat(g); + } + + @Override + public ToDoubleFunction visit(ToDoubleFunction g) { + return f.mapToDouble(g); + } + } +} diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ShortFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ShortFunctions.java index 32f37824cc1..efe8543f915 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ShortFunctions.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ShortFunctions.java @@ -9,11 +9,17 @@ class ShortFunctions { static ToShortFunction cast() { + return cast(PrimitiveShort.INSTANCE); + } + + static ToShortFunction cast(ToShortFunction f) { // noinspection unchecked - return (ToShortFunction) PrimitiveShort.INSTANCE; + return (ToShortFunction) f; } - static ToShortFunction map(Function f, ToShortFunction g) { + static ToShortFunction map( + Function f, + ToShortFunction g) { return new ShortMap<>(f, g); } @@ -27,10 +33,10 @@ public short applyAsShort(Object value) { } private static class ShortMap implements ToShortFunction { - private final Function f; - private final ToShortFunction g; + private final Function f; + private final ToShortFunction g; - public ShortMap(Function f, ToShortFunction g) { + public ShortMap(Function f, ToShortFunction g) { this.f = Objects.requireNonNull(f); this.g = Objects.requireNonNull(g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToBooleanFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToBooleanFunction.java index bf0f1353b6b..11c727a0398 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToBooleanFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToBooleanFunction.java @@ -5,8 +5,10 @@ import io.deephaven.qst.type.BooleanType; import io.deephaven.qst.type.Type; +import org.jetbrains.annotations.NotNull; import java.util.Collection; +import java.util.List; import java.util.function.Function; import java.util.function.Predicate; @@ -60,7 +62,9 @@ static ToBooleanFunction ofFalse() { * @param the input type * @param the intermediate type */ - static ToBooleanFunction map(Function f, Predicate g) { + static ToBooleanFunction map( + Function f, + Predicate g) { return BooleanFunctions.map(f, g); } @@ -72,7 +76,7 @@ static ToBooleanFunction map(Function f, Predicate g) { * @return the or-function * @param the input type */ - static ToBooleanFunction or(Collection> functions) { + static ToBooleanFunction or(Collection> functions) { return BooleanFunctions.or(functions); } @@ -84,18 +88,18 @@ static ToBooleanFunction or(Collection> functions) { * @return the and-function * @param the input type */ - static ToBooleanFunction and(Collection> functions) { + static ToBooleanFunction and(Collection> functions) { return BooleanFunctions.and(functions); } /** - * Creates a function that is the opposite of {@code f}. Equivalent to {@code x -> !x}. + * Creates a function that is the opposite of {@code f}. Equivalent to {@code x -> !f.test(x)}. * * @param f the function * @return the not-function * @param the input type */ - static ToBooleanFunction not(ToBooleanFunction f) { + static ToBooleanFunction not(Predicate f) { return BooleanFunctions.not(f); } @@ -111,4 +115,22 @@ default BooleanType returnType() { default R walk(Visitor visitor) { return visitor.visit(this); } + + @Override + @NotNull + default ToBooleanFunction negate() { + return not(this); + } + + @Override + @NotNull + default ToBooleanFunction and(@NotNull Predicate other) { + return ToBooleanFunction.and(List.of(this, other)); + } + + @Override + @NotNull + default ToBooleanFunction or(@NotNull Predicate other) { + return ToBooleanFunction.or(List.of(this, other)); + } } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToByteFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToByteFunction.java index 93a7a5439a4..511938a68e2 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToByteFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToByteFunction.java @@ -38,7 +38,9 @@ static ToByteFunction cast() { * @param the input type * @param the intermediate type */ - static ToByteFunction map(Function f, ToByteFunction g) { + static ToByteFunction map( + Function f, + ToByteFunction g) { return ByteFunctions.map(f, g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToCharFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToCharFunction.java index 33cf7931176..6e6cbc30bfb 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToCharFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToCharFunction.java @@ -37,7 +37,9 @@ static ToCharFunction cast() { * @param the input type * @param the intermediate type */ - static ToCharFunction map(Function f, ToCharFunction g) { + static ToCharFunction map( + Function f, + ToCharFunction g) { return CharFunctions.map(f, g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToDoubleFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToDoubleFunction.java index 4d7a6c0d725..46a20e0e2c9 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToDoubleFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToDoubleFunction.java @@ -38,7 +38,9 @@ static ToDoubleFunction cast() { * @param the input type * @param the intermediate type */ - static ToDoubleFunction map(Function f, java.util.function.ToDoubleFunction g) { + static ToDoubleFunction map( + Function f, + java.util.function.ToDoubleFunction g) { return DoubleFunctions.map(f, g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToFloatFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToFloatFunction.java index fc0d34e8ac3..ad131d34105 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToFloatFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToFloatFunction.java @@ -37,7 +37,9 @@ static ToFloatFunction cast() { * @param the input type * @param the intermediate type */ - static ToFloatFunction map(Function f, ToFloatFunction g) { + static ToFloatFunction map( + Function f, + ToFloatFunction g) { return FloatFunctions.map(f, g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToIntFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToIntFunction.java index 26da4dfad5a..4bf7f603230 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToIntFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToIntFunction.java @@ -38,7 +38,9 @@ static ToIntFunction cast() { * @param the input type * @param the intermediate type */ - static ToIntFunction map(Function f, java.util.function.ToIntFunction g) { + static ToIntFunction map( + Function f, + java.util.function.ToIntFunction g) { return IntFunctions.map(f, g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToLongFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToLongFunction.java index f21c629a4e5..674431642dc 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToLongFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToLongFunction.java @@ -37,7 +37,9 @@ static ToLongFunction cast() { * @param the input type * @param the intermediate type */ - static ToLongFunction map(Function f, java.util.function.ToLongFunction g) { + static ToLongFunction map( + Function f, + java.util.function.ToLongFunction g) { return LongFunctions.map(f, g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToObjectFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToObjectFunction.java index 9e476f903bc..80b61870ea9 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToObjectFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToObjectFunction.java @@ -28,7 +28,7 @@ public interface ToObjectFunction extends TypedFunction, Function * @param the input type * @param the return type */ - static ToObjectFunction of(Function f, GenericType returnType) { + static ToObjectFunction of(Function f, GenericType returnType) { return ObjectFunctions.of(f, returnType); } @@ -65,8 +65,11 @@ static ToObjectFunction identity(GenericType returnTyp * @return the object function * @param the input type * @param the intermediate type + * @param the return type */ - static ToObjectFunction map(Function f, ToObjectFunction g) { + static ToObjectFunction map( + Function f, + ToObjectFunction g) { return ObjectFunctions.map(f, g); } @@ -82,8 +85,12 @@ static ToObjectFunction map(Function f, ToObjectFunction the input type * @param the intermediate type + * @param the return type */ - static ToObjectFunction map(Function f, Function g, GenericType returnType) { + static ToObjectFunction map( + Function f, + Function g, + GenericType returnType) { return ObjectFunctions.map(f, g, returnType); } @@ -99,8 +106,9 @@ static ToObjectFunction map(Function f, Function g, * * @param g the outer function * @return the boolean function + * @param the returned function input type */ - default ToBooleanFunction mapToBoolean(Predicate g) { + default ToBooleanFunction mapToBoolean(Predicate g) { return ToBooleanFunction.map(this, g); } @@ -112,8 +120,9 @@ default ToBooleanFunction mapToBoolean(Predicate g) { * * @param g the outer function * @return the char function + * @param the returned function input type */ - default ToCharFunction mapToChar(ToCharFunction g) { + default ToCharFunction mapToChar(ToCharFunction g) { return ToCharFunction.map(this, g); } @@ -125,8 +134,9 @@ default ToCharFunction mapToChar(ToCharFunction g) { * * @param g the outer function * @return the byte function + * @param the returned function input type */ - default ToByteFunction mapToByte(ToByteFunction g) { + default ToByteFunction mapToByte(ToByteFunction g) { return ToByteFunction.map(this, g); } @@ -138,8 +148,9 @@ default ToByteFunction mapToByte(ToByteFunction g) { * * @param g the outer function * @return the short function + * @param the returned function input type */ - default ToShortFunction mapToShort(ToShortFunction g) { + default ToShortFunction mapToShort(ToShortFunction g) { return ToShortFunction.map(this, g); } @@ -151,8 +162,9 @@ default ToShortFunction mapToShort(ToShortFunction g) { * * @param g the outer function * @return the int function + * @param the returned function input type */ - default ToIntFunction mapToInt(java.util.function.ToIntFunction g) { + default ToIntFunction mapToInt(java.util.function.ToIntFunction g) { return ToIntFunction.map(this, g); } @@ -164,8 +176,9 @@ default ToIntFunction mapToInt(java.util.function.ToIntFunction g) { * * @param g the outer function * @return the long function + * @param the returned function input type */ - default ToLongFunction mapToLong(java.util.function.ToLongFunction g) { + default ToLongFunction mapToLong(java.util.function.ToLongFunction g) { return ToLongFunction.map(this, g); } @@ -177,8 +190,9 @@ default ToLongFunction mapToLong(java.util.function.ToLongFunction g) { * * @param g the outer function * @return the float function + * @param the returned function input type */ - default ToFloatFunction mapToFloat(ToFloatFunction g) { + default ToFloatFunction mapToFloat(ToFloatFunction g) { return ToFloatFunction.map(this, g); } @@ -190,8 +204,9 @@ default ToFloatFunction mapToFloat(ToFloatFunction g) { * * @param g the outer function * @return the double function + * @param the returned function input type */ - default ToDoubleFunction mapToDouble(java.util.function.ToDoubleFunction g) { + default ToDoubleFunction mapToDouble(java.util.function.ToDoubleFunction g) { return ToDoubleFunction.map(this, g); } @@ -203,8 +218,10 @@ default ToDoubleFunction mapToDouble(java.util.function.ToDoubleFunction g * * @param g the outer function * @return the object function + * @param the returned function input type + * @param the returned function return type */ - default ToObjectFunction mapToObj(ToObjectFunction g) { + default ToObjectFunction mapToObj(ToObjectFunction g) { return map(this, g); } @@ -216,8 +233,12 @@ default ToObjectFunction mapToObj(ToObjectFunction g) { * * @param g the outer function * @return the object function - */ - default ToObjectFunction mapToObj(Function g, GenericType returnType) { + * @param the returned function input type + * @param the returned function return type + **/ + default ToObjectFunction mapToObj( + Function g, + GenericType returnType) { return map(this, g, returnType); } @@ -237,9 +258,10 @@ default ToObjectFunction mapToObj(Function g, GenericType * @see #mapToLong(java.util.function.ToLongFunction) * @see #mapToFloat(ToFloatFunction) * @see #mapToDouble(java.util.function.ToDoubleFunction) + * @param the returned function input type */ - default ToPrimitiveFunction mapToPrimitive(ToPrimitiveFunction g) { - return ObjectFunctions.mapPrimitive(this, g); + default ToPrimitiveFunction mapToPrimitive(ToPrimitiveFunction g) { + return PrimitiveFunctions.map(this, g); } /** @@ -252,27 +274,26 @@ default ToPrimitiveFunction mapToPrimitive(ToPrimitiveFunction g) { * @return the function * @see #mapToPrimitive(ToPrimitiveFunction) * @see #mapToObj(ToObjectFunction) + * @param the returned function input type */ - default TypedFunction map(TypedFunction g) { - return ObjectFunctions.map(this, g); + default TypedFunction map(TypedFunction g) { + return TypedFunctions.map(this, g); } /** * Creates a function by casting to {@code returnType}. * *

- * In the case where {@code returnType().equals(returnType)}, the result is {@code (ObjectFunction) this}. + * In the case where {@code returnType().equals(returnType)}, the result is {@code (ObjectFunction) this}. * Otherwise, the result is equivalent to {@code x -> (R2) this.apply(x)}. * * @param returnType the return type * @return the object function - * @param the return type + * @param the returned function input type + * @param the returned function return type */ - default ToObjectFunction cast(GenericType returnType) { - // noinspection unchecked - return returnType().equals(returnType) - ? (ToObjectFunction) this - : mapToObj(ObjectFunctions.cast(returnType)); + default ToObjectFunction cast(GenericType returnType) { + return ObjectFunctions.castOrMapCast(this, returnType); } @Override diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/ToShortFunction.java b/extensions/protobuf/src/main/java/io/deephaven/functions/ToShortFunction.java index 312ed3cef45..7c4810a78ec 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/functions/ToShortFunction.java +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/ToShortFunction.java @@ -38,7 +38,9 @@ static ToShortFunction cast() { * @param the input type * @param the intermediate type */ - static ToShortFunction map(Function f, ToShortFunction g) { + static ToShortFunction map( + Function f, + ToShortFunction g) { return ShortFunctions.map(f, g); } diff --git a/extensions/protobuf/src/main/java/io/deephaven/functions/TypedFunctions.java b/extensions/protobuf/src/main/java/io/deephaven/functions/TypedFunctions.java new file mode 100644 index 00000000000..46d9bbeeb02 --- /dev/null +++ b/extensions/protobuf/src/main/java/io/deephaven/functions/TypedFunctions.java @@ -0,0 +1,40 @@ +package io.deephaven.functions; + +import java.util.Objects; + +class TypedFunctions { + static TypedFunction cast(TypedFunction f) { + // noinspection unchecked + return (TypedFunction) f; + } + + static TypedFunction map( + ToObjectFunction f, + TypedFunction g) { + return MapVisitor.of(f, g); + } + + private static class MapVisitor implements TypedFunction.Visitor> { + + public static TypedFunction of(ToObjectFunction f, + TypedFunction g) { + return g.walk(new MapVisitor<>(f)); + } + + private final ToObjectFunction f; + + private MapVisitor(ToObjectFunction f) { + this.f = Objects.requireNonNull(f); + } + + @Override + public ToPrimitiveFunction visit(ToPrimitiveFunction g) { + return f.mapToPrimitive(g); + } + + @Override + public ToObjectFunction visit(ToObjectFunction g) { + return f.mapToObj(g); + } + } +} diff --git a/extensions/protobuf/src/test/java/io/deephaven/functions/ToBooleanFunctionTest.java b/extensions/protobuf/src/test/java/io/deephaven/functions/ToBooleanFunctionTest.java index f9828e8ce52..316bf10dff0 100644 --- a/extensions/protobuf/src/test/java/io/deephaven/functions/ToBooleanFunctionTest.java +++ b/extensions/protobuf/src/test/java/io/deephaven/functions/ToBooleanFunctionTest.java @@ -14,51 +14,134 @@ public class ToBooleanFunctionTest { + private static final Object OBJ = new Object(); + + // Specifically create versions that aren't the library's internal objects + + private static ToBooleanFunction myTrue() { + return x -> true; + } + + private static ToBooleanFunction myFalse() { + return x -> false; + } + @Test void ofTrue_() { - assertThat(ofTrue().test(new Object())).isTrue(); + isTrue(ofTrue(), OBJ); + isTrue(myTrue(), OBJ); } @Test void ofFalse_() { - assertThat(ofFalse().test(new Object())).isFalse(); + isFalse(ofFalse(), OBJ); + isFalse(myFalse(), OBJ); } @Test void or_() { - assertThat(or(List.of()).test(new Object())).isFalse(); - assertThat(or(List.of(ofFalse())).test(new Object())).isFalse(); - assertThat(or(List.of(ofTrue())).test(new Object())).isTrue(); - assertThat(or(List.of(ofFalse(), ofFalse())).test(new Object())).isFalse(); - assertThat(or(List.of(ofFalse(), ofTrue())).test(new Object())).isTrue(); - assertThat(or(List.of(ofTrue(), ofFalse())).test(new Object())).isTrue(); - assertThat(or(List.of(ofTrue(), ofTrue())).test(new Object())).isTrue(); + isFalse(or(List.of()), OBJ); + + isFalse(or(List.of(ofFalse())), OBJ); + isTrue(or(List.of(ofTrue())), OBJ); + isFalse(or(List.of(ofFalse(), ofFalse())), OBJ); + isTrue(or(List.of(ofFalse(), ofTrue())), OBJ); + isTrue(or(List.of(ofTrue(), ofFalse())), OBJ); + isTrue(or(List.of(ofTrue(), ofTrue())), OBJ); + + isFalse(or(List.of(myFalse())), OBJ); + isTrue(or(List.of(myTrue())), OBJ); + isFalse(or(List.of(myFalse(), myFalse())), OBJ); + isTrue(or(List.of(myFalse(), myTrue())), OBJ); + isTrue(or(List.of(myTrue(), myFalse())), OBJ); + isTrue(or(List.of(myTrue(), myTrue())), OBJ); } @Test void and_() { - assertThat(and(List.of()).test(new Object())).isTrue(); - assertThat(and(List.of(ofFalse())).test(new Object())).isFalse(); - assertThat(and(List.of(ofTrue())).test(new Object())).isTrue(); - assertThat(and(List.of(ofFalse(), ofFalse())).test(new Object())).isFalse(); - assertThat(and(List.of(ofFalse(), ofTrue())).test(new Object())).isFalse(); - assertThat(and(List.of(ofTrue(), ofFalse())).test(new Object())).isFalse(); - assertThat(and(List.of(ofTrue(), ofTrue())).test(new Object())).isTrue(); + isTrue(and(List.of()), OBJ); + + isFalse(and(List.of(ofFalse())), OBJ); + isTrue(and(List.of(ofTrue())), OBJ); + isFalse(and(List.of(ofFalse(), ofFalse())), OBJ); + isFalse(and(List.of(ofFalse(), ofTrue())), OBJ); + isFalse(and(List.of(ofTrue(), ofFalse())), OBJ); + isTrue(and(List.of(ofTrue(), ofTrue())), OBJ); + + isFalse(and(List.of(myFalse())), OBJ); + isTrue(and(List.of(myTrue())), OBJ); + isFalse(and(List.of(myFalse(), myFalse())), OBJ); + isFalse(and(List.of(myFalse(), myTrue())), OBJ); + isFalse(and(List.of(myTrue(), myFalse())), OBJ); + isTrue(and(List.of(myTrue(), myTrue())), OBJ); } @Test void not_() { - assertThat(not(ofTrue()).test(new Object())).isFalse(); - assertThat(not(ofFalse()).test(new Object())).isTrue(); + isFalse(not(ofTrue()), OBJ); + isTrue(not(ofFalse()), OBJ); + + isFalse(not(myTrue()), OBJ); + isTrue(not(myFalse()), OBJ); } @Test void map_() { final ToBooleanFunction trimIsFoo = map(String::trim, "foo"::equals); - assertThat(trimIsFoo.test("")).isFalse(); - assertThat(trimIsFoo.test(" ")).isFalse(); - assertThat(trimIsFoo.test("foo")).isTrue(); - assertThat(trimIsFoo.test(" foo ")).isTrue(); - assertThat(trimIsFoo.test(" foo bar")).isFalse(); + isFalse(trimIsFoo, ""); + isFalse(trimIsFoo, " "); + isTrue(trimIsFoo, "foo"); + isTrue(trimIsFoo, " foo "); + isFalse(trimIsFoo, " foo bar"); + } + + private static void isTrue(ToBooleanFunction f, X x) { + assertThat(f.test(x)).isTrue(); + assertThat(f.negate().test(x)).isFalse(); + + assertThat(f.or(ofTrue()).test(x)).isTrue(); + assertThat(f.and(ofTrue()).test(x)).isTrue(); + assertThat(f.or(ofFalse()).test(x)).isTrue(); + assertThat(f.and(ofFalse()).test(x)).isFalse(); + + assertThat(ToBooleanFunction.ofTrue().or(f).test(x)).isTrue(); + assertThat(ToBooleanFunction.ofTrue().and(f).test(x)).isTrue(); + assertThat(ToBooleanFunction.ofFalse().or(f).test(x)).isTrue(); + assertThat(ToBooleanFunction.ofFalse().and(f).test(x)).isFalse(); + + assertThat(f.or(myTrue()).test(x)).isTrue(); + assertThat(f.and(myTrue()).test(x)).isTrue(); + assertThat(f.or(myFalse()).test(x)).isTrue(); + assertThat(f.and(myFalse()).test(x)).isFalse(); + + assertThat(ToBooleanFunctionTest.myTrue().or(f).test(x)).isTrue(); + assertThat(ToBooleanFunctionTest.myTrue().and(f).test(x)).isTrue(); + assertThat(ToBooleanFunctionTest.myFalse().or(f).test(x)).isTrue(); + assertThat(ToBooleanFunctionTest.myFalse().and(f).test(x)).isFalse(); + } + + private static void isFalse(ToBooleanFunction f, X x) { + assertThat(f.test(x)).isFalse(); + assertThat(f.negate().test(x)).isTrue(); + + assertThat(f.or(ofTrue()).test(x)).isTrue(); + assertThat(f.and(ofTrue()).test(x)).isFalse(); + assertThat(f.or(ofFalse()).test(x)).isFalse(); + assertThat(f.and(ofFalse()).test(x)).isFalse(); + + assertThat(ToBooleanFunction.ofTrue().or(f).test(x)).isTrue(); + assertThat(ToBooleanFunction.ofTrue().and(f).test(x)).isFalse(); + assertThat(ToBooleanFunction.ofFalse().or(f).test(x)).isFalse(); + assertThat(ToBooleanFunction.ofFalse().and(f).test(x)).isFalse(); + + assertThat(f.or(myTrue()).test(x)).isTrue(); + assertThat(f.and(myTrue()).test(x)).isFalse(); + assertThat(f.or(myFalse()).test(x)).isFalse(); + assertThat(f.and(myFalse()).test(x)).isFalse(); + + assertThat(ToBooleanFunctionTest.myTrue().or(f).test(x)).isTrue(); + assertThat(ToBooleanFunctionTest.myTrue().and(f).test(x)).isFalse(); + assertThat(ToBooleanFunctionTest.myFalse().or(f).test(x)).isFalse(); + assertThat(ToBooleanFunctionTest.myFalse().and(f).test(x)).isFalse(); } } From 2bf5958f14033d685206ae23d0ca18d06f99eea3 Mon Sep 17 00:00:00 2001 From: Cristian Ferretti <37232625+jcferretti@users.noreply.github.com> Date: Tue, 12 Sep 2023 21:47:36 -0400 Subject: [PATCH 03/15] Tweaks to support building the cpp-client in older linux distros/GCC versions. (#4481) * Tweaks to support building the cpp-client in older linux distros/GCC versions. * Update cpp-client-base image version and link to build dependencies in cpp client README.md. * Indentation fix. * Followup to review comments from Corey. --- cpp-client/README.md | 34 +++++++++++++++---- cpp-client/build.gradle | 2 +- cpp-client/deephaven/CMakeLists.txt | 2 +- cpp-client/deephaven/dhclient/CMakeLists.txt | 2 +- .../include/public/deephaven/client/client.h | 8 ----- .../public/deephaven/client/client_options.h | 8 ++--- cpp-client/deephaven/dhclient/src/client.cc | 2 -- .../deephaven/dhclient/src/client_options.cc | 5 +-- cpp-client/deephaven/dhcore/CMakeLists.txt | 2 +- .../public/deephaven/dhcore/utility/utility.h | 8 +++-- .../CMakeLists.txt | 1 - .../CMakeLists.txt | 1 - .../deephaven/examples/demos/CMakeLists.txt | 1 - .../examples/hello_world/CMakeLists.txt | 1 - .../examples/read_csv/CMakeLists.txt | 1 - .../CMakeLists.txt | 1 - .../examples/table_cleanup/CMakeLists.txt | 2 +- cpp-client/deephaven/tests/CMakeLists.txt | 2 +- cpp-client/deephaven/tests/select_test.cc | 2 +- .../cpp-client-base/gradle.properties | 2 +- 20 files changed, 47 insertions(+), 40 deletions(-) diff --git a/cpp-client/README.md b/cpp-client/README.md index d1642cf6abb..55e426d5271 100644 --- a/cpp-client/README.md +++ b/cpp-client/README.md @@ -11,7 +11,7 @@ C++ compiler and tool suite (cmake etc). 3. Get build tools ``` sudo apt update - sudo apt install curl git g++ cmake make build-essential zlib1g-dev libssl-dev pkg-config + sudo apt install curl git g++ cmake make build-essential zlib1g-dev libbz2-dev libssl-dev pkg-config ``` 4. Make a new directory for the Deephaven source code and assign that directory @@ -33,7 +33,7 @@ C++ compiler and tool suite (cmake etc). Get the `build-dependencies.sh` script from Deephaven's base images repository at the correct version. You can download it directly from the link - https://raw.githubusercontent.com/deephaven/deephaven-base-images/53081b141aebea4c43238ddae233be49db28cf7b/cpp-client/build-dependencies.sh + https://raw.githubusercontent.com/deephaven/deephaven-base-images/166befad816acbc9dff55d2d8354d60612cd9a8a/cpp-client/build-dependencies.sh (this script is also used from our automated tools, to generate a docker image to support tests runs; that's why it lives in a separate repo). The script downloads, builds and installs the dependent libraries @@ -41,9 +41,9 @@ C++ compiler and tool suite (cmake etc). Decide on a directory for the dependencies to live (eg, "$HOME/dhcpp"). Create that directory and save the script there. - The two main build types of a standard cmake build are supported, - `Release` and `Debug`. By default. `build-dependencies.sh` - creates a `Debug` build. To create a `Release` build, set the + The three main build types of a standard cmake build are supported, + `Release`, `Debug` and `RelWithDebInfo`. By default. `build-dependencies.sh` + creates a `RelWithDebInfo` build. To create a `Release` build, set the environment variable `BUILD_TYPE=Release` (1) Edit your local copy of the script if necessary to reflect your selection @@ -60,7 +60,7 @@ C++ compiler and tool suite (cmake etc). # If the directory already exists from a previous attempt, ensure is clean/empty mkdir -p $DHCPP cd $DHCPP - wget https://raw.githubusercontent.com/deephaven/deephaven-base-images/53081b141aebea4c43238ddae233be49db28cf7b/cpp-client/build-dependencies.sh + wget https://raw.githubusercontent.com/deephaven/deephaven-base-images/166befad816acbc9dff55d2d8354d60612cd9a8a/cpp-client/build-dependencies.sh chmod +x ./build-dependencies.sh # Maybe edit build-dependencies.sh to reflect choices of build tools and build target, if you # want anything different than defaults; defaults are tested to work, @@ -102,7 +102,7 @@ C++ compiler and tool suite (cmake etc). cd $DHSRC/deephaven-core/cpp-client/deephaven/ mkdir build && cd build cmake -DCMAKE_INSTALL_PREFIX=${DHCPP}/local \ - -DCMAKE_BUILD_TYPE=Debug -DBUILD_SHARED_LIBS=ON .. && \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON .. && \ make -j$NCPUS install ``` @@ -126,6 +126,26 @@ C++ compiler and tool suite (cmake etc). ./tests ``` +10. Building in different distributions or with older toolchains. + While we don't support other linux distributions or GCC versions earlier + than 11, this section provides some notes that may help you + in that situation. + + * GCC 8 mixed with older versions of GNU as/binutils may fail to compile + `roaring.c` with an error similar to: + ``` + /tmp/cczCvQKd.s: Assembler messages: + /tmp/cczCvQKd.s:45826: Error: no such instruction: `vpcompressb %zmm0,%zmm1{%k2}' + /tmp/cczCvQKd.s:46092: Error: no such instruction: `vpcompressb %zmm0,%zmm1{%k1}' + ``` + In that case, add `-DCMAKE_C_FLAGS=-DCROARING_COMPILER_SUPPORTS_AVX512=0` + to the list of arguments to `cmake`. + + * Some platforms combining old versions of GCC and cmake may fail + to set the cmake C++ standard to 17 without explicitly adding + `-DCMAKE_CXX_STANDARD=17` to the list of arguments to `cmake`. + Note the default mode for C++ is `-std=gnu++17` for GCC 11. + Notes (1) The standard assumptions for `Debug` and `Release` apply here. With a `Debug` build you get debug information which is useful during diff --git a/cpp-client/build.gradle b/cpp-client/build.gradle index e0959beb4b5..faecbd330b7 100644 --- a/cpp-client/build.gradle +++ b/cpp-client/build.gradle @@ -90,7 +90,7 @@ def testCppClient = Docker.registerDockerTask(project, 'testCppClient') { cd /cpp-client/deephaven/build; \\ . /cpp-client/deps/env.sh; \\ cmake -DCMAKE_INSTALL_PREFIX=/cpp-client/install \\ - -DCMAKE_BUILD_TYPE=Debug \\ + -DCMAKE_BUILD_TYPE=Release \\ -DBUILD_SHARED_LIBS=ON \\ .. ; \\ make -j\$NCPUS install diff --git a/cpp-client/deephaven/CMakeLists.txt b/cpp-client/deephaven/CMakeLists.txt index 30154613b3c..71e2a305c28 100644 --- a/cpp-client/deephaven/CMakeLists.txt +++ b/cpp-client/deephaven/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.14) if(TARGET client) # The library has already been built (i.e. through some diff --git a/cpp-client/deephaven/dhclient/CMakeLists.txt b/cpp-client/deephaven/dhclient/CMakeLists.txt index 9e4d1080d8c..8b366e25cf2 100644 --- a/cpp-client/deephaven/dhclient/CMakeLists.txt +++ b/cpp-client/deephaven/dhclient/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.14) project(dhclient) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h index 67de7de26ed..0c5e151ce94 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h @@ -87,18 +87,10 @@ class TableHandleManager { * Constructor. Used internally. */ explicit TableHandleManager(std::shared_ptr impl); - /** - * Copy constructor - */ - TableHandleManager(const TableHandleManager &other) noexcept; /** * Move constructor */ TableHandleManager(TableHandleManager &&other) noexcept; - /** - * Copy assigment operator. - */ - TableHandleManager &operator=(const TableHandleManager &other) noexcept; /** * Move assigment operator. */ diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client_options.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client_options.h index 93f65da80f2..3be53541786 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client_options.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client_options.h @@ -27,9 +27,9 @@ class ClientOptions { */ ClientOptions(); /** - * Copy constructor - */ - ClientOptions(const ClientOptions &other) noexcept; + * Copy constructor + */ + ClientOptions(const ClientOptions &other); /** * Move constructor */ @@ -37,7 +37,7 @@ class ClientOptions { /** * Copy assigment operator. */ - ClientOptions &operator=(const ClientOptions &other) noexcept; + ClientOptions &operator=(const ClientOptions &other); /** * Move assigment operator. */ diff --git a/cpp-client/deephaven/dhclient/src/client.cc b/cpp-client/deephaven/dhclient/src/client.cc index bce5e8ac029..47bb40d3adb 100644 --- a/cpp-client/deephaven/dhclient/src/client.cc +++ b/cpp-client/deephaven/dhclient/src/client.cc @@ -104,9 +104,7 @@ bool Client::RemoveOnCloseCallback(OnCloseCbId cb_id) { TableHandleManager::TableHandleManager() = default; TableHandleManager::TableHandleManager(std::shared_ptr impl) : impl_(std::move(impl)) {} -TableHandleManager::TableHandleManager(const TableHandleManager &other) noexcept = default; TableHandleManager::TableHandleManager(TableHandleManager &&other) noexcept = default; -TableHandleManager &TableHandleManager::operator=(const TableHandleManager &other) noexcept = default; TableHandleManager &TableHandleManager::operator=(TableHandleManager &&other) noexcept = default; TableHandleManager::~TableHandleManager() = default; diff --git a/cpp-client/deephaven/dhclient/src/client_options.cc b/cpp-client/deephaven/dhclient/src/client_options.cc index 01f172eead4..95a935af17d 100644 --- a/cpp-client/deephaven/dhclient/src/client_options.cc +++ b/cpp-client/deephaven/dhclient/src/client_options.cc @@ -13,10 +13,11 @@ ClientOptions::ClientOptions() { SetSessionType("python"); } -ClientOptions::ClientOptions(const ClientOptions &other) noexcept = default; +ClientOptions::ClientOptions(const ClientOptions &other) = default; ClientOptions::ClientOptions(ClientOptions &&other) noexcept = default; -ClientOptions &ClientOptions::operator=(const ClientOptions &other) noexcept = default; +ClientOptions &ClientOptions::operator=(const ClientOptions &other) = default; ClientOptions &ClientOptions::operator=(ClientOptions &&other) noexcept = default; + ClientOptions::~ClientOptions() = default; ClientOptions &ClientOptions::SetDefaultAuthentication() { diff --git a/cpp-client/deephaven/dhcore/CMakeLists.txt b/cpp-client/deephaven/dhcore/CMakeLists.txt index 1ac7d98194f..e9c11beda83 100644 --- a/cpp-client/deephaven/dhcore/CMakeLists.txt +++ b/cpp-client/deephaven/dhcore/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.14) project(dhcore) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/utility/utility.h b/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/utility/utility.h index 828d5116255..7829c75c99a 100644 --- a/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/utility/utility.h +++ b/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/utility/utility.h @@ -61,9 +61,11 @@ bool DumpFormat(std::ostream &s, const char **fmt, bool placeholder_expected); std::ostream &Streamf(std::ostream &s, const char *fmt); template -std::ostream &Streamf(std::ostream &s, const char *fmt, const HEAD &head, REST &&... rest) { - (void) deephaven::dhcore::utility::internal::DumpFormat(s, &fmt, true); - s << head; +std::ostream &Streamf(std::ostream &s, const char *fmt, HEAD &&head, REST &&... rest) { + if (!deephaven::dhcore::utility::internal::DumpFormat(s, &fmt, true)) { + return s; + } + s << std::forward(head); return Streamf(s, fmt, std::forward(rest)...); } diff --git a/cpp-client/deephaven/examples/create_table_with_arrow_flight/CMakeLists.txt b/cpp-client/deephaven/examples/create_table_with_arrow_flight/CMakeLists.txt index 9f67ba39749..8654e461bc7 100644 --- a/cpp-client/deephaven/examples/create_table_with_arrow_flight/CMakeLists.txt +++ b/cpp-client/deephaven/examples/create_table_with_arrow_flight/CMakeLists.txt @@ -1,4 +1,3 @@ -cmake_minimum_required(VERSION 3.16) project(create_table_with_arrow_flight) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/examples/create_table_with_table_maker/CMakeLists.txt b/cpp-client/deephaven/examples/create_table_with_table_maker/CMakeLists.txt index cda5bc343b6..0f3623c939e 100644 --- a/cpp-client/deephaven/examples/create_table_with_table_maker/CMakeLists.txt +++ b/cpp-client/deephaven/examples/create_table_with_table_maker/CMakeLists.txt @@ -1,4 +1,3 @@ -cmake_minimum_required(VERSION 3.16) project(create_table_with_table_maker) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/examples/demos/CMakeLists.txt b/cpp-client/deephaven/examples/demos/CMakeLists.txt index bcecd05768d..542b90f8a9c 100644 --- a/cpp-client/deephaven/examples/demos/CMakeLists.txt +++ b/cpp-client/deephaven/examples/demos/CMakeLists.txt @@ -1,4 +1,3 @@ -cmake_minimum_required(VERSION 3.16) project(demos) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/examples/hello_world/CMakeLists.txt b/cpp-client/deephaven/examples/hello_world/CMakeLists.txt index 88bc6b84bf0..3c814dde1ec 100644 --- a/cpp-client/deephaven/examples/hello_world/CMakeLists.txt +++ b/cpp-client/deephaven/examples/hello_world/CMakeLists.txt @@ -1,4 +1,3 @@ -cmake_minimum_required(VERSION 3.16) project(hello_world) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/examples/read_csv/CMakeLists.txt b/cpp-client/deephaven/examples/read_csv/CMakeLists.txt index cdead265e81..3343d8542ae 100644 --- a/cpp-client/deephaven/examples/read_csv/CMakeLists.txt +++ b/cpp-client/deephaven/examples/read_csv/CMakeLists.txt @@ -1,4 +1,3 @@ -cmake_minimum_required(VERSION 3.16) project(read_csv) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/examples/read_table_with_arrow_flight/CMakeLists.txt b/cpp-client/deephaven/examples/read_table_with_arrow_flight/CMakeLists.txt index cc1a7aa4c19..0b2308318a0 100644 --- a/cpp-client/deephaven/examples/read_table_with_arrow_flight/CMakeLists.txt +++ b/cpp-client/deephaven/examples/read_table_with_arrow_flight/CMakeLists.txt @@ -1,4 +1,3 @@ -cmake_minimum_required(VERSION 3.16) project(read_table_with_arrow_flight) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/examples/table_cleanup/CMakeLists.txt b/cpp-client/deephaven/examples/table_cleanup/CMakeLists.txt index 253486d3559..80b93013d0a 100644 --- a/cpp-client/deephaven/examples/table_cleanup/CMakeLists.txt +++ b/cpp-client/deephaven/examples/table_cleanup/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.14) project(table_cleanup) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/tests/CMakeLists.txt b/cpp-client/deephaven/tests/CMakeLists.txt index 4ed3c99f7f1..c5710d66392 100644 --- a/cpp-client/deephaven/tests/CMakeLists.txt +++ b/cpp-client/deephaven/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.14) project(tests) set(CMAKE_CXX_STANDARD 17) diff --git a/cpp-client/deephaven/tests/select_test.cc b/cpp-client/deephaven/tests/select_test.cc index 54b6b980234..b7e8d8ecefb 100644 --- a/cpp-client/deephaven/tests/select_test.cc +++ b/cpp-client/deephaven/tests/select_test.cc @@ -39,7 +39,7 @@ TEST_CASE("Support all types", "[select]") { byte_data.push_back(i * 11); short_data.push_back(i * 1000); int_data.push_back(i * 1'000'000); - long_data.push_back(i * 1'000'000'000); + long_data.push_back(static_cast(i) * 1'000'000'000); float_data.push_back(i * 123.456F); doubleData.push_back(i * 987654.321); stringData.push_back(Stringf("test %o", i)); diff --git a/docker/registry/cpp-client-base/gradle.properties b/docker/registry/cpp-client-base/gradle.properties index bb8d6d4af2a..72382af8d7d 100644 --- a/docker/registry/cpp-client-base/gradle.properties +++ b/docker/registry/cpp-client-base/gradle.properties @@ -7,6 +7,6 @@ deephaven.registry.imageName=ghcr.io/deephaven/cpp-client-base:latest # It is in two different parts of the file (text and command examples). # If you have the image sha for this file, you can get the commit sha for the README using: # docker buildx imagetools inspect ghcr.io/deephaven/cpp-client-base@sha256:$IMAGESHA --format '{{ $metadata := index .Provenance.SLSA.metadata "https://mobyproject.org/buildkit@v1#metadata" }} {{ $metadata.vcs.revision }}' -deephaven.registry.imageId=ghcr.io/deephaven/cpp-client-base@sha256:105b358c20bd004daa67a5a947e0da1190568854da07adf7b2ac66e2c78f9287 +deephaven.registry.imageId=ghcr.io/deephaven/cpp-client-base@sha256:74034b2c26a26033258a8d0808f45d6a010300c07a9135260fcb0c1691de04dd # TODO(deephaven-base-images#54): arm64 native image for cpp-client-base deephaven.registry.platform=linux/amd64 From fa7ca6d1425f09bd3b3956f3b3da6b6474002eaa Mon Sep 17 00:00:00 2001 From: Alex Peters <80283343+alexpeters1208@users.noreply.github.com> Date: Wed, 13 Sep 2023 14:29:56 -0500 Subject: [PATCH 04/15] Add `agg_all_by()` to the R client and add error handling around empty aggregations in `agg_by()` (#4465) * Support agg_all_by at the R level, provide a stable API that allows the underlying C++ to change freely * Snake case * A little more snake case * Helpful errors in agg_by * Code review suggestions --- R/rdeephaven/R/aggregate_wrapper.R | 54 ++-- R/rdeephaven/R/helper_functions.R | 2 +- R/rdeephaven/R/table_handle_wrapper.R | 12 +- .../inst/tests/testthat/test_agg_by.R | 230 ++++++++++++++++++ R/rdeephaven/src/client.cpp | 10 +- 5 files changed, 272 insertions(+), 36 deletions(-) diff --git a/R/rdeephaven/R/aggregate_wrapper.R b/R/rdeephaven/R/aggregate_wrapper.R index ccfb0689124..daafae48361 100644 --- a/R/rdeephaven/R/aggregate_wrapper.R +++ b/R/rdeephaven/R/aggregate_wrapper.R @@ -1,95 +1,85 @@ -#' @export Aggregation <- R6Class("Aggregation", cloneable = FALSE, public = list( .internal_rcpp_object = NULL, - initialize = function(aggregation) { - if (class(aggregation) != "Rcpp_INTERNAL_Aggregate") { - stop("'aggregation' should be an internal Deephaven Aggregation. If you're seeing this,\n you are trying to call the constructor of an Aggregation directly, which is not advised.\n Please use one of the provided aggregation functions instead.") + .internal_num_cols = NULL, + .internal_agg_name = NULL, + initialize = function(aggregation, agg_name, ...) { + self$.internal_agg_name <- agg_name + args <- list(...) + if (any(names(args) == "cols")) { + self$.internal_num_cols <- length(args$cols) } - self$.internal_rcpp_object <- aggregation + self$.internal_rcpp_object <- do.call(aggregation, args) } ) ) ### All of the functions below return an instance of the above class -#' @export agg_first <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_first(cols))) + return(Aggregation$new(INTERNAL_agg_first, "agg_first", cols=cols)) } -#' @export agg_last <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_last(cols))) + return(Aggregation$new(INTERNAL_agg_last, "agg_last", cols=cols)) } -#' @export agg_min <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_min(cols))) + return(Aggregation$new(INTERNAL_agg_min, "agg_min", cols=cols)) } -#' @export agg_max <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_max(cols))) + return(Aggregation$new(INTERNAL_agg_max, "agg_max", cols=cols)) } -#' @export agg_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_sum(cols))) + return(Aggregation$new(INTERNAL_agg_sum, "agg_sum", cols=cols)) } -#' @export agg_abs_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_abs_sum(cols))) + return(Aggregation$new(INTERNAL_agg_abs_sum, "agg_abs_sum", cols=cols)) } -#' @export agg_avg <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_avg(cols))) + return(Aggregation$new(INTERNAL_agg_avg, "agg_avg", cols=cols)) } -#' @export agg_w_avg <- function(wcol, cols = character()) { verify_string("wcol", wcol, TRUE) verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_w_avg(wcol, cols))) + return(Aggregation$new(INTERNAL_agg_w_avg, "agg_w_avg", wcol=wcol, cols=cols)) } -#' @export agg_median <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_median(cols))) + return(Aggregation$new(INTERNAL_agg_median, "agg_median", cols=cols)) } -#' @export agg_var <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_var(cols))) + return(Aggregation$new(INTERNAL_agg_var, "agg_var", cols=cols)) } -#' @export agg_std <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_std(cols))) + return(Aggregation$new(INTERNAL_agg_std, "agg_std", cols=cols)) } -#' @export agg_percentile <- function(percentile, cols = character()) { verify_in_unit_interval("percentile", percentile, TRUE) verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_percentile(percentile, cols))) + return(Aggregation$new(INTERNAL_agg_percentile, "agg_percentile", percentile=percentile, cols=cols)) } -#' @export agg_count <- function(col) { verify_string("col", col, TRUE) - return(Aggregation$new(INTERNAL_agg_count(col))) -} + return(Aggregation$new(INTERNAL_agg_count, "agg_count", col=col)) +} \ No newline at end of file diff --git a/R/rdeephaven/R/helper_functions.R b/R/rdeephaven/R/helper_functions.R index f378f089408..165834a97cb 100644 --- a/R/rdeephaven/R/helper_functions.R +++ b/R/rdeephaven/R/helper_functions.R @@ -20,7 +20,7 @@ verify_type <- function(arg_name, candidate, required_type, message_type_name, i } else { stop(paste0("'", arg_name, "' must be a single ", message_type_name, ". Got an object of class ", first_class(candidate), ".")) } - } else if (is_scalar && (length(candidate) != 1)) { + } else if (is_scalar && (length(c(candidate)) != 1)) { stop(paste0("'", arg_name, "' must be a single ", message_type_name, ". Got a vector of length ", length(candidate), ".")) } } diff --git a/R/rdeephaven/R/table_handle_wrapper.R b/R/rdeephaven/R/table_handle_wrapper.R index 4e6c481279f..63c2b8071d9 100644 --- a/R/rdeephaven/R/table_handle_wrapper.R +++ b/R/rdeephaven/R/table_handle_wrapper.R @@ -107,9 +107,19 @@ TableHandle <- R6Class("TableHandle", verify_type("aggs", aggs, "Aggregation", "Deephaven Aggregation", FALSE) verify_string("by", by, FALSE) aggs <- c(aggs) + for (i in 1:length(aggs)) { + if (!is.null(aggs[[i]]$.internal_num_cols) && aggs[[i]]$.internal_num_cols == 0) { + stop(paste0("Aggregations with no columns cannot be used in 'agg_by'. Got '", aggs[[i]]$.internal_agg_name, "' at index ", i, " with an empty 'cols' argument.")) + } + } unwrapped_aggs <- lapply(aggs, strip_r6_wrapping) return(TableHandle$new(self$.internal_rcpp_object$agg_by(unwrapped_aggs, by))) }, + agg_all_by = function(agg, by = character()) { + verify_type("agg", agg, "Aggregation", "Deephaven Aggregation", TRUE) + verify_string("by", by, FALSE) + return(TableHandle$new(self$.internal_rcpp_object$agg_all_by(agg$.internal_rcpp_object, by))) + }, first_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$first_by(by))) @@ -170,7 +180,7 @@ TableHandle <- R6Class("TableHandle", verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$percentile_by(percentile, by))) }, - count_by = function(col = "n", by = character()) { + count_by = function(col, by = character()) { verify_string("col", col, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$count_by(col, by))) diff --git a/R/rdeephaven/inst/tests/testthat/test_agg_by.R b/R/rdeephaven/inst/tests/testthat/test_agg_by.R index 12f30300f53..4669f3440a5 100644 --- a/R/rdeephaven/inst/tests/testthat/test_agg_by.R +++ b/R/rdeephaven/inst/tests/testthat/test_agg_by.R @@ -55,6 +55,8 @@ setup <- function() { )) } +##### TESTING GOOD INPUTS ##### + test_that("agg_first behaves as expected", { data <- setup() @@ -105,6 +107,21 @@ test_that("agg_first behaves as expected", { agg_by(agg_first(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + group_by(X) %>% + summarise(across(everything(), first)) + new_th7 <- data$th5$ + agg_all_by(agg_first(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), first)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_first(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -159,6 +176,21 @@ test_that("agg_last behaves as expected", { agg_by(agg_last(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + group_by(X) %>% + summarise(across(everything(), last)) + new_th7 <- data$th5$ + agg_all_by(agg_last(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), last)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_last(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -213,6 +245,21 @@ test_that("agg_min behaves as expected", { agg_by(agg_min(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + group_by(X) %>% + summarise(across(everything(), min)) + new_th7 <- data$th5$ + agg_all_by(agg_min(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), min)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_min(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -267,6 +314,21 @@ test_that("agg_max behaves as expected", { agg_by(agg_max(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + group_by(X) %>% + summarise(across(everything(), max)) + new_th7 <- data$th5$ + agg_all_by(agg_max(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), max)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_max(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -321,6 +383,23 @@ test_that("agg_sum behaves as expected", { agg_by(agg_sum(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), sum)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_sum(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), sum)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_sum(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -375,6 +454,23 @@ test_that("agg_abs_sum behaves as expected", { agg_by(agg_abs_sum(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), ~ sum(abs(.x)))) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_abs_sum(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), ~ sum(abs(.x)))) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_abs_sum(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -429,6 +525,23 @@ test_that("agg_avg behaves as expected", { agg_by(agg_avg(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), mean)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_avg(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), mean)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_avg(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -493,6 +606,25 @@ test_that("agg_w_avg behaves as expected", { agg_by(agg_w_avg("weights", c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), ~ weighted.mean(.x, Number2))) %>% + select(-Number2) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_w_avg("Number2"), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), ~ weighted.mean(.x, Number2))) %>% + select(-Number2) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_w_avg("Number2"), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -547,6 +679,23 @@ test_that("agg_median behaves as expected", { agg_by(agg_median(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), median)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_median(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), median)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_median(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -601,6 +750,23 @@ test_that("agg_var behaves as expected", { agg_by(agg_var(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), var)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_var(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), var)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_var(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -655,6 +821,23 @@ test_that("agg_std behaves as expected", { agg_by(agg_std(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), sd)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_std(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), sd)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_std(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -690,6 +873,26 @@ test_that("agg_percentile behaves as expected", { agg_by(agg_percentile(0.3, c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th3), new_df3) + + new_df4 <- data.frame( + X = c("A", "B", "C"), + Number1 = c(50, -44, -70), + Number2 = c(-50, 76, 130) + ) + new_th4 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_percentile(0.4), "X") + expect_equal(as.data.frame(new_th4), new_df4) + + new_df5 <- data.frame( + X = c("A", "B", "A", "C", "B", "B", "C", "B", "A", "C"), + Y = c("M", "N", "O", "N", "P", "O", "M", "M", "P", "P"), + Number1 = c(50, -44, 1, 11, -66, 99, -70, 86, -45, 0), + Number2 = c(-55, 76, 12, 4, 137, 45, 214, -6, 34, -76) + ) + new_th5 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_percentile(0.4), c("X", "Y")) + expect_equal(as.data.frame(new_th4), new_df4) data$client$close() }) @@ -738,6 +941,33 @@ test_that("agg_count behaves as expected", { agg_by(agg_count("n"), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_th7 <- data$th5$ + agg_all_by(agg_count("n"), "X")$ + sort("X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb5)) + + new_th8 <- data$th6$ + agg_all_by(agg_count("n"), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb6)) + + data$client$close() +}) + +##### TESTING BAD INPUTS ##### + +test_that("agg_by behaves nicely when given bad input", { + data <- setup() + + expect_error(data$th1$agg_by(agg_first()), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_first' at index 1 with an empty 'cols' argument.") + + expect_error(data$th1$agg_by(c(agg_first("int_col"), agg_last())), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_last' at index 2 with an empty 'cols' argument.") + + expect_error(data$th1$agg_by(c(agg_first("int_col"), agg_last("int_col"), agg_count("n"), agg_avg())), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_avg' at index 4 with an empty 'cols' argument.") data$client$close() }) diff --git a/R/rdeephaven/src/client.cpp b/R/rdeephaven/src/client.cpp index df1f6a4360c..150ff74e966 100644 --- a/R/rdeephaven/src/client.cpp +++ b/R/rdeephaven/src/client.cpp @@ -152,7 +152,12 @@ class TableHandleWrapper { TableHandleWrapper* AggBy(Rcpp::List aggregations, std::vector group_by_columns) { std::vector converted_aggregations = convertRcppListToVectorOfTypeAggregate(aggregations); return new TableHandleWrapper(internal_tbl_hdl.By(deephaven::client::AggregateCombo::Create(converted_aggregations), group_by_columns)); - } + }; + + TableHandleWrapper* AggAllBy(AggregateWrapper &aggregation, std::vector group_by_columns) { + std::vector converted_aggregation = {aggregation.internal_agg_op}; + return new TableHandleWrapper(internal_tbl_hdl.By(deephaven::client::AggregateCombo::Create(converted_aggregation), group_by_columns)); + }; TableHandleWrapper* FirstBy(std::vector cols) { return new TableHandleWrapper(internal_tbl_hdl.FirstBy(cols)); @@ -404,7 +409,6 @@ class ClientWrapper { return new TableHandleWrapper(internal_tbl_hdl_mngr.TimeTable(period_ISO, start_time_ISO)); }; - TableHandleWrapper* MakeTableHandleFromTicket(std::string ticket) { return new TableHandleWrapper(internal_tbl_hdl_mngr.MakeTableHandleFromTicket(ticket)); } @@ -527,6 +531,8 @@ RCPP_MODULE(DeephavenInternalModule) { .method("ungroup", &TableHandleWrapper::Ungroup) .method("agg_by", &TableHandleWrapper::AggBy) + .method("agg_all_by", &TableHandleWrapper::AggAllBy) + .method("first_by", &TableHandleWrapper::FirstBy) .method("last_by", &TableHandleWrapper::LastBy) .method("head_by", &TableHandleWrapper::HeadBy) From a00fa05f366bf74d7222d58d65fe927e96dd18eb Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Wed, 13 Sep 2023 18:54:06 -0500 Subject: [PATCH 05/15] Removed ParquetHadoop module (#4457) Removed duplicate code copied from Apache parquet-hadoop project Co-authored-by: Colin Alworth --- NOTICE.md | 2 - ParquetHadoop/LICENSE | 202 --- ParquetHadoop/NOTICE | 116 -- ParquetHadoop/build.gradle | 62 - ParquetHadoop/gradle.properties | 1 - .../tempfix/ParquetMetadataConverter.java | 1538 ----------------- buildSrc/src/main/groovy/Classpaths.groovy | 28 + extensions/parquet/base/build.gradle | 2 +- .../parquet/base/ColumnWriterImpl.java | 2 +- .../parquet/base/ParquetFileWriter.java | 2 +- extensions/parquet/compression/build.gradle | 7 +- extensions/parquet/table/build.gradle | 2 + .../parquet/table/ParquetSchemaReader.java | 2 +- .../deephaven/parquet/table/ParquetTools.java | 2 +- .../layout/ParquetMetadataFileLayout.java | 2 +- .../table/location/ParquetColumnLocation.java | 2 +- .../location/ParquetTableLocationKey.java | 2 +- settings.gradle | 2 - 18 files changed, 43 insertions(+), 1933 deletions(-) delete mode 100644 ParquetHadoop/LICENSE delete mode 100644 ParquetHadoop/NOTICE delete mode 100644 ParquetHadoop/build.gradle delete mode 100644 ParquetHadoop/gradle.properties delete mode 100644 ParquetHadoop/src/main/java/io/deephaven/parquet/base/tempfix/ParquetMetadataConverter.java diff --git a/NOTICE.md b/NOTICE.md index 995681efa31..8e17bb71e23 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -17,6 +17,4 @@ Deephaven Community License. See the `LICENSE` and `NOTICE` files in these directories for more information. * [Container](Container): Apache-2.0 -* [py/jpy](py/jpy): Apache-2.0 * [style](style): Apache-2.0 -* [ParquetHadoop](ParquetHadoop): Apache-2.0 diff --git a/ParquetHadoop/LICENSE b/ParquetHadoop/LICENSE deleted file mode 100644 index d6456956733..00000000000 --- a/ParquetHadoop/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/ParquetHadoop/NOTICE b/ParquetHadoop/NOTICE deleted file mode 100644 index efdd44573d6..00000000000 --- a/ParquetHadoop/NOTICE +++ /dev/null @@ -1,116 +0,0 @@ -Deephaven ParquetHadoop -Copyright 2021 Deephaven Data Labs - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -This product includes code from Apache Parquet Hadoop (https://github.com/apache/parquet-mr), -which is available under the Apache 2.0 License. The project includes the following notice: - - Apache Parquet MR (Incubating) - Copyright 2014 The Apache Software Foundation - - This product includes software developed at - The Apache Software Foundation (http://www.apache.org/). - - -------------------------------------------------------------------------------- - - This product includes parquet-tools, initially developed at ARRIS, Inc. with - the following copyright notice: - - Copyright 2013 ARRIS, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - -------------------------------------------------------------------------------- - - This product includes parquet-protobuf, initially developed by Lukas Nalezenc - with the following copyright notice: - - Copyright 2013 Lukas Nalezenec. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - -------------------------------------------------------------------------------- - - This product includes code from Apache Avro, which includes the following in - its NOTICE file: - - Apache Avro - Copyright 2010-2015 The Apache Software Foundation - - This product includes software developed at - The Apache Software Foundation (http://www.apache.org/). - - -------------------------------------------------------------------------------- - - This project includes code from Kite, developed at Cloudera, Inc. with - the following copyright notice: - - | Copyright 2013 Cloudera Inc. - | - | Licensed under the Apache License, Version 2.0 (the "License"); - | you may not use this file except in compliance with the License. - | You may obtain a copy of the License at - | - | http://www.apache.org/licenses/LICENSE-2.0 - | - | Unless required by applicable law or agreed to in writing, software - | distributed under the License is distributed on an "AS IS" BASIS, - | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - | See the License for the specific language governing permissions and - | limitations under the License. - - -------------------------------------------------------------------------------- - - This project includes code from Netflix, Inc. with the following copyright - notice: - - | Copyright 2016 Netflix, Inc. - | - | Licensed under the Apache License, Version 2.0 (the "License"); - | you may not use this file except in compliance with the License. - | You may obtain a copy of the License at - | - | http://www.apache.org/licenses/LICENSE-2.0 - | - | Unless required by applicable law or agreed to in writing, software - | distributed under the License is distributed on an "AS IS" BASIS, - | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - | See the License for the specific language governing permissions and - | limitations under the License. --------------------------------------------------------------------------------- - - - diff --git a/ParquetHadoop/build.gradle b/ParquetHadoop/build.gradle deleted file mode 100644 index a9faf546632..00000000000 --- a/ParquetHadoop/build.gradle +++ /dev/null @@ -1,62 +0,0 @@ -import nl.javadude.gradle.plugins.license.License - -plugins { - id 'java-library' - id 'io.deephaven.project.register' -} - -sourceSets { - main { - java { - srcDir 'java' - } - } -} -tasks.withType(License) { - enabled = false -} - -dependencies { - // TODO(deephaven-core#3148): LZ4_RAW parquet support - api('org.apache.parquet:parquet-hadoop:1.13.0') - - // TODO(deephaven-core#806): Remove dependency on hadoop-common - api('org.apache.hadoop:hadoop-common:3.3.3') { - // do not take any dependencies of this project, - // we just want a few classes (Configuration, Path) for - // simplified prototyping work, and api compatibility. - transitive = false - // if we actually need any more of hadoop at runtime, - // we can add more jars w/ transitive=false, - // or replace transitive=false here w/ more exclusions; - // (we want to avoid pulling in netty, loggers, jetty-util, guice and asm). - } - - /* A dependency to woodstox is triggered from the - * initialization of ParquetReader the first time - * we try to open a parquet file. Without this below, - * we get: - * - * java.lang.NoClassDefFoundError: com/ctc/wstx/io/InputBootstrapper - * at io.deephaven.parquet.ParquetFileReader.lambda$new$0(ParquetFileReader.java:44) - * at java.lang.ThreadLocal$SuppliedThreadLocal.initialValue(ThreadLocal.java:284) - * at java.lang.ThreadLocal.setInitialValue(ThreadLocal.java:180) - * at java.lang.ThreadLocal.get(ThreadLocal.java:170) - * at io.deephaven.parquet.ColumnChunkReaderImpl.lambda$new$0(ColumnChunkReaderImpl.java:49) - * [...] - * - * Similarly for hadoop-shaded-guava. - * - * lz4-pure-java - note that we can't _easily_ use aircompressor here, as the service loader sees - * the copy in hadoop-common. TODO use config instead of service loader - */ - runtimeOnly('com.fasterxml.woodstox:woodstox-core:6.4.0') { - because 'hadoop-common required dependency for Configuration' - } - runtimeOnly('org.apache.hadoop.thirdparty:hadoop-shaded-guava:1.1.1') { - because 'hadoop-common required dependency for Configuration' - } - runtimeOnly('commons-collections:commons-collections:3.2.2') { - because 'hadoop-common required dependency for Configuration' - } -} diff --git a/ParquetHadoop/gradle.properties b/ParquetHadoop/gradle.properties deleted file mode 100644 index 1c0cc01b600..00000000000 --- a/ParquetHadoop/gradle.properties +++ /dev/null @@ -1 +0,0 @@ -io.deephaven.project.ProjectType=JAVA_EXTERNAL diff --git a/ParquetHadoop/src/main/java/io/deephaven/parquet/base/tempfix/ParquetMetadataConverter.java b/ParquetHadoop/src/main/java/io/deephaven/parquet/base/tempfix/ParquetMetadataConverter.java deleted file mode 100644 index c800aebb1f7..00000000000 --- a/ParquetHadoop/src/main/java/io/deephaven/parquet/base/tempfix/ParquetMetadataConverter.java +++ /dev/null @@ -1,1538 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package io.deephaven.parquet.base.tempfix; - -/* -TODO(deephaven-core#901): Remove the hacked ParquetMetadataConverter.java and the need for the ParquetHadoop module. -NOTE this only exists for this line, inside addRowGroup -Without it the page offset is not being saved properly - if (columnMetaData.getDictionaryPageOffset() >= 0) { - columnChunk.meta_data.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset()); - } - - */ - -import static java.util.Optional.empty; - -import static java.util.Optional.of; -import static org.apache.parquet.format.Util.readFileMetaData; -import static org.apache.parquet.format.Util.writePageHeader; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.CorruptStatistics; -import org.apache.parquet.ParquetReadOptions; -import org.apache.parquet.format.BsonType; -import org.apache.parquet.format.CompressionCodec; -import org.apache.parquet.format.DateType; -import org.apache.parquet.format.DecimalType; -import org.apache.parquet.format.EnumType; -import org.apache.parquet.format.IntType; -import org.apache.parquet.format.JsonType; -import org.apache.parquet.format.ListType; -import org.apache.parquet.format.LogicalType; -import org.apache.parquet.format.MapType; -import org.apache.parquet.format.MicroSeconds; -import org.apache.parquet.format.MilliSeconds; -import org.apache.parquet.format.NanoSeconds; -import org.apache.parquet.format.NullType; -import org.apache.parquet.format.PageEncodingStats; -import org.apache.parquet.format.StringType; -import org.apache.parquet.format.TimeType; -import org.apache.parquet.format.TimeUnit; -import org.apache.parquet.format.TimestampType; -import org.apache.parquet.hadoop.metadata.ColumnPath; -import org.apache.parquet.format.BoundaryOrder; -import org.apache.parquet.format.ColumnChunk; -import org.apache.parquet.format.ColumnIndex; -import org.apache.parquet.format.ColumnMetaData; -import org.apache.parquet.format.ColumnOrder; -import org.apache.parquet.format.ConvertedType; -import org.apache.parquet.format.DataPageHeader; -import org.apache.parquet.format.DataPageHeaderV2; -import org.apache.parquet.format.DictionaryPageHeader; -import org.apache.parquet.format.Encoding; -import org.apache.parquet.format.FieldRepetitionType; -import org.apache.parquet.format.FileMetaData; -import org.apache.parquet.format.KeyValue; -import org.apache.parquet.format.OffsetIndex; -import org.apache.parquet.format.PageHeader; -import org.apache.parquet.format.PageLocation; -import org.apache.parquet.format.PageType; -import org.apache.parquet.format.RowGroup; -import org.apache.parquet.format.SchemaElement; -import org.apache.parquet.format.Statistics; -import org.apache.parquet.format.Type; -import org.apache.parquet.format.TypeDefinedOrder; -import org.apache.parquet.hadoop.metadata.BlockMetaData; -import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; -import org.apache.parquet.hadoop.metadata.CompressionCodecName; -import org.apache.parquet.column.EncodingStats; -import org.apache.parquet.hadoop.metadata.ParquetMetadata; -import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder; -import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder; -import org.apache.parquet.internal.hadoop.metadata.IndexReference; -import org.apache.parquet.io.ParquetDecodingException; -import org.apache.parquet.schema.ColumnOrder.ColumnOrderName; -import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; -import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; -import org.apache.parquet.schema.Type.Repetition; -import org.apache.parquet.schema.TypeVisitor; -import org.apache.parquet.schema.Types; -import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -// TODO: This file has become too long! -// TODO: Lets split it up: https://issues.apache.org/jira/browse/PARQUET-310 -public class ParquetMetadataConverter { - - private static final TypeDefinedOrder TYPE_DEFINED_ORDER = new TypeDefinedOrder(); - public static final MetadataFilter NO_FILTER = new NoFilter(); - public static final MetadataFilter SKIP_ROW_GROUPS = new SkipMetadataFilter(); - public static final long MAX_STATS_SIZE = 4096; // limit stats to 4k - - private static final Logger LOG = LoggerFactory.getLogger(ParquetMetadataConverter.class); - private static final LogicalTypeConverterVisitor LOGICAL_TYPE_ANNOTATION_VISITOR = new LogicalTypeConverterVisitor(); - private static final ConvertedTypeConverterVisitor CONVERTED_TYPE_CONVERTER_VISITOR = new ConvertedTypeConverterVisitor(); - - private final boolean useSignedStringMinMax; - - public ParquetMetadataConverter() { - this(false); - } - - /** - * @param conf a configuration - * @deprecated will be removed in 2.0.0; use {@code ParquetMetadataConverter(ParquetReadOptions)} - */ - @Deprecated - public ParquetMetadataConverter(Configuration conf) { - this(conf.getBoolean("parquet.strings.signed-min-max.enabled", false)); - } - - public ParquetMetadataConverter(ParquetReadOptions options) { - this(options.useSignedStringMinMax()); - } - - private ParquetMetadataConverter(boolean useSignedStringMinMax) { - this.useSignedStringMinMax = useSignedStringMinMax; - } - - // NOTE: this cache is for memory savings, not cpu savings, and is used to de-duplicate - // sets of encodings. It is important that all collections inserted to this cache be - // immutable and have thread-safe read-only access. This can be achieved by wrapping - // an unsynchronized collection in Collections.unmodifiable*(), and making sure to not - // keep any references to the original collection. - private static final ConcurrentHashMap, Set> - cachedEncodingSets = new ConcurrentHashMap, Set>(); - - public FileMetaData toParquetMetadata(int currentVersion, ParquetMetadata parquetMetadata) { - List blocks = parquetMetadata.getBlocks(); - List rowGroups = new ArrayList(); - long numRows = 0; - for (BlockMetaData block : blocks) { - numRows += block.getRowCount(); - addRowGroup(parquetMetadata, rowGroups, block); - } - FileMetaData fileMetaData = new FileMetaData( - currentVersion, - toParquetSchema(parquetMetadata.getFileMetaData().getSchema()), - numRows, - rowGroups); - - Set> keyValues = parquetMetadata.getFileMetaData().getKeyValueMetaData().entrySet(); - for (Entry keyValue : keyValues) { - addKeyValue(fileMetaData, keyValue.getKey(), keyValue.getValue()); - } - - fileMetaData.setCreated_by(parquetMetadata.getFileMetaData().getCreatedBy()); - - fileMetaData.setColumn_orders(getColumnOrders(parquetMetadata.getFileMetaData().getSchema())); - - return fileMetaData; - } - - private List getColumnOrders(MessageType schema) { - List columnOrders = new ArrayList<>(); - // Currently, only TypeDefinedOrder is supported, so we create a column order for each columns with - // TypeDefinedOrder even if some types (e.g. INT96) have undefined column orders. - for (int i = 0, n = schema.getPaths().size(); i < n; ++i) { - ColumnOrder columnOrder = new ColumnOrder(); - columnOrder.setTYPE_ORDER(TYPE_DEFINED_ORDER); - columnOrders.add(columnOrder); - } - return columnOrders; - } - - // Visible for testing - List toParquetSchema(MessageType schema) { - List result = new ArrayList(); - addToList(result, schema); - return result; - } - - private void addToList(final List result, org.apache.parquet.schema.Type field) { - field.accept(new TypeVisitor() { - @Override - public void visit(PrimitiveType primitiveType) { - SchemaElement element = new SchemaElement(primitiveType.getName()); - element.setRepetition_type(toParquetRepetition(primitiveType.getRepetition())); - element.setType(getType(primitiveType.getPrimitiveTypeName())); - if (primitiveType.getLogicalTypeAnnotation() != null) { - element.setConverted_type(convertToConvertedType(primitiveType.getLogicalTypeAnnotation())); - element.setLogicalType(convertToLogicalType(primitiveType.getLogicalTypeAnnotation())); - } - if (primitiveType.getDecimalMetadata() != null) { - element.setPrecision(primitiveType.getDecimalMetadata().getPrecision()); - element.setScale(primitiveType.getDecimalMetadata().getScale()); - } - if (primitiveType.getTypeLength() > 0) { - element.setType_length(primitiveType.getTypeLength()); - } - if (primitiveType.getId() != null) { - element.setField_id(primitiveType.getId().intValue()); - } - result.add(element); - } - - @Override - public void visit(MessageType messageType) { - SchemaElement element = new SchemaElement(messageType.getName()); - if (messageType.getId() != null) { - element.setField_id(messageType.getId().intValue()); - } - visitChildren(result, messageType.asGroupType(), element); - } - - @Override - public void visit(GroupType groupType) { - SchemaElement element = new SchemaElement(groupType.getName()); - element.setRepetition_type(toParquetRepetition(groupType.getRepetition())); - if (groupType.getLogicalTypeAnnotation() != null) { - element.setConverted_type(convertToConvertedType(groupType.getLogicalTypeAnnotation())); - element.setLogicalType(convertToLogicalType(groupType.getLogicalTypeAnnotation())); - } - if (groupType.getId() != null) { - element.setField_id(groupType.getId().intValue()); - } - visitChildren(result, groupType, element); - } - - private void visitChildren(final List result, - GroupType groupType, SchemaElement element) { - element.setNum_children(groupType.getFieldCount()); - result.add(element); - for (org.apache.parquet.schema.Type field : groupType.getFields()) { - addToList(result, field); - } - } - }); - } - - LogicalType convertToLogicalType(LogicalTypeAnnotation logicalTypeAnnotation) { - return logicalTypeAnnotation.accept(LOGICAL_TYPE_ANNOTATION_VISITOR).get(); - } - - ConvertedType convertToConvertedType(LogicalTypeAnnotation logicalTypeAnnotation) { - return logicalTypeAnnotation.accept(CONVERTED_TYPE_CONVERTER_VISITOR).orElse(null); - } - - static org.apache.parquet.format.TimeUnit convertUnit(LogicalTypeAnnotation.TimeUnit unit) { - switch (unit) { - case MICROS: - return org.apache.parquet.format.TimeUnit.MICROS(new MicroSeconds()); - case MILLIS: - return org.apache.parquet.format.TimeUnit.MILLIS(new MilliSeconds()); - case NANOS: - return TimeUnit.NANOS(new NanoSeconds()); - default: - throw new RuntimeException("Unknown time unit " + unit); - } - } - - private static class ConvertedTypeConverterVisitor implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor { - @Override - public Optional visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { - return of(ConvertedType.UTF8); - } - - @Override - public Optional visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) { - return of(ConvertedType.MAP); - } - - @Override - public Optional visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) { - return of(ConvertedType.LIST); - } - - @Override - public Optional visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) { - return of(ConvertedType.ENUM); - } - - @Override - public Optional visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { - return of(ConvertedType.DECIMAL); - } - - @Override - public Optional visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) { - return of(ConvertedType.DATE); - } - - @Override - public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { - if (!timeLogicalType.isAdjustedToUTC()) { - return empty(); - } - switch (timeLogicalType.getUnit()) { - case MILLIS: - return of(ConvertedType.TIME_MILLIS); - case MICROS: - return of(ConvertedType.TIME_MICROS); - case NANOS: - return empty(); - default: - throw new RuntimeException("Unknown converted type for " + timeLogicalType.toOriginalType()); - } - } - - @Override - public Optional visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { - if (!timestampLogicalType.isAdjustedToUTC()) { - return empty(); - } - switch (timestampLogicalType.getUnit()) { - case MICROS: - return of(ConvertedType.TIMESTAMP_MICROS); - case MILLIS: - return of(ConvertedType.TIMESTAMP_MILLIS); - case NANOS: - return empty(); - default: - throw new RuntimeException("Unknown converted type for " + timestampLogicalType.toOriginalType()); - } - } - - @Override - public Optional visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) { - boolean signed = intLogicalType.isSigned(); - switch (intLogicalType.getBitWidth()) { - case 8: - return of(signed ? ConvertedType.INT_8 : ConvertedType.UINT_8); - case 16: - return of(signed ? ConvertedType.INT_16 : ConvertedType.UINT_16); - case 32: - return of(signed ? ConvertedType.INT_32 : ConvertedType.UINT_32); - case 64: - return of(signed ? ConvertedType.INT_64 : ConvertedType.UINT_64); - default: - throw new RuntimeException("Unknown original type " + intLogicalType.toOriginalType()); - } - } - - @Override - public Optional visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) { - return of(ConvertedType.JSON); - } - - @Override - public Optional visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) { - return of(ConvertedType.BSON); - } - - @Override - public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) { - return of(ConvertedType.INTERVAL); - } - - @Override - public Optional visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) { - return of(ConvertedType.MAP_KEY_VALUE); - } - } - - private static class LogicalTypeConverterVisitor implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor { - @Override - public Optional visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { - return of(LogicalType.STRING(new StringType())); - } - - @Override - public Optional visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) { - return of(LogicalType.MAP(new MapType())); - } - - @Override - public Optional visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) { - return of(LogicalType.LIST(new ListType())); - } - - @Override - public Optional visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) { - return of(LogicalType.ENUM(new EnumType())); - } - - @Override - public Optional visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { - return of(LogicalType.DECIMAL(new DecimalType(decimalLogicalType.getScale(), decimalLogicalType.getPrecision()))); - } - - @Override - public Optional visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) { - return of(LogicalType.DATE(new DateType())); - } - - @Override - public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { - return of(LogicalType.TIME(new TimeType(timeLogicalType.isAdjustedToUTC(), convertUnit(timeLogicalType.getUnit())))); - } - - @Override - public Optional visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { - return of(LogicalType.TIMESTAMP(new TimestampType(timestampLogicalType.isAdjustedToUTC(), convertUnit(timestampLogicalType.getUnit())))); - } - - @Override - public Optional visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) { - return of(LogicalType.INTEGER(new IntType((byte) intLogicalType.getBitWidth(), intLogicalType.isSigned()))); - } - - @Override - public Optional visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) { - return of(LogicalType.JSON(new JsonType())); - } - - @Override - public Optional visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) { - return of(LogicalType.BSON(new BsonType())); - } - - @Override - public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) { - return of(LogicalType.UNKNOWN(new NullType())); - } - - @Override - public Optional visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) { - return of(LogicalType.UNKNOWN(new NullType())); - } - } - - private void addRowGroup(ParquetMetadata parquetMetadata, List rowGroups, BlockMetaData block) { - //rowGroup.total_byte_size = ; - List columns = block.getColumns(); - List parquetColumns = new ArrayList(); - for (ColumnChunkMetaData columnMetaData : columns) { - ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset - columnChunk.file_path = block.getPath(); // they are in the same file for now - columnChunk.meta_data = new ColumnMetaData( - getType(columnMetaData.getType()), - toFormatEncodings(columnMetaData.getEncodings()), - Arrays.asList(columnMetaData.getPath().toArray()), - toFormatCodec(columnMetaData.getCodec()), - columnMetaData.getValueCount(), - columnMetaData.getTotalUncompressedSize(), - columnMetaData.getTotalSize(), - columnMetaData.getFirstDataPageOffset()); - if (columnMetaData.getDictionaryPageOffset() >= 0) { - columnChunk.meta_data.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset()); - } - if (!columnMetaData.getStatistics().isEmpty()) { - columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics())); - } - if (columnMetaData.getEncodingStats() != null) { - columnChunk.meta_data.setEncoding_stats(convertEncodingStats(columnMetaData.getEncodingStats())); - } -// columnChunk.meta_data.index_page_offset = ; -// columnChunk.meta_data.key_value_metadata = ; // nothing yet - - IndexReference columnIndexRef = columnMetaData.getColumnIndexReference(); - if (columnIndexRef != null) { - columnChunk.setColumn_index_offset(columnIndexRef.getOffset()); - columnChunk.setColumn_index_length(columnIndexRef.getLength()); - } - IndexReference offsetIndexRef = columnMetaData.getOffsetIndexReference(); - if (offsetIndexRef != null) { - columnChunk.setOffset_index_offset(offsetIndexRef.getOffset()); - columnChunk.setOffset_index_length(offsetIndexRef.getLength()); - } - - parquetColumns.add(columnChunk); - } - RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount()); - rowGroups.add(rowGroup); - } - - private List toFormatEncodings(Set encodings) { - List converted = new ArrayList(encodings.size()); - for (org.apache.parquet.column.Encoding encoding : encodings) { - converted.add(getEncoding(encoding)); - } - return converted; - } - - // Visible for testing - Set fromFormatEncodings(List encodings) { - Set converted = new HashSet(); - - for (Encoding encoding : encodings) { - converted.add(getEncoding(encoding)); - } - - // make converted unmodifiable, drop reference to modifiable copy - converted = Collections.unmodifiableSet(converted); - - // atomically update the cache - Set cached = cachedEncodingSets.putIfAbsent(converted, converted); - - if (cached == null) { - // cached == null signifies that converted was *not* in the cache previously - // so we can return converted instead of throwing it away, it has now - // been cached - cached = converted; - } - - return cached; - } - - private CompressionCodecName fromFormatCodec(CompressionCodec codec) { - return CompressionCodecName.valueOf(codec.toString()); - } - - private CompressionCodec toFormatCodec(CompressionCodecName codec) { - return CompressionCodec.valueOf(codec.toString()); - } - - public org.apache.parquet.column.Encoding getEncoding(Encoding encoding) { - return org.apache.parquet.column.Encoding.valueOf(encoding.name()); - } - - public Encoding getEncoding(org.apache.parquet.column.Encoding encoding) { - return Encoding.valueOf(encoding.name()); - } - - public EncodingStats convertEncodingStats(List stats) { - if (stats == null) { - return null; - } - - EncodingStats.Builder builder = new EncodingStats.Builder(); - for (PageEncodingStats stat : stats) { - switch (stat.getPage_type()) { - case DATA_PAGE_V2: - builder.withV2Pages(); - // falls through - case DATA_PAGE: - builder.addDataEncoding( - getEncoding(stat.getEncoding()), stat.getCount()); - break; - case DICTIONARY_PAGE: - builder.addDictEncoding( - getEncoding(stat.getEncoding()), stat.getCount()); - break; - } - } - return builder.build(); - } - - public List convertEncodingStats(EncodingStats stats) { - if (stats == null) { - return null; - } - - List formatStats = new ArrayList(); - for (org.apache.parquet.column.Encoding encoding : stats.getDictionaryEncodings()) { - formatStats.add(new PageEncodingStats( - PageType.DICTIONARY_PAGE, getEncoding(encoding), - stats.getNumDictionaryPagesEncodedAs(encoding))); - } - PageType dataPageType = (stats.usesV2Pages() ? PageType.DATA_PAGE_V2 : PageType.DATA_PAGE); - for (org.apache.parquet.column.Encoding encoding : stats.getDataEncodings()) { - formatStats.add(new PageEncodingStats( - dataPageType, getEncoding(encoding), - stats.getNumDataPagesEncodedAs(encoding))); - } - return formatStats; - } - - public static Statistics toParquetStatistics( - org.apache.parquet.column.statistics.Statistics stats) { - Statistics formatStats = new Statistics(); - // Don't write stats larger than the max size rather than truncating. The - // rationale is that some engines may use the minimum value in the page as - // the true minimum for aggregations and there is no way to mark that a - // value has been truncated and is a lower bound and not in the page. - if (!stats.isEmpty() && stats.isSmallerThan(MAX_STATS_SIZE)) { - formatStats.setNull_count(stats.getNumNulls()); - if (stats.hasNonNullValue()) { - byte[] min = stats.getMinBytes(); - byte[] max = stats.getMaxBytes(); - - // Fill the former min-max statistics only if the comparison logic is - // signed so the logic of V1 and V2 stats are the same (which is - // trivially true for equal min-max values) - if (sortOrder(stats.type()) == SortOrder.SIGNED || Arrays.equals(min, max)) { - formatStats.setMin(min); - formatStats.setMax(max); - } - - if (isMinMaxStatsSupported(stats.type()) || Arrays.equals(min, max)) { - formatStats.setMin_value(min); - formatStats.setMax_value(max); - } - } - } - return formatStats; - } - - private static boolean isMinMaxStatsSupported(PrimitiveType type) { - return type.columnOrder().getColumnOrderName() == ColumnOrderName.TYPE_DEFINED_ORDER; - } - - /** - * @param statistics parquet format statistics - * @param type a primitive type name - * @return the statistics - * @deprecated will be removed in 2.0.0. - */ - @Deprecated - public static org.apache.parquet.column.statistics.Statistics fromParquetStatistics(Statistics statistics, PrimitiveTypeName type) { - return fromParquetStatistics(null, statistics, type); - } - - /** - * @param createdBy the created-by string from the file - * @param statistics parquet format statistics - * @param type a primitive type name - * @return the statistics - * @deprecated will be removed in 2.0.0. - */ - @Deprecated - public static org.apache.parquet.column.statistics.Statistics fromParquetStatistics - (String createdBy, Statistics statistics, PrimitiveTypeName type) { - return fromParquetStatisticsInternal(createdBy, statistics, - new PrimitiveType(Repetition.OPTIONAL, type, "fake_type"), defaultSortOrder(type)); - } - - // Visible for testing - static org.apache.parquet.column.statistics.Statistics fromParquetStatisticsInternal - (String createdBy, Statistics formatStats, PrimitiveType type, SortOrder typeSortOrder) { - // create stats object based on the column type - org.apache.parquet.column.statistics.Statistics.Builder statsBuilder = - org.apache.parquet.column.statistics.Statistics.getBuilderForReading(type); - - if (formatStats != null) { - // Use the new V2 min-max statistics over the former one if it is filled - if (formatStats.isSetMin_value() && formatStats.isSetMax_value()) { - byte[] min = formatStats.min_value.array(); - byte[] max = formatStats.max_value.array(); - if (isMinMaxStatsSupported(type) || Arrays.equals(min, max)) { - statsBuilder.withMin(min); - statsBuilder.withMax(max); - } - } else { - boolean isSet = formatStats.isSetMax() && formatStats.isSetMin(); - boolean maxEqualsMin = isSet ? Arrays.equals(formatStats.getMin(), formatStats.getMax()) : false; - boolean sortOrdersMatch = SortOrder.SIGNED == typeSortOrder; - // NOTE: See docs in CorruptStatistics for explanation of why this check is needed - // The sort order is checked to avoid returning min/max stats that are not - // valid with the type's sort order. In previous releases, all stats were - // aggregated using a signed byte-wise ordering, which isn't valid for all the - // types (e.g. strings, decimals etc.). - if (!CorruptStatistics.shouldIgnoreStatistics(createdBy, type.getPrimitiveTypeName()) && - (sortOrdersMatch || maxEqualsMin)) { - if (isSet) { - statsBuilder.withMin(formatStats.min.array()); - statsBuilder.withMax(formatStats.max.array()); - } - } - } - - if (formatStats.isSetNull_count()) { - statsBuilder.withNumNulls(formatStats.null_count); - } - } - return statsBuilder.build(); - } - - public org.apache.parquet.column.statistics.Statistics fromParquetStatistics( - String createdBy, Statistics statistics, PrimitiveType type) { - SortOrder expectedOrder = overrideSortOrderToSigned(type) ? - SortOrder.SIGNED : sortOrder(type); - return fromParquetStatisticsInternal( - createdBy, statistics, type, expectedOrder); - } - - /** - * Sort order for page and column statistics. Types are associated with sort - * orders (e.g., UTF8 columns should use UNSIGNED) and column stats are - * aggregated using a sort order. As of parquet-format version 2.3.1, the - * order used to aggregate stats is always SIGNED and is not stored in the - * Parquet file. These stats are discarded for types that need unsigned. - * - * See PARQUET-686. - */ - enum SortOrder { - SIGNED, - UNSIGNED, - UNKNOWN - } - - private static final Set STRING_TYPES = Collections - .unmodifiableSet(new HashSet<>(Arrays.asList( - LogicalTypeAnnotation.StringLogicalTypeAnnotation.class, - LogicalTypeAnnotation.EnumLogicalTypeAnnotation.class, - LogicalTypeAnnotation.JsonLogicalTypeAnnotation.class - ))); - - /** - * Returns whether to use signed order min and max with a type. It is safe to - * use signed min and max when the type is a string type and contains only - * ASCII characters (where the sign bit was 0). This checks whether the type - * is a string type and uses {@code useSignedStringMinMax} to determine if - * only ASCII characters were written. - * - * @param type a primitive type with a logical type annotation - * @return true if signed order min/max can be used with this type - */ - private boolean overrideSortOrderToSigned(PrimitiveType type) { - // even if the override is set, only return stats for string-ish types - // a null type annotation is considered string-ish because some writers - // failed to use the UTF8 annotation. - LogicalTypeAnnotation annotation = type.getLogicalTypeAnnotation(); - return useSignedStringMinMax && - PrimitiveTypeName.BINARY == type.getPrimitiveTypeName() && - (annotation == null || STRING_TYPES.contains(annotation.getClass())); - } - - /** - * @param primitive a primitive physical type - * @return the default sort order used when the logical type is not known - */ - private static SortOrder defaultSortOrder(PrimitiveTypeName primitive) { - switch (primitive) { - case BOOLEAN: - case INT32: - case INT64: - case FLOAT: - case DOUBLE: - return SortOrder.SIGNED; - case BINARY: - case FIXED_LEN_BYTE_ARRAY: - return SortOrder.UNSIGNED; - } - return SortOrder.UNKNOWN; - } - - /** - * @param primitive a primitive type with a logical type annotation - * @return the "correct" sort order of the type that applications assume - */ - private static SortOrder sortOrder(PrimitiveType primitive) { - LogicalTypeAnnotation annotation = primitive.getLogicalTypeAnnotation(); - if (annotation != null) { - return annotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor() { - @Override - public Optional visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) { - return intLogicalType.isSigned() ? of(SortOrder.SIGNED) : of(SortOrder.UNSIGNED); - } - - @Override - public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) { - return of(SortOrder.UNKNOWN); - } - - @Override - public Optional visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) { - return of(SortOrder.SIGNED); - } - - @Override - public Optional visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) { - return of(SortOrder.UNSIGNED); - } - - @Override - public Optional visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) { - return of(SortOrder.UNSIGNED); - } - - @Override - public Optional visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) { - return of(SortOrder.UNSIGNED); - } - - @Override - public Optional visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { - return of(SortOrder.UNSIGNED); - } - - @Override - public Optional visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { - return of(SortOrder.UNKNOWN); - } - - @Override - public Optional visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) { - return of(SortOrder.UNKNOWN); - } - - @Override - public Optional visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) { - return of(SortOrder.UNKNOWN); - } - - @Override - public Optional visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) { - return of(SortOrder.UNKNOWN); - } - - @Override - public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { - return of(SortOrder.SIGNED); - } - - @Override - public Optional visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { - return of(SortOrder.SIGNED); - } - }).orElse(defaultSortOrder(primitive.getPrimitiveTypeName())); - } - - return defaultSortOrder(primitive.getPrimitiveTypeName()); - } - - public PrimitiveTypeName getPrimitive(Type type) { - switch (type) { - case BYTE_ARRAY: // TODO: rename BINARY and remove this switch - return PrimitiveTypeName.BINARY; - case INT64: - return PrimitiveTypeName.INT64; - case INT32: - return PrimitiveTypeName.INT32; - case BOOLEAN: - return PrimitiveTypeName.BOOLEAN; - case FLOAT: - return PrimitiveTypeName.FLOAT; - case DOUBLE: - return PrimitiveTypeName.DOUBLE; - case INT96: - return PrimitiveTypeName.INT96; - case FIXED_LEN_BYTE_ARRAY: - return PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY; - default: - throw new RuntimeException("Unknown type " + type); - } - } - - // Visible for testing - Type getType(PrimitiveTypeName type) { - switch (type) { - case INT64: - return Type.INT64; - case INT32: - return Type.INT32; - case BOOLEAN: - return Type.BOOLEAN; - case BINARY: - return Type.BYTE_ARRAY; - case FLOAT: - return Type.FLOAT; - case DOUBLE: - return Type.DOUBLE; - case INT96: - return Type.INT96; - case FIXED_LEN_BYTE_ARRAY: - return Type.FIXED_LEN_BYTE_ARRAY; - default: - throw new RuntimeException("Unknown primitive type " + type); - } - } - - // Visible for testing - LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, SchemaElement schemaElement) { - switch (type) { - case UTF8: - return LogicalTypeAnnotation.stringType(); - case MAP: - return LogicalTypeAnnotation.mapType(); - case MAP_KEY_VALUE: - return LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(); - case LIST: - return LogicalTypeAnnotation.listType(); - case ENUM: - return LogicalTypeAnnotation.enumType(); - case DECIMAL: - int scale = (schemaElement == null ? 0 : schemaElement.scale); - int precision = (schemaElement == null ? 0 : schemaElement.precision); - return LogicalTypeAnnotation.decimalType(scale, precision); - case DATE: - return LogicalTypeAnnotation.dateType(); - case TIME_MILLIS: - return LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); - case TIME_MICROS: - return LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS); - case TIMESTAMP_MILLIS: - return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); - case TIMESTAMP_MICROS: - return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS); - case INTERVAL: - return LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance(); - case INT_8: - return LogicalTypeAnnotation.intType(8, true); - case INT_16: - return LogicalTypeAnnotation.intType(16, true); - case INT_32: - return LogicalTypeAnnotation.intType(32, true); - case INT_64: - return LogicalTypeAnnotation.intType(64, true); - case UINT_8: - return LogicalTypeAnnotation.intType(8, false); - case UINT_16: - return LogicalTypeAnnotation.intType(16, false); - case UINT_32: - return LogicalTypeAnnotation.intType(32, false); - case UINT_64: - return LogicalTypeAnnotation.intType(64, false); - case JSON: - return LogicalTypeAnnotation.jsonType(); - case BSON: - return LogicalTypeAnnotation.bsonType(); - default: - throw new RuntimeException("Can't convert converted type to logical type, unknown converted type " + type); - } - } - - LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) { - switch (type.getSetField()) { - case MAP: - return LogicalTypeAnnotation.mapType(); - case BSON: - return LogicalTypeAnnotation.bsonType(); - case DATE: - return LogicalTypeAnnotation.dateType(); - case ENUM: - return LogicalTypeAnnotation.enumType(); - case JSON: - return LogicalTypeAnnotation.jsonType(); - case LIST: - return LogicalTypeAnnotation.listType(); - case TIME: - TimeType time = type.getTIME(); - return LogicalTypeAnnotation.timeType(time.isAdjustedToUTC, convertTimeUnit(time.unit)); - case STRING: - return LogicalTypeAnnotation.stringType(); - case DECIMAL: - DecimalType decimal = type.getDECIMAL(); - return LogicalTypeAnnotation.decimalType(decimal.scale, decimal.precision); - case INTEGER: - IntType integer = type.getINTEGER(); - return LogicalTypeAnnotation.intType(integer.bitWidth, integer.isSigned); - case UNKNOWN: - return null; - case TIMESTAMP: - TimestampType timestamp = type.getTIMESTAMP(); - return LogicalTypeAnnotation.timestampType(timestamp.isAdjustedToUTC, convertTimeUnit(timestamp.unit)); - default: - throw new RuntimeException("Unknown logical type " + type); - } - } - - private LogicalTypeAnnotation.TimeUnit convertTimeUnit(TimeUnit unit) { - switch (unit.getSetField()) { - case MICROS: - return LogicalTypeAnnotation.TimeUnit.MICROS; - case MILLIS: - return LogicalTypeAnnotation.TimeUnit.MILLIS; - case NANOS: - return LogicalTypeAnnotation.TimeUnit.NANOS; - default: - throw new RuntimeException("Unknown time unit " + unit); - } - } - - private static void addKeyValue(FileMetaData fileMetaData, String key, String value) { - KeyValue keyValue = new KeyValue(key); - keyValue.value = value; - fileMetaData.addToKey_value_metadata(keyValue); - } - - private static interface MetadataFilterVisitor { - T visit(NoFilter filter) throws E; - T visit(SkipMetadataFilter filter) throws E; - T visit(RangeMetadataFilter filter) throws E; - T visit(OffsetMetadataFilter filter) throws E; - } - - public abstract static class MetadataFilter { - private MetadataFilter() {} - abstract T accept(MetadataFilterVisitor visitor) throws E; - } - - /** - * [ startOffset, endOffset ) - * @param startOffset a start offset (inclusive) - * @param endOffset an end offset (exclusive) - * @return a range filter from the offsets - */ - public static MetadataFilter range(long startOffset, long endOffset) { - return new RangeMetadataFilter(startOffset, endOffset); - } - - public static MetadataFilter offsets(long... offsets) { - Set set = new HashSet(); - for (long offset : offsets) { - set.add(offset); - } - return new OffsetMetadataFilter(set); - } - - private static final class NoFilter extends MetadataFilter { - private NoFilter() {} - @Override - T accept(MetadataFilterVisitor visitor) throws E { - return visitor.visit(this); - } - @Override - public String toString() { - return "NO_FILTER"; - } - } - private static final class SkipMetadataFilter extends MetadataFilter { - private SkipMetadataFilter() {} - @Override - T accept(MetadataFilterVisitor visitor) throws E { - return visitor.visit(this); - } - @Override - public String toString() { - return "SKIP_ROW_GROUPS"; - } - } - - /** - * [ startOffset, endOffset ) - */ - // Visible for testing - static final class RangeMetadataFilter extends MetadataFilter { - final long startOffset; - final long endOffset; - - RangeMetadataFilter(long startOffset, long endOffset) { - super(); - this.startOffset = startOffset; - this.endOffset = endOffset; - } - - @Override - T accept(MetadataFilterVisitor visitor) throws E { - return visitor.visit(this); - } - - public boolean contains(long offset) { - return offset >= this.startOffset && offset < this.endOffset; - } - - @Override - public String toString() { - return "range(s:" + startOffset + ", e:" + endOffset + ")"; - } - } - - static final class OffsetMetadataFilter extends MetadataFilter { - private final Set offsets; - - public OffsetMetadataFilter(Set offsets) { - this.offsets = offsets; - } - - public boolean contains(long offset) { - return offsets.contains(offset); - } - - @Override - T accept(MetadataFilterVisitor visitor) throws E { - return visitor.visit(this); - } - } - - @Deprecated - public ParquetMetadata readParquetMetadata(InputStream from) throws IOException { - return readParquetMetadata(from, NO_FILTER); - } - - // Visible for testing - static FileMetaData filterFileMetaDataByMidpoint(FileMetaData metaData, RangeMetadataFilter filter) { - List rowGroups = metaData.getRow_groups(); - List newRowGroups = new ArrayList(); - for (RowGroup rowGroup : rowGroups) { - long totalSize = 0; - long startIndex = getOffset(rowGroup.getColumns().get(0)); - for (ColumnChunk col : rowGroup.getColumns()) { - totalSize += col.getMeta_data().getTotal_compressed_size(); - } - long midPoint = startIndex + totalSize / 2; - if (filter.contains(midPoint)) { - newRowGroups.add(rowGroup); - } - } - metaData.setRow_groups(newRowGroups); - return metaData; - } - - // Visible for testing - static FileMetaData filterFileMetaDataByStart(FileMetaData metaData, OffsetMetadataFilter filter) { - List rowGroups = metaData.getRow_groups(); - List newRowGroups = new ArrayList(); - for (RowGroup rowGroup : rowGroups) { - long startIndex = getOffset(rowGroup.getColumns().get(0)); - if (filter.contains(startIndex)) { - newRowGroups.add(rowGroup); - } - } - metaData.setRow_groups(newRowGroups); - return metaData; - } - - static long getOffset(RowGroup rowGroup) { - return getOffset(rowGroup.getColumns().get(0)); - } - // Visible for testing - static long getOffset(ColumnChunk columnChunk) { - ColumnMetaData md = columnChunk.getMeta_data(); - long offset = md.getData_page_offset(); - if (md.isSetDictionary_page_offset() && offset > md.getDictionary_page_offset()) { - offset = md.getDictionary_page_offset(); - } - return offset; - } - - public ParquetMetadata readParquetMetadata(final InputStream from, MetadataFilter filter) throws IOException { - FileMetaData fileMetaData = filter.accept(new MetadataFilterVisitor() { - @Override - public FileMetaData visit(NoFilter filter) throws IOException { - return readFileMetaData(from); - } - - @Override - public FileMetaData visit(SkipMetadataFilter filter) throws IOException { - return readFileMetaData(from, true); - } - - @Override - public FileMetaData visit(OffsetMetadataFilter filter) throws IOException { - return filterFileMetaDataByStart(readFileMetaData(from), filter); - } - - @Override - public FileMetaData visit(RangeMetadataFilter filter) throws IOException { - return filterFileMetaDataByMidpoint(readFileMetaData(from), filter); - } - }); - LOG.debug("{}", fileMetaData); - ParquetMetadata parquetMetadata = fromParquetMetadata(fileMetaData); - if (LOG.isDebugEnabled()) LOG.debug(ParquetMetadata.toPrettyJSON(parquetMetadata)); - return parquetMetadata; - } - - public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws IOException { - MessageType messageType = fromParquetSchema(parquetMetadata.getSchema(), parquetMetadata.getColumn_orders()); - List blocks = new ArrayList(); - List row_groups = parquetMetadata.getRow_groups(); - if (row_groups != null) { - for (RowGroup rowGroup : row_groups) { - BlockMetaData blockMetaData = new BlockMetaData(); - blockMetaData.setRowCount(rowGroup.getNum_rows()); - blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size()); - List columns = rowGroup.getColumns(); - String filePath = columns.get(0).getFile_path(); - for (ColumnChunk columnChunk : columns) { - if ((filePath == null && columnChunk.getFile_path() != null) - || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) { - throw new ParquetDecodingException("all column chunks of the same row group must be in the same file for now"); - } - ColumnMetaData metaData = columnChunk.meta_data; - ColumnPath path = getPath(metaData); - ColumnChunkMetaData column = ColumnChunkMetaData.get( - path, - messageType.getType(path.toArray()).asPrimitiveType(), - fromFormatCodec(metaData.codec), - convertEncodingStats(metaData.getEncoding_stats()), - fromFormatEncodings(metaData.encodings), - fromParquetStatistics( - parquetMetadata.getCreated_by(), - metaData.statistics, - messageType.getType(path.toArray()).asPrimitiveType()), - metaData.data_page_offset, - metaData.dictionary_page_offset, - metaData.num_values, - metaData.total_compressed_size, - metaData.total_uncompressed_size); - column.setColumnIndexReference(toColumnIndexReference(columnChunk)); - column.setOffsetIndexReference(toOffsetIndexReference(columnChunk)); - // TODO - // index_page_offset - // key_value_metadata - blockMetaData.addColumn(column); - } - blockMetaData.setPath(filePath); - blocks.add(blockMetaData); - } - } - Map keyValueMetaData = new HashMap(); - List key_value_metadata = parquetMetadata.getKey_value_metadata(); - if (key_value_metadata != null) { - for (KeyValue keyValue : key_value_metadata) { - keyValueMetaData.put(keyValue.key, keyValue.value); - } - } - return new ParquetMetadata( - new org.apache.parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, parquetMetadata.getCreated_by()), - blocks); - } - - private static IndexReference toColumnIndexReference(ColumnChunk columnChunk) { - if (columnChunk.isSetColumn_index_offset() && columnChunk.isSetColumn_index_length()) { - return new IndexReference(columnChunk.getColumn_index_offset(), columnChunk.getColumn_index_length()); - } - return null; - } - - private static IndexReference toOffsetIndexReference(ColumnChunk columnChunk) { - if (columnChunk.isSetOffset_index_offset() && columnChunk.isSetOffset_index_length()) { - return new IndexReference(columnChunk.getOffset_index_offset(), columnChunk.getOffset_index_length()); - } - return null; - } - - private static ColumnPath getPath(ColumnMetaData metaData) { - String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]); - return ColumnPath.get(path); - } - - // Visible for testing - MessageType fromParquetSchema(List schema, List columnOrders) { - Iterator iterator = schema.iterator(); - SchemaElement root = iterator.next(); - Types.MessageTypeBuilder builder = Types.buildMessage(); - if (root.isSetField_id()) { - builder.id(root.field_id); - } - buildChildren(builder, iterator, root.getNum_children(), columnOrders, 0); - return builder.named(root.name); - } - - private void buildChildren(Types.GroupBuilder builder, - Iterator schema, - int childrenCount, - List columnOrders, - int columnCount) { - for (int i = 0; i < childrenCount; i++) { - SchemaElement schemaElement = schema.next(); - - // Create Parquet Type. - Types.Builder childBuilder; - if (schemaElement.type != null) { - Types.PrimitiveBuilder primitiveBuilder = builder.primitive( - getPrimitive(schemaElement.type), - fromParquetRepetition(schemaElement.repetition_type)); - if (schemaElement.isSetType_length()) { - primitiveBuilder.length(schemaElement.type_length); - } - if (schemaElement.isSetPrecision()) { - primitiveBuilder.precision(schemaElement.precision); - } - if (schemaElement.isSetScale()) { - primitiveBuilder.scale(schemaElement.scale); - } - if (columnOrders != null) { - org.apache.parquet.schema.ColumnOrder columnOrder = fromParquetColumnOrder(columnOrders.get(columnCount)); - // As per parquet format 2.4.0 no UNDEFINED order is supported. So, set undefined column order for the types - // where ordering is not supported. - if (columnOrder.getColumnOrderName() == ColumnOrderName.TYPE_DEFINED_ORDER - && (schemaElement.type == Type.INT96 || schemaElement.converted_type == ConvertedType.INTERVAL)) { - columnOrder = org.apache.parquet.schema.ColumnOrder.undefined(); - } - primitiveBuilder.columnOrder(columnOrder); - } - childBuilder = primitiveBuilder; - - } else { - childBuilder = builder.group(fromParquetRepetition(schemaElement.repetition_type)); - buildChildren((Types.GroupBuilder) childBuilder, schema, schemaElement.num_children, columnOrders, columnCount); - } - - if (schemaElement.isSetLogicalType()) { - childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType)); - } - if (schemaElement.isSetConverted_type()) { - OriginalType originalType = getLogicalTypeAnnotation(schemaElement.converted_type, schemaElement).toOriginalType(); - OriginalType newOriginalType = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType) != null) ? - getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null; - if (!originalType.equals(newOriginalType)) { - if (newOriginalType != null) { - LOG.warn("Converted type and logical type metadata mismatch (convertedType: {}, logical type: {}). Using value in converted type.", - schemaElement.converted_type, schemaElement.logicalType); - } - childBuilder.as(originalType); - } - } - if (schemaElement.isSetField_id()) { - childBuilder.id(schemaElement.field_id); - } - - childBuilder.named(schemaElement.name); - ++columnCount; - } - } - - // Visible for testing - FieldRepetitionType toParquetRepetition(Repetition repetition) { - return FieldRepetitionType.valueOf(repetition.name()); - } - - // Visible for testing - Repetition fromParquetRepetition(FieldRepetitionType repetition) { - return Repetition.valueOf(repetition.name()); - } - - private static org.apache.parquet.schema.ColumnOrder fromParquetColumnOrder(ColumnOrder columnOrder) { - if (columnOrder.isSetTYPE_ORDER()) { - return org.apache.parquet.schema.ColumnOrder.typeDefined(); - } - // The column order is not yet supported by this API - return org.apache.parquet.schema.ColumnOrder.undefined(); - } - - @Deprecated - public void writeDataPageHeader( - int uncompressedSize, - int compressedSize, - int valueCount, - org.apache.parquet.column.Encoding rlEncoding, - org.apache.parquet.column.Encoding dlEncoding, - org.apache.parquet.column.Encoding valuesEncoding, - OutputStream to) throws IOException { - writePageHeader(newDataPageHeader(uncompressedSize, - compressedSize, - valueCount, - rlEncoding, - dlEncoding, - valuesEncoding), to); - } - - // Statistics are no longer saved in page headers - @Deprecated - public void writeDataPageHeader( - int uncompressedSize, - int compressedSize, - int valueCount, - org.apache.parquet.column.statistics.Statistics statistics, - org.apache.parquet.column.Encoding rlEncoding, - org.apache.parquet.column.Encoding dlEncoding, - org.apache.parquet.column.Encoding valuesEncoding, - OutputStream to) throws IOException { - writePageHeader( - newDataPageHeader(uncompressedSize, compressedSize, valueCount, - rlEncoding, dlEncoding, valuesEncoding), - to); - } - - private PageHeader newDataPageHeader( - int uncompressedSize, int compressedSize, - int valueCount, - org.apache.parquet.column.Encoding rlEncoding, - org.apache.parquet.column.Encoding dlEncoding, - org.apache.parquet.column.Encoding valuesEncoding) { - PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize); - // TODO: pageHeader.crc = ...; - pageHeader.setData_page_header(new DataPageHeader( - valueCount, - getEncoding(valuesEncoding), - getEncoding(dlEncoding), - getEncoding(rlEncoding))); - return pageHeader; - } - - // Statistics are no longer saved in page headers - @Deprecated - public void writeDataPageV2Header( - int uncompressedSize, int compressedSize, - int valueCount, int nullCount, int rowCount, - org.apache.parquet.column.statistics.Statistics statistics, - org.apache.parquet.column.Encoding dataEncoding, - int rlByteLength, int dlByteLength, - OutputStream to) throws IOException { - writePageHeader( - newDataPageV2Header( - uncompressedSize, compressedSize, - valueCount, nullCount, rowCount, - dataEncoding, - rlByteLength, dlByteLength), to); - } - - public void writeDataPageV1Header( - int uncompressedSize, - int compressedSize, - int valueCount, - org.apache.parquet.column.Encoding rlEncoding, - org.apache.parquet.column.Encoding dlEncoding, - org.apache.parquet.column.Encoding valuesEncoding, - OutputStream to) throws IOException { - writePageHeader(newDataPageHeader(uncompressedSize, - compressedSize, - valueCount, - rlEncoding, - dlEncoding, - valuesEncoding), to); - } - - public void writeDataPageV2Header( - int uncompressedSize, int compressedSize, - int valueCount, int nullCount, int rowCount, - org.apache.parquet.column.Encoding dataEncoding, - int rlByteLength, int dlByteLength, - OutputStream to) throws IOException { - writePageHeader( - newDataPageV2Header( - uncompressedSize, compressedSize, - valueCount, nullCount, rowCount, - dataEncoding, - rlByteLength, dlByteLength), to); - } - - private PageHeader newDataPageV2Header( - int uncompressedSize, int compressedSize, - int valueCount, int nullCount, int rowCount, - org.apache.parquet.column.Encoding dataEncoding, - int rlByteLength, int dlByteLength) { - // TODO: pageHeader.crc = ...; - DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2( - valueCount, nullCount, rowCount, - getEncoding(dataEncoding), - dlByteLength, rlByteLength); - PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize); - pageHeader.setData_page_header_v2(dataPageHeaderV2); - return pageHeader; - } - - public void writeDictionaryPageHeader( - int uncompressedSize, int compressedSize, int valueCount, - org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { - PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize); - pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding))); - writePageHeader(pageHeader, to); - } - - private static BoundaryOrder toParquetBoundaryOrder( - org.apache.parquet.internal.column.columnindex.BoundaryOrder boundaryOrder) { - switch (boundaryOrder) { - case ASCENDING: - return BoundaryOrder.ASCENDING; - case DESCENDING: - return BoundaryOrder.DESCENDING; - case UNORDERED: - return BoundaryOrder.UNORDERED; - default: - throw new IllegalArgumentException("Unsupported boundary order: " + boundaryOrder); - } - } - - private static org.apache.parquet.internal.column.columnindex.BoundaryOrder fromParquetBoundaryOrder( - BoundaryOrder boundaryOrder) { - switch (boundaryOrder) { - case ASCENDING: - return org.apache.parquet.internal.column.columnindex.BoundaryOrder.ASCENDING; - case DESCENDING: - return org.apache.parquet.internal.column.columnindex.BoundaryOrder.DESCENDING; - case UNORDERED: - return org.apache.parquet.internal.column.columnindex.BoundaryOrder.UNORDERED; - default: - throw new IllegalArgumentException("Unsupported boundary order: " + boundaryOrder); - } - } - - public static ColumnIndex toParquetColumnIndex(PrimitiveType type, - org.apache.parquet.internal.column.columnindex.ColumnIndex columnIndex) { - if (!isMinMaxStatsSupported(type) || columnIndex == null) { - return null; - } - ColumnIndex parquetColumnIndex = new ColumnIndex( - columnIndex.getNullPages(), - columnIndex.getMinValues(), - columnIndex.getMaxValues(), - toParquetBoundaryOrder(columnIndex.getBoundaryOrder())); - parquetColumnIndex.setNull_counts(columnIndex.getNullCounts()); - return parquetColumnIndex; - } - - public static org.apache.parquet.internal.column.columnindex.ColumnIndex fromParquetColumnIndex(PrimitiveType type, - ColumnIndex parquetColumnIndex) { - if (!isMinMaxStatsSupported(type)) { - return null; - } - return ColumnIndexBuilder.build(type, - fromParquetBoundaryOrder(parquetColumnIndex.getBoundary_order()), - parquetColumnIndex.getNull_pages(), - parquetColumnIndex.getNull_counts(), - parquetColumnIndex.getMin_values(), - parquetColumnIndex.getMax_values()); - } - - public static OffsetIndex toParquetOffsetIndex(org.apache.parquet.internal.column.columnindex.OffsetIndex offsetIndex) { - List pageLocations = new ArrayList<>(offsetIndex.getPageCount()); - for (int i = 0, n = offsetIndex.getPageCount(); i < n; ++i) { - pageLocations.add(new PageLocation( - offsetIndex.getOffset(i), - offsetIndex.getCompressedPageSize(i), - offsetIndex.getFirstRowIndex(i))); - } - return new OffsetIndex(pageLocations); - } - - public static org.apache.parquet.internal.column.columnindex.OffsetIndex fromParquetOffsetIndex( - OffsetIndex parquetOffsetIndex) { - OffsetIndexBuilder builder = OffsetIndexBuilder.getBuilder(); - for (PageLocation pageLocation : parquetOffsetIndex.getPage_locations()) { - builder.add(pageLocation.getOffset(), pageLocation.getCompressed_page_size(), pageLocation.getFirst_row_index()); - } - return builder.build(); - } -} \ No newline at end of file diff --git a/buildSrc/src/main/groovy/Classpaths.groovy b/buildSrc/src/main/groovy/Classpaths.groovy index db7ce197d3d..f67907c5e40 100644 --- a/buildSrc/src/main/groovy/Classpaths.groovy +++ b/buildSrc/src/main/groovy/Classpaths.groovy @@ -287,4 +287,32 @@ class Classpaths { Configuration config = p.configurations.getByName(configName) addDependency(config, GUAVA_GROUP, GUAVA_NAME, GUAVA_VERSION) } + + static void inheritParquetHadoop(Project p, String configName = JavaPlugin.IMPLEMENTATION_CONFIGURATION_NAME) { + Configuration config = p.configurations.getByName(configName) + addDependency(config, 'org.apache.parquet', 'parquet-hadoop', '1.13.0') + } + + /** configName controls only the Configuration's classpath, all transitive dependencies are runtimeOnly */ + static void inheritParquetHadoopConfiguration(Project p, String configName = JavaPlugin.IMPLEMENTATION_CONFIGURATION_NAME) { + Configuration config = p.configurations.getByName(configName) + addDependency(config, 'org.apache.hadoop', 'hadoop-common', '3.3.3') { + it.setTransitive(false) + // Do not take any extra dependencies of this project transitively. We just want a few classes for + // configuration and compression codecs. For any additional required dependencies, add them separately, as + // done for woodstox, shaded-guava, etc. below. Or we can replace setTransitive(false) here with more + // exclusions (we want to avoid pulling in netty, loggers, jetty-util, guice and asm). + } + + Configuration runtimeOnly = p.configurations.getByName(JavaPlugin.RUNTIME_ONLY_CONFIGURATION_NAME) + addDependency(runtimeOnly, 'com.fasterxml.woodstox', 'woodstox-core', '6.4.0') { + it.because('hadoop-common required dependency for Configuration') + } + addDependency(runtimeOnly, 'org.apache.hadoop.thirdparty', 'hadoop-shaded-guava', '1.1.1') { + it.because('hadoop-common required dependency for Configuration') + } + addDependency(runtimeOnly, 'commons-collections', 'commons-collections', '3.2.2') { + it.because('hadoop-common required dependency for Configuration') + } + } } diff --git a/extensions/parquet/base/build.gradle b/extensions/parquet/base/build.gradle index 82099cf0676..b6871736347 100644 --- a/extensions/parquet/base/build.gradle +++ b/extensions/parquet/base/build.gradle @@ -6,7 +6,7 @@ plugins { description 'Parquet Base: Libraries for working with Parquet files' dependencies { - api project(':ParquetHadoop') + Classpaths.inheritParquetHadoop(project) implementation project(':extensions-parquet-compression') implementation project(':Base') diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java index 41f84f3cffc..d06ffe1cf8d 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java @@ -3,7 +3,7 @@ */ package io.deephaven.parquet.base; -import io.deephaven.parquet.base.tempfix.ParquetMetadataConverter; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import io.deephaven.parquet.compress.CompressorAdapter; import io.deephaven.util.QueryConstants; import org.apache.parquet.bytes.ByteBufferAllocator; diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java index 5ff4f393fcb..b9f6a37b170 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java @@ -3,7 +3,7 @@ */ package io.deephaven.parquet.base; -import io.deephaven.parquet.base.tempfix.ParquetMetadataConverter; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import io.deephaven.parquet.base.util.SeekableChannelsProvider; import io.deephaven.parquet.compress.CompressorAdapter; import io.deephaven.parquet.compress.DeephavenCompressorAdapterFactory; diff --git a/extensions/parquet/compression/build.gradle b/extensions/parquet/compression/build.gradle index 416f9e95591..b71871e5516 100644 --- a/extensions/parquet/compression/build.gradle +++ b/extensions/parquet/compression/build.gradle @@ -4,8 +4,11 @@ plugins { } dependencies { - api project(':ParquetHadoop'), - project(':Util') + api project(':Util') + + // TODO(deephaven-core#3148): LZ4_RAW parquet support + Classpaths.inheritParquetHadoop(project) + Classpaths.inheritParquetHadoopConfiguration(project) implementation project(':Configuration') diff --git a/extensions/parquet/table/build.gradle b/extensions/parquet/table/build.gradle index 68cef5f870b..98a40a10546 100644 --- a/extensions/parquet/table/build.gradle +++ b/extensions/parquet/table/build.gradle @@ -19,6 +19,8 @@ dependencies { api project(':engine-stringset') implementation project(':extensions-parquet-base') + Classpaths.inheritParquetHadoop(project) + implementation project(':engine-base') implementation project(':engine-table') implementation project(':extensions-csv') diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetSchemaReader.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetSchemaReader.java index bd6cafb4ec5..cf9ac9d15e7 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetSchemaReader.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetSchemaReader.java @@ -10,7 +10,7 @@ import io.deephaven.parquet.table.metadata.ColumnTypeInfo; import io.deephaven.parquet.table.metadata.TableInfo; import io.deephaven.parquet.base.ParquetFileReader; -import io.deephaven.parquet.base.tempfix.ParquetMetadataConverter; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import io.deephaven.util.codec.SimpleByteArrayCodec; import io.deephaven.util.codec.UTF8StringAsByteArrayCodec; import org.apache.commons.lang3.mutable.MutableObject; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java index 33b81f54fa5..6bfe5ed8673 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java @@ -36,7 +36,7 @@ import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; import io.deephaven.parquet.base.ParquetFileReader; -import io.deephaven.parquet.base.tempfix.ParquetMetadataConverter; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import io.deephaven.parquet.base.util.CachedChannelProvider; import io.deephaven.util.annotations.VisibleForTesting; import org.apache.commons.lang3.mutable.MutableObject; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/layout/ParquetMetadataFileLayout.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/layout/ParquetMetadataFileLayout.java index 95be66585ae..b1720cdbdce 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/layout/ParquetMetadataFileLayout.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/layout/ParquetMetadataFileLayout.java @@ -15,7 +15,7 @@ import io.deephaven.parquet.table.location.ParquetTableLocationKey; import io.deephaven.parquet.table.ParquetInstructions; import io.deephaven.parquet.base.ParquetFileReader; -import io.deephaven.parquet.base.tempfix.ParquetMetadataConverter; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import io.deephaven.util.type.TypeUtils; import org.apache.commons.lang3.mutable.MutableInt; import org.apache.parquet.format.RowGroup; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java index 7b6aec79022..3ac34501aeb 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java @@ -26,7 +26,7 @@ import io.deephaven.parquet.base.ColumnChunkReader; import io.deephaven.parquet.base.ParquetFileReader; import io.deephaven.parquet.base.RowGroupReader; -import io.deephaven.parquet.base.tempfix.ParquetMetadataConverter; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import io.deephaven.parquet.table.*; import io.deephaven.parquet.table.metadata.CodecInfo; import io.deephaven.parquet.table.metadata.ColumnTypeInfo; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetTableLocationKey.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetTableLocationKey.java index 8e1292be3d6..1925250eb8e 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetTableLocationKey.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetTableLocationKey.java @@ -9,7 +9,7 @@ import io.deephaven.engine.table.impl.locations.local.FileTableLocationKey; import io.deephaven.parquet.table.ParquetTableWriter; import io.deephaven.parquet.base.ParquetFileReader; -import io.deephaven.parquet.base.tempfix.ParquetMetadataConverter; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import org.apache.parquet.format.RowGroup; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.jetbrains.annotations.NotNull; diff --git a/settings.gradle b/settings.gradle index c1ad45d0946..3e16c0846e1 100644 --- a/settings.gradle +++ b/settings.gradle @@ -144,8 +144,6 @@ include(':Stats') include(':Container') -include(':ParquetHadoop') - include(':codegen') include(':cpp-client') From 0328e4ce19163106e5ec311e245deac8d79ba648 Mon Sep 17 00:00:00 2001 From: Cristian Ferretti <37232625+jcferretti@users.noreply.github.com> Date: Thu, 14 Sep 2023 12:04:06 -0400 Subject: [PATCH 06/15] Changes to build R client on Fedora. (#4492) --- R/rdeephaven/DESCRIPTION | 2 +- R/rdeephaven/src/Makevars | 2 +- R/rdeephaven/src/client.cpp | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/rdeephaven/DESCRIPTION b/R/rdeephaven/DESCRIPTION index cd4ce38d6cb..79126c5f9c1 100644 --- a/R/rdeephaven/DESCRIPTION +++ b/R/rdeephaven/DESCRIPTION @@ -13,7 +13,7 @@ Description: The `rdeephaven` package provides an R API for communicating with t and bind it to a server-side variable so you can access it from any Deephaven client. Finally, you can run Python or Groovy scripts on the Deephaven server, so long as your server is equipped with that capability. License: Apache License (== 2.0) -Depends: R (>= 4.1.2), Rcpp (>= 1.0.10), arrow (>= 12.0.0), R6 (>= 2.5.0), dplyr (>= 1.1.0) +Depends: R (>= 3.5.3), Rcpp (>= 1.0.10), arrow (>= 12.0.0), R6 (>= 2.5.0), dplyr (>= 1.1.0) Imports: Rcpp (>= 1.0.10), R6 (>= 2.5.0), dplyr (>= 1.1.0) LinkingTo: Rcpp Suggests: testthat (>= 3.0.0) diff --git a/R/rdeephaven/src/Makevars b/R/rdeephaven/src/Makevars index f3f97454c6e..cbf3a2d43bd 100644 --- a/R/rdeephaven/src/Makevars +++ b/R/rdeephaven/src/Makevars @@ -22,7 +22,7 @@ PROTOBUF_LIB = `[ -f \${DHCPP}/local/lib/libprotobufd.so ] && echo -lprotobufd | DEPENDENCY_LIBS = \ $(PROTOBUF_LIB) \ - -larrow_flight -larrow -larrow_bundled_dependencies \ + -larrow \ `PKG_CONFIG_PATH=\${DHCPP}/local/lib/pkgconfig pkg-config --libs grpc++` # tells the compiler where to look for additional include directories diff --git a/R/rdeephaven/src/client.cpp b/R/rdeephaven/src/client.cpp index 150ff74e966..15116093c57 100644 --- a/R/rdeephaven/src/client.cpp +++ b/R/rdeephaven/src/client.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -256,7 +257,7 @@ class TableHandleWrapper { abs_sort = std::vector(cols.size(), abs_sort[0]); } - for(int i = 0; i < cols.size(); i++) { + for(std::size_t i = 0; i < cols.size(); i++) { if (!descending[i]) { sort_pairs.push_back(deephaven::client::SortPair::Ascending(cols[i], abs_sort[i])); } else { From 10e5eb4e74b0de475fe35f22166aa9ab9eb2c7e7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 14 Sep 2023 12:53:09 -0400 Subject: [PATCH 07/15] Bump actions/checkout from 3 to 4 (#4478) Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build-ci.yml | 4 ++-- .github/workflows/check-ci.yml | 2 +- .github/workflows/create-docs-issues.yml | 2 +- .github/workflows/docs-ci.yml | 6 +++--- .github/workflows/label-check-ci.yml | 4 ++-- .github/workflows/nightly-check-ci.yml | 2 +- .github/workflows/nightly-image-check.yml | 2 +- .github/workflows/publish-ci.yml | 2 +- .github/workflows/quick-ci.yml | 2 +- .github/workflows/tag-base-images.yml | 2 +- .github/workflows/update-web.yml | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build-ci.yml b/.github/workflows/build-ci.yml index 87306c3a68d..7699cea350c 100644 --- a/.github/workflows/build-ci.yml +++ b/.github/workflows/build-ci.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 @@ -129,7 +129,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get Semver id: semver diff --git a/.github/workflows/check-ci.yml b/.github/workflows/check-ci.yml index 9912d1cf2f6..14678aa511f 100644 --- a/.github/workflows/check-ci.yml +++ b/.github/workflows/check-ci.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 diff --git a/.github/workflows/create-docs-issues.yml b/.github/workflows/create-docs-issues.yml index 6eee6eacaf8..8566752c2a0 100644 --- a/.github/workflows/create-docs-issues.yml +++ b/.github/workflows/create-docs-issues.yml @@ -13,7 +13,7 @@ jobs: if: github.event.pull_request.merged && contains(github.event.pull_request.labels.*.name, 'DocumentationNeeded') runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Create Issues id: create-issues diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 6d1f93061e2..c581a11037b 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -15,7 +15,7 @@ jobs: cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 @@ -73,7 +73,7 @@ jobs: cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 @@ -144,7 +144,7 @@ jobs: cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 diff --git a/.github/workflows/label-check-ci.yml b/.github/workflows/label-check-ci.yml index 4c4edf9e897..f8a80452364 100644 --- a/.github/workflows/label-check-ci.yml +++ b/.github/workflows/label-check-ci.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Check Documentation Labels env: @@ -29,7 +29,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Check Release Notes Labels env: diff --git a/.github/workflows/nightly-check-ci.yml b/.github/workflows/nightly-check-ci.yml index 3ec70af5e84..12184ffc72c 100644 --- a/.github/workflows/nightly-check-ci.yml +++ b/.github/workflows/nightly-check-ci.yml @@ -23,7 +23,7 @@ jobs: cancel-in-progress: true steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 diff --git a/.github/workflows/nightly-image-check.yml b/.github/workflows/nightly-image-check.yml index 417c5bdb958..5b97bda947c 100644 --- a/.github/workflows/nightly-image-check.yml +++ b/.github/workflows/nightly-image-check.yml @@ -11,7 +11,7 @@ jobs: if: ${{ github.repository_owner == 'deephaven' }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 diff --git a/.github/workflows/publish-ci.yml b/.github/workflows/publish-ci.yml index c4ffd03521b..debbeb3dde8 100644 --- a/.github/workflows/publish-ci.yml +++ b/.github/workflows/publish-ci.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 diff --git a/.github/workflows/quick-ci.yml b/.github/workflows/quick-ci.yml index 0ef1d968de7..7bea9bbbb1c 100644 --- a/.github/workflows/quick-ci.yml +++ b/.github/workflows/quick-ci.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Check gitignore rules run: .github/scripts/check-gitignore-rules.sh diff --git a/.github/workflows/tag-base-images.yml b/.github/workflows/tag-base-images.yml index 7bcd0280e33..fb982554310 100644 --- a/.github/workflows/tag-base-images.yml +++ b/.github/workflows/tag-base-images.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 11 id: setup-java-11 diff --git a/.github/workflows/update-web.yml b/.github/workflows/update-web.yml index 51728ad6dfd..c903623c6bc 100644 --- a/.github/workflows/update-web.yml +++ b/.github/workflows/update-web.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout latest - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: main - name: Setup Node From 2d9efa7347f042ca0bf6db549c084a97070f5a19 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Thu, 14 Sep 2023 13:38:58 -0500 Subject: [PATCH 08/15] Fix for crashing python parquet CI tests (#4494) --- py/server/tests/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index 417b24353d0..89d13ad0582 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -286,7 +286,7 @@ def round_trip_with_compression(self, compression_codec_name, dh_table, vector_c # Write the pandas dataframe back to parquet (via pyarraow) and read it back using deephaven.parquet to compare dataframe.to_parquet('data_from_pandas.parquet', - compression=None if compression_codec_name is 'UNCOMPRESSED' else compression_codec_name) + compression=None if compression_codec_name == 'UNCOMPRESSED' else compression_codec_name) result_table = read('data_from_pandas.parquet') self.assert_table_equals(dh_table, result_table) From deca28f1a6990d0c45cd0e755693662aa9be87f0 Mon Sep 17 00:00:00 2001 From: Cristian Ferretti <37232625+jcferretti@users.noreply.github.com> Date: Thu, 14 Sep 2023 17:45:14 -0400 Subject: [PATCH 09/15] Update base images for cpp-client build; update cpp-client's README. (#4496) * Update base images for cpp-client build; update cpp-client's README. * Followup to review comment: fixing typos. --- cpp-client/README.md | 39 +++++++++++++------ .../cpp-client-base/gradle.properties | 3 +- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/cpp-client/README.md b/cpp-client/README.md index 55e426d5271..87fa77a2053 100644 --- a/cpp-client/README.md +++ b/cpp-client/README.md @@ -1,10 +1,12 @@ # Building the C++ client from a base Ubuntu 20.04 or 22.04 image These instructions show how to install and run the Deephaven C++ client, its dependencies, -and its unit tests. We have tested these instructions in Ubuntu 20.04 and 22.04 with the default -C++ compiler and tool suite (cmake etc). +and its unit tests. We have tested these instructions in Ubuntu 22.04 with the default +C++ compiler and tool suite (cmake etc). We have used the instructions in the past to build +for older Ubuntu versions (20.04) and for some Fedora versions, but we don't regularly test +on them anymore so we do notguarantee they are current for those platforms. -1. Start with an Ubuntu 20.04 or 22.04 install +1. Start with an Ubuntu 22.04 install 2. Get Deephaven running by following the instructions here: https://deephaven.io/core/docs/how-to-guides/launch-build/ @@ -14,6 +16,8 @@ C++ compiler and tool suite (cmake etc). sudo apt install curl git g++ cmake make build-essential zlib1g-dev libbz2-dev libssl-dev pkg-config ``` + See the notes at the end of this document if you need the equivalent packages for Fedora. + 4. Make a new directory for the Deephaven source code and assign that directory to a temporary shell variable. This will make subsequent build steps easier. ``` @@ -33,7 +37,7 @@ C++ compiler and tool suite (cmake etc). Get the `build-dependencies.sh` script from Deephaven's base images repository at the correct version. You can download it directly from the link - https://raw.githubusercontent.com/deephaven/deephaven-base-images/166befad816acbc9dff55d2d8354d60612cd9a8a/cpp-client/build-dependencies.sh + https://raw.githubusercontent.com/deephaven/deephaven-base-images/72427ce29901bf0419dd05db8e4abf31b57253d9/cpp-client/build-dependencies.sh (this script is also used from our automated tools, to generate a docker image to support tests runs; that's why it lives in a separate repo). The script downloads, builds and installs the dependent libraries @@ -60,7 +64,7 @@ C++ compiler and tool suite (cmake etc). # If the directory already exists from a previous attempt, ensure is clean/empty mkdir -p $DHCPP cd $DHCPP - wget https://raw.githubusercontent.com/deephaven/deephaven-base-images/166befad816acbc9dff55d2d8354d60612cd9a8a/cpp-client/build-dependencies.sh + wget https://raw.githubusercontent.com/deephaven/deephaven-base-images/72427ce29901bf0419dd05db8e4abf31b57253d9/cpp-client/build-dependencies.sh chmod +x ./build-dependencies.sh # Maybe edit build-dependencies.sh to reflect choices of build tools and build target, if you # want anything different than defaults; defaults are tested to work, @@ -101,18 +105,25 @@ C++ compiler and tool suite (cmake etc). source $DHCPP/env.sh cd $DHSRC/deephaven-core/cpp-client/deephaven/ mkdir build && cd build - cmake -DCMAKE_INSTALL_PREFIX=${DHCPP}/local \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON .. && \ - make -j$NCPUS install + cmake \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_INSTALL_PREFIX=${DHCPP}/local \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DBUILD_SHARED_LIBS=ON \ + .. \ + && \ + make -j$NCPUS install ``` -8. Build and run the deephaven example which uses the installed client. - Note this assumes deephaven server is running (see step 2), +8. Run one Deephaven example which uses the installed client. + This is a smoke test that the basic functionality + is working end to end, and the build is properly configured. + Note this assumes Deephaven server is running (see step 2), and the build created on step 7 is available in the same directory. ``` cd $DHSRC/deephaven-core/cpp-client/deephaven/build/examples - make -j$NCPUS cd hello_world ./hello_world ``` @@ -159,6 +170,12 @@ Notes target using protobuf header files will not link against a `Debug` version of protobuf. To keep things simple, we suggest that you run a consistent setting for your code and all dependencies. + (2) In Fedora, the packages needed for building: + + ``` + dnf -y groupinstall 'Development Tools' + dnf -y install curl cmake gcc-c++ openssl-devel libcurl-devel + ``` # Updating proto generated C++ stubs (intended for developers) 1. Ensure you have a local installation of the dependent libraries diff --git a/docker/registry/cpp-client-base/gradle.properties b/docker/registry/cpp-client-base/gradle.properties index 72382af8d7d..fa49da41ace 100644 --- a/docker/registry/cpp-client-base/gradle.properties +++ b/docker/registry/cpp-client-base/gradle.properties @@ -7,6 +7,7 @@ deephaven.registry.imageName=ghcr.io/deephaven/cpp-client-base:latest # It is in two different parts of the file (text and command examples). # If you have the image sha for this file, you can get the commit sha for the README using: # docker buildx imagetools inspect ghcr.io/deephaven/cpp-client-base@sha256:$IMAGESHA --format '{{ $metadata := index .Provenance.SLSA.metadata "https://mobyproject.org/buildkit@v1#metadata" }} {{ $metadata.vcs.revision }}' -deephaven.registry.imageId=ghcr.io/deephaven/cpp-client-base@sha256:74034b2c26a26033258a8d0808f45d6a010300c07a9135260fcb0c1691de04dd +deephaven.registry.imageId=ghcr.io/deephaven/cpp-client-base@sha256:061a3b87bd8cf7afa9bfacc3ebfbe904224cadc62b88e5232bf493c341bc4ed3 + # TODO(deephaven-base-images#54): arm64 native image for cpp-client-base deephaven.registry.platform=linux/amd64 From f0fddebf865a1b3cac15d4ed27417a260637db2d Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Thu, 14 Sep 2023 17:33:37 -0500 Subject: [PATCH 10/15] Enforce page size limit when writing Parquet files (#4344) For string columns and columns which use codecs, we didn't enfoce page size limits properly. This change fixes that. --- .../parquet/table/ParquetInstructions.java | 19 +- .../parquet/table/ParquetTableWriter.java | 510 +++++++++++------- .../table/ParquetTableReadWriteTest.java | 91 +++- 3 files changed, 426 insertions(+), 194 deletions(-) diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java index 217ac2e58d1..ee15e2cd399 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java @@ -4,6 +4,7 @@ package io.deephaven.parquet.table; import io.deephaven.base.verify.Require; +import io.deephaven.configuration.Configuration; import io.deephaven.engine.table.impl.ColumnToCodecMappings; import io.deephaven.hash.KeyedObjectHashMap; import io.deephaven.hash.KeyedObjectKey; @@ -81,20 +82,24 @@ public static int getDefaltMaximumDictionarySize() { return defaultMaximumDictionarySize; } - private static final int MIN_DEFAULT_PAGE_SIZE = 64 << 10; - private static volatile int defaultTargetPageSize = 1 << 20; + private static final int MIN_TARGET_PAGE_SIZE = + Configuration.getInstance().getIntegerWithDefault("Parquet.minTargetPageSize", 2 << 10); + private static final int DEFAULT_TARGET_PAGE_SIZE = + Configuration.getInstance().getIntegerWithDefault("Parquet.defaultTargetPageSize", 8 << 10); + private static volatile int defaultTargetPageSize = DEFAULT_TARGET_PAGE_SIZE; + private static final boolean DEFAULT_IS_REFRESHING = false; /** * Set the default target page size (in bytes) used to section rows of data into pages during column writing. This - * number should be no smaller than {@value #MIN_DEFAULT_PAGE_SIZE}. + * number should be no smaller than {@link #MIN_TARGET_PAGE_SIZE}. * * @param newDefaultSizeBytes the new default target page size. */ public static void setDefaultTargetPageSize(final int newDefaultSizeBytes) { - if (newDefaultSizeBytes < MIN_DEFAULT_PAGE_SIZE) { + if (newDefaultSizeBytes < MIN_TARGET_PAGE_SIZE) { throw new IllegalArgumentException( - "Default target page size should be larger than " + MIN_DEFAULT_PAGE_SIZE + " bytes"); + "Default target page size should be larger than " + MIN_TARGET_PAGE_SIZE + " bytes"); } defaultTargetPageSize = newDefaultSizeBytes; } @@ -606,8 +611,8 @@ public Builder setIsLegacyParquet(final boolean isLegacyParquet) { } public Builder setTargetPageSize(final int targetPageSize) { - if (targetPageSize < MIN_DEFAULT_PAGE_SIZE) { - throw new IllegalArgumentException("Target page size should be >= " + MIN_DEFAULT_PAGE_SIZE); + if (targetPageSize < MIN_TARGET_PAGE_SIZE) { + throw new IllegalArgumentException("Target page size should be >= " + MIN_TARGET_PAGE_SIZE); } this.targetPageSize = targetPageSize; return this; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java index d19b767ca9c..6513a204d14 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java @@ -48,6 +48,7 @@ import org.apache.parquet.column.statistics.Statistics; import org.apache.parquet.io.api.Binary; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import java.io.File; import java.io.IOException; @@ -83,22 +84,6 @@ enum CacheTags { DECIMAL_ARGS } - /** - * Classes that implement this interface are responsible for converting data from individual DH columns into buffers - * to be written out to the Parquet file. - * - * @param - */ - interface TransferObject extends SafeCloseable { - void propagateChunkData(); - - B getBuffer(); - - int rowCount(); - - void fetchData(RowSequence rs); - } - /** * Helper struct used to pass information about where to write the grouping files for each grouping column */ @@ -618,8 +603,6 @@ private static void encodePlain(@NotNull final ParquetInstructions w final RowSequence rs = valueRowSetIterator.getNextRowSequenceWithLength(valuePageSizeGetter.getAsInt()); transferObject.fetchData(rs); - transferObject.propagateChunkData(); - final Object bufferToWrite = transferObject.getBuffer(); if (vectorHelper != null) { final IntChunk lenChunk = vectorHelper.lengthSource.getChunk( lengthSourceContext, @@ -627,10 +610,20 @@ private static void encodePlain(@NotNull final ParquetInstructions w .asIntChunk(); lenChunk.copyToTypedBuffer(0, repeatCount, 0, lenChunk.size()); repeatCount.limit(lenChunk.size()); - columnWriter.addVectorPage(bufferToWrite, repeatCount, transferObject.rowCount(), statistics); + // TODO(deephaven-core:DH-4495): Add support for paginating vector data + // We do not paginate vector data, because our parquet reading code expects all elements from a + // single array or a vector to be on the same page (refer classes ToVectorPage and ToArrayPage + // for more details). + int numValuesBuffered = transferObject.transferAllToBuffer(); + columnWriter.addVectorPage(transferObject.getBuffer(), repeatCount, numValuesBuffered, + statistics); repeatCount.clear(); } else { - columnWriter.addPage(bufferToWrite, transferObject.rowCount(), statistics); + // Split a single page into multiple if we are not able to fit all the entries in one page + do { + int numValuesBuffered = transferObject.transferOnePageToBuffer(); + columnWriter.addPage(transferObject.getBuffer(), numValuesBuffered, statistics); + } while (transferObject.hasMoreDataToBuffer()); } } } @@ -692,24 +685,23 @@ private static boolean tryEncodeDictionary( } else { if (keyCount == encodedKeys.length) { // Copy into an array of double the size with upper limit at maxKeys + if (keyCount == maxKeys) { + throw new DictionarySizeExceededException(String.format( + "Dictionary maximum keys exceeded for %s", columnDefinition.getName())); + } encodedKeys = Arrays.copyOf(encodedKeys, (int) Math.min(keyCount * 2L, maxKeys)); } final Binary encodedKey = Binary.fromString(key); - encodedKeys[keyCount] = encodedKey; - statistics.updateStats(encodedKey); - dictionaryPos = keyCount; - dictSize += encodedKeys[keyCount].length(); - keyCount++; - if ((keyCount >= maxKeys) || (dictSize >= maxDictSize)) { - // Reset the stats because we will re-encode these in PLAIN encoding. - columnWriter.resetStats(); - // We discard all the dictionary data accumulated so far and fall back to PLAIN - // encoding. We could have added a dictionary page first with data collected so far - // and then stored the remaining data via PLAIN encoding (TODO deephaven-core#946). + dictSize += encodedKey.length(); + if (dictSize > maxDictSize) { throw new DictionarySizeExceededException( String.format("Dictionary maximum size exceeded for %s", columnDefinition.getName())); } + encodedKeys[keyCount] = encodedKey; + statistics.updateStats(encodedKey); + dictionaryPos = keyCount; + keyCount++; } keyToPos.put(key, dictionaryPos); } @@ -768,6 +760,9 @@ private static boolean tryEncodeDictionary( } catch (final DictionarySizeExceededException ignored) { // Reset the stats because we will re-encode these in PLAIN encoding. columnWriter.resetStats(); + // We discard all the dictionary data accumulated so far and fall back to PLAIN encoding. We could have + // added a dictionary page first with data collected so far and then encoded the remaining data using PLAIN + // encoding (TODO deephaven-core#946). return false; } } @@ -790,7 +785,9 @@ private static int getTargetRowsPerPage(@NotNull final Class columnType, } if (columnType == String.class) { - return targetPageSize / Integer.BYTES; + // We don't know the length of strings until we read the actual data. Therefore, we take a relaxed estimate + // here and final calculation is done when writing the data. + return targetPageSize; } try { @@ -813,23 +810,23 @@ private static TransferObject getDestinationBuffer( if (int.class.equals(columnType)) { int[] array = new int[maxValuesPerPage]; WritableIntChunk chunk = WritableIntChunk.writableChunkWrap(array); - return new PrimitiveTransfer<>(columnSource, chunk, IntBuffer.wrap(array), maxValuesPerPage); + return new IntTransfer(columnSource, chunk, IntBuffer.wrap(array), maxValuesPerPage); } else if (long.class.equals(columnType)) { long[] array = new long[maxValuesPerPage]; WritableLongChunk chunk = WritableLongChunk.writableChunkWrap(array); - return new PrimitiveTransfer<>(columnSource, chunk, LongBuffer.wrap(array), maxValuesPerPage); + return new LongTransfer(columnSource, chunk, LongBuffer.wrap(array), maxValuesPerPage); } else if (double.class.equals(columnType)) { double[] array = new double[maxValuesPerPage]; WritableDoubleChunk chunk = WritableDoubleChunk.writableChunkWrap(array); - return new PrimitiveTransfer<>(columnSource, chunk, DoubleBuffer.wrap(array), maxValuesPerPage); + return new DoubleTransfer(columnSource, chunk, DoubleBuffer.wrap(array), maxValuesPerPage); } else if (float.class.equals(columnType)) { float[] array = new float[maxValuesPerPage]; WritableFloatChunk chunk = WritableFloatChunk.writableChunkWrap(array); - return new PrimitiveTransfer<>(columnSource, chunk, FloatBuffer.wrap(array), maxValuesPerPage); + return new FloatTransfer(columnSource, chunk, FloatBuffer.wrap(array), maxValuesPerPage); } else if (Boolean.class.equals(columnType)) { byte[] array = new byte[maxValuesPerPage]; WritableByteChunk chunk = WritableByteChunk.writableChunkWrap(array); - return new PrimitiveTransfer<>(columnSource, chunk, ByteBuffer.wrap(array), maxValuesPerPage); + return new BooleanTransfer(columnSource, chunk, ByteBuffer.wrap(array), maxValuesPerPage); } else if (short.class.equals(columnType)) { return new ShortTransfer(columnSource, maxValuesPerPage); } else if (char.class.equals(columnType)) { @@ -837,7 +834,7 @@ private static TransferObject getDestinationBuffer( } else if (byte.class.equals(columnType)) { return new ByteTransfer(columnSource, maxValuesPerPage); } else if (String.class.equals(columnType)) { - return new StringTransfer(columnSource, maxValuesPerPage); + return new StringTransfer(columnSource, maxValuesPerPage, instructions.getTargetPageSize()); } // If there's an explicit codec, we should disregard the defaults for these CodecLookup#lookup() will properly @@ -850,24 +847,74 @@ private static TransferObject getDestinationBuffer( computedCache, columnDefinition.getName(), tableRowSet, () -> bigDecimalColumnSource); final ObjectCodec codec = new BigDecimalParquetBytesCodec( precisionAndScale.precision, precisionAndScale.scale, -1); - return new CodecTransfer<>(bigDecimalColumnSource, codec, maxValuesPerPage); + return new CodecTransfer<>(bigDecimalColumnSource, codec, maxValuesPerPage, + instructions.getTargetPageSize()); } else if (BigInteger.class.equals(columnType)) { // noinspection unchecked return new CodecTransfer<>((ColumnSource) columnSource, new BigIntegerParquetBytesCodec(-1), - maxValuesPerPage); + maxValuesPerPage, instructions.getTargetPageSize()); } } final ObjectCodec codec = CodecLookup.lookup(columnDefinition, instructions); - return new CodecTransfer<>(columnSource, codec, maxValuesPerPage); + return new CodecTransfer<>(columnSource, codec, maxValuesPerPage, instructions.getTargetPageSize()); } - static class PrimitiveTransfer, B extends Buffer> implements TransferObject { + /** + * Classes that implement this interface are responsible for converting data from individual DH columns into buffers + * to be written out to the Parquet file. + * + * @param + */ + interface TransferObject extends SafeCloseable { + /** + * Fetch all data corresponding to the provided row sequence. + */ + void fetchData(RowSequence rs); + + /** + * Transfer all the fetched data into an internal buffer, which can then be accessed using + * {@link TransferObject#getBuffer()}. This method should only be called after + * {@link TransferObject#fetchData(RowSequence)}}. This method should be used when writing unpaginated data, and + * should not be interleaved with calls to {@link TransferObject#transferOnePageToBuffer()}. Note that this + * method can lead to out-of-memory error for variable-width types (e.g. strings) if the fetched data is too big + * to fit in the available heap. + * + * @return The number of fetched data entries copied into the buffer. + */ + int transferAllToBuffer(); + + /** + * Transfer one page size worth of fetched data into an internal buffer, which can then be accessed using + * {@link TransferObject#getBuffer()}. The target page size is passed in the constructor. The method should only + * be called after {@link TransferObject#fetchData(RowSequence)}}. This method should be used when writing + * paginated data, and should not be interleaved with calls to {@link TransferObject#transferAllToBuffer()}. + * + * @return The number of fetched data entries copied into the buffer. This can be different from the total + * number of entries fetched in case of variable-width types (e.g. strings) when used with additional + * page size limits while copying. + */ + int transferOnePageToBuffer(); + + /** + * Check if there is any fetched data which can be copied into buffer + */ + boolean hasMoreDataToBuffer(); + + B getBuffer(); + } + + private static class PrimitiveTransfer, B extends Buffer> + implements TransferObject { private final C chunk; private final B buffer; private final ColumnSource columnSource; private final ChunkSource.FillContext context; + private boolean hasMoreDataToBuffer; + /** + * {@code chunk} and {@code buffer} must be backed by the same array. Assumption verified in the child classes. + */ PrimitiveTransfer(ColumnSource columnSource, C chunk, B buffer, int targetSize) { this.columnSource = columnSource; this.chunk = chunk; @@ -876,25 +923,36 @@ static class PrimitiveTransfer, B extends Buffer } @Override - public void propagateChunkData() { - buffer.position(0); - buffer.limit(chunk.size()); + public void fetchData(RowSequence rs) { + columnSource.fillChunk(context, chunk, rs); + hasMoreDataToBuffer = true; } @Override - public B getBuffer() { - return buffer; + public int transferAllToBuffer() { + return transferOnePageToBuffer(); } @Override - public int rowCount() { + public int transferOnePageToBuffer() { + if (!hasMoreDataToBuffer()) { + return 0; + } + // Assuming that buffer and chunk are backed by the same array. + buffer.position(0); + buffer.limit(chunk.size()); + hasMoreDataToBuffer = false; return chunk.size(); } + @Override + public boolean hasMoreDataToBuffer() { + return hasMoreDataToBuffer; + } @Override - public void fetchData(RowSequence rs) { - columnSource.fillChunk(context, chunk, rs); + public B getBuffer() { + return buffer; } @Override @@ -903,225 +961,309 @@ public void close() { } } - static class ShortTransfer implements TransferObject { - - private ShortChunk chunk; - private final IntBuffer buffer; - private final ColumnSource columnSource; - private final ChunkSource.GetContext context; - - ShortTransfer(ColumnSource columnSource, int targetSize) { - - this.columnSource = columnSource; - this.buffer = IntBuffer.allocate(targetSize); - context = columnSource.makeGetContext(targetSize); - } - - - @Override - public void propagateChunkData() { - buffer.clear(); - for (int i = 0; i < chunk.size(); i++) { - buffer.put(chunk.get(i)); - } - buffer.flip(); + private static final class IntTransfer extends PrimitiveTransfer, IntBuffer> { + /** + * Check docs in {@link PrimitiveTransfer} for more details. + */ + IntTransfer(ColumnSource columnSource, WritableIntChunk chunk, IntBuffer buffer, int targetSize) { + super(columnSource, chunk, buffer, targetSize); + Assert.eq(chunk.array(), "chunk.array()", buffer.array(), "buffer.array()"); } + } - @Override - public IntBuffer getBuffer() { - return buffer; + private static final class LongTransfer extends PrimitiveTransfer, LongBuffer> { + /** + * Check docs in {@link PrimitiveTransfer} for more details. + */ + LongTransfer(ColumnSource columnSource, WritableLongChunk chunk, LongBuffer buffer, int targetSize) { + super(columnSource, chunk, buffer, targetSize); + Assert.eq(chunk.array(), "chunk.array()", buffer.array(), "buffer.array()"); } + } - @Override - public int rowCount() { - return chunk.size(); + private static final class DoubleTransfer extends PrimitiveTransfer, DoubleBuffer> { + /** + * Check docs in {@link PrimitiveTransfer} for more details. + */ + DoubleTransfer(ColumnSource columnSource, WritableDoubleChunk chunk, DoubleBuffer buffer, + int targetSize) { + super(columnSource, chunk, buffer, targetSize); + Assert.eq(chunk.array(), "chunk.array()", buffer.array(), "buffer.array()"); } + } - @Override - public void fetchData(RowSequence rs) { - // noinspection unchecked - chunk = (ShortChunk) columnSource.getChunk(context, rs); + private static final class FloatTransfer extends PrimitiveTransfer, FloatBuffer> { + /** + * Check docs in {@link PrimitiveTransfer} for more details. + */ + FloatTransfer(ColumnSource columnSource, WritableFloatChunk chunk, FloatBuffer buffer, + int targetSize) { + super(columnSource, chunk, buffer, targetSize); + Assert.eq(chunk.array(), "chunk.array()", buffer.array(), "buffer.array()"); } + } - @Override - public void close() { - context.close(); + private static final class BooleanTransfer extends PrimitiveTransfer, ByteBuffer> { + /** + * Check docs in {@link PrimitiveTransfer} for more details. + */ + BooleanTransfer(ColumnSource columnSource, WritableByteChunk chunk, ByteBuffer buffer, + int targetSize) { + super(columnSource, chunk, buffer, targetSize); + Assert.eq(chunk.array(), "chunk.array()", buffer.array(), "buffer.array()"); } } - static class CharTransfer implements TransferObject { - + /** + * Used as a base class of transfer objects for types like shorts which are castable to Ints without losing any + * precision and are therefore stored using IntBuffer. + */ + private abstract static class IntCastablePrimitiveTransfer> + implements TransferObject { + protected T chunk; + protected final IntBuffer buffer; private final ColumnSource columnSource; private final ChunkSource.GetContext context; - private CharChunk chunk; - private final IntBuffer buffer; - CharTransfer(ColumnSource columnSource, int targetSize) { + IntCastablePrimitiveTransfer(ColumnSource columnSource, int targetSize) { this.columnSource = columnSource; this.buffer = IntBuffer.allocate(targetSize); - context = this.columnSource.makeGetContext(targetSize); + context = columnSource.makeGetContext(targetSize); } @Override - public void propagateChunkData() { - buffer.clear(); - for (int i = 0; i < chunk.size(); i++) { - buffer.put(chunk.get(i)); - } - buffer.flip(); + final public void fetchData(RowSequence rs) { + // noinspection unchecked + chunk = (T) columnSource.getChunk(context, rs); } @Override - public IntBuffer getBuffer() { - return buffer; + final public int transferAllToBuffer() { + return transferOnePageToBuffer(); } @Override - public int rowCount() { - return chunk.size(); + final public int transferOnePageToBuffer() { + if (!hasMoreDataToBuffer()) { + return 0; + } + buffer.clear(); + // Assuming that all the fetched data will fit in one page. This is because page count is accurately + // calculated for non variable-width types. Check ParquetTableWriter.getTargetRowsPerPage for more details. + copyAllFromChunkToBuffer(); + buffer.flip(); + int ret = chunk.size(); + chunk = null; + return ret; } + /** + * Helper method to copy all data from {@code this.chunk} to {@code this.buffer}. The buffer should be cleared + * before calling this method and is positioned for a {@link Buffer#flip()} after the call. + */ + abstract void copyAllFromChunkToBuffer(); + @Override - public void fetchData(RowSequence rs) { - // noinspection unchecked - chunk = (CharChunk) columnSource.getChunk(context, rs); + final public boolean hasMoreDataToBuffer() { + return (chunk != null); } @Override - public void close() { + final public IntBuffer getBuffer() { + return buffer; + } + + @Override + final public void close() { context.close(); } } - static class ByteTransfer implements TransferObject { - - private ByteChunk chunk; - private final IntBuffer buffer; - private final ColumnSource columnSource; - private final ChunkSource.GetContext context; - - ByteTransfer(ColumnSource columnSource, int targetSize) { - this.columnSource = columnSource; - this.buffer = IntBuffer.allocate(targetSize); - context = this.columnSource.makeGetContext(targetSize); + private static final class ShortTransfer extends IntCastablePrimitiveTransfer> { + ShortTransfer(ColumnSource columnSource, int targetSize) { + super(columnSource, targetSize); } @Override - public void propagateChunkData() { - buffer.clear(); - for (int i = 0; i < chunk.size(); i++) { - buffer.put(chunk.get(i)); + void copyAllFromChunkToBuffer() { + for (int chunkIdx = 0; chunkIdx < chunk.size(); chunkIdx++) { + buffer.put(chunk.get(chunkIdx)); } - buffer.flip(); } + } - @Override - public IntBuffer getBuffer() { - return buffer; + private static final class CharTransfer extends IntCastablePrimitiveTransfer> { + CharTransfer(ColumnSource columnSource, int targetSize) { + super(columnSource, targetSize); } @Override - public int rowCount() { - return chunk.size(); + void copyAllFromChunkToBuffer() { + for (int chunkIdx = 0; chunkIdx < chunk.size(); chunkIdx++) { + buffer.put(chunk.get(chunkIdx)); + } } + } - @Override - public void fetchData(RowSequence rs) { - // noinspection unchecked - chunk = (ByteChunk) columnSource.getChunk(context, rs); + private static final class ByteTransfer extends IntCastablePrimitiveTransfer> { + ByteTransfer(ColumnSource columnSource, int targetSize) { + super(columnSource, targetSize); } @Override - public void close() { - context.close(); + void copyAllFromChunkToBuffer() { + for (int chunkIdx = 0; chunkIdx < chunk.size(); chunkIdx++) { + buffer.put(chunk.get(chunkIdx)); + } } } - static class StringTransfer implements TransferObject { - + /** + * Used as a base class of transfer objects for types like strings or big integers that need specialized encoding, + * and thus we need to enforce page size limits while writing. + */ + private abstract static class EncodedTransfer implements TransferObject { private final ChunkSource.GetContext context; - private ObjectChunk chunk; - private final Binary[] buffer; + private ObjectChunk chunk; + private Binary[] buffer; + /** + * Number of objects buffered + */ + private int bufferedDataCount; + private final ColumnSource columnSource; - StringTransfer(ColumnSource columnSource, int targetSize) { + /** + * The target size of data to be stored in a single page. This is not a strictly enforced "maximum" page size. + */ + private final int targetPageSize; + + /** + * Index of next object from the chunk to be buffered + */ + private int currentChunkIdx; + + /** + * Encoded value which takes us beyond the page size limit. We cache it to avoid re-encoding. + */ + @Nullable + private Binary cachedEncodedValue; + + EncodedTransfer(ColumnSource columnSource, int maxValuesPerPage, int targetPageSize) { this.columnSource = columnSource; - this.buffer = new Binary[targetSize]; - context = this.columnSource.makeGetContext(targetSize); + this.buffer = new Binary[maxValuesPerPage]; + context = this.columnSource.makeGetContext(maxValuesPerPage); + this.targetPageSize = targetPageSize; + bufferedDataCount = 0; + cachedEncodedValue = null; } @Override - public void propagateChunkData() { - for (int i = 0; i < chunk.size(); i++) { - String value = chunk.get(i); - buffer[i] = value == null ? null : Binary.fromString(value); + final public void fetchData(RowSequence rs) { + // noinspection unchecked + chunk = (ObjectChunk) columnSource.getChunk(context, rs); + currentChunkIdx = 0; + bufferedDataCount = 0; + } + + @Override + final public int transferAllToBuffer() { + // Assuming this method is called after fetchData() and that the buffer is empty. + Assert.neqNull(chunk, "chunk"); + Assert.eqZero(currentChunkIdx, "currentChunkIdx"); + Assert.eqZero(bufferedDataCount, "bufferedDataCount"); + int chunkSize = chunk.size(); + while (currentChunkIdx < chunkSize) { + final T value = chunk.get(currentChunkIdx++); + buffer[bufferedDataCount++] = value == null ? null : encodeToBinary(value); } + chunk = null; + return bufferedDataCount; } @Override - public Binary[] getBuffer() { - return buffer; + final public int transferOnePageToBuffer() { + if (!hasMoreDataToBuffer()) { + return 0; + } + if (bufferedDataCount != 0) { + // Clear any old buffered data + Arrays.fill(buffer, 0, bufferedDataCount, null); + bufferedDataCount = 0; + } + int bufferedDataSize = 0; + int chunkSize = chunk.size(); + while (currentChunkIdx < chunkSize) { + final T value = chunk.get(currentChunkIdx); + if (value == null) { + currentChunkIdx++; + buffer[bufferedDataCount++] = null; + continue; + } + Binary binaryEncodedValue; + if (cachedEncodedValue == null) { + binaryEncodedValue = encodeToBinary(value); + } else { + binaryEncodedValue = cachedEncodedValue; + cachedEncodedValue = null; + } + + // Always buffer the first element, even if it exceeds the target page size. + if (bufferedDataSize != 0 && bufferedDataSize + binaryEncodedValue.length() > targetPageSize) { + cachedEncodedValue = binaryEncodedValue; + break; + } + currentChunkIdx++; + buffer[bufferedDataCount++] = binaryEncodedValue; + bufferedDataSize += binaryEncodedValue.length(); + } + if (currentChunkIdx == chunk.size()) { + chunk = null; + } + return bufferedDataCount; } + abstract Binary encodeToBinary(T value); + @Override - public int rowCount() { - return chunk.size(); + final public boolean hasMoreDataToBuffer() { + return ((chunk != null) && (currentChunkIdx < chunk.size())); } @Override - public void fetchData(RowSequence rs) { - // noinspection unchecked - chunk = (ObjectChunk) columnSource.getChunk(context, rs); + final public Binary[] getBuffer() { + return buffer; } @Override - public void close() { + final public void close() { context.close(); } } - static class CodecTransfer implements TransferObject { - - private final ChunkSource.GetContext context; - private final ObjectCodec codec; - private ObjectChunk chunk; - private final Binary[] buffer; - private final ColumnSource columnSource; - - CodecTransfer(ColumnSource columnSource, ObjectCodec codec, int targetSize) { - this.columnSource = columnSource; - this.buffer = new Binary[targetSize]; - context = this.columnSource.makeGetContext(targetSize); - this.codec = codec; - } - - @Override - public void propagateChunkData() { - for (int i = 0; i < chunk.size(); i++) { - T value = chunk.get(i); - buffer[i] = value == null ? null : Binary.fromConstantByteArray(codec.encode(value)); - } + private static final class StringTransfer extends EncodedTransfer { + StringTransfer(ColumnSource columnSource, int maxValuesPerPage, int targetPageSize) { + super(columnSource, maxValuesPerPage, targetPageSize); } @Override - public Binary[] getBuffer() { - return buffer; + Binary encodeToBinary(String value) { + return Binary.fromString(value); } + } - @Override - public int rowCount() { - return chunk.size(); - } + private static final class CodecTransfer extends EncodedTransfer { + private final ObjectCodec codec; - @Override - public void fetchData(RowSequence rs) { - // noinspection unchecked - chunk = (ObjectChunk) columnSource.getChunk(context, rs); + CodecTransfer(ColumnSource columnSource, ObjectCodec codec, int maxValuesPerPage, + int targetPageSize) { + super(columnSource, maxValuesPerPage, targetPageSize); + this.codec = codec; } @Override - public void close() { - context.close(); + Binary encodeToBinary(T value) { + return Binary.fromConstantByteArray(codec.encode(value)); } } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 0ff7a7669f9..24a878da4ec 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -6,6 +6,7 @@ import io.deephaven.UncheckedDeephavenException; import io.deephaven.api.Selectable; import io.deephaven.base.FileUtils; +import io.deephaven.configuration.Configuration; import io.deephaven.datastructures.util.CollectionUtil; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.primitive.function.ByteConsumer; @@ -31,15 +32,17 @@ import io.deephaven.engine.util.TableTools; import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.test.types.OutOfBandTest; +import io.deephaven.util.codec.SimpleByteArrayCodec; +import junit.framework.TestCase; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; import io.deephaven.time.DateTimeUtils; import io.deephaven.util.compare.DoubleComparisons; import io.deephaven.util.compare.FloatComparisons; import io.deephaven.vector.*; -import junit.framework.TestCase; -import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.commons.lang3.mutable.*; import org.apache.parquet.column.statistics.Statistics; -import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; import org.apache.parquet.io.api.Binary; import org.junit.After; import org.junit.Before; @@ -847,6 +850,88 @@ public void dictionaryEncodingTest() { assertTrue(thirdColumnMetadata.contains("someIntColumn") && !thirdColumnMetadata.contains("RLE_DICTIONARY")); } + @Test + public void overflowingStringsTest() { + // Test the behavior of writing parquet files if entries exceed the page size limit + final int pageSize = 2 << 10; + final char[] data = new char[pageSize / 4]; + String someString = new String(data); + Collection columns = new ArrayList<>(Arrays.asList( + "someStringColumn = `" + someString + "` + i%10")); + final long numRows = 10; + ColumnChunkMetaData columnMetadata = overflowingStringsTestHelper(columns, numRows, pageSize); + String metadataStr = columnMetadata.toString(); + assertTrue(metadataStr.contains("someStringColumn") && metadataStr.contains("PLAIN") + && !metadataStr.contains("RLE_DICTIONARY")); + + // We exceed page size on hitting 4 rows, and we have 10 total rows. + // Therefore, we should have total 4 pages containing 3, 3, 3, 1 rows respectively. + assertEquals(columnMetadata.getEncodingStats().getNumDataPagesEncodedAs(Encoding.PLAIN), 4); + + final char[] veryLongData = new char[pageSize]; + someString = new String(veryLongData); + columns = new ArrayList<>( + Arrays.asList("someStringColumn = ii % 2 == 0 ? Long.toString(ii) : `" + someString + "` + ii")); + columnMetadata = overflowingStringsTestHelper(columns, numRows, pageSize); + // We will have 10 pages each containing 1 row. + assertEquals(columnMetadata.getEncodingStats().getNumDataPagesEncodedAs(Encoding.PLAIN), 10); + + // Table with null rows + columns = new ArrayList<>(Arrays.asList("someStringColumn = ii % 2 == 0 ? null : `" + someString + "` + ii")); + columnMetadata = overflowingStringsTestHelper(columns, numRows, pageSize); + // We will have 5 pages containing 3, 2, 2, 2, 1 rows. + assertEquals(columnMetadata.getEncodingStats().getNumDataPagesEncodedAs(Encoding.PLAIN), 5); + } + + private static ColumnChunkMetaData overflowingStringsTestHelper(final Collection columns, + final long numRows, final int pageSize) { + final ParquetInstructions writeInstructions = new ParquetInstructions.Builder() + .setTargetPageSize(pageSize) // Force a small page size to cause splitting across pages + .setMaximumDictionarySize(50) // Force "someStringColumn" to use non-dictionary encoding + .build(); + Table stringTable = TableTools.emptyTable(numRows).select(Selectable.from(columns)); + final File dest = new File(rootFile + File.separator + "overflowingStringsTest.parquet"); + ParquetTools.writeTable(stringTable, dest, writeInstructions); + Table fromDisk = ParquetTools.readTable(dest).select(); + assertTableEquals(stringTable, fromDisk); + + ParquetMetadata metadata = new ParquetTableLocationKey(dest, 0, null).getMetadata(); + ColumnChunkMetaData columnMetadata = metadata.getBlocks().get(0).getColumns().get(0); + return columnMetadata; + } + + @Test + public void overflowingCodecsTest() { + final int pageSize = 2 << 10; + final ParquetInstructions writeInstructions = new ParquetInstructions.Builder() + .setTargetPageSize(pageSize) // Force a small page size to cause splitting across pages + .addColumnCodec("VariableWidthByteArrayColumn", SimpleByteArrayCodec.class.getName()) + .build(); + + final ColumnDefinition columnDefinition = + ColumnDefinition.fromGenericType("VariableWidthByteArrayColumn", byte[].class, byte.class); + final TableDefinition tableDefinition = TableDefinition.of(columnDefinition); + final byte[] byteArray = new byte[pageSize / 2]; + final Table table = TableTools.newTable(tableDefinition, + TableTools.col("VariableWidthByteArrayColumn", byteArray, byteArray, byteArray)); + + final File dest = new File(rootFile + File.separator + "overflowingCodecsTest.parquet"); + ParquetTools.writeTable(table, dest, writeInstructions); + Table fromDisk = ParquetTools.readTable(dest).select(); + assertTableEquals(table, fromDisk); + + final ParquetMetadata metadata = new ParquetTableLocationKey(dest, 0, null).getMetadata(); + final String metadataStr = metadata.getFileMetaData().getKeyValueMetaData().get("deephaven"); + assertTrue( + metadataStr.contains("VariableWidthByteArrayColumn") && metadataStr.contains("SimpleByteArrayCodec")); + final ColumnChunkMetaData columnMetadata = metadata.getBlocks().get(0).getColumns().get(0); + final String columnMetadataStr = columnMetadata.toString(); + assertTrue(columnMetadataStr.contains("VariableWidthByteArrayColumn") && columnMetadataStr.contains("PLAIN")); + // Each byte array is of half the page size. So we exceed page size on hitting 3 byteArrays. + // Therefore, we should have total 2 pages containing 2, 1 rows respectively. + assertEquals(columnMetadata.getEncodingStats().getNumDataPagesEncodedAs(Encoding.PLAIN), 2); + } + @Test public void readWriteStatisticsTest() { // Test simple structured table. From f2a8d7253a50ea165c68110352caa4465c8c7d68 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 15 Sep 2023 08:58:20 -0400 Subject: [PATCH 11/15] Update web version 0.48.0 (#4484) Release notes https://github.com/deephaven/web-client-ui/releases/tag/v0.48.0 Co-authored-by: deephaven-internal --- web/client-ui/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/client-ui/Dockerfile b/web/client-ui/Dockerfile index 114f9eb1aff..3c91c43cc55 100644 --- a/web/client-ui/Dockerfile +++ b/web/client-ui/Dockerfile @@ -2,9 +2,9 @@ FROM deephaven/node:local-build WORKDIR /usr/src/app # Most of the time, these versions are the same, except in cases where a patch only affects one of the packages -ARG WEB_VERSION=0.47.0 -ARG GRID_VERSION=0.47.0 -ARG CHART_VERSION=0.47.0 +ARG WEB_VERSION=0.48.0 +ARG GRID_VERSION=0.48.0 +ARG CHART_VERSION=0.48.0 # Pull in the published code-studio package from npmjs and extract is RUN set -eux; \ From 55c695d07718288a302903e2e03acc8a108a24cb Mon Sep 17 00:00:00 2001 From: "Charles P. Wright" Date: Fri, 15 Sep 2023 15:38:39 -0400 Subject: [PATCH 12/15] Add Adoptium to profiled vendors list. (#4498) * Add Adoptium to profiled vendors list. * Spotless apply. --- .../java/io/deephaven/util/profiling/ThreadProfiler.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Util/src/main/java/io/deephaven/util/profiling/ThreadProfiler.java b/Util/src/main/java/io/deephaven/util/profiling/ThreadProfiler.java index d398d122907..80eefa0d029 100644 --- a/Util/src/main/java/io/deephaven/util/profiling/ThreadProfiler.java +++ b/Util/src/main/java/io/deephaven/util/profiling/ThreadProfiler.java @@ -68,7 +68,10 @@ static ThreadProfiler make() { return NullThreadProfiler.INSTANCE; } final String vendor = System.getProperty("java.vendor"); - if (vendor.contains("Sun") || vendor.contains("Oracle") || vendor.contains("OpenJDK")) { + if (vendor.contains("Sun") + || vendor.contains("Oracle") + || vendor.contains("OpenJDK") + || vendor.contains("Adoptium")) { return new SunThreadMXBeanThreadProfiler(); } return new BaselineThreadMXBeanThreadProfiler(); From c8c9676f9d8443a6dadc76d32e86791e75057203 Mon Sep 17 00:00:00 2001 From: Cristian Ferretti <37232625+jcferretti@users.noreply.github.com> Date: Fri, 15 Sep 2023 19:28:19 -0400 Subject: [PATCH 13/15] Use new C++ base image that removes local from the directory layout. (#4500) * Use new C++ base image that removes local from the directory layout. * Followup to review comment: build-cpp-protos.sh. * Followup to review comment. --- R/rdeephaven/src/Makevars | 11 +++++------ cpp-client/README.md | 19 ++++++++++++++++--- .../cpp-client-base/gradle.properties | 2 +- .../src/main/proto/build-cpp-protos.sh | 11 ++++++++--- py/client-ticking/README.md | 2 +- 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/R/rdeephaven/src/Makevars b/R/rdeephaven/src/Makevars index cbf3a2d43bd..8bf86a45734 100644 --- a/R/rdeephaven/src/Makevars +++ b/R/rdeephaven/src/Makevars @@ -7,7 +7,7 @@ CXX_STD = CXX17 # and its dependent libraries was built, as per the instructions in # https://github.com/deephaven/deephaven-core/blob/main/cpp-client/README.md # -DEPENDENCY_DIRS = -L$(DHCPP)/local/lib +DEPENDENCY_DIRS = -L$(DHCPP)/lib # @@ -18,16 +18,16 @@ DEPENDENCY_DIRS = -L$(DHCPP)/local/lib # The line below tries to automatically guess whether to use # `-lprotbufd` or `-lprotobuf` depending on which file is available. # -PROTOBUF_LIB = `[ -f \${DHCPP}/local/lib/libprotobufd.so ] && echo -lprotobufd || echo -lprotobuf` +PROTOBUF_LIB = `[ -f \${DHCPP}/lib/libprotobufd.so ] && echo -lprotobufd || echo -lprotobuf` DEPENDENCY_LIBS = \ $(PROTOBUF_LIB) \ -larrow \ - `PKG_CONFIG_PATH=\${DHCPP}/local/lib/pkgconfig pkg-config --libs grpc++` + `PKG_CONFIG_PATH=\${DHCPP}/lib/pkgconfig pkg-config --libs grpc++` # tells the compiler where to look for additional include directories PKG_CXXFLAGS = \ - -I$(DHCPP)/local/include \ + -I$(DHCPP)/include \ -I/usr/share/R/include \ -I/usr/local/lib/R/site-library/Rcpp/include @@ -47,9 +47,8 @@ PKG_CXXFLAGS = \ PKG_LIBS = \ $(EXTRA_LD_FLAGS) \ -L/usr/lib/R/lib -lR \ - -L$(DHCPP)/local/lib \ - -ldhclient -ldhcore \ $(DEPENDENCY_DIRS) \ + -ldhclient -ldhcore \ $(DEPENDENCY_LIBS) \ # all src directory c++ source files diff --git a/cpp-client/README.md b/cpp-client/README.md index 87fa77a2053..a9fbcb4498a 100644 --- a/cpp-client/README.md +++ b/cpp-client/README.md @@ -37,7 +37,7 @@ on them anymore so we do notguarantee they are current for those platforms. Get the `build-dependencies.sh` script from Deephaven's base images repository at the correct version. You can download it directly from the link - https://raw.githubusercontent.com/deephaven/deephaven-base-images/72427ce29901bf0419dd05db8e4abf31b57253d9/cpp-client/build-dependencies.sh + https://raw.githubusercontent.com/deephaven/deephaven-base-images/4eab90690888fbd5abfcbaf5dbacc750e90d9c2f/cpp-client/build-dependencies.sh (this script is also used from our automated tools, to generate a docker image to support tests runs; that's why it lives in a separate repo). The script downloads, builds and installs the dependent libraries @@ -64,7 +64,7 @@ on them anymore so we do notguarantee they are current for those platforms. # If the directory already exists from a previous attempt, ensure is clean/empty mkdir -p $DHCPP cd $DHCPP - wget https://raw.githubusercontent.com/deephaven/deephaven-base-images/72427ce29901bf0419dd05db8e4abf31b57253d9/cpp-client/build-dependencies.sh + wget https://raw.githubusercontent.com/deephaven/deephaven-base-images/4eab90690888fbd5abfcbaf5dbacc750e90d9c2f/cpp-client/build-dependencies.sh chmod +x ./build-dependencies.sh # Maybe edit build-dependencies.sh to reflect choices of build tools and build target, if you # want anything different than defaults; defaults are tested to work, @@ -108,7 +108,7 @@ on them anymore so we do notguarantee they are current for those platforms. cmake \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_INSTALL_PREFIX=${DHCPP}/local \ + -DCMAKE_INSTALL_PREFIX=${DHCPP} \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DBUILD_SHARED_LIBS=ON \ .. \ @@ -116,6 +116,19 @@ on them anymore so we do notguarantee they are current for those platforms. make -j$NCPUS install ``` + If you need `make` to generate detailed output of the commands it is running + (which may be helpful for debugging the paths being used etc), + you can set the environment variable `VERBOSE=1`. + This is true in general of cmake-generated Makefiles. + It is useful to keep the output of make for later reference + in case you need to be sure of the exact compiler flags + that were used to compile the library and its objects. + You can tweak the last command above as + + ``` + VERBOSE=1 make -j$NCPUS install 2>&1 | tee make-install.log + ``` + 8. Run one Deephaven example which uses the installed client. This is a smoke test that the basic functionality is working end to end, and the build is properly configured. diff --git a/docker/registry/cpp-client-base/gradle.properties b/docker/registry/cpp-client-base/gradle.properties index fa49da41ace..adce2bbcaf4 100644 --- a/docker/registry/cpp-client-base/gradle.properties +++ b/docker/registry/cpp-client-base/gradle.properties @@ -7,7 +7,7 @@ deephaven.registry.imageName=ghcr.io/deephaven/cpp-client-base:latest # It is in two different parts of the file (text and command examples). # If you have the image sha for this file, you can get the commit sha for the README using: # docker buildx imagetools inspect ghcr.io/deephaven/cpp-client-base@sha256:$IMAGESHA --format '{{ $metadata := index .Provenance.SLSA.metadata "https://mobyproject.org/buildkit@v1#metadata" }} {{ $metadata.vcs.revision }}' -deephaven.registry.imageId=ghcr.io/deephaven/cpp-client-base@sha256:061a3b87bd8cf7afa9bfacc3ebfbe904224cadc62b88e5232bf493c341bc4ed3 +deephaven.registry.imageId=ghcr.io/deephaven/cpp-client-base@sha256:a31ce4b5c0864a35cc8f358dce50950fd3f10413595e773803b9939498b4eb91 # TODO(deephaven-base-images#54): arm64 native image for cpp-client-base deephaven.registry.platform=linux/amd64 diff --git a/proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh b/proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh index fb60f514fe8..9b36346a61d 100755 --- a/proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh +++ b/proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh @@ -2,14 +2,19 @@ set -euxo pipefail -: ${DHCPP_LOCAL:=$HOME/dhcpp/local} +if [ -z "$PROTOC_BIN" ] && [ -z "$DHCPP" ]; then + echo "$0: At least one of the environment variables 'PROTOC_BIN' and 'DHCPP' must be defined, aborting." 1>&2 + exit 1 +fi + +: ${PROTOC_BIN:=$DHCPP/bin} : ${CPP_PROTO_BUILD_DIR:=build} mkdir -p "${CPP_PROTO_BUILD_DIR}" -$DHCPP_LOCAL/protobuf/bin/protoc \ +$DHCPP/bin/protoc \ $(find . -name '*.proto') \ --cpp_out=${CPP_PROTO_BUILD_DIR} \ --grpc_out=${CPP_PROTO_BUILD_DIR} \ - --plugin=protoc-gen-grpc=$DHCPP_LOCAL/bin/grpc_cpp_plugin + --plugin=protoc-gen-grpc=${PROTOC_BIN}/grpc_cpp_plugin mv -f ${CPP_PROTO_BUILD_DIR}/deephaven/proto/*.{h,cc} \ ../../../../../cpp-client/deephaven/dhclient/proto/deephaven/proto diff --git a/py/client-ticking/README.md b/py/client-ticking/README.md index 876a6a35cef..8c0aa4b5a1a 100644 --- a/py/client-ticking/README.md +++ b/py/client-ticking/README.md @@ -51,7 +51,7 @@ cd ${DHROOT}/py/client-ticking ``` # Ensure the DHCPP environment variable is set per the instructions above rm -rf build # Ensure we clean the remnants of any pre-existing build. -CFLAGS="-I${DHCPP}/local/include" LDFLAGS="-L${DHCPP}/local/lib" python setup.py build_ext -i +CFLAGS="-I${DHCPP}/include" LDFLAGS="-L${DHCPP}/lib" python setup.py build_ext -i ``` ### Install pydeephaven-ticking From 2bafd80b2de8da06bf547e8f4838f44d55db5d08 Mon Sep 17 00:00:00 2001 From: rbasralian Date: Fri, 15 Sep 2023 19:40:57 -0400 Subject: [PATCH 14/15] Fix for inconsistent tests (#4483) * Fix for inconsistent test * Extra logging in the third suspicious test * Look up row keys properly instead of using `.getColumnSource().get(0)`. Include `function_generated_table` in `__all__`. * Remove redundant utility function --- ...=> TestFunctionGeneratedTableFactory.java} | 47 +--------- .../TestKeyedArrayBackedMutableTable.java | 41 ++++----- py/server/deephaven/__init__.py | 4 +- .../tests/test_function_generated_table.py | 88 +++++++++++-------- 4 files changed, 74 insertions(+), 106 deletions(-) rename engine/table/src/test/java/io/deephaven/engine/table/impl/util/{TestFunctionBackedTableFactory.java => TestFunctionGeneratedTableFactory.java} (65%) diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestFunctionBackedTableFactory.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestFunctionGeneratedTableFactory.java similarity index 65% rename from engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestFunctionBackedTableFactory.java rename to engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestFunctionGeneratedTableFactory.java index d4f5814127b..809180c7d45 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestFunctionBackedTableFactory.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestFunctionGeneratedTableFactory.java @@ -3,7 +3,6 @@ */ package io.deephaven.engine.table.impl.util; -import io.deephaven.UncheckedDeephavenException; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.table.ColumnDefinition; import io.deephaven.engine.table.Table; @@ -11,7 +10,6 @@ import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.QueryTableTest; import io.deephaven.engine.testutil.ColumnInfo; -import io.deephaven.engine.testutil.ControlledUpdateGraph; import io.deephaven.engine.testutil.EvalNugget; import io.deephaven.engine.testutil.EvalNuggetInterface; import io.deephaven.engine.testutil.generator.IntGenerator; @@ -19,17 +17,14 @@ import io.deephaven.engine.testutil.testcase.RefreshingTableTestCase; import io.deephaven.engine.util.TableDiff; import io.deephaven.qst.type.Type; -import io.deephaven.util.function.ThrowingRunnable; -import java.io.IOException; -import java.util.Arrays; import java.util.Random; -import java.util.concurrent.CountDownLatch; +import static io.deephaven.engine.table.impl.util.TestKeyedArrayBackedMutableTable.handleDelayedRefresh; import static io.deephaven.engine.testutil.TstUtils.*; import static io.deephaven.engine.util.TableTools.*; -public class TestFunctionBackedTableFactory extends RefreshingTableTestCase { +public class TestFunctionGeneratedTableFactory extends RefreshingTableTestCase { public void testIterative() { Random random = new Random(0); ColumnInfo[] columnInfo; @@ -98,42 +93,4 @@ public void testMultipleSources() throws Exception { stringCol("StringCol", "MyString"), intCol("IntCol", 12345)), functionBacked); } - - /** - * See {@link io.deephaven.engine.table.impl.util.TestKeyedArrayBackedMutableTable#handleDelayedRefresh} - */ - public static void handleDelayedRefresh(final ThrowingRunnable action, - final BaseArrayBackedMutableTable... tables) throws Exception { - final Thread refreshThread; - final CountDownLatch gate = new CountDownLatch(tables.length); - - Arrays.stream(tables).forEach(t -> t.setOnPendingChange(gate::countDown)); - try { - final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); - refreshThread = new Thread(() -> { - // If this unexpected interruption happens, the test thread may hang in action.run() - // indefinitely. Best to hope it's already queued the pending action and proceed with run. - updateGraph.runWithinUnitTestCycle(() -> { - try { - gate.await(); - } catch (InterruptedException ignored) { - // If this unexpected interruption happens, the test thread may hang in action.run() - // indefinitely. Best to hope it's already queued the pending action and proceed with run. - } - Arrays.stream(tables).forEach(BaseArrayBackedMutableTable::run); - }); - }); - - refreshThread.start(); - action.run(); - } finally { - Arrays.stream(tables).forEach(t -> t.setOnPendingChange(null)); - } - try { - refreshThread.join(); - } catch (InterruptedException e) { - throw new UncheckedDeephavenException( - "Interrupted unexpectedly while waiting for run cycle to complete", e); - } - } } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestKeyedArrayBackedMutableTable.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestKeyedArrayBackedMutableTable.java index 14fcc5a3c69..a211071cbe5 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestKeyedArrayBackedMutableTable.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/util/TestKeyedArrayBackedMutableTable.java @@ -8,13 +8,13 @@ import io.deephaven.datastructures.util.CollectionUtil; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.impl.FailureListener; +import io.deephaven.engine.table.impl.TableUpdateValidator; import io.deephaven.engine.testutil.ControlledUpdateGraph; +import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.engine.util.TableTools; import io.deephaven.engine.util.config.InputTableStatusListener; import io.deephaven.engine.util.config.MutableInputTable; -import io.deephaven.engine.table.impl.FailureListener; -import io.deephaven.engine.table.impl.TableUpdateValidator; -import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.util.function.ThrowingRunnable; import junit.framework.TestCase; import org.jetbrains.annotations.NotNull; @@ -23,12 +23,13 @@ import org.junit.Test; import java.io.IOException; +import java.util.Arrays; import java.util.Map; import java.util.concurrent.CountDownLatch; +import static io.deephaven.engine.testutil.TstUtils.assertTableEquals; import static io.deephaven.engine.util.TableTools.showWithRowSet; import static io.deephaven.engine.util.TableTools.stringCol; -import static io.deephaven.engine.testutil.TstUtils.assertTableEquals; public class TestKeyedArrayBackedMutableTable { @@ -53,23 +54,23 @@ public void testSimple() throws Exception { final Table input2 = TableTools.newTable(stringCol("Name", "Randy"), stringCol("Employer", "USGS")); - handleDelayedRefresh(kabut, () -> mutableInputTable.add(input2)); + handleDelayedRefresh(() -> mutableInputTable.add(input2), kabut); assertTableEquals(TableTools.merge(input, input2), kabut); final Table input3 = TableTools.newTable(stringCol("Name", "Randy"), stringCol("Employer", "Tegridy")); - handleDelayedRefresh(kabut, () -> mutableInputTable.add(input3)); + handleDelayedRefresh(() -> mutableInputTable.add(input3), kabut); assertTableEquals(TableTools.merge(input, input3), kabut); final Table input4 = TableTools.newTable(stringCol("Name", "George"), stringCol("Employer", "Cogswell")); - handleDelayedRefresh(kabut, () -> mutableInputTable.add(input4)); + handleDelayedRefresh(() -> mutableInputTable.add(input4), kabut); showWithRowSet(kabut); assertTableEquals(TableTools.merge(input, input3, input4).lastBy("Name"), kabut); final Table input5 = TableTools.newTable(stringCol("Name", "George"), stringCol("Employer", "Spacely Sprockets")); - handleDelayedRefresh(kabut, () -> mutableInputTable.add(input5)); + handleDelayedRefresh(() -> mutableInputTable.add(input5), kabut); showWithRowSet(kabut); assertTableEquals(TableTools.merge(input, input3, input4, input5).lastBy("Name"), kabut); @@ -77,7 +78,7 @@ public void testSimple() throws Exception { final long sizeBeforeDelete = kabut.size(); System.out.println("KABUT.rowSet before delete: " + kabut.getRowSet()); final Table delete1 = TableTools.newTable(stringCol("Name", "Earl")); - handleDelayedRefresh(kabut, () -> mutableInputTable.delete(delete1)); + handleDelayedRefresh(() -> mutableInputTable.delete(delete1), kabut); System.out.println("KABUT.rowSet after delete: " + kabut.getRowSet()); final long sizeAfterDelete = kabut.size(); TestCase.assertEquals(sizeBeforeDelete - 1, sizeAfterDelete); @@ -113,7 +114,7 @@ public void testAppendOnly() throws Exception { final Table input2 = TableTools.newTable(stringCol("Name", "Randy", "George"), stringCol("Employer", "USGS", "Cogswell")); - handleDelayedRefresh(aoabmt, () -> mutableInputTable.add(input2)); + handleDelayedRefresh(() -> mutableInputTable.add(input2), aoabmt); assertTableEquals(TableTools.merge(input, input2), aoabmt); } @@ -137,7 +138,7 @@ public void testFilteredAndSorted() throws Exception { final Table delete = TableTools.newTable(stringCol("Name", "Fred")); - handleDelayedRefresh(kabut, () -> mutableInputTable.delete(delete)); + handleDelayedRefresh(() -> mutableInputTable.delete(delete), kabut); assertTableEquals(input.where("Name != `Fred`"), kabut); } @@ -203,13 +204,13 @@ public void testAddBack() throws Exception { final Table input2 = TableTools.newTable(stringCol("Name", "George"), stringCol("Employer", "Spacely Sprockets")); - handleDelayedRefresh(kabut, () -> mutableInputTable.add(input2)); + handleDelayedRefresh(() -> mutableInputTable.add(input2), kabut); assertTableEquals(input2, kabut); - handleDelayedRefresh(kabut, () -> mutableInputTable.delete(input2.view("Name"))); + handleDelayedRefresh(() -> mutableInputTable.delete(input2.view("Name")), kabut); assertTableEquals(input, kabut); - handleDelayedRefresh(kabut, () -> mutableInputTable.add(input2)); + handleDelayedRefresh(() -> mutableInputTable.add(input2), kabut); assertTableEquals(input2, kabut); } @@ -295,12 +296,12 @@ private synchronized void assertFailure(@NotNull final Class action) throws Exception { + public static void handleDelayedRefresh(final ThrowingRunnable action, + final BaseArrayBackedMutableTable... tables) throws Exception { final Thread refreshThread; - final CountDownLatch gate = new CountDownLatch(1); + final CountDownLatch gate = new CountDownLatch(tables.length); - table.setOnPendingChange(gate::countDown); + Arrays.stream(tables).forEach(t -> t.setOnPendingChange(gate::countDown)); try { final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); refreshThread = new Thread(() -> { @@ -313,14 +314,14 @@ private void handleDelayedRefresh(final BaseArrayBackedMutableTable table, // If this unexpected interruption happens, the test thread may hang in action.run() // indefinitely. Best to hope it's already queued the pending action and proceed with run. } - table.run(); + Arrays.stream(tables).forEach(BaseArrayBackedMutableTable::run); }); }); refreshThread.start(); action.run(); } finally { - table.setOnPendingChange(null); + Arrays.stream(tables).forEach(t -> t.setOnPendingChange(null)); } try { refreshThread.join(); diff --git a/py/server/deephaven/__init__.py b/py/server/deephaven/__init__.py index a5ce6b412e4..d299242a7a3 100644 --- a/py/server/deephaven/__init__.py +++ b/py/server/deephaven/__init__.py @@ -29,5 +29,5 @@ from .dbc import read_sql __all__ = ["read_csv", "write_csv", "kafka_consumer", "kafka_producer", "empty_table", "time_table", "merge", - "merge_sorted", "new_table", "input_table", "ring_table", "DynamicTableWriter", "TableReplayer", - "garbage_collect", "read_sql", "DHError", "SortDirection"] + "merge_sorted", "new_table", "input_table", "ring_table", "function_generated_table", "DynamicTableWriter", + "TableReplayer", "garbage_collect", "read_sql", "DHError", "SortDirection"] diff --git a/py/server/tests/test_function_generated_table.py b/py/server/tests/test_function_generated_table.py index 82ae96b6bfa..3b82acf45cd 100644 --- a/py/server/tests/test_function_generated_table.py +++ b/py/server/tests/test_function_generated_table.py @@ -1,9 +1,11 @@ -from time import sleep +from datetime import datetime +from typing import Any import deephaven.dtypes as dht from deephaven import empty_table, input_table, new_table, update_graph, function_generated_table from deephaven.column import string_col, int_col from deephaven.execution_context import get_exec_ctx +from deephaven.table import Table from tests.testbase import BaseTestCase @@ -20,17 +22,22 @@ def test_generated_table_timed_refresh(self): def table_generator_function(): return empty_table(1).update("Timestamp = io.deephaven.base.clock.Clock.system().currentTimeMillis()") - result_table = function_generated_table(table_generator_function, refresh_interval_ms=2000) + with update_graph.exclusive_lock(self.test_update_graph): + result_table = function_generated_table(table_generator_function, refresh_interval_ms=2000) + self.assertEqual(result_table.size, 1) + first_row_key = get_row_key(0, result_table) + initial_time = result_table.j_table.getColumnSource("Timestamp").get(first_row_key) - self.assertEqual(result_table.size, 1) - initial_time = result_table.j_table.getColumnSource("Timestamp").get(0) - sleep(5) - later_time = result_table.j_table.getColumnSource("Timestamp").get(0) + if not result_table.await_update(5_000): + raise RuntimeError("Result table did not update within 5 seconds") + + first_row_key = get_row_key(0, result_table) + later_time = result_table.j_table.getColumnSource("Timestamp").get(first_row_key) # Make sure it ticked at least once within 5 seconds. It should have ticked twice, # but leaving a wider margin to ensure the test passes -- as long as it ticks at all # we can be confident it's working. - self.assertGreaterEqual(later_time, initial_time + 2000) + self.assertGreater(later_time, initial_time) def test_generated_table_1trigger(self): col_defs = { @@ -39,26 +46,36 @@ def test_generated_table_1trigger(self): append_only_input_table = input_table(col_defs=col_defs) def table_generator_function(): + print("Running table_generator_function() at time: " + str(datetime.now())) return append_only_input_table.last_by().update('ResultStr = MyStr') result_table = function_generated_table(table_generator_function, source_tables=append_only_input_table) self.assertEqual(result_table.size, 0) + print("Adding row at time: " + str(datetime.now())) append_only_input_table.add(new_table([string_col(name='MyStr', data=['test string'])])) + print("add() returned at time: " + str(datetime.now())) self.wait_ticking_table_update(result_table, row_count=1, timeout=30) - self.assertEqual(result_table.size, 1) + print("result_table has 1 row at time: " + str(datetime.now())) - result_str = result_table.j_table.getColumnSource("ResultStr").get(0) + first_row_key = get_row_key(0, result_table) + result_str = result_table.j_table.getColumnSource("ResultStr").get(first_row_key) self.assertEqual(result_str, 'test string') def test_generated_table_2triggers(self): + # NOTE: This tests that both trigger tables cause the refresh function to run. + # It does not test updating two source tables in the same cycle (that is covered by + # io.deephaven.engine.table.impl.util.TestFunctionGeneratedTableFactory.testMultipleSources). append_only_input_table1 = input_table(col_defs={"MyStr": dht.string}) append_only_input_table2 = input_table(col_defs={"MyInt": dht.int32}) def table_generator_function(): - my_str = append_only_input_table1.last_by().j_table.getColumnSource('MyStr').get(0) - my_int = append_only_input_table2.last_by().j_table.getColumnSource('MyInt').getInt(0) + t1 = append_only_input_table1.last_by() + t2 = append_only_input_table2.last_by() + + my_str = t1.j_table.getColumnSource('MyStr').get(get_row_key(0, t1)) + my_int = t2.j_table.getColumnSource('MyInt').getInt(get_row_key(0, t2)) return new_table([ string_col('ResultStr', [my_str]), @@ -66,42 +83,35 @@ def table_generator_function(): ]) result_table = function_generated_table(table_generator_function, - source_tables=[append_only_input_table1, append_only_input_table2]) + source_tables=[append_only_input_table1, append_only_input_table2]) self.assertEqual(result_table.size, 1) - result_str = result_table.j_table.getColumnSource("ResultStr").get(0) - result_int = result_table.j_table.getColumnSource("ResultInt").get(0) + first_row_key = get_row_key(0, result_table) + result_str = result_table.j_table.getColumnSource("ResultStr").get(first_row_key) + result_int = result_table.j_table.getColumnSource("ResultInt").get(first_row_key) self.assertEqual(result_str, None) self.assertEqual(result_int, None) - append_only_input_table1.add(new_table([string_col(name='MyStr', data=['test string'])])) - self.wait_ticking_table_update(append_only_input_table1, row_count=1, timeout=30) - + with update_graph.exclusive_lock(self.test_update_graph): + append_only_input_table1.add(new_table([string_col(name='MyStr', data=['test string'])])) - self.assertEqual(result_table.size, 1) - result_str = result_table.j_table.getColumnSource("ResultStr").get(0) - result_int = result_table.j_table.getColumnSource("ResultInt").get(0) - self.assertEqual(result_str, 'test string') - self.assertEqual(result_int, None) + self.assertEqual(result_table.size, 1) + first_row_key = get_row_key(0, result_table) + result_str = result_table.j_table.getColumnSource("ResultStr").get(first_row_key) + result_int = result_table.j_table.getColumnSource("ResultInt").get(first_row_key) + self.assertEqual(result_str, 'test string') + self.assertEqual(result_int, None) - append_only_input_table2.add(new_table([int_col(name='MyInt', data=[12345])])) - self.wait_ticking_table_update(append_only_input_table2, row_count=1, timeout=30) + append_only_input_table2.add(new_table([int_col(name='MyInt', data=[12345])])) - self.assertEqual(result_table.size, 1) - result_str = result_table.j_table.getColumnSource("ResultStr").get(0) - result_int = result_table.j_table.getColumnSource("ResultInt").get(0) - self.assertEqual(result_str, 'test string') - self.assertEqual(result_int, 12345) + self.assertEqual(result_table.size, 1) - with update_graph.exclusive_lock(self.test_update_graph): - append_only_input_table1.add(new_table([string_col(name='MyStr', data=['test string 2'])])) - append_only_input_table2.add(new_table([int_col(name='MyInt', data=[54321])])) + first_row_key = get_row_key(0, result_table) + result_str = result_table.j_table.getColumnSource("ResultStr").get(first_row_key) + result_int = result_table.j_table.getColumnSource("ResultInt").get(first_row_key) + self.assertEqual(result_str, 'test string') + self.assertEqual(result_int, 12345) - self.wait_ticking_table_update(append_only_input_table1, row_count=2, timeout=30) - self.wait_ticking_table_update(append_only_input_table2, row_count=2, timeout=30) - self.assertEqual(result_table.size, 1) - result_str = result_table.j_table.getColumnSource("ResultStr").get(0) - result_int = result_table.j_table.getColumnSource("ResultInt").get(0) - self.assertEqual(result_str, 'test string 2') - self.assertEqual(result_int, 54321) +def get_row_key(row_position: int, t: Table) -> Any: + return t.j_table.getRowSet().get(row_position) From 240f337189b5fcb3d111b0dabbdfff66d21ddb41 Mon Sep 17 00:00:00 2001 From: Alex Peters <80283343+alexpeters1208@users.noreply.github.com> Date: Fri, 15 Sep 2023 18:59:52 -0500 Subject: [PATCH 15/15] Add docstrings to R Client (#4363) * Add docstrings - .Rd not generated * Snake case in docs PR * Apply agg_all_by changes to docs * Update docs to be more like Python * Do not import all of dplyr and arrow * Remove RecordBatchStreamReader directive * Review suggestions * Should be up-to-date with main * Review sugestions * Update client_wrapper.R Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> * Update client_wrapper.R Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> * Update client_wrapper.R Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> * Update client_wrapper.R Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> * Update client_wrapper.R Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> * Update docs according to review --------- Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> --- R/rdeephaven/NAMESPACE | 3 +- R/rdeephaven/R/aggregate_wrapper.R | 83 +++++++++ R/rdeephaven/R/client_wrapper.R | 77 +++++++++ R/rdeephaven/R/table_handle_wrapper.R | 233 +++++++++++++++++++++++++- 4 files changed, 392 insertions(+), 4 deletions(-) diff --git a/R/rdeephaven/NAMESPACE b/R/rdeephaven/NAMESPACE index b5c79e0e035..634a18faaf7 100644 --- a/R/rdeephaven/NAMESPACE +++ b/R/rdeephaven/NAMESPACE @@ -7,7 +7,6 @@ S3method(as_tibble,TableHandle) S3method(dim,TableHandle) S3method(head,TableHandle) S3method(tail,TableHandle) -export(Aggregation) export(Client) export(TableHandle) export(agg_abs_sum) @@ -33,4 +32,4 @@ importFrom(arrow,as_arrow_table) importFrom(arrow,as_record_batch_reader) importFrom(dplyr,as_data_frame) importFrom(dplyr,as_tibble) -useDynLib(rdeephaven, .registration = TRUE) +useDynLib(rdeephaven, .registration = TRUE) \ No newline at end of file diff --git a/R/rdeephaven/R/aggregate_wrapper.R b/R/rdeephaven/R/aggregate_wrapper.R index daafae48361..b5b3bbeadc3 100644 --- a/R/rdeephaven/R/aggregate_wrapper.R +++ b/R/rdeephaven/R/aggregate_wrapper.R @@ -1,3 +1,6 @@ +#' @description +#' An Aggregation represents an aggregation operation that can be passed to `agg_by()` or `agg_all_by()`. +#' Note that Aggregations should not be instantiated directly by user code, but rather by provided agg_* functions. Aggregation <- R6Class("Aggregation", cloneable = FALSE, public = list( @@ -17,68 +20,148 @@ Aggregation <- R6Class("Aggregation", ### All of the functions below return an instance of the above class +#' @description +#' Creates a First aggregation that computes the first value of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_first <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_first, "agg_first", cols=cols)) } +#' @description +#' Creates a Last aggregation that computes the last value of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_last <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_last, "agg_last", cols=cols)) } +#' @description +#' Creates a Minimum aggregation that computes the minimum of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_min <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_min, "agg_min", cols=cols)) } +#' @description +#' Creates a Maximum aggregation that computes the maximum of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_max <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_max, "agg_max", cols=cols)) } +#' @description +#' Creates a Sum aggregation that computes the sum of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_sum, "agg_sum", cols=cols)) } +#' @description +#' Creates an Absolute Sum aggregation that computes the absolute sum of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_abs_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_abs_sum, "agg_abs_sum", cols=cols)) } +#' @description +#' Creates an Average aggregation that computes the average of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_avg <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_avg, "agg_avg", cols=cols)) } +#' @description +#' Creates a Weighted Average aggregation that computes the weighted average of each column in `cols` for each aggregation group. +#' @param wcol String denoting the column to use for weights. This must be a numeric column. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_w_avg <- function(wcol, cols = character()) { verify_string("wcol", wcol, TRUE) verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_w_avg, "agg_w_avg", wcol=wcol, cols=cols)) } +#' @description +#' Creates a Median aggregation that computes the median of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_median <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_median, "agg_median", cols=cols)) } +#' @description +#' Creates a Variance aggregation that computes the variance of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_var <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_var, "agg_var", cols=cols)) } +#' @description +#' Creates a Standard Deviation aggregation that computes the standard deviation of each column in `cols`, for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_std <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_std, "agg_std", cols=cols)) } +#' @description +#' Creates a Percentile aggregation that computes the given percentile of each column in `cols` for each aggregation group. +#' @param percentile Numeric scalar between 0 and 1 denoting the percentile to compute. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_percentile <- function(percentile, cols = character()) { verify_in_unit_interval("percentile", percentile, TRUE) verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_percentile, "agg_percentile", percentile=percentile, cols=cols)) } +#' @description +#' Creates a Count aggregation that counts the number of rows in each aggregation group. +#' Note that this operation is not supported in `agg_all_by()`. +#' @param col String denoting the name of the new column to hold the counts of each aggregation group. +#' @return Aggregation function to be used in `agg_by()`. +#' @export agg_count <- function(col) { verify_string("col", col, TRUE) return(Aggregation$new(INTERNAL_agg_count, "agg_count", col=col)) diff --git a/R/rdeephaven/R/client_wrapper.R b/R/rdeephaven/R/client_wrapper.R index acd6e2a145c..6ce3dee5346 100644 --- a/R/rdeephaven/R/client_wrapper.R +++ b/R/rdeephaven/R/client_wrapper.R @@ -1,8 +1,17 @@ +#' @description +#' A Client is the entry point for interacting with the Deephaven server. It is used to create new tables, +#' import data to and export data from the server, and run queries on the server. #' @export Client <- R6Class("Client", cloneable = FALSE, public = list( .internal_rcpp_object = NULL, + + #' @description + #' Calls `initialize_for_xptr` if the first argument is an external pointer, and `initialize_for_target` if the + #' first argument is a string. In the latter case, the remaining keyword arguments are passed to `initialize_for_target`. + #' @param ... Either an external pointer to an existing client connection, or a string denoting the address + #' of a running Deephaven server followed by keyword arguments to `initialize_from_target`. initialize = function(...) { args <- list(...) if (length(args) == 1) { @@ -19,10 +28,41 @@ Client <- R6Class("Client", } return(do.call(self$initialize_for_target, args)) }, + + #' @description + #' Initializes a Client object using a pointer to an existing client connection. + #' @param xptr External pointer to an existing client connection. initialize_for_xptr = function(xptr) { verify_type("xptr", xptr, "externalptr", "XPtr", TRUE) self$.internal_rcpp_object = new(INTERNAL_Client, xptr) }, + + #' @description + #' Initializes a Client object and connects to a Deephaven server. + #' @param target String denoting the address of a Deephaven server, formatted as `"ip:port"`. + #' @param auth_type String denoting the authentication type. Can be `"anonymous"`, `"basic"`, + #' or any custom-built authenticator supported by the server, such as `"io.deephaven.authentication.psk.PskAuthenticationHandler"`. + #' Default is `anonymous`. + #' @param username String denoting the username, which only applies if `auth_type` is `basic`. + #' Username and password should not be used in conjunction with `auth_token`. Defaults to an empty string. + #' @param password String denoting the password, which only applies if `auth_type` is `basic`. + #' Username and password should not be used in conjunction with `auth_token`. Defaults to an empty string. + #' @param auth_token String denoting the authentication token. When `auth_type` + #' is `anonymous`, it will be ignored; when `auth_type` is `basic`, it must be + #' `"user:password"` or left blank; when `auth_type` is a custom-built authenticator, it must + #' conform to the specific requirement of that authenticator. This should not be used + #' in conjunction with `username` and `password`. Defaults to an empty string. + #' @param session_type String denoting the session type supported on the server. + #' Currently, `python` and `groovy` are supported. Defaults to `python`. + #' @param use_tls Whether or not to use a TLS connection. Defaults to `FALSE`. + #' @param tls_root_certs String denoting PEM encoded root certificates to use for TLS connection, + #' or `""` to use system defaults. Only used if `use_tls == TRUE`. Defaults to system defaults. + #' @param int_options List of name-value pairs for int-valued options to the underlying + #' grpc channel creation. Defaults to an empty list, which implies not using any channel options. + #' @param string_options List of name-value pairs for string-valued options to the underlying + #' grpc channel creation. Defaults to an empty list, which implies not using any channel options. + #' @param extra_headers List of name-value pairs for additional headers and values + #' to add to server requests. Defaults to an empty list, which implies not using any extra headers. initialize_for_target = function( target, auth_type = "anonymous", @@ -138,15 +178,32 @@ Client <- R6Class("Client", client_options = options ) }, + + #' @description + #' Creates an empty table on the server with 'size' rows and no columns. + #' @param size Non-negative integer specifying the number of rows for the new table. + #' @return TableHandle reference to the new table. empty_table = function(size) { verify_nonnegative_int("size", size, TRUE) return(TableHandle$new(self$.internal_rcpp_object$empty_table(size))) }, + + #' @description + #' Creates a ticking table on the server. + #' @param period ISO-8601-formatted string specifying the update frequency of the new table. + #' @param start_time Optional ISO-8601-formatted string specifying the start time of the table. + #' Defaults to now. + #' @return TableHandle reference to the new table. time_table = function(period, start_time = "now") { verify_string("period", period, TRUE) verify_string("start_time", start_time, TRUE) return(TableHandle$new(self$.internal_rcpp_object$time_table(period, start_time))) }, + + #' @description + #' Retrieves a reference to a named table on the server using its name. + #' @param name String denoting the name of the table to retrieve. + #' @return TableHandle reference to the named table. open_table = function(name) { verify_string("name", name, TRUE) if (!private$check_for_table(name)) { @@ -154,6 +211,13 @@ Client <- R6Class("Client", } return(TableHandle$new(self$.internal_rcpp_object$open_table(name))) }, + + #' @description + #' Imports a new table to the Deephaven server. Note that this new table is not automatically bound to + #' a variable name on the server. See `?TableHandle` for more information. + #' @param table_object R Data Frame, dplyr Tibble, Arrow Table, Arrow RecordBatchReader, or other supported table + #' containing the data to import to the server. + #' @return TableHandle reference to the new table. import_table = function(table_object) { table_object_class <- class(table_object) if (table_object_class[[1]] == "data.frame") { @@ -170,14 +234,27 @@ Client <- R6Class("Client", stop(paste0("'table_object' must be a single data frame, tibble, arrow table, or record batch reader. Got an object of class ", table_object_class[[1]], ".")) } }, + + #' @description + #' Retrieves a reference to a named table in the server using its Arrow Flight ticket. + #' @param ticket String denoting the Arrow Flight ticket. + #' @return TableHandle reference to the table. ticket_to_table = function(ticket) { verify_string("ticket", ticket, TRUE) return(TableHandle$new(self$.internal_rcpp_object$make_table_handle_from_ticket(ticket))) }, + + #' @description + #' Runs a script on the server. The script must be in the language that the server console was started with. + #' @param script String containing the code to be executed on the server. run_script = function(script) { verify_string("script", script, TRUE) self$.internal_rcpp_object$run_script(script) }, + + #' @description + #' Closes the client connection. After this method is called, any further server calls will + #' be undefined and will likely result in an error. close = function() { self$.internal_rcpp_object$close() } diff --git a/R/rdeephaven/R/table_handle_wrapper.R b/R/rdeephaven/R/table_handle_wrapper.R index 63c2b8071d9..d538665bfbc 100644 --- a/R/rdeephaven/R/table_handle_wrapper.R +++ b/R/rdeephaven/R/table_handle_wrapper.R @@ -1,8 +1,15 @@ -#' @export +#' @description +#' A TableHandle holds a reference to a Deephaven Table on the server, and provides methods for operating on that table. +#' Note that TableHandles should not be instantiated directly by user code, but rather by server calls accessible from +#' the `Client` class. See `?Client` for more information. TableHandle <- R6Class("TableHandle", cloneable = FALSE, public = list( .internal_rcpp_object = NULL, + + #' @description + #' Initializes a new TableHandle from an internal Deephaven TableHandle. + #' @param table_handle Internal Deephaven TableHandle. initialize = function(table_handle) { if (class(table_handle)[[1]] != "Rcpp_INTERNAL_TableHandle") { stop("'table_handle' should be an internal Deephaven TableHandle. If you're seeing this, @@ -10,9 +17,17 @@ TableHandle <- R6Class("TableHandle", } self$.internal_rcpp_object <- table_handle }, + + #' @description + #' Determines whether the table referenced by this TableHandle is static or not. + #' @return TRUE if the table is static, or FALSE if the table is ticking. is_static = function() { return(self$.internal_rcpp_object$is_static()) }, + + #' @description + #' Binds the table referenced by this TableHandle to a variable on the server, so that it can be referenced by that name. + #' @param name Name for this table on the server. bind_to_variable = function(name) { verify_string("name", name, TRUE) self$.internal_rcpp_object$bind_to_variable(name) @@ -20,23 +35,51 @@ TableHandle <- R6Class("TableHandle", ### BASE R METHODS, ALSO IMPLEMENTED FUNCTIONALLY + #' @description + #' Creates a new table containing the first `n` rows of this table. + #' @param n Positive integer specifying the number of rows to return. + #' @return A TableHandle referencing the new table. head = function(n) { verify_positive_int("n", n, TRUE) return(TableHandle$new(self$.internal_rcpp_object$head(n))) }, + + #' @description + #' Creates a new table containing the last `n` rows of this table. + #' @param n Positive integer specifying the number of rows to return. + #' @return A TableHandle referencing the new table consisting of the last n rows of the parent table. tail = function(n) { verify_positive_int("n", n, TRUE) return(TableHandle$new(self$.internal_rcpp_object$tail(n))) }, + + #' @description + #' Gets the number of rows in the table referenced by this TableHandle. + #' @return The number of rows in the table. nrow = function() { return(self$.internal_rcpp_object$num_rows()) }, + + #' @description + #' Gets the number of columns in the table referenced by this TableHandle. + #' @return The number of columns in the table. ncol = function() { return(self$.internal_rcpp_object$num_cols()) }, + + #' @description + #' Gets the dimensions of the table referenced by this TableHandle. Equivalent to c(nrow, ncol). + #' @return A vector of length 2, where the first element is the number of rows in the table and the second + #' element is the number of columns in the table. dim = function() { return(c(self$nrow(), self$ncol())) }, + + #' @description + #' Merges several tables into one table on the server. The tables must have the same schema as this table, and can + #' be supplied as a list of TableHandles, any number of TableHandles, or a mix of both. + #' @param ... Arbitrary number of TableHandles or vectors of TableHandles with a schema matching this table. + #' @return A TableHandle referencing the new table. merge = function(...) { table_list <- unlist(c(...)) if (length(table_list) == 0) { @@ -49,21 +92,36 @@ TableHandle <- R6Class("TableHandle", ### CONVERSION METHODS, ALSO IMPLEMENTED FUNCTIONALLY + #' @description + #' Converts the table referenced by this TableHandle to an Arrow RecordBatchStreamReader. + #' @return An Arrow RecordBatchStreamReader constructed from the data of this TableHandle. as_record_batch_reader = function() { ptr <- self$.internal_rcpp_object$get_arrow_array_stream_ptr() rbsr <- arrow::RecordBatchStreamReader$import_from_c(ptr) return(rbsr) }, + + #' @description + #' Converts the table referenced by this TableHandle to an Arrow Table. + #' @return An Arrow Table constructed from the data of this TableHandle. as_arrow_table = function() { rbsr <- self$as_record_batch_reader() arrow_tbl <- rbsr$read_table() return(arrow_tbl) }, + + #' @description + #' Converts the table referenced by this TableHandle to a dplyr tibble. + #' @return A dplyr tibble constructed from the data of this TableHandle. as_tibble = function() { rbsr <- self$as_record_batch_reader() arrow_tbl <- rbsr$read_table() return(as_tibble(arrow_tbl)) }, + + #' @description + #' Converts the table referenced by this TableHandle to an R data frame. + #' @return An R data frame constructed from the data of this TableHandle. as_data_frame = function() { arrow_tbl <- self$as_arrow_table() return(as.data.frame(as.data.frame(arrow_tbl))) # TODO: for some reason as.data.frame on arrow table returns a tibble, not a data frame @@ -71,38 +129,88 @@ TableHandle <- R6Class("TableHandle", ### DEEPHAVEN TABLE OPERATIONS + #' @description + #' Creates a new in-memory table that includes one column for each formula. + #' If no formula is specified, all columns will be included. + #' @param formulas String or list of strings denoting the column formulas. + #' @return A TableHandle referencing the new table. select = function(formulas = character()) { verify_string("formulas", formulas, FALSE) return(TableHandle$new(self$.internal_rcpp_object$select(formulas))) }, + + #' @description + #' Creates a new formula table that includes one column for each formula. + #' @param formulas String or list of strings denoting the column formulas. + #' @return A TableHandle referencing the new table. view = function(formulas = character()) { verify_string("formulas", formulas, FALSE) return(TableHandle$new(self$.internal_rcpp_object$view(formulas))) }, + + #' @description + #' Creates a new table containing a new, in-memory column for each formula. + #' @param formulas String or list of strings denoting the column formulas. + #' @return A TableHandle referencing the new table. update = function(formulas = character()) { verify_string("formulas", formulas, FALSE) return(TableHandle$new(self$.internal_rcpp_object$update(formulas))) }, + + #' @description + #' Creates a new table containing a new formula column for each formula. + #' @param formulas String or list of strings denoting the column formulas. + #' @return A TableHandle referencing the new table. update_view = function(formulas = character()) { verify_string("formulas", formulas, FALSE) return(TableHandle$new(self$.internal_rcpp_object$update_view(formulas))) }, + + #' @description + #' Creates a new table that has the same number of rows as this table, + #' but omits the columns specified in `cols`. + #' @param cols String or list of strings denoting the names of the columns to drop. + #' @return A TableHandle referencing the new table. drop_columns = function(cols = character()) { verify_string("cols", cols, FALSE) return(TableHandle$new(self$.internal_rcpp_object$drop_columns(cols))) }, + + #' @description + #' Creates a new table containing only the rows meeting the filter condition. + #' @param filter String denoting the filter condition. + #' @return A TableHandle referencing the new table. where = function(filter) { verify_string("filter", filter, TRUE) return(TableHandle$new(self$.internal_rcpp_object$where(filter))) }, + + #' @description + #' Creates a new table containing grouping columns and grouped data, with column content is grouped into arrays. + #' If no group-by column is given, the content of each column is grouped into its own array. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. group_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$group_by(by))) }, + + #' @description + #' Creates a new table in which array columns from the source table are unwrapped into separate rows. + #' The ungroup columns should be of array types. + #' @param by String or list of strings denoting the names of the columns to ungroup. + #' @return A TableHandle referencing the new table. ungroup = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$ungroup(by))) }, + + #' @description + #' Creates a new table containing grouping columns and grouped data. The resulting grouped data is defined by the + #' aggregation(s) specified. See `?Aggregations` for more information. + #' @param aggs Aggregation or list of Aggregations to perform on non-grouping columns. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. agg_by = function(aggs, by = character()) { verify_type("aggs", aggs, "Aggregation", "Deephaven Aggregation", FALSE) verify_string("by", by, FALSE) @@ -115,76 +223,166 @@ TableHandle <- R6Class("TableHandle", unwrapped_aggs <- lapply(aggs, strip_r6_wrapping) return(TableHandle$new(self$.internal_rcpp_object$agg_by(unwrapped_aggs, by))) }, + + #' @description + #' Creates a new table containing grouping columns and grouped data. The resulting grouped data is defined by the + #' aggregation(s) specified. See `?Aggregations` for more information. + #' This method applies the aggregation to all columns of the table, so it can only + #' accept one aggregation at a time. + #' @param agg Aggregation to perform on non-grouping columns. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. agg_all_by = function(agg, by = character()) { verify_type("agg", agg, "Aggregation", "Deephaven Aggregation", TRUE) - verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$agg_all_by(agg$.internal_rcpp_object, by))) }, + + #' @description + #' Creates a new table containing the first row of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. first_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$first_by(by))) }, + + #' @description + #' Creates a new table containing the last row of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. last_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$last_by(by))) }, + + #' @description + #' Creates a new table containing the first `num_rows` rows of each group. + #' @param num_rows Positive integer specifying the number of rows to return. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. head_by = function(num_rows, by = character()) { verify_positive_int("num_rows", num_rows, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$head_by(num_rows, by))) }, + + #' @description + #' Creates a new table containing the last `num_rows` rows of each group. + #' @param num_rows Positive integer specifying the number of rows to return. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. tail_by = function(num_rows, by = character()) { verify_positive_int("num_rows", num_rows, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$tail_by(num_rows, by))) }, + + #' @description + #' Creates a new table containing the column-wise minimum of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. min_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$min_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise maximum of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. max_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$max_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise sum of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. sum_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$sum_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise absolute sum of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. abs_sum_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$abs_sum_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise average of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. avg_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$avg_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise weighted average of each group. + #' @param wcol String denoting the name of the column to use as weights. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. w_avg_by = function(wcol, by = character()) { verify_string("wcol", wcol, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$w_avg_by(wcol, by))) }, + + #' @description + #' Creates a new table containing the column-wise median of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. median_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$median_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise variance of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. var_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$var_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise standard deviation of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. std_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$std_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise percentile of each group. + #' @param percentile Numeric scalar between 0 and 1 denoting the percentile to compute. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. percentile_by = function(percentile, by = character()) { verify_in_unit_interval("percentile", percentile, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$percentile_by(percentile, by))) }, + + #' @description + #' Creates a new table containing the number of rows in each group. + #' @param col String denoting the name of the new column to hold the counts of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. count_by = function(col, by = character()) { verify_string("col", col, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$count_by(col, by))) }, + + #' @export cross_join = function(table, on = character(), joins = character()) { verify_string("on", on, FALSE) verify_string("joins", joins, FALSE) @@ -193,6 +391,15 @@ TableHandle <- R6Class("TableHandle", on, joins ))) }, + + #' @description + #' Creates a new table containing all the rows and columns of this table, plus additional columns containing data + #' from the right table. For columns appended to the left table (joins), row values equal the row values from the + #' right table where the key values in the left and right tables are equal. + #' If there is no matching key in the right table, appended row values are NULL. + #' @param table TableHandle referencing the table to join with. + #' @param on String or list of strings denoting the names of the columns to join on. + #' @param joins String or list of strings denoting the names of the columns to add from `table`. natural_join = function(table, on = character(), joins = character()) { verify_string("on", on, FALSE) verify_string("joins", joins, FALSE) @@ -201,6 +408,14 @@ TableHandle <- R6Class("TableHandle", on, joins ))) }, + + #' @description + #' Creates a new table containing all the rows and columns of this table, plus additional columns containing data + #' from the right table. For columns appended to the left table (joins), row values equal the row values from the + #' right table where the key values in the left and right tables are equal. + #' @param table TableHandle referencing the table to join with. + #' @param on String or list of strings denoting the names of the columns to join on. + #' @param joins String or list of strings denoting the names of the columns to add from `table`. exact_join = function(table, on = character(), joins = character()) { verify_string("on", on, FALSE) verify_string("joins", joins, FALSE) @@ -209,6 +424,15 @@ TableHandle <- R6Class("TableHandle", on, joins ))) }, + + #' @description + #' Creates a new table containing all the rows and columns of this table, sorted by the specified columns. + #' @param order_by String or list of strings denoting the names of the columns to sort by. + #' @param descending Boolean or list of booleans denoting whether to sort in descending order. + #' If a list is supplied, it must be the same length as `order_by`. + #' @param abs_sort Boolean or list of booleans denoting whether to sort by absolute value. + #' If a list is supplied, it must be the same length as `order_by`. + #' @return A TableHandle referencing the new table. sort = function(order_by, descending = FALSE, abs_sort = FALSE) { verify_string("order_by", order_by, FALSE) verify_bool("descending", descending, FALSE) @@ -249,6 +473,11 @@ dim.TableHandle <- function(x) { return(x$dim()) } +#' @description +#' Merges several tables into one table on the server. The tables must have the same schema, and can +#' be supplied as a list of TableHandles, any number of TableHandles, or a mix of both. +#' @param ... Arbitrary number of TableHandles or vectors of TableHandles with a uniform schema. +#' @return A TableHandle referencing the new table. #' @export merge_tables <- function(...) { table_list <- unlist(c(...))