From 643cc9a09db49d3c2b1e1a4add932ec5f8778366 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Mon, 17 Jun 2024 18:51:46 -0500 Subject: [PATCH] Optimizations in parquet page materialization (#5582) --- extensions/parquet/base/build.gradle | 2 + .../parquet/base/ColumnChunkReaderImpl.java | 8 +- .../parquet/base/ColumnPageReaderImpl.java | 4 +- .../parquet/base/PageMaterializer.java | 422 ++++-------------- .../parquet/base/PageMaterializerFactory.java | 2 +- .../parquet/base/ParquetTimeUtils.java} | 15 +- .../base/materializers/BlobMaterializer.java | 64 +++ .../base/materializers/BoolMaterializer.java | 63 +++ .../base/materializers/ByteMaterializer.java | 67 +++ .../base/materializers/CharMaterializer.java | 63 +++ .../materializers/DoubleMaterializer.java | 67 +++ .../base/materializers/FloatMaterializer.java | 63 +++ .../InstantFromInt96Materializer.java | 84 ++++ .../InstantNanosFromMicrosMaterializer.java | 42 ++ .../InstantNanosFromMillisMaterializer.java | 46 ++ .../base/materializers/IntMaterializer.java | 67 +++ .../materializers/LocalDateMaterializer.java | 65 +++ .../LocalDateTimeFromMicrosMaterializer.java | 49 ++ .../LocalDateTimeFromMillisMaterializer.java | 45 ++ .../LocalDateTimeFromNanosMaterializer.java | 49 ++ .../LocalDateTimeMaterializerBase.java | 40 ++ .../LocalTimeFromMicrosMaterializer.java | 44 ++ .../LocalTimeFromMillisMaterializer.java | 48 ++ .../LocalTimeFromNanosMaterializer.java | 48 ++ .../LocalTimeMaterializerBase.java | 36 ++ .../LongFromUnsignedIntMaterializer.java | 41 ++ .../base/materializers/LongMaterializer.java | 41 ++ .../materializers/LongMaterializerBase.java | 35 ++ .../base/materializers/ShortMaterializer.java | 67 +++ .../materializers/StringMaterializer.java | 67 +++ .../parquet/base/TestParquetTimeUtils.java} | 21 +- .../table/location/ParquetColumnLocation.java | 19 +- ...ToBytePageFromInt.java => ToBytePage.java} | 29 +- ...ToCharPageFromInt.java => ToCharPage.java} | 31 +- .../pagestore/topage/ToDatePageFromInt.java | 59 --- .../table/pagestore/topage/ToDoublePage.java | 3 +- .../table/pagestore/topage/ToFloatPage.java | 3 +- .../table/pagestore/topage/ToInstantPage.java | 98 ++-- .../topage/ToInstantPageFromInt96.java | 122 ----- .../table/pagestore/topage/ToIntPage.java | 3 +- .../pagestore/topage/ToLocalDatePage.java | 38 ++ .../pagestore/topage/ToLocalDateTimePage.java | 78 +--- .../pagestore/topage/ToLocalTimePage.java | 42 ++ .../table/pagestore/topage/ToLongPage.java | 3 +- .../topage/ToLongPageFromUnsignedInt.java | 56 --- ...ShortPageFromInt.java => ToShortPage.java} | 29 +- .../table/pagestore/topage/ToStringPage.java | 16 +- .../table/pagestore/topage/ToTimePage.java | 111 ----- .../transfer/LocalDateTimeArrayTransfer.java | 4 +- .../table/transfer/LocalDateTimeTransfer.java | 4 +- .../transfer/LocalDateTimeVectorTransfer.java | 4 +- py/server/tests/test_parquet.py | 52 +++ replication/static/build.gradle | 1 + .../ReplicatePageMaterializers.java | 134 ++++++ .../ReplicateParquetTransferObjects.java | 8 +- .../replicators/ReplicateToPage.java | 24 +- 56 files changed, 1793 insertions(+), 953 deletions(-) rename extensions/parquet/{table/src/main/java/io/deephaven/parquet/table/util/TransferUtils.java => base/src/main/java/io/deephaven/parquet/base/ParquetTimeUtils.java} (84%) create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BlobMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ByteMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/CharMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/FloatMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantFromInt96Materializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMicrosMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMillisMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMicrosMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMillisMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromNanosMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeMaterializerBase.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMicrosMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMillisMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromNanosMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeMaterializerBase.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializerBase.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/StringMaterializer.java rename extensions/parquet/{table/src/test/java/io/deephaven/parquet/table/TestTransferUtils.java => base/src/test/java/io/deephaven/parquet/base/TestParquetTimeUtils.java} (63%) rename extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/{ToBytePageFromInt.java => ToBytePage.java} (51%) rename extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/{ToCharPageFromInt.java => ToCharPage.java} (52%) delete mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDatePageFromInt.java delete mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPageFromInt96.java create mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java create mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalTimePage.java delete mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPageFromUnsignedInt.java rename extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/{ToShortPageFromInt.java => ToShortPage.java} (51%) delete mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToTimePage.java create mode 100644 replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java diff --git a/extensions/parquet/base/build.gradle b/extensions/parquet/base/build.gradle index f50b4bb61b3..db854924faf 100644 --- a/extensions/parquet/base/build.gradle +++ b/extensions/parquet/base/build.gradle @@ -13,6 +13,8 @@ dependencies { implementation project(':extensions-parquet-compression') implementation project(':Base') implementation project(':Util') + implementation project(':engine-time') + implementation project(':Configuration') implementation depCommonsIo compileOnly depAnnotations diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java index d716f97e16b..504b31c17c5 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java @@ -48,7 +48,7 @@ final class ColumnChunkReaderImpl implements ColumnChunkReader { private final OffsetIndexReader offsetIndexReader; private final List fieldTypes; private final Function dictionarySupplier; - private final PageMaterializerFactory nullMaterializerFactory; + private final PageMaterializerFactory pageMaterializerFactory; private final URI columnChunkURI; /** * Number of rows in the row group of this column chunk. @@ -81,7 +81,7 @@ final class ColumnChunkReaderImpl implements ColumnChunkReader { } this.fieldTypes = fieldTypes; this.dictionarySupplier = new SoftCachingFunction<>(this::getDictionary); - this.nullMaterializerFactory = PageMaterializer.factoryForType(path.getPrimitiveType().getPrimitiveTypeName()); + this.pageMaterializerFactory = PageMaterializer.factoryForType(path.getPrimitiveType()); this.numRows = numRows; this.version = version; if (columnChunk.isSetFile_path() && FILE_URI_SCHEME.equals(rootURI.getScheme())) { @@ -289,7 +289,7 @@ public ColumnPageReader next(@NotNull final SeekableChannelContext channelContex final Function pageDictionarySupplier = getPageDictionarySupplier(pageHeader); return new ColumnPageReaderImpl(columnName, channelsProvider, decompressor, pageDictionarySupplier, - nullMaterializerFactory, path, getURI(), fieldTypes, dataOffset, pageHeader, numValuesInPage); + pageMaterializerFactory, path, getURI(), fieldTypes, dataOffset, pageHeader, numValuesInPage); } catch (IOException e) { throw new UncheckedDeephavenException("Error reading page header at offset " + headerOffset + " for " + "column: " + columnName + ", uri: " + getURI(), e); @@ -364,7 +364,7 @@ public ColumnPageReader getPageReader(final int pageNum, final SeekableChannelCo final Function pageDictionarySupplier = getPageDictionarySupplier(pageHeader); return new ColumnPageReaderImpl(columnName, channelsProvider, decompressor, pageDictionarySupplier, - nullMaterializerFactory, path, getURI(), fieldTypes, dataOffset, pageHeader, + pageMaterializerFactory, path, getURI(), fieldTypes, dataOffset, pageHeader, getNumValues(pageHeader)); } catch (final IOException e) { throw new UncheckedDeephavenException("Error reading page header for page number " + pageNum + diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnPageReaderImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnPageReaderImpl.java index dd2cb0abc20..14c60369f01 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnPageReaderImpl.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnPageReaderImpl.java @@ -6,6 +6,7 @@ import io.deephaven.UncheckedDeephavenException; import io.deephaven.base.Pair; import io.deephaven.base.verify.Require; +import io.deephaven.parquet.base.materializers.IntMaterializer; import io.deephaven.parquet.compress.CompressorAdapter; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.util.channel.SeekableChannelsProvider; @@ -344,8 +345,7 @@ private IntBuffer readKeysFromPageCommon( final RunLengthBitPackingHybridBufferDecoder rlDecoder, final RunLengthBitPackingHybridBufferDecoder dlDecoder, final ValuesReader dataReader) throws IOException { - final Object result = materialize(PageMaterializer.IntFactory, dlDecoder, rlDecoder, dataReader, - nullPlaceholder); + final Object result = materialize(IntMaterializer.Factory, dlDecoder, rlDecoder, dataReader, nullPlaceholder); if (result instanceof DataWithOffsets) { keyDest.put((int[]) ((DataWithOffsets) result).materializeResult); return ((DataWithOffsets) result).offsets; diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializer.java index d3c69138f47..c4adfe39069 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializer.java @@ -3,103 +3,106 @@ // package io.deephaven.parquet.base; -import org.apache.parquet.column.values.ValuesReader; -import org.apache.parquet.io.api.Binary; +import io.deephaven.parquet.base.materializers.*; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; +import org.jetbrains.annotations.NotNull; -import java.util.Arrays; +public interface PageMaterializer { -interface PageMaterializer { - - PageMaterializerFactory IntFactory = new PageMaterializerFactory() { - @Override - public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new IntMaterializer(dataReader, (int) nullValue, numValues); - } - - @Override - public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new IntMaterializer(dataReader, numValues); - } - }; - - PageMaterializerFactory LongFactory = new PageMaterializerFactory() { - @Override - public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new LongMaterializer(dataReader, (long) nullValue, numValues); - } - - @Override - public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new LongMaterializer(dataReader, numValues); - } - }; - - PageMaterializerFactory FloatFactory = new PageMaterializerFactory() { - @Override - public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new FloatMaterializer(dataReader, (float) nullValue, numValues); - } - - @Override - public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new FloatMaterializer(dataReader, numValues); - } - }; - - PageMaterializerFactory DoubleFactory = new PageMaterializerFactory() { - @Override - public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new DoubleMaterializer(dataReader, (double) nullValue, numValues); - } - - @Override - public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new DoubleMaterializer(dataReader, numValues); - } - }; - - PageMaterializerFactory BoolFactory = new PageMaterializerFactory() { - @Override - public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new BoolMaterializer(dataReader, (byte) nullValue, numValues); - } - - @Override - public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new BoolMaterializer(dataReader, numValues); - } - }; - - PageMaterializerFactory BlobFactory = new PageMaterializerFactory() { - @Override - public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new BlobMaterializer(dataReader, (Binary) nullValue, numValues); - } - - @Override - public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new BlobMaterializer(dataReader, numValues); - } - }; - - static PageMaterializerFactory factoryForType(PrimitiveType.PrimitiveTypeName primitiveTypeName) { + static PageMaterializerFactory factoryForType(@NotNull final PrimitiveType primitiveType) { + final PrimitiveType.PrimitiveTypeName primitiveTypeName = primitiveType.getPrimitiveTypeName(); + final LogicalTypeAnnotation logicalTypeAnnotation = primitiveType.getLogicalTypeAnnotation(); switch (primitiveTypeName) { case INT32: - return IntFactory; + if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation) { + final LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType = + (LogicalTypeAnnotation.IntLogicalTypeAnnotation) logicalTypeAnnotation; + if (intLogicalType.isSigned()) { + switch (intLogicalType.getBitWidth()) { + case 8: + return ByteMaterializer.Factory; + case 16: + return ShortMaterializer.Factory; + case 32: + return IntMaterializer.Factory; + } + } else { + switch (intLogicalType.getBitWidth()) { + case 8: + case 16: + return CharMaterializer.Factory; + case 32: + return LongFromUnsignedIntMaterializer.Factory; + } + } + } else if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation) { + return LocalDateMaterializer.Factory; + } else if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimeLogicalTypeAnnotation) { + final LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType = + (LogicalTypeAnnotation.TimeLogicalTypeAnnotation) logicalTypeAnnotation; + if (timeLogicalType.getUnit() != LogicalTypeAnnotation.TimeUnit.MILLIS) { + throw new IllegalArgumentException( + "Expected unit type to be MILLIS, found " + timeLogicalType.getUnit()); + } + // isAdjustedToUTC parameter is ignored while reading LocalTime from Parquet files + return LocalTimeFromMillisMaterializer.Factory; + } + return IntMaterializer.Factory; case INT64: - return LongFactory; + if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) { + final LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType = + (LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) logicalTypeAnnotation; + if (timestampLogicalType.isAdjustedToUTC()) { + // The column will store nanoseconds elapsed since epoch as long values + switch (timestampLogicalType.getUnit()) { + case MILLIS: + return InstantNanosFromMillisMaterializer.Factory; + case MICROS: + return InstantNanosFromMicrosMaterializer.Factory; + case NANOS: + return LongMaterializer.Factory; + } + } else { + // The column will be stored as LocalDateTime values + // Ref:https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#local-semantics-timestamps-not-normalized-to-utc + switch (timestampLogicalType.getUnit()) { + case MILLIS: + return LocalDateTimeFromMillisMaterializer.Factory; + case MICROS: + return LocalDateTimeFromMicrosMaterializer.Factory; + case NANOS: + return LocalDateTimeFromNanosMaterializer.Factory; + } + } + } else if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimeLogicalTypeAnnotation) { + final LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType = + (LogicalTypeAnnotation.TimeLogicalTypeAnnotation) logicalTypeAnnotation; + // isAdjustedToUTC parameter is ignored while reading LocalTime from Parquet files + switch (timeLogicalType.getUnit()) { + case MICROS: + return LocalTimeFromMicrosMaterializer.Factory; + case NANOS: + return LocalTimeFromNanosMaterializer.Factory; + default: + throw new IllegalArgumentException("Unsupported unit=" + timeLogicalType.getUnit()); + } + } + return LongMaterializer.Factory; + case INT96: + return InstantFromInt96Materializer.Factory; case FLOAT: - return FloatFactory; + return FloatMaterializer.Factory; case DOUBLE: - return DoubleFactory; + return DoubleMaterializer.Factory; case BOOLEAN: - return BoolFactory; + return BoolMaterializer.Factory; case BINARY: - case FIXED_LEN_BYTE_ARRAY: - case INT96: { - return BlobFactory; - } + if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) { + return StringMaterializer.Factory; + } + case FIXED_LEN_BYTE_ARRAY: // fall through + return BlobMaterializer.Factory; default: throw new RuntimeException("Unexpected type name:" + primitiveTypeName); } @@ -112,249 +115,4 @@ static PageMaterializerFactory factoryForType(PrimitiveType.PrimitiveTypeName pr Object fillAll(); Object data(); - - class IntMaterializer implements PageMaterializer { - final ValuesReader dataReader; - - final int nullValue; - final int[] data; - - IntMaterializer(ValuesReader dataReader, int numValues) { - this(dataReader, 0, numValues); - } - - IntMaterializer(ValuesReader dataReader, int nullValue, int numValues) { - this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new int[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); - } - - @Override - public void fillValues(int startIndex, int endIndex) { - for (int ii = startIndex; ii < endIndex; ii++) { - data[ii] = dataReader.readInteger(); - } - } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } - } - - class LongMaterializer implements PageMaterializer { - - final ValuesReader dataReader; - - final long nullValue; - final long[] data; - - LongMaterializer(ValuesReader dataReader, int numValues) { - this(dataReader, 0, numValues); - } - - LongMaterializer(ValuesReader dataReader, long nullValue, int numValues) { - this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new long[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); - } - - @Override - public void fillValues(int startIndex, int endIndex) { - for (int ii = startIndex; ii < endIndex; ii++) { - data[ii] = dataReader.readLong(); - } - } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } - } - - class FloatMaterializer implements PageMaterializer { - - final ValuesReader dataReader; - - final float nullValue; - final float[] data; - - FloatMaterializer(ValuesReader dataReader, int numValues) { - this(dataReader, 0.0f, numValues); - } - - FloatMaterializer(ValuesReader dataReader, float nullValue, int numValues) { - this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new float[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); - } - - @Override - public void fillValues(int startIndex, int endIndex) { - for (int ii = startIndex; ii < endIndex; ii++) { - data[ii] = dataReader.readFloat(); - } - } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } - } - - class DoubleMaterializer implements PageMaterializer { - - final ValuesReader dataReader; - - final double nullValue; - final double[] data; - - DoubleMaterializer(ValuesReader dataReader, int numValues) { - this(dataReader, 0.0, numValues); - } - - DoubleMaterializer(ValuesReader dataReader, double nullValue, int numValues) { - this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new double[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); - } - - @Override - public void fillValues(int startIndex, int endIndex) { - for (int ii = startIndex; ii < endIndex; ii++) { - data[ii] = dataReader.readDouble(); - } - } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } - } - - class BoolMaterializer implements PageMaterializer { - - final ValuesReader dataReader; - - final byte nullValue; - final byte[] data; - - BoolMaterializer(ValuesReader dataReader, int numValues) { - this(dataReader, (byte) 0, numValues); - } - - BoolMaterializer(ValuesReader dataReader, byte nullValue, int numValues) { - this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new byte[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); - } - - @Override - public void fillValues(int startIndex, int endIndex) { - for (int ii = startIndex; ii < endIndex; ii++) { - data[ii] = (byte) (dataReader.readBoolean() ? 1 : 0); - } - } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } - } - - class BlobMaterializer implements PageMaterializer { - - final ValuesReader dataReader; - - final Binary nullValue; - final Binary[] data; - - BlobMaterializer(ValuesReader dataReader, int numValues) { - this(dataReader, null, numValues); - } - - BlobMaterializer(ValuesReader dataReader, Binary nullValue, int numValues) { - this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new Binary[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); - } - - @Override - public void fillValues(int startIndex, int endIndex) { - for (int ii = startIndex; ii < endIndex; ii++) { - data[ii] = dataReader.readBytes(); - } - } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } - } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializerFactory.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializerFactory.java index 04bbe4ed49a..a276ca24ff0 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializerFactory.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializerFactory.java @@ -5,7 +5,7 @@ import org.apache.parquet.column.values.ValuesReader; -interface PageMaterializerFactory { +public interface PageMaterializerFactory { PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues); PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/util/TransferUtils.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetTimeUtils.java similarity index 84% rename from extensions/parquet/table/src/main/java/io/deephaven/parquet/table/util/TransferUtils.java rename to extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetTimeUtils.java index 7bcb0af3525..d0483c777e6 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/util/TransferUtils.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetTimeUtils.java @@ -1,7 +1,7 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -package io.deephaven.parquet.table.util; +package io.deephaven.parquet.base; import io.deephaven.time.DateTimeUtils; import io.deephaven.util.QueryConstants; @@ -11,9 +11,9 @@ import java.time.ZoneOffset; /** - * Internal library with utility methods for converting data between Deephaven and Parquet. + * Internal library with utility methods for converting time data between Deephaven and Parquet. */ -public class TransferUtils { +public class ParquetTimeUtils { /** * Returns nanoseconds from the Epoch for a {@link LocalDateTime} value in UTC timezone. * @@ -35,7 +35,8 @@ public static long epochNanosUTC(@Nullable final LocalDateTime localDateTime) { * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input nanoseconds from the * Epoch converted to a {@link LocalDateTime} in UTC timezone */ - public static @Nullable LocalDateTime epochNanosToLocalDateTimeUTC(final long nanos) { + @Nullable + public static LocalDateTime epochNanosToLocalDateTimeUTC(final long nanos) { return nanos == QueryConstants.NULL_LONG ? null : LocalDateTime.ofEpochSecond(nanos / 1_000_000_000L, (int) (nanos % 1_000_000_000L), ZoneOffset.UTC); } @@ -47,7 +48,8 @@ public static long epochNanosUTC(@Nullable final LocalDateTime localDateTime) { * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input microseconds from the * Epoch converted to a {@link LocalDateTime} in UTC timezone */ - public static @Nullable LocalDateTime epochMicrosToLocalDateTimeUTC(final long micros) { + @Nullable + public static LocalDateTime epochMicrosToLocalDateTimeUTC(final long micros) { return micros == QueryConstants.NULL_LONG ? null : LocalDateTime.ofEpochSecond(micros / 1_000_000L, (int) ((micros % 1_000_000L) * DateTimeUtils.MICRO), ZoneOffset.UTC); @@ -60,7 +62,8 @@ public static long epochNanosUTC(@Nullable final LocalDateTime localDateTime) { * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input milliseconds from the * Epoch converted to a {@link LocalDateTime} in UTC timezone */ - public static @Nullable LocalDateTime epochMillisToLocalDateTimeUTC(final long millis) { + @Nullable + public static LocalDateTime epochMillisToLocalDateTimeUTC(final long millis) { return millis == QueryConstants.NULL_LONG ? null : LocalDateTime.ofEpochSecond(millis / 1_000L, (int) ((millis % 1_000L) * DateTimeUtils.MILLI), ZoneOffset.UTC); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BlobMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BlobMaterializer.java new file mode 100644 index 00000000000..127ff034033 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BlobMaterializer.java @@ -0,0 +1,64 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; +import org.apache.parquet.io.api.Binary; + +import java.util.Arrays; + +public class BlobMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new BlobMaterializer(dataReader, (Binary) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new BlobMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final Binary nullValue; + final Binary[] data; + + private BlobMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private BlobMaterializer(ValuesReader dataReader, Binary nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new Binary[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readBytes(); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java new file mode 100644 index 00000000000..8852c0d4314 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java @@ -0,0 +1,63 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +import java.util.Arrays; + +public class BoolMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new BoolMaterializer(dataReader, (byte) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new BoolMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final byte nullValue; + final byte[] data; + + private BoolMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, (byte) 0, numValues); + } + + private BoolMaterializer(ValuesReader dataReader, byte nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new byte[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = (byte) (dataReader.readBoolean() ? 1 : 0); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ByteMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ByteMaterializer.java new file mode 100644 index 00000000000..181b9bf9848 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ByteMaterializer.java @@ -0,0 +1,67 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit CharMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +import java.util.Arrays; + +public class ByteMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new ByteMaterializer(dataReader, (byte) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new ByteMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final byte nullValue; + final byte[] data; + + private ByteMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, (byte) 0, numValues); + } + + private ByteMaterializer(ValuesReader dataReader, byte nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new byte[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = (byte) dataReader.readInteger(); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/CharMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/CharMaterializer.java new file mode 100644 index 00000000000..43b80ccbcd2 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/CharMaterializer.java @@ -0,0 +1,63 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +import java.util.Arrays; + +public class CharMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new CharMaterializer(dataReader, (char) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new CharMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final char nullValue; + final char[] data; + + private CharMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, (char) 0, numValues); + } + + private CharMaterializer(ValuesReader dataReader, char nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new char[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = (char) dataReader.readInteger(); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java new file mode 100644 index 00000000000..8278e0f8695 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java @@ -0,0 +1,67 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +import java.util.Arrays; + +public class DoubleMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new DoubleMaterializer(dataReader, (double) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new DoubleMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final double nullValue; + final double[] data; + + private DoubleMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private DoubleMaterializer(ValuesReader dataReader, double nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new double[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readDouble(); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/FloatMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/FloatMaterializer.java new file mode 100644 index 00000000000..5b17ffbc316 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/FloatMaterializer.java @@ -0,0 +1,63 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +import java.util.Arrays; + +public class FloatMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new FloatMaterializer(dataReader, (float) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new FloatMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final float nullValue; + final float[] data; + + private FloatMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private FloatMaterializer(ValuesReader dataReader, float nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new float[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readFloat(); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantFromInt96Materializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantFromInt96Materializer.java new file mode 100644 index 00000000000..912910f232a --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantFromInt96Materializer.java @@ -0,0 +1,84 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.configuration.Configuration; +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.time.DateTimeUtils; +import org.apache.parquet.column.values.ValuesReader; +import org.jetbrains.annotations.NotNull; + +import java.nio.ByteBuffer; +import java.time.Instant; +import java.time.ZoneId; + +/** + * {@link PageMaterializer} implementation for {@link Instant Instants} stored as Int96s representing an Impala format + * Timestamp (nanoseconds of day and Julian date encoded as 8 bytes and 4 bytes, respectively) + */ +public class InstantFromInt96Materializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new InstantFromInt96Materializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new InstantFromInt96Materializer(dataReader, numValues); + } + }; + + /* + * Potential references/points of comparison for this algorithm: https://github.com/apache/iceberg/pull/1184/files + * https://github.com/apache/arrow/blob/master/cpp/src/parquet/types.h (last retrieved as + * https://github.com/apache/arrow/blob/d5a2aa2ffb1c2fc4f3ca48c829fcdba80ec67916/cpp/src/parquet/types.h) + */ + private static final long NANOS_PER_DAY = 86400L * 1000 * 1000 * 1000; + private static final int JULIAN_OFFSET_TO_UNIX_EPOCH_DAYS = 2_440_588; + private static long offset; + static { + final String referenceTimeZone = + Configuration.getInstance().getStringWithDefault("deephaven.parquet.referenceTimeZone", "UTC"); + setReferenceTimeZone(referenceTimeZone); + } + + final ValuesReader dataReader; + + private InstantFromInt96Materializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private InstantFromInt96Materializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + /** + * Allows overriding the time zone to be used when interpreting Int96 timestamp values. Default is UTC. Can be set + * globally with the parameter deephaven.parquet.referenceTimeZone. Valid values are time zone strings that would be + * used in {@link DateTimeUtils#parseInstant(String) parseInstant}, such as NY. + */ + private static void setReferenceTimeZone(@NotNull final String timeZone) { + offset = DateTimeUtils.nanosOfDay(DateTimeUtils.parseInstant("1970-01-01T00:00:00 " + timeZone), + ZoneId.of("UTC"), false); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = readInstantNanos(); + } + } + + long readInstantNanos() { + final ByteBuffer resultBuffer = ByteBuffer.wrap(dataReader.readBytes().getBytesUnsafe()); + resultBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN); + final long nanos = resultBuffer.getLong(); + final int julianDate = resultBuffer.getInt(); + return (julianDate - JULIAN_OFFSET_TO_UNIX_EPOCH_DAYS) * (NANOS_PER_DAY) + nanos + offset; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMicrosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMicrosMaterializer.java new file mode 100644 index 00000000000..5c82c8f4a45 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMicrosMaterializer.java @@ -0,0 +1,42 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.time.DateTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +public class InstantNanosFromMicrosMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new InstantNanosFromMicrosMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new InstantNanosFromMicrosMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private InstantNanosFromMicrosMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private InstantNanosFromMicrosMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = DateTimeUtils.microsToNanos(dataReader.readLong()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMillisMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMillisMaterializer.java new file mode 100644 index 00000000000..4f94a5dd9f8 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMillisMaterializer.java @@ -0,0 +1,46 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit InstantNanosFromMicrosMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.time.DateTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +public class InstantNanosFromMillisMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new InstantNanosFromMillisMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new InstantNanosFromMillisMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private InstantNanosFromMillisMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private InstantNanosFromMillisMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = DateTimeUtils.millisToNanos(dataReader.readLong()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java new file mode 100644 index 00000000000..f0a048ca53e --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java @@ -0,0 +1,67 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +import java.util.Arrays; + +public class IntMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new IntMaterializer(dataReader, (int) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new IntMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final int nullValue; + final int[] data; + + private IntMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private IntMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new int[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readInteger(); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateMaterializer.java new file mode 100644 index 00000000000..18adfd60971 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateMaterializer.java @@ -0,0 +1,65 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.time.DateTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +import java.time.LocalDate; +import java.util.Arrays; + +public class LocalDateMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LocalDateMaterializer(dataReader, (LocalDate) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LocalDateMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final LocalDate nullValue; + final LocalDate[] data; + + private LocalDateMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private LocalDateMaterializer(ValuesReader dataReader, LocalDate nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new LocalDate[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = DateTimeUtils.epochDaysAsIntToLocalDate(dataReader.readInteger()); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMicrosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMicrosMaterializer.java new file mode 100644 index 00000000000..212c2df3a81 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMicrosMaterializer.java @@ -0,0 +1,49 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LocalDateTimeFromMillisMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.ParquetTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +import java.time.LocalDateTime; + +public class LocalDateTimeFromMicrosMaterializer extends LocalDateTimeMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LocalDateTimeFromMicrosMaterializer(dataReader, (LocalDateTime) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LocalDateTimeFromMicrosMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private LocalDateTimeFromMicrosMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private LocalDateTimeFromMicrosMaterializer(ValuesReader dataReader, LocalDateTime nullValue, + int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = ParquetTimeUtils.epochMicrosToLocalDateTimeUTC(dataReader.readLong()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMillisMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMillisMaterializer.java new file mode 100644 index 00000000000..0bf6ad7fb89 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMillisMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.ParquetTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +import java.time.LocalDateTime; + +public class LocalDateTimeFromMillisMaterializer extends LocalDateTimeMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LocalDateTimeFromMillisMaterializer(dataReader, (LocalDateTime) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LocalDateTimeFromMillisMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private LocalDateTimeFromMillisMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private LocalDateTimeFromMillisMaterializer(ValuesReader dataReader, LocalDateTime nullValue, + int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = ParquetTimeUtils.epochMillisToLocalDateTimeUTC(dataReader.readLong()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromNanosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromNanosMaterializer.java new file mode 100644 index 00000000000..664887b2aa4 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromNanosMaterializer.java @@ -0,0 +1,49 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LocalDateTimeFromMillisMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.ParquetTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +import java.time.LocalDateTime; + +public class LocalDateTimeFromNanosMaterializer extends LocalDateTimeMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LocalDateTimeFromNanosMaterializer(dataReader, (LocalDateTime) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LocalDateTimeFromNanosMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private LocalDateTimeFromNanosMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private LocalDateTimeFromNanosMaterializer(ValuesReader dataReader, LocalDateTime nullValue, + int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = ParquetTimeUtils.epochNanosToLocalDateTimeUTC(dataReader.readLong()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeMaterializerBase.java new file mode 100644 index 00000000000..60118a41f68 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeMaterializerBase.java @@ -0,0 +1,40 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LocalTimeMaterializerBase and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.time.LocalDateTime; +import java.util.Arrays; + +abstract class LocalDateTimeMaterializerBase implements PageMaterializer { + + final LocalDateTime nullValue; + final LocalDateTime[] data; + + LocalDateTimeMaterializerBase(LocalDateTime nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new LocalDateTime[numValues]; + } + + @Override + public final void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public final Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public final Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMicrosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMicrosMaterializer.java new file mode 100644 index 00000000000..360fe334723 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMicrosMaterializer.java @@ -0,0 +1,44 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.time.DateTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +import java.time.LocalTime; + +public class LocalTimeFromMicrosMaterializer extends LocalTimeMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LocalTimeFromMicrosMaterializer(dataReader, (LocalTime) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LocalTimeFromMicrosMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private LocalTimeFromMicrosMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private LocalTimeFromMicrosMaterializer(ValuesReader dataReader, LocalTime nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = DateTimeUtils.microsOfDayToLocalTime(dataReader.readLong()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMillisMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMillisMaterializer.java new file mode 100644 index 00000000000..da3d799aac6 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMillisMaterializer.java @@ -0,0 +1,48 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LocalTimeFromMicrosMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.time.DateTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +import java.time.LocalTime; + +public class LocalTimeFromMillisMaterializer extends LocalTimeMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LocalTimeFromMillisMaterializer(dataReader, (LocalTime) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LocalTimeFromMillisMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private LocalTimeFromMillisMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private LocalTimeFromMillisMaterializer(ValuesReader dataReader, LocalTime nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = DateTimeUtils.millisOfDayToLocalTime(dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromNanosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromNanosMaterializer.java new file mode 100644 index 00000000000..81b0c8996b2 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromNanosMaterializer.java @@ -0,0 +1,48 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LocalTimeFromMicrosMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.time.DateTimeUtils; +import org.apache.parquet.column.values.ValuesReader; + +import java.time.LocalTime; + +public class LocalTimeFromNanosMaterializer extends LocalTimeMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LocalTimeFromNanosMaterializer(dataReader, (LocalTime) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LocalTimeFromNanosMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private LocalTimeFromNanosMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private LocalTimeFromNanosMaterializer(ValuesReader dataReader, LocalTime nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = DateTimeUtils.nanosOfDayToLocalTime(dataReader.readLong()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeMaterializerBase.java new file mode 100644 index 00000000000..320fed412b6 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeMaterializerBase.java @@ -0,0 +1,36 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.time.LocalTime; +import java.util.Arrays; + +abstract class LocalTimeMaterializerBase implements PageMaterializer { + + final LocalTime nullValue; + final LocalTime[] data; + + LocalTimeMaterializerBase(LocalTime nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new LocalTime[numValues]; + } + + @Override + public final void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public final Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public final Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java new file mode 100644 index 00000000000..6db570f83f6 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java @@ -0,0 +1,41 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromUnsignedIntMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromUnsignedIntMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromUnsignedIntMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private LongFromUnsignedIntMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromUnsignedIntMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Integer.toUnsignedLong(dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializer.java new file mode 100644 index 00000000000..a8eb22eb0e4 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializer.java @@ -0,0 +1,41 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + private LongMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readLong(); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializerBase.java new file mode 100644 index 00000000000..7edfa3b8423 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializerBase.java @@ -0,0 +1,35 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.util.Arrays; + +abstract class LongMaterializerBase implements PageMaterializer { + + final long nullValue; + final long[] data; + + LongMaterializerBase(long nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new long[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java new file mode 100644 index 00000000000..bbfc0613e77 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java @@ -0,0 +1,67 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit CharMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +import java.util.Arrays; + +public class ShortMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new ShortMaterializer(dataReader, (short) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new ShortMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final short nullValue; + final short[] data; + + private ShortMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, (short) 0, numValues); + } + + private ShortMaterializer(ValuesReader dataReader, short nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new short[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = (short) dataReader.readInteger(); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/StringMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/StringMaterializer.java new file mode 100644 index 00000000000..759f8ab10d3 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/StringMaterializer.java @@ -0,0 +1,67 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +import java.util.Arrays; + +public class StringMaterializer implements PageMaterializer { + + public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new StringMaterializer(dataReader, (String) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new StringMaterializer(dataReader, numValues); + } + }; + + final ValuesReader dataReader; + + final String nullValue; + final String[] data; + + private StringMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, null, numValues); + } + + private StringMaterializer(ValuesReader dataReader, String nullValue, int numValues) { + this.dataReader = dataReader; + this.nullValue = nullValue; + this.data = new String[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readBytes().toStringUsingUTF8(); + } + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestTransferUtils.java b/extensions/parquet/base/src/test/java/io/deephaven/parquet/base/TestParquetTimeUtils.java similarity index 63% rename from extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestTransferUtils.java rename to extensions/parquet/base/src/test/java/io/deephaven/parquet/base/TestParquetTimeUtils.java index b48360c41c2..f32f9975cfe 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestTransferUtils.java +++ b/extensions/parquet/base/src/test/java/io/deephaven/parquet/base/TestParquetTimeUtils.java @@ -1,9 +1,8 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -package io.deephaven.parquet.table; +package io.deephaven.parquet.base; -import io.deephaven.parquet.table.util.TransferUtils; import io.deephaven.time.DateTimeUtils; import io.deephaven.util.QueryConstants; import junit.framework.TestCase; @@ -13,15 +12,15 @@ import java.time.LocalDateTime; import java.time.ZoneId; -final public class TestTransferUtils { +public class TestParquetTimeUtils { @Test public void testEpochNanosUTC() { final long nanos = 123456789123456789L; final Instant dt2 = Instant.ofEpochSecond(0, nanos); final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); - TestCase.assertEquals(nanos, TransferUtils.epochNanosUTC(ldt)); - TestCase.assertEquals(QueryConstants.NULL_LONG, TransferUtils.epochNanosUTC(null)); + TestCase.assertEquals(nanos, ParquetTimeUtils.epochNanosUTC(ldt)); + TestCase.assertEquals(QueryConstants.NULL_LONG, ParquetTimeUtils.epochNanosUTC(null)); } @Test @@ -29,8 +28,8 @@ public void testEpochNanosTo() { final long nanos = 123456789123456789L; final Instant dt2 = Instant.ofEpochSecond(0, nanos); final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); - TestCase.assertEquals(ldt, TransferUtils.epochNanosToLocalDateTimeUTC(nanos)); - TestCase.assertNull(TransferUtils.epochNanosToLocalDateTimeUTC(QueryConstants.NULL_LONG)); + TestCase.assertEquals(ldt, ParquetTimeUtils.epochNanosToLocalDateTimeUTC(nanos)); + TestCase.assertNull(ParquetTimeUtils.epochNanosToLocalDateTimeUTC(QueryConstants.NULL_LONG)); } @Test @@ -40,8 +39,8 @@ public void testEpochMicrosTo() { nanos = DateTimeUtils.microsToNanos(micros); final Instant dt2 = Instant.ofEpochSecond(0, nanos); final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); - TestCase.assertEquals(ldt, TransferUtils.epochMicrosToLocalDateTimeUTC(micros)); - TestCase.assertNull(TransferUtils.epochMicrosToLocalDateTimeUTC(QueryConstants.NULL_LONG)); + TestCase.assertEquals(ldt, ParquetTimeUtils.epochMicrosToLocalDateTimeUTC(micros)); + TestCase.assertNull(ParquetTimeUtils.epochMicrosToLocalDateTimeUTC(QueryConstants.NULL_LONG)); } @Test @@ -51,7 +50,7 @@ public void testEpochMillisTo() { nanos = DateTimeUtils.millisToNanos(millis); final Instant dt2 = Instant.ofEpochSecond(0, nanos); final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); - TestCase.assertEquals(ldt, TransferUtils.epochMillisToLocalDateTimeUTC(millis)); - TestCase.assertNull(TransferUtils.epochMillisToLocalDateTimeUTC(QueryConstants.NULL_LONG)); + TestCase.assertEquals(ldt, ParquetTimeUtils.epochMillisToLocalDateTimeUTC(millis)); + TestCase.assertNull(ParquetTimeUtils.epochMillisToLocalDateTimeUTC(QueryConstants.NULL_LONG)); } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java index 78045b4722f..bad7cad841e 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java @@ -376,7 +376,7 @@ private static ToPage makeToPage( toPage = ToLongPage.create(pageType); break; case INT96: - toPage = ToInstantPageFromInt96.create(pageType); + toPage = ToInstantPage.create(pageType); break; case DOUBLE: toPage = ToDoublePage.create(pageType); @@ -461,9 +461,9 @@ private static class LogicalTypeVisitor public Optional> visit( final LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { if (timestampLogicalType.isAdjustedToUTC()) { - return Optional.of(ToInstantPage.create(componentType, timestampLogicalType.getUnit())); + return Optional.of(ToInstantPage.create(componentType)); } - return Optional.of(ToLocalDateTimePage.create(componentType, timestampLogicalType.getUnit())); + return Optional.of(ToLocalDateTimePage.create(componentType)); } @Override @@ -471,9 +471,9 @@ private static class LogicalTypeVisitor if (intLogicalType.isSigned()) { switch (intLogicalType.getBitWidth()) { case 8: - return Optional.of(ToBytePageFromInt.create(componentType)); + return Optional.of(ToBytePage.create(componentType)); case 16: - return Optional.of(ToShortPageFromInt.create(componentType)); + return Optional.of(ToShortPage.create(componentType)); case 32: return Optional.of(ToIntPage.create(componentType)); case 64: @@ -483,9 +483,9 @@ private static class LogicalTypeVisitor switch (intLogicalType.getBitWidth()) { case 8: case 16: - return Optional.of(ToCharPageFromInt.create(componentType)); + return Optional.of(ToCharPage.create(componentType)); case 32: - return Optional.of(ToLongPageFromUnsignedInt.create(componentType)); + return Optional.of(ToLongPage.create(componentType)); } } return Optional.empty(); @@ -493,13 +493,12 @@ private static class LogicalTypeVisitor @Override public Optional> visit(final LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) { - return Optional.of(ToDatePageFromInt.create(componentType)); + return Optional.of(ToLocalDatePage.create(componentType)); } @Override public Optional> visit(final LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { - return Optional - .of(ToTimePage.create(componentType, timeLogicalType.getUnit(), timeLogicalType.isAdjustedToUTC())); + return Optional.of(ToLocalTimePage.create(componentType)); } @Override diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePageFromInt.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java similarity index 51% rename from extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePageFromInt.java rename to extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java index a8ce4677508..ff24ec4aeda 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePageFromInt.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToCharPageFromInt and run "./gradlew replicateToPage" to regenerate +// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate // // @formatter:off package io.deephaven.parquet.table.pagestore.topage; @@ -11,15 +11,13 @@ import io.deephaven.chunk.attributes.Any; import org.jetbrains.annotations.NotNull; -import static io.deephaven.util.QueryConstants.NULL_BYTE; +import static io.deephaven.util.QueryConstants.NULL_BYTE_BOXED; -public class ToBytePageFromInt implements ToPage { +public class ToBytePage implements ToPage { - private static final ToBytePageFromInt INSTANCE = new ToBytePageFromInt<>(); + private static final ToBytePage INSTANCE = new ToBytePage<>(); - private static final Integer NULL_BYTE_AS_INT = (int) NULL_BYTE; - - public static ToBytePageFromInt create(Class nativeType) { + public static ToBytePage create(Class nativeType) { if (nativeType == null || byte.class.equals(nativeType)) { // noinspection unchecked return INSTANCE; @@ -28,7 +26,7 @@ public static ToBytePageFromInt create(Class nativeT throw new IllegalArgumentException("The native type for a Byte column is " + nativeType.getCanonicalName()); } - private ToBytePageFromInt() {} + private ToBytePage() {} @Override @NotNull @@ -45,19 +43,6 @@ public final ChunkType getChunkType() { @Override @NotNull public final Object nullValue() { - return NULL_BYTE_AS_INT; - } - - @Override - @NotNull - public final byte[] convertResult(Object result) { - int[] from = (int[]) result; - byte[] to = new byte[from.length]; - - for (int i = 0; i < from.length; ++i) { - to[i] = (byte) from[i]; - } - - return to; + return NULL_BYTE_BOXED; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPageFromInt.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java similarity index 52% rename from extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPageFromInt.java rename to extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java index 42d820ac711..45f13abec94 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPageFromInt.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java @@ -1,21 +1,23 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate +// +// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import org.jetbrains.annotations.NotNull; -import static io.deephaven.util.QueryConstants.NULL_CHAR; - -public class ToCharPageFromInt implements ToPage { +import static io.deephaven.util.QueryConstants.NULL_CHAR_BOXED; - private static final ToCharPageFromInt INSTANCE = new ToCharPageFromInt<>(); +public class ToCharPage implements ToPage { - private static final Integer NULL_CHAR_AS_INT = (int) NULL_CHAR; + private static final ToCharPage INSTANCE = new ToCharPage<>(); - public static ToCharPageFromInt create(Class nativeType) { + public static ToCharPage create(Class nativeType) { if (nativeType == null || char.class.equals(nativeType)) { // noinspection unchecked return INSTANCE; @@ -24,7 +26,7 @@ public static ToCharPageFromInt create(Class nativeT throw new IllegalArgumentException("The native type for a Char column is " + nativeType.getCanonicalName()); } - private ToCharPageFromInt() {} + private ToCharPage() {} @Override @NotNull @@ -41,19 +43,6 @@ public final ChunkType getChunkType() { @Override @NotNull public final Object nullValue() { - return NULL_CHAR_AS_INT; - } - - @Override - @NotNull - public final char[] convertResult(Object result) { - int[] from = (int[]) result; - char[] to = new char[from.length]; - - for (int i = 0; i < from.length; ++i) { - to[i] = (char) from[i]; - } - - return to; + return NULL_CHAR_BOXED; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDatePageFromInt.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDatePageFromInt.java deleted file mode 100644 index 1f141cf46b9..00000000000 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDatePageFromInt.java +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.parquet.table.pagestore.topage; - -import io.deephaven.chunk.ChunkType; -import io.deephaven.chunk.attributes.Any; -import io.deephaven.time.DateTimeUtils; -import org.jetbrains.annotations.NotNull; - -import java.time.LocalDate; - -import static io.deephaven.util.QueryConstants.NULL_INT_BOXED; - -public class ToDatePageFromInt implements ToPage { - - private static final ToDatePageFromInt INSTANCE = new ToDatePageFromInt<>(); - - public static ToDatePageFromInt create(final Class nativeType) { - if (nativeType == null || LocalDate.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; - } - - throw new IllegalArgumentException("The native type for a Date column is " + nativeType.getCanonicalName()); - } - - private ToDatePageFromInt() {} - - @Override - @NotNull - public final Class getNativeType() { - return LocalDate.class; - } - - @Override - @NotNull - public final ChunkType getChunkType() { - return ChunkType.Object; - } - - @Override - @NotNull - public final Object nullValue() { - return NULL_INT_BOXED; - } - - @Override - @NotNull - public final LocalDate[] convertResult(final Object result) { - final int[] from = (int[]) result; - final LocalDate[] to = new LocalDate[from.length]; - - for (int i = 0; i < from.length; ++i) { - to[i] = DateTimeUtils.epochDaysAsIntToLocalDate(from[i]); - } - return to; - } -} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java index 9526766d522..3a68addb35e 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java @@ -26,8 +26,7 @@ public static ToDoublePage create(Class nativeType) throw new IllegalArgumentException("The native type for a Double column is " + nativeType.getCanonicalName()); } - @SuppressWarnings("WeakerAccess") - ToDoublePage() {} + private ToDoublePage() {} @Override @NotNull diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java index 944e32645c8..3801e9ae21d 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java @@ -26,8 +26,7 @@ public static ToFloatPage create(Class nativeType) { throw new IllegalArgumentException("The native type for a Float column is " + nativeType.getCanonicalName()); } - @SuppressWarnings("WeakerAccess") - ToFloatPage() {} + private ToFloatPage() {} @Override @NotNull diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPage.java index 1df34a35b89..2a961f688fe 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPage.java @@ -3,65 +3,38 @@ // package io.deephaven.parquet.table.pagestore.topage; +import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.time.DateTimeUtils; import io.deephaven.vector.ObjectVector; import io.deephaven.vector.ObjectVectorDirect; -import org.apache.parquet.schema.LogicalTypeAnnotation; import org.jetbrains.annotations.NotNull; import java.time.Instant; -import java.util.function.LongFunction; -import java.util.function.LongUnaryOperator; -public abstract class ToInstantPage extends ToLongPage { +import static io.deephaven.util.QueryConstants.NULL_LONG_BOXED; - @SuppressWarnings("rawtypes") - private static final ToInstantPage MILLIS_INSTANCE = new ToInstantPageFromMillis(); - @SuppressWarnings("rawtypes") - private static final ToInstantPage MICROS_INSTANCE = new ToInstantPageFromMicros(); - @SuppressWarnings("rawtypes") - private static final ToInstantPage NANOS_INSTANCE = new ToInstantPageFromNanos(); +public class ToInstantPage implements ToPage { + + private static final ToInstantPage INSTANCE = new ToInstantPage<>(); @SuppressWarnings("unchecked") - public static ToPage create(@NotNull final Class nativeType, - final LogicalTypeAnnotation.TimeUnit unit) { - if (Instant.class.equals(nativeType)) { - switch (unit) { - case MILLIS: - return MILLIS_INSTANCE; - case MICROS: - return MICROS_INSTANCE; - case NANOS: - return NANOS_INSTANCE; - default: - throw new IllegalArgumentException("Unsupported unit=" + unit); - } + public static ToPage create(final Class nativeType) { + if (nativeType == null || Instant.class.equals(nativeType)) { + // noinspection unchecked + return INSTANCE; } throw new IllegalArgumentException( - "The native type foran Instant column is " + nativeType.getCanonicalName()); + "The native type for an Instant column is " + nativeType.getCanonicalName()); } - protected ToInstantPage() {} - - protected static ObjectVector makeVectorHelper(final long[] result, - final LongFunction unitToTime) { - Instant[] to = new Instant[result.length]; + private ToInstantPage() {} - for (int i = 0; i < result.length; ++i) { - to[i] = unitToTime.apply(result[i]); - } - return new ObjectVectorDirect<>(to); - } - - protected static long[] convertResultHelper(@NotNull final Object result, final LongUnaryOperator unitToNanos) { - final long[] resultLongs = (long[]) result; - final int resultLength = resultLongs.length; - for (int ri = 0; ri < resultLength; ++ri) { - resultLongs[ri] = unitToNanos.applyAsLong(resultLongs[ri]); - } - return resultLongs; + @Override + @NotNull + public final Class getNativeType() { + return long.class; } @Override @@ -70,37 +43,26 @@ public final Class getNativeComponentType() { return Instant.class; } - private static final class ToInstantPageFromNanos extends ToInstantPage { - @Override - @NotNull - public ObjectVector makeVector(long[] result) { - return makeVectorHelper(result, DateTimeUtils::epochNanosToInstant); - } + @Override + @NotNull + public final ChunkType getChunkType() { + return ChunkType.Long; } - private static final class ToInstantPageFromMicros extends ToInstantPage { - @Override - @NotNull - public ObjectVector makeVector(long[] result) { - return makeVectorHelper(result, DateTimeUtils::epochMicrosToInstant); - } - - @Override - public long[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, DateTimeUtils::microsToNanos); - } + @Override + @NotNull + public final Object nullValue() { + return NULL_LONG_BOXED; } - private static final class ToInstantPageFromMillis extends ToInstantPage { - @Override - @NotNull - public ObjectVector makeVector(long[] result) { - return makeVectorHelper(result, DateTimeUtils::epochMillisToInstant); - } + @Override + @NotNull + public ObjectVector makeVector(long[] result) { + Instant[] to = new Instant[result.length]; - @Override - public long[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, DateTimeUtils::millisToNanos); + for (int i = 0; i < result.length; ++i) { + to[i] = DateTimeUtils.epochNanosToInstant(result[i]); } + return new ObjectVectorDirect<>(to); } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPageFromInt96.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPageFromInt96.java deleted file mode 100644 index 5d42a8f922b..00000000000 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPageFromInt96.java +++ /dev/null @@ -1,122 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.parquet.table.pagestore.topage; - -import io.deephaven.chunk.attributes.Any; -import io.deephaven.time.DateTimeUtils; -import io.deephaven.vector.ObjectVector; -import io.deephaven.vector.ObjectVectorDirect; -import io.deephaven.configuration.Configuration; -import io.deephaven.chunk.ChunkType; -import org.apache.parquet.io.api.Binary; -import org.jetbrains.annotations.NotNull; - -import java.nio.ByteBuffer; -import java.time.Instant; -import java.time.ZoneId; - -import static io.deephaven.util.QueryConstants.NULL_LONG; - -/** - * Parquet {@link ToPage} implementation for {@link Instant}s stored as Int96s representing an Impala format Timestamp - * (nanoseconds of day and Julian date encoded as 8 bytes and 4 bytes, respectively) - * - */ -public class ToInstantPageFromInt96 implements ToPage { - /* - * Potential references/points of comparison for this algorithm: https://github.com/apache/iceberg/pull/1184/files - * https://github.com/apache/arrow/blob/master/cpp/src/parquet/types.h (last retrieved as - * https://github.com/apache/arrow/blob/d5a2aa2ffb1c2fc4f3ca48c829fcdba80ec67916/cpp/src/parquet/types.h) - */ - @SuppressWarnings("rawtypes") - private static final ToInstantPageFromInt96 INSTANCE = new ToInstantPageFromInt96<>(); - private static final long NANOS_PER_DAY = 86400L * 1000 * 1000 * 1000; - private static final int JULIAN_OFFSET_TO_UNIX_EPOCH_DAYS = 2_440_588; - private static long offset; - static { - final String referenceTimeZone = - Configuration.getInstance().getStringWithDefault("deephaven.parquet.referenceTimeZone", "UTC"); - setReferenceTimeZone(referenceTimeZone); - } - - public static ToInstantPageFromInt96 create(@NotNull Class nativeType) { - if (Instant.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; - } - - throw new IllegalArgumentException("The native type foran Instant column is " + nativeType.getCanonicalName()); - } - - private ToInstantPageFromInt96() {} - - /** - * Allows overriding the time zone to be used when interpreting Int96 timestamp values. Default is UTC. Can be set - * globally with the parameter deephaven.parquet.referenceTimeZone. Valid values are time zone Strings which would - * be used in {@link DateTimeUtils#parseInstant(String) parseInstant}, such as NY. - * - * @param timeZone - */ - public static void setReferenceTimeZone(@NotNull final String timeZone) { - offset = DateTimeUtils.nanosOfDay(DateTimeUtils.parseInstant("1970-01-01T00:00:00 " + timeZone), - ZoneId.of("UTC")); - } - - @Override - @NotNull - public final Class getNativeType() { - return long.class; - } - - @Override - @NotNull - public final ChunkType getChunkType() { - return ChunkType.Long; - } - - @Override - public Object nullValue() { - return null; - } - - @Override - @NotNull - public final Class getNativeComponentType() { - return Instant.class; - } - - @Override - public final long[] convertResult(@NotNull final Object result) { - // result is delivered as an array of Binary[12] - final Binary[] results = (Binary[]) result; - final int resultLength = results.length; - final long[] resultLongs = new long[resultLength]; - - for (int ri = 0; ri < resultLength; ++ri) { - if (results[ri] == null) { - resultLongs[ri] = NULL_LONG; - continue; - } - final ByteBuffer resultBuffer = ByteBuffer.wrap(results[ri].getBytesUnsafe()); - resultBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN); - final long nanos = resultBuffer.getLong(); - final int julianDate = resultBuffer.getInt(); - resultLongs[ri] = (julianDate - JULIAN_OFFSET_TO_UNIX_EPOCH_DAYS) * (NANOS_PER_DAY) + nanos + offset; - } - return resultLongs; - } - - @Override - @NotNull - public final ObjectVector makeVector(@NotNull final long[] result) { - final Instant[] to = new Instant[result.length]; - - final int resultLength = result.length; - for (int ri = 0; ri < resultLength; ++ri) { - to[ri] = DateTimeUtils.epochNanosToInstant(result[ri]); - } - - return new ObjectVectorDirect<>(to); - } -} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java index 68f4a96b8d2..845ea12ec9a 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java @@ -22,8 +22,7 @@ public static ToIntPage create(Class nativeType) { throw new IllegalArgumentException("The native type for a Int column is " + nativeType.getCanonicalName()); } - @SuppressWarnings("WeakerAccess") - ToIntPage() {} + private ToIntPage() {} @Override @NotNull diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java new file mode 100644 index 00000000000..a2747a8d78e --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java @@ -0,0 +1,38 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.table.pagestore.topage; + +import io.deephaven.chunk.ChunkType; +import io.deephaven.chunk.attributes.Any; +import org.jetbrains.annotations.NotNull; + +import java.time.LocalDate; + +public class ToLocalDatePage implements ToPage { + + private static final ToLocalDatePage INSTANCE = new ToLocalDatePage<>(); + + public static ToLocalDatePage create(final Class nativeType) { + if (nativeType == null || LocalDate.class.equals(nativeType)) { + // noinspection unchecked + return INSTANCE; + } + throw new IllegalArgumentException( + "The native type for a LocalDate column is " + nativeType.getCanonicalName()); + } + + private ToLocalDatePage() {} + + @Override + @NotNull + public final Class getNativeType() { + return LocalDate.class; + } + + @Override + @NotNull + public final ChunkType getChunkType() { + return ChunkType.Object; + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java index 1cf593e2354..97f8ef5842c 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java @@ -1,51 +1,32 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit ToLocalDatePage and run "./gradlew replicateToPage" to regenerate +// +// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; -import io.deephaven.parquet.table.util.TransferUtils; -import io.deephaven.util.QueryConstants; -import org.apache.parquet.schema.LogicalTypeAnnotation; import org.jetbrains.annotations.NotNull; import java.time.LocalDateTime; -import java.util.function.LongFunction; -/** - * Used to convert Parquet TIMESTAMP values with {@code isAdjustedToUTC=false} to {@link LocalDateTime}. Ref: ... - */ public class ToLocalDateTimePage implements ToPage { - @SuppressWarnings("rawtypes") - private static final ToPage MILLIS_INSTANCE = new ToLocalDateTimePageFromMillis(); - @SuppressWarnings("rawtypes") - private static final ToPage MICROS_INSTANCE = new ToLocalDateTimePageFromMicros(); - @SuppressWarnings("rawtypes") - private static final ToPage NANOS_INSTANCE = new ToLocalDateTimePageFromNanos(); + private static final ToLocalDateTimePage INSTANCE = new ToLocalDateTimePage<>(); - @SuppressWarnings("unchecked") - public static ToPage create(@NotNull final Class nativeType, - @NotNull final LogicalTypeAnnotation.TimeUnit unit) { - if (LocalDateTime.class.equals(nativeType)) { - switch (unit) { - case MILLIS: - return MILLIS_INSTANCE; - case MICROS: - return MICROS_INSTANCE; - case NANOS: - return NANOS_INSTANCE; - default: - throw new IllegalArgumentException("Unsupported unit=" + unit); - } + public static ToLocalDateTimePage create(final Class nativeType) { + if (nativeType == null || LocalDateTime.class.equals(nativeType)) { + // noinspection unchecked + return INSTANCE; } throw new IllegalArgumentException( "The native type for a LocalDateTime column is " + nativeType.getCanonicalName()); } - ToLocalDateTimePage() {} + private ToLocalDateTimePage() {} @Override @NotNull @@ -58,43 +39,4 @@ public final Class getNativeType() { public final ChunkType getChunkType() { return ChunkType.Object; } - - @Override - @NotNull - public final Object nullValue() { - return QueryConstants.NULL_LONG_BOXED; - } - - private static LocalDateTime[] convertResultHelper(@NotNull final Object result, - @NotNull final LongFunction unitToLocalDateTime) { - final long[] from = (long[]) result; - final LocalDateTime[] to = new LocalDateTime[from.length]; - - for (int i = 0; i < from.length; ++i) { - to[i] = unitToLocalDateTime.apply(from[i]); - } - return to; - } - - private static final class ToLocalDateTimePageFromMillis extends ToLocalDateTimePage { - @Override - public LocalDateTime[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, TransferUtils::epochMillisToLocalDateTimeUTC); - } - } - - private static final class ToLocalDateTimePageFromMicros extends ToLocalDateTimePage { - @Override - public LocalDateTime[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, TransferUtils::epochMicrosToLocalDateTimeUTC); - } - } - - private static final class ToLocalDateTimePageFromNanos extends ToLocalDateTimePage { - @Override - public LocalDateTime[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, TransferUtils::epochNanosToLocalDateTimeUTC); - } - } - } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalTimePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalTimePage.java new file mode 100644 index 00000000000..2c6fa645e27 --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalTimePage.java @@ -0,0 +1,42 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit ToLocalDatePage and run "./gradlew replicateToPage" to regenerate +// +// @formatter:off +package io.deephaven.parquet.table.pagestore.topage; + +import io.deephaven.chunk.ChunkType; +import io.deephaven.chunk.attributes.Any; +import org.jetbrains.annotations.NotNull; + +import java.time.LocalTime; + +public class ToLocalTimePage implements ToPage { + + private static final ToLocalTimePage INSTANCE = new ToLocalTimePage<>(); + + public static ToLocalTimePage create(final Class nativeType) { + if (nativeType == null || LocalTime.class.equals(nativeType)) { + // noinspection unchecked + return INSTANCE; + } + throw new IllegalArgumentException( + "The native type for a LocalTime column is " + nativeType.getCanonicalName()); + } + + private ToLocalTimePage() {} + + @Override + @NotNull + public final Class getNativeType() { + return LocalTime.class; + } + + @Override + @NotNull + public final ChunkType getChunkType() { + return ChunkType.Object; + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java index 0acaaab2d91..51f77f9ed24 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java @@ -26,8 +26,7 @@ public static ToLongPage create(Class nativeType) { throw new IllegalArgumentException("The native type for a Long column is " + nativeType.getCanonicalName()); } - @SuppressWarnings("WeakerAccess") - ToLongPage() {} + private ToLongPage() {} @Override @NotNull diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPageFromUnsignedInt.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPageFromUnsignedInt.java deleted file mode 100644 index 7106c28423b..00000000000 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPageFromUnsignedInt.java +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.parquet.table.pagestore.topage; - -import io.deephaven.chunk.ChunkType; -import io.deephaven.chunk.attributes.Any; -import org.jetbrains.annotations.NotNull; - -import static io.deephaven.util.QueryConstants.NULL_INT; -import static io.deephaven.util.QueryConstants.NULL_INT_BOXED; -import static io.deephaven.util.QueryConstants.NULL_LONG; - -public class ToLongPageFromUnsignedInt implements ToPage { - - private static final ToLongPageFromUnsignedInt INSTANCE = new ToLongPageFromUnsignedInt<>(); - - public static ToLongPageFromUnsignedInt create(final Class nativeType) { - if (nativeType == null || long.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; - } - throw new IllegalArgumentException("The native type for a Long column is " + nativeType.getCanonicalName()); - } - - private ToLongPageFromUnsignedInt() {} - - @Override - @NotNull - public final Class getNativeType() { - return long.class; - } - - @Override - @NotNull - public final ChunkType getChunkType() { - return ChunkType.Long; - } - - @Override - @NotNull - public final Object nullValue() { - return NULL_INT_BOXED; - } - - @Override - public final long[] convertResult(final Object result) { - final int[] from = (int[]) result; - final long[] to = new long[from.length]; - for (int i = 0; i < from.length; ++i) { - final int fromValue = from[i]; - to[i] = fromValue == NULL_INT ? NULL_LONG : Integer.toUnsignedLong(fromValue); - } - return to; - } -} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPageFromInt.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java similarity index 51% rename from extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPageFromInt.java rename to extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java index ff1e32286fc..23368baa1b8 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPageFromInt.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToCharPageFromInt and run "./gradlew replicateToPage" to regenerate +// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate // // @formatter:off package io.deephaven.parquet.table.pagestore.topage; @@ -11,15 +11,13 @@ import io.deephaven.chunk.attributes.Any; import org.jetbrains.annotations.NotNull; -import static io.deephaven.util.QueryConstants.NULL_SHORT; +import static io.deephaven.util.QueryConstants.NULL_SHORT_BOXED; -public class ToShortPageFromInt implements ToPage { +public class ToShortPage implements ToPage { - private static final ToShortPageFromInt INSTANCE = new ToShortPageFromInt<>(); + private static final ToShortPage INSTANCE = new ToShortPage<>(); - private static final Integer NULL_SHORT_AS_INT = (int) NULL_SHORT; - - public static ToShortPageFromInt create(Class nativeType) { + public static ToShortPage create(Class nativeType) { if (nativeType == null || short.class.equals(nativeType)) { // noinspection unchecked return INSTANCE; @@ -28,7 +26,7 @@ public static ToShortPageFromInt create(Class native throw new IllegalArgumentException("The native type for a Short column is " + nativeType.getCanonicalName()); } - private ToShortPageFromInt() {} + private ToShortPage() {} @Override @NotNull @@ -45,19 +43,6 @@ public final ChunkType getChunkType() { @Override @NotNull public final Object nullValue() { - return NULL_SHORT_AS_INT; - } - - @Override - @NotNull - public final short[] convertResult(Object result) { - int[] from = (int[]) result; - short[] to = new short[from.length]; - - for (int i = 0; i < from.length; ++i) { - to[i] = (short) from[i]; - } - - return to; + return NULL_SHORT_BOXED; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToStringPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToStringPage.java index 56945287c82..83447e68e93 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToStringPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToStringPage.java @@ -7,14 +7,13 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.util.channel.SeekableChannelContext; import org.apache.parquet.column.Dictionary; -import org.apache.parquet.io.api.Binary; import org.jetbrains.annotations.NotNull; import java.util.function.Function; public class ToStringPage implements ToPage { - static final ToStringPage INSTANCE = new ToStringPage<>(); + private static final ToStringPage INSTANCE = new ToStringPage<>(); public static ToPage create( final Class nativeType, @@ -47,17 +46,4 @@ public final Class getNativeType() { public final ChunkType getChunkType() { return ChunkType.Object; } - - @Override - @NotNull - public final String[] convertResult(final Object result) { - final Binary[] from = (Binary[]) result; - final String[] to = new String[from.length]; - for (int ri = 0; ri < to.length; ++ri) { - if (from[ri] != null) { - to[ri] = from[ri].toStringUsingUTF8(); - } - } - return to; - } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToTimePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToTimePage.java deleted file mode 100644 index d164557a358..00000000000 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToTimePage.java +++ /dev/null @@ -1,111 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.parquet.table.pagestore.topage; - -import io.deephaven.chunk.ChunkType; -import io.deephaven.chunk.attributes.Any; -import io.deephaven.time.DateTimeUtils; -import io.deephaven.util.QueryConstants; -import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.jetbrains.annotations.NotNull; - -import java.time.LocalTime; -import java.util.function.LongFunction; - -public class ToTimePage implements ToPage { - - @SuppressWarnings("rawtypes") - private static final ToPage MILLIS_INSTANCE = new ToTimePageFromMillis(); - @SuppressWarnings("rawtypes") - private static final ToPage MICROS_INSTANCE = new ToTimePageFromMicros(); - @SuppressWarnings("rawtypes") - private static final ToPage NANOS_INSTANCE = new ToTimePageFromNanos(); - - @SuppressWarnings("unchecked") - public static ToPage create( - @NotNull final Class nativeType, - @NotNull final LogicalTypeAnnotation.TimeUnit unit, - @SuppressWarnings("unused") final boolean isAdjustedToUTC) { - // isAdjustedToUTC parameter is ignored while reading from Parquet files - if (LocalTime.class.equals(nativeType)) { - switch (unit) { - case MILLIS: - return MILLIS_INSTANCE; - case MICROS: - return MICROS_INSTANCE; - case NANOS: - return NANOS_INSTANCE; - default: - throw new IllegalArgumentException("Unsupported unit=" + unit); - } - } - throw new IllegalArgumentException("The native type for a Time column is " + nativeType.getCanonicalName()); - } - - ToTimePage() {} - - @Override - @NotNull - public final Class getNativeType() { - return LocalTime.class; - } - - @Override - @NotNull - public final ChunkType getChunkType() { - return ChunkType.Object; - } - - private static final class ToTimePageFromMillis extends ToTimePage { - @Override - @NotNull - public Object nullValue() { - return QueryConstants.NULL_INT_BOXED; - } - - @Override - public LocalTime[] convertResult(@NotNull final Object result) { - final int[] from = (int[]) result; - final LocalTime[] to = new LocalTime[from.length]; - - for (int i = 0; i < from.length; ++i) { - to[i] = DateTimeUtils.millisOfDayToLocalTime(from[i]); - } - return to; - } - } - - private static class ToTimePageFromLong extends ToTimePage { - @Override - @NotNull - public final Object nullValue() { - return QueryConstants.NULL_LONG_BOXED; - } - - static LocalTime[] convertResultHelper(@NotNull final Object result, - @NotNull final LongFunction unitToLocalTime) { - final long[] from = (long[]) result; - final LocalTime[] to = new LocalTime[from.length]; - - for (int i = 0; i < from.length; ++i) { - to[i] = unitToLocalTime.apply(from[i]); - } - return to; - } - } - - private static final class ToTimePageFromMicros extends ToTimePageFromLong { - @Override - public LocalTime[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, DateTimeUtils::microsOfDayToLocalTime); - } - } - - private static final class ToTimePageFromNanos extends ToTimePageFromLong { - @Override - public LocalTime[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, DateTimeUtils::nanosOfDayToLocalTime); - } - } -} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java index 51f8d83d7f0..8bc8f55e6b5 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java @@ -9,7 +9,7 @@ import io.deephaven.engine.rowset.RowSequence; import io.deephaven.engine.table.ColumnSource; -import io.deephaven.parquet.table.util.TransferUtils; +import io.deephaven.parquet.base.ParquetTimeUtils; import org.jetbrains.annotations.NotNull; import java.nio.LongBuffer; @@ -36,7 +36,7 @@ void resizeBuffer(final int length) { @Override void copyToBuffer(@NotNull final EncodedData data) { for (final LocalDateTime t : data.encodedValues) { - buffer.put(TransferUtils.epochNanosUTC(t)); + buffer.put(ParquetTimeUtils.epochNanosUTC(t)); } } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java index 618cb2e56cb..fd49df54f42 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java @@ -11,7 +11,7 @@ import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSequence; import io.deephaven.engine.table.ColumnSource; -import io.deephaven.parquet.table.util.TransferUtils; +import io.deephaven.parquet.base.ParquetTimeUtils; import org.jetbrains.annotations.NotNull; import java.nio.LongBuffer; @@ -30,7 +30,7 @@ final class LocalDateTimeTransfer extends GettingPrimitiveTransfer> data) { try (final CloseableIterator dataIterator = data.encodedValues.iterator()) { - dataIterator.forEachRemaining((LocalDateTime t) -> buffer.put(TransferUtils.epochNanosUTC(t))); + dataIterator.forEachRemaining((LocalDateTime t) -> buffer.put(ParquetTimeUtils.epochNanosUTC(t))); } } } diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index f9182ad6cda..612d9bbc759 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -455,6 +455,58 @@ def timestamp_test_helper(pa_table, new_schema, dest): schema_msec = table.schema.set(0, pyarrow.field('f', pyarrow.timestamp('ms'))) timestamp_test_helper(table, schema_msec, 'timestamp_test_msec.parquet') + def test_timestamp_with_different_units(self): + # Create a DataFrame with a Timestamp column + df = pandas.DataFrame({ + "time_ms": pandas.date_range("11:00:00", "11:00:01", freq="1ms"), + "time_us": pandas.date_range("11:00:01", "11:00:02", freq="1ms"), + "time_ns": pandas.date_range("11:00:02", "11:00:03", freq="1ms") + }) + + # Sprinkle some nulls + df["time_ms"][0] = df["time_ms"][5] = None + df["time_us"][0] = df["time_us"][5] = None + df["time_ns"][0] = df["time_ns"][5] = None + + # Set the appropriate unit and timezone + df['time_ms'] = df["time_ms"].astype("datetime64[ms]").dt.tz_localize('UTC') + df['time_us'] = df["time_us"].astype("datetime64[us]").dt.tz_localize('UTC') + df['time_ns'] = df["time_ns"].astype("datetime64[ns]").dt.tz_localize('UTC') + + dest = "timestamp_data_from_pd.parquet" + df.to_parquet(dest) + + metadata = pyarrow.parquet.read_metadata(dest) + ms_col_metadata = str(metadata.row_group(0).column(0)) + if "isAdjustedToUTC=true" not in ms_col_metadata: + self.fail("isAdjustedToUTC is not set to true") + if "timeUnit=milliseconds" not in ms_col_metadata: + self.fail("timeUnit is not milliseconds") + us_col_metadata = str(metadata.row_group(0).column(1)) + if "isAdjustedToUTC=true" not in us_col_metadata: + self.fail("isAdjustedToUTC is not set to true") + if "timeUnit=microseconds" not in us_col_metadata: + self.fail("timeUnit is not microseconds") + ns_col_metadata = str(metadata.row_group(0).column(2)) + if "isAdjustedToUTC=true" not in ns_col_metadata: + self.fail("isAdjustedToUTC is not set to true") + if "timeUnit=nanoseconds" not in ns_col_metadata: + self.fail("timeUnit is not nanoseconds") + + # Read the parquet file back using deephaven and write it back + dh_table_from_disk = read(dest).select() + dh_dest = "dh_" + dest + write(dh_table_from_disk, dh_dest) + + # Read the new parquet file using pyarrow and compare against original table + df_from_disk = pyarrow.parquet.read_table(dh_dest).to_pandas() + + # Deephaven writes timestamps as nsec, so need to convert them back + df_from_disk['time_ms'] = df_from_disk["time_ms"].dt.tz_localize(None).astype("datetime64[ms]").dt.tz_localize('UTC') + df_from_disk['time_us'] = df_from_disk["time_us"].dt.tz_localize(None).astype("datetime64[us]").dt.tz_localize('UTC') + self.assertTrue(df_from_disk.equals(df)) + + def test_read_single_file(self): table = empty_table(3).update( formulas=["x=i", "y=(double)(i/10.0)", "z=(double)(i*i)"] diff --git a/replication/static/build.gradle b/replication/static/build.gradle index 161f7017104..6382374ad23 100644 --- a/replication/static/build.gradle +++ b/replication/static/build.gradle @@ -67,6 +67,7 @@ task replicateAllSafe { dependsOn Tasks.registerMainExecTask(project, 'replicateRegionAndRegionedSourceTests', 'io.deephaven.replicators.ReplicateRegionAndRegionedSourceTests') dependsOn Tasks.registerMainExecTask(project, 'replicateToPage', 'io.deephaven.replicators.ReplicateToPage') + dependsOn Tasks.registerMainExecTask(project, 'replicatePageMaterializers', 'io.deephaven.replicators.ReplicatePageMaterializers') dependsOn Tasks.registerMainExecTask(project, 'replicateDownsamplingValueTrackers', 'io.deephaven.replicators.ReplicateDownsamplingValueTrackers') diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java new file mode 100644 index 00000000000..489ebe04e78 --- /dev/null +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java @@ -0,0 +1,134 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.replicators; + +import java.io.IOException; + +import static io.deephaven.replication.ReplicatePrimitiveCode.charToShortAndByte; +import static io.deephaven.replication.ReplicatePrimitiveCode.floatToAllFloatingPoints; +import static io.deephaven.replication.ReplicatePrimitiveCode.replaceAll; + +/** + * Code generation for basic ToPage implementations. + */ +public class ReplicatePageMaterializers { + private static final String TASK = "replicatePageMaterializers"; + private static final String[] NO_EXCEPTIONS = new String[0]; + + private static final String MATERIALIZER_DIR = + "extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/"; + + private static final String CHAR_MATERIALIZER_PATH = MATERIALIZER_DIR + "CharMaterializer.java"; + private static final String FLOAT_MATERIALIZER_PATH = MATERIALIZER_DIR + "FloatMaterializer.java"; + private static final String INT_MATERIALIZER_PATH = MATERIALIZER_DIR + "IntMaterializer.java"; + private static final String STRING_MATERIALIZER_PATH = MATERIALIZER_DIR + "StringMaterializer.java"; + + private static final String LOCAL_TIME_MATERIALIZER_BASE_PATH = + MATERIALIZER_DIR + "LocalTimeMaterializerBase.java"; + private static final String LOCAL_TIME_FROM_MILLIS_MATERIALIZER_PATH = + MATERIALIZER_DIR + "LocalTimeFromMillisMaterializer.java"; + private static final String LOCAL_TIME_FROM_MICROS_MATERIALIZER_PATH = + MATERIALIZER_DIR + "LocalTimeFromMicrosMaterializer.java"; + private static final String LOCAL_TIME_FROM_NANOS_MATERIALIZER_PATH = + MATERIALIZER_DIR + "LocalTimeFromNanosMaterializer.java"; + + private static final String LOCAL_DATE_TIME_MATERIALIZER_BASE_PATH = + MATERIALIZER_DIR + "LocalDateTimeMaterializerBase.java"; + private static final String LOCAL_DATE_TIME_FROM_MILLIS_MATERIALIZER_PATH = + MATERIALIZER_DIR + "LocalDateTimeFromMillisMaterializer.java"; + private static final String LOCAL_DATE_TIME_FROM_MICROS_MATERIALIZER_PATH = + MATERIALIZER_DIR + "LocalDateTimeFromMicrosMaterializer.java"; + private static final String LOCAL_DATE_TIME_FROM_NANOS_MATERIALIZER_PATH = + MATERIALIZER_DIR + "LocalDateTimeFromNanosMaterializer.java"; + + private static final String INSTANT_NANOS_FROM_MILLIS_MATERIALIZER_PATH = + MATERIALIZER_DIR + "InstantNanosFromMillisMaterializer.java"; + private static final String INSTANT_NANOS_FROM_MICROS_MATERIALIZER_PATH = + MATERIALIZER_DIR + "InstantNanosFromMicrosMaterializer.java"; + + public static void main(String... args) throws IOException { + charToShortAndByte(TASK, CHAR_MATERIALIZER_PATH, NO_EXCEPTIONS); + + // Float -> Double + floatToAllFloatingPoints(TASK, FLOAT_MATERIALIZER_PATH, NO_EXCEPTIONS); + + // Float -> Int + String[][] pairs = new String[][] { + {"readFloat", "readInteger"}, + {"Float", "Int"}, + {"float", "int"} + }; + replaceAll(TASK, FLOAT_MATERIALIZER_PATH, INT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // Float -> String + pairs = new String[][] { + {"readFloat()", "readBytes().toStringUsingUTF8"}, + {"Float", "String"}, + {"float", "String"}, + {"dataReader, 0, numValues", "dataReader, null, numValues"} + }; + replaceAll(TASK, FLOAT_MATERIALIZER_PATH, STRING_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LocalTimeFromMicros -> LocalTimeFromMillis + // We change from Micros to Millis and not the other way since converting from Long to Integer has fewer + // exceptions than the other way around. + pairs = new String[][] { + {"Micros", "Millis"}, + {"micros", "millis"}, + {"readLong", "readInteger"}, + }; + replaceAll(TASK, + LOCAL_TIME_FROM_MICROS_MATERIALIZER_PATH, + LOCAL_TIME_FROM_MILLIS_MATERIALIZER_PATH, + null, NO_EXCEPTIONS, pairs); + + // LocalTimeFromMicros -> LocalTimeFromNanos + pairs = new String[][] { + {"Micros", "Nanos"}, + {"micros", "nanos"}, + }; + replaceAll(TASK, + LOCAL_TIME_FROM_MICROS_MATERIALIZER_PATH, + LOCAL_TIME_FROM_NANOS_MATERIALIZER_PATH, + null, NO_EXCEPTIONS, pairs); + + // LocalTimeBase -> LocalDateTimeBase + pairs = new String[][] { + {"LocalTime", "LocalDateTime"} + }; + replaceAll(TASK, + LOCAL_TIME_MATERIALIZER_BASE_PATH, + LOCAL_DATE_TIME_MATERIALIZER_BASE_PATH, + null, NO_EXCEPTIONS, pairs); + + // LocalDateTimeFromMillis -> LocalDateTimeFromMicros + pairs = new String[][] { + {"Millis", "Micros"} + }; + replaceAll(TASK, + LOCAL_DATE_TIME_FROM_MILLIS_MATERIALIZER_PATH, + LOCAL_DATE_TIME_FROM_MICROS_MATERIALIZER_PATH, + null, NO_EXCEPTIONS, pairs); + + // LocalDateTimeFromMillis -> LocalDateTimeFromNanos + pairs = new String[][] { + {"Millis", "Nanos"} + }; + replaceAll(TASK, + LOCAL_DATE_TIME_FROM_MILLIS_MATERIALIZER_PATH, + LOCAL_DATE_TIME_FROM_NANOS_MATERIALIZER_PATH, + null, NO_EXCEPTIONS, pairs); + + // InstantNanosFromMicros -> InstantNanosFromMillis + pairs = new String[][] { + {"Micros", "Millis"}, + {"micros", "millis"} + }; + replaceAll(TASK, + INSTANT_NANOS_FROM_MICROS_MATERIALIZER_PATH, + INSTANT_NANOS_FROM_MILLIS_MATERIALIZER_PATH, + null, NO_EXCEPTIONS, pairs); + } +} + diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java index efc182ad546..4902dcbb35d 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java @@ -79,10 +79,10 @@ public static void main(String[] args) throws IOException { pairs); pairs = new String[][] { - {"io.deephaven.time.DateTimeUtils", "io.deephaven.parquet.table.util.TransferUtils"}, + {"io.deephaven.time.DateTimeUtils", "io.deephaven.parquet.base.ParquetTimeUtils"}, {"InstantArrayTransfer", "LocalDateTimeArrayTransfer"}, {"InstantVectorTransfer", "LocalDateTimeVectorTransfer"}, - {"DateTimeUtils.epochNanos", "TransferUtils.epochNanosUTC"}, + {"DateTimeUtils.epochNanos", "ParquetTimeUtils.epochNanosUTC"}, {"Instant", "LocalDateTime"} }; replaceAll(TASK, PARQUET_INSTANT_ARRAY_TRANSFER_PATH, PARQUET_LOCAL_DATE_TIME_ARRAY_TRANSFER_PATH, null, @@ -91,10 +91,10 @@ public static void main(String[] args) throws IOException { NO_EXCEPTIONS, pairs); pairs = new String[][] { - {"io.deephaven.time.DateTimeUtils", "io.deephaven.parquet.table.util.TransferUtils"}, + {"io.deephaven.time.DateTimeUtils", "io.deephaven.parquet.base.ParquetTimeUtils"}, {"TimeTransfer", "LocalDateTimeTransfer"}, {"LocalTime", "LocalDateTime"}, - {"DateTimeUtils.nanosOfDay", "TransferUtils.epochNanosUTC"} + {"DateTimeUtils.nanosOfDay", "ParquetTimeUtils.epochNanosUTC"} }; replaceAll(TASK, PARQUET_TIME_TRANSFER_PATH, PARQUET_LOCAL_DATE_TIME_TRANSFER_PATH, null, NO_EXCEPTIONS, pairs); diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java index 778618907cd..aa3e1a715be 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java @@ -5,19 +5,33 @@ import java.io.IOException; -import static io.deephaven.replication.ReplicatePrimitiveCode.charToShortAndByte; -import static io.deephaven.replication.ReplicatePrimitiveCode.intToLongAndFloatingPoints; +import static io.deephaven.replication.ReplicatePrimitiveCode.intToAllButBoolean; +import static io.deephaven.replication.ReplicatePrimitiveCode.replaceAll; /** * Code generation for basic ToPage implementations. */ public class ReplicateToPage { + private static final String TASK = "replicateToPage"; + private static final String[] NO_EXCEPTIONS = new String[0]; public static void main(String... args) throws IOException { - intToLongAndFloatingPoints("replicateToPage", + intToAllButBoolean(TASK, "extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java", "interface"); - charToShortAndByte("replicateToPage", - "extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPageFromInt.java"); + + // LocalDate -> LocalDateTime + final String sourcePath = + "extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java"; + String[][] pairs = new String[][] { + {"LocalDate", "LocalDateTime"} + }; + replaceAll(TASK, sourcePath, null, NO_EXCEPTIONS, pairs); + + // LocalDate -> LocalTime + pairs = new String[][] { + {"LocalDate", "LocalTime"} + }; + replaceAll(TASK, sourcePath, null, NO_EXCEPTIONS, pairs); } }