-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Parquet: Replicate TransferObject and Update Statistics
- Loading branch information
1 parent
bf579e0
commit 1838705
Showing
19 changed files
with
1,187 additions
and
489 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 8 additions & 0 deletions
8
extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetCacheTags.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
/** | ||
* Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending | ||
*/ | ||
package io.deephaven.parquet.table; | ||
|
||
public enum ParquetCacheTags { | ||
DECIMAL_ARGS | ||
} |
489 changes: 8 additions & 481 deletions
489
extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
...ions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/BooleanTransfer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/** | ||
* Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending | ||
*/ | ||
package io.deephaven.parquet.table.transfer; | ||
|
||
import io.deephaven.chunk.ByteChunk; | ||
import io.deephaven.chunk.attributes.Values; | ||
import io.deephaven.engine.rowset.RowSequence; | ||
import io.deephaven.engine.table.ChunkSource; | ||
import io.deephaven.engine.table.ColumnSource; | ||
import org.apache.parquet.column.statistics.Statistics; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.nio.ByteBuffer; | ||
|
||
public class BooleanTransfer implements TransferObject<ByteBuffer> { | ||
|
||
private final ColumnSource<?> columnSource; | ||
private final ChunkSource.GetContext context; | ||
private ByteChunk<? extends Values> chunk; | ||
private final ByteBuffer buffer; | ||
|
||
public BooleanTransfer( | ||
@NotNull ColumnSource<?> columnSource, | ||
int targetSize) { | ||
this.columnSource = columnSource; | ||
this.buffer = ByteBuffer.allocate(targetSize); | ||
this.context = columnSource.makeGetContext(targetSize); | ||
} | ||
|
||
@Override | ||
public void fetchData(@NotNull final RowSequence rs) { | ||
chunk = columnSource.getChunk(context, rs).asByteChunk(); | ||
} | ||
|
||
@Override | ||
public int transferAllToBuffer() { | ||
return transferOnePageToBuffer(); | ||
} | ||
|
||
@Override | ||
public int transferOnePageToBuffer() { | ||
if (!hasMoreDataToBuffer()) { | ||
return 0; | ||
} | ||
buffer.clear(); | ||
// Assuming that all the fetched data will fit in one page. This is because page count is accurately | ||
// calculated for non variable-width types. Check ParquetTableWriter.getTargetRowsPerPage for more details. | ||
copyAllFromChunkToBuffer(); | ||
buffer.flip(); | ||
int ret = chunk.size(); | ||
chunk = null; | ||
return ret; | ||
} | ||
|
||
private void copyAllFromChunkToBuffer() { | ||
for (int chunkIdx = 0; chunkIdx < chunk.size(); ++chunkIdx) { | ||
buffer.put(chunk.get(chunkIdx)); | ||
} | ||
} | ||
|
||
@Override | ||
public boolean hasMoreDataToBuffer() { | ||
return chunk != null; | ||
} | ||
|
||
@Override | ||
public ByteBuffer getBuffer() { | ||
return buffer; | ||
} | ||
|
||
@Override | ||
public void close() { | ||
context.close(); | ||
} | ||
|
||
public <T extends Comparable<T>> void updateStatistics(@NotNull final Statistics<T> stats) {} | ||
} |
107 changes: 107 additions & 0 deletions
107
extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/ByteTransfer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/** | ||
* Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending | ||
*/ | ||
/* | ||
* --------------------------------------------------------------------------------------------------------------------- | ||
* AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY - for any changes edit IntTransfer and regenerate | ||
* --------------------------------------------------------------------------------------------------------------------- | ||
*/ | ||
package io.deephaven.parquet.table.transfer; | ||
|
||
import io.deephaven.chunk.ByteChunk; | ||
import io.deephaven.chunk.attributes.Values; | ||
import io.deephaven.engine.rowset.RowSequence; | ||
import io.deephaven.engine.table.ChunkSource; | ||
import io.deephaven.engine.table.ColumnSource; | ||
import io.deephaven.util.QueryConstants; | ||
import org.apache.parquet.column.statistics.IntStatistics; | ||
import org.apache.parquet.column.statistics.Statistics; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.nio.Buffer; | ||
import java.nio.IntBuffer; | ||
|
||
public class ByteTransfer implements TransferObject<IntBuffer> { | ||
|
||
private final ColumnSource<?> columnSource; | ||
private final ChunkSource.GetContext context; | ||
private final IntBuffer buffer; | ||
private ByteChunk<? extends Values> chunk; | ||
private byte minValue = QueryConstants.NULL_BYTE; | ||
private byte maxValue = QueryConstants.NULL_BYTE; | ||
|
||
public ByteTransfer( | ||
@NotNull final ColumnSource<?> columnSource, | ||
final int targetSize) { | ||
this.columnSource = columnSource; | ||
this.buffer = IntBuffer.allocate(targetSize); | ||
context = columnSource.makeGetContext(targetSize); | ||
} | ||
|
||
@Override | ||
final public void fetchData(@NotNull final RowSequence rs) { | ||
chunk = columnSource.getChunk(context, rs).asByteChunk(); | ||
} | ||
|
||
@Override | ||
final public int transferAllToBuffer() { | ||
return transferOnePageToBuffer(); | ||
} | ||
|
||
@Override | ||
final public int transferOnePageToBuffer() { | ||
if (!hasMoreDataToBuffer()) { | ||
return 0; | ||
} | ||
buffer.clear(); | ||
// Assuming that all the fetched data will fit in one page. This is because page count is accurately | ||
// calculated for non variable-width types. Check ParquetTableWriter.getTargetRowsPerPage for more details. | ||
copyAllFromChunkToBuffer(); | ||
buffer.flip(); | ||
int ret = chunk.size(); | ||
chunk = null; | ||
return ret; | ||
} | ||
|
||
/** | ||
* Helper method to copy all data from {@code this.chunk} to {@code this.buffer}. The buffer should be cleared | ||
* before calling this method and is positioned for a {@link Buffer#flip()} after the call. | ||
*/ | ||
private void copyAllFromChunkToBuffer() { | ||
for (int chunkIdx = 0; chunkIdx < chunk.size(); ++chunkIdx) { | ||
byte value = chunk.get(chunkIdx); | ||
if (value != QueryConstants.NULL_BYTE) { | ||
if (minValue == QueryConstants.NULL_BYTE) { | ||
minValue = maxValue = value; | ||
} else if (value < minValue) { | ||
minValue = value; | ||
} else if (value > maxValue) { | ||
maxValue = value; | ||
} | ||
} | ||
buffer.put(value); | ||
} | ||
} | ||
|
||
@Override | ||
final public boolean hasMoreDataToBuffer() { | ||
return (chunk != null); | ||
} | ||
|
||
@Override | ||
final public IntBuffer getBuffer() { | ||
return buffer; | ||
} | ||
|
||
@Override | ||
final public void close() { | ||
context.close(); | ||
} | ||
|
||
@Override | ||
public <T extends Comparable<T>> void updateStatistics(@NotNull final Statistics<T> stats) { | ||
if (minValue != QueryConstants.NULL_BYTE) { | ||
((IntStatistics) stats).setMinMax(minValue, maxValue); | ||
} | ||
} | ||
} |
107 changes: 107 additions & 0 deletions
107
extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/CharTransfer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/** | ||
* Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending | ||
*/ | ||
/* | ||
* --------------------------------------------------------------------------------------------------------------------- | ||
* AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY - for any changes edit IntTransfer and regenerate | ||
* --------------------------------------------------------------------------------------------------------------------- | ||
*/ | ||
package io.deephaven.parquet.table.transfer; | ||
|
||
import io.deephaven.chunk.CharChunk; | ||
import io.deephaven.chunk.attributes.Values; | ||
import io.deephaven.engine.rowset.RowSequence; | ||
import io.deephaven.engine.table.ChunkSource; | ||
import io.deephaven.engine.table.ColumnSource; | ||
import io.deephaven.util.QueryConstants; | ||
import org.apache.parquet.column.statistics.IntStatistics; | ||
import org.apache.parquet.column.statistics.Statistics; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.nio.Buffer; | ||
import java.nio.IntBuffer; | ||
|
||
public class CharTransfer implements TransferObject<IntBuffer> { | ||
|
||
private final ColumnSource<?> columnSource; | ||
private final ChunkSource.GetContext context; | ||
private final IntBuffer buffer; | ||
private CharChunk<? extends Values> chunk; | ||
private char minValue = QueryConstants.NULL_CHAR; | ||
private char maxValue = QueryConstants.NULL_CHAR; | ||
|
||
public CharTransfer( | ||
@NotNull final ColumnSource<?> columnSource, | ||
final int targetSize) { | ||
this.columnSource = columnSource; | ||
this.buffer = IntBuffer.allocate(targetSize); | ||
context = columnSource.makeGetContext(targetSize); | ||
} | ||
|
||
@Override | ||
final public void fetchData(@NotNull final RowSequence rs) { | ||
chunk = columnSource.getChunk(context, rs).asCharChunk(); | ||
} | ||
|
||
@Override | ||
final public int transferAllToBuffer() { | ||
return transferOnePageToBuffer(); | ||
} | ||
|
||
@Override | ||
final public int transferOnePageToBuffer() { | ||
if (!hasMoreDataToBuffer()) { | ||
return 0; | ||
} | ||
buffer.clear(); | ||
// Assuming that all the fetched data will fit in one page. This is because page count is accurately | ||
// calculated for non variable-width types. Check ParquetTableWriter.getTargetRowsPerPage for more details. | ||
copyAllFromChunkToBuffer(); | ||
buffer.flip(); | ||
int ret = chunk.size(); | ||
chunk = null; | ||
return ret; | ||
} | ||
|
||
/** | ||
* Helper method to copy all data from {@code this.chunk} to {@code this.buffer}. The buffer should be cleared | ||
* before calling this method and is positioned for a {@link Buffer#flip()} after the call. | ||
*/ | ||
private void copyAllFromChunkToBuffer() { | ||
for (int chunkIdx = 0; chunkIdx < chunk.size(); ++chunkIdx) { | ||
char value = chunk.get(chunkIdx); | ||
if (value != QueryConstants.NULL_CHAR) { | ||
if (minValue == QueryConstants.NULL_CHAR) { | ||
minValue = maxValue = value; | ||
} else if (value < minValue) { | ||
minValue = value; | ||
} else if (value > maxValue) { | ||
maxValue = value; | ||
} | ||
} | ||
buffer.put(value); | ||
} | ||
} | ||
|
||
@Override | ||
final public boolean hasMoreDataToBuffer() { | ||
return (chunk != null); | ||
} | ||
|
||
@Override | ||
final public IntBuffer getBuffer() { | ||
return buffer; | ||
} | ||
|
||
@Override | ||
final public void close() { | ||
context.close(); | ||
} | ||
|
||
@Override | ||
public <T extends Comparable<T>> void updateStatistics(@NotNull final Statistics<T> stats) { | ||
if (minValue != QueryConstants.NULL_CHAR) { | ||
((IntStatistics) stats).setMinMax(minValue, maxValue); | ||
} | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
...nsions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/CodecTransfer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/** | ||
* Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending | ||
*/ | ||
package io.deephaven.parquet.table.transfer; | ||
|
||
import io.deephaven.engine.table.ColumnSource; | ||
import io.deephaven.util.codec.ObjectCodec; | ||
import org.apache.parquet.io.api.Binary; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
public class CodecTransfer<T> extends EncodedTransfer<T> { | ||
private final ObjectCodec<? super T> codec; | ||
|
||
public CodecTransfer( | ||
@NotNull final ColumnSource<?> columnSource, | ||
@NotNull final ObjectCodec<? super T> codec, | ||
final int maxValuesPerPage, | ||
final int targetPageSize) { | ||
super(columnSource, maxValuesPerPage, targetPageSize); | ||
this.codec = codec; | ||
} | ||
|
||
@Override | ||
Binary encodeToBinary(T value) { | ||
return Binary.fromConstantByteArray(codec.encode(value)); | ||
} | ||
} |
Oops, something went wrong.