Skip to content

Commit

Permalink
Enforce page size limit when writing Parquet files (#4344)
Browse files Browse the repository at this point in the history
For string columns and columns which use codecs, we didn't enfoce page size limits properly. This change fixes that.
  • Loading branch information
malhotrashivam authored Sep 14, 2023
1 parent deca28f commit f0fddeb
Show file tree
Hide file tree
Showing 3 changed files with 426 additions and 194 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package io.deephaven.parquet.table;

import io.deephaven.base.verify.Require;
import io.deephaven.configuration.Configuration;
import io.deephaven.engine.table.impl.ColumnToCodecMappings;
import io.deephaven.hash.KeyedObjectHashMap;
import io.deephaven.hash.KeyedObjectKey;
Expand Down Expand Up @@ -81,20 +82,24 @@ public static int getDefaltMaximumDictionarySize() {
return defaultMaximumDictionarySize;
}

private static final int MIN_DEFAULT_PAGE_SIZE = 64 << 10;
private static volatile int defaultTargetPageSize = 1 << 20;
private static final int MIN_TARGET_PAGE_SIZE =
Configuration.getInstance().getIntegerWithDefault("Parquet.minTargetPageSize", 2 << 10);
private static final int DEFAULT_TARGET_PAGE_SIZE =
Configuration.getInstance().getIntegerWithDefault("Parquet.defaultTargetPageSize", 8 << 10);
private static volatile int defaultTargetPageSize = DEFAULT_TARGET_PAGE_SIZE;

private static final boolean DEFAULT_IS_REFRESHING = false;

/**
* Set the default target page size (in bytes) used to section rows of data into pages during column writing. This
* number should be no smaller than {@value #MIN_DEFAULT_PAGE_SIZE}.
* number should be no smaller than {@link #MIN_TARGET_PAGE_SIZE}.
*
* @param newDefaultSizeBytes the new default target page size.
*/
public static void setDefaultTargetPageSize(final int newDefaultSizeBytes) {
if (newDefaultSizeBytes < MIN_DEFAULT_PAGE_SIZE) {
if (newDefaultSizeBytes < MIN_TARGET_PAGE_SIZE) {
throw new IllegalArgumentException(
"Default target page size should be larger than " + MIN_DEFAULT_PAGE_SIZE + " bytes");
"Default target page size should be larger than " + MIN_TARGET_PAGE_SIZE + " bytes");
}
defaultTargetPageSize = newDefaultSizeBytes;
}
Expand Down Expand Up @@ -606,8 +611,8 @@ public Builder setIsLegacyParquet(final boolean isLegacyParquet) {
}

public Builder setTargetPageSize(final int targetPageSize) {
if (targetPageSize < MIN_DEFAULT_PAGE_SIZE) {
throw new IllegalArgumentException("Target page size should be >= " + MIN_DEFAULT_PAGE_SIZE);
if (targetPageSize < MIN_TARGET_PAGE_SIZE) {
throw new IllegalArgumentException("Target page size should be >= " + MIN_TARGET_PAGE_SIZE);
}
this.targetPageSize = targetPageSize;
return this;
Expand Down
Loading

0 comments on commit f0fddeb

Please sign in to comment.