diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileReader.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileReader.java index da085646a41..87ce0690771 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileReader.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileReader.java @@ -25,8 +25,7 @@ import static io.deephaven.base.FileUtils.convertToURI; /** - * Top level accessor for a parquet file which can read both from a file path string or a CLI style file URI, - * ex."s3://bucket/key". + * Top level accessor for a parquet file which can read from a CLI style file URI, ex."s3://bucket/key". */ public class ParquetFileReader { private static final int FOOTER_LENGTH_SIZE = 4; @@ -39,25 +38,7 @@ public class ParquetFileReader { * If reading a single parquet file, root URI is the URI of the file, else the parent directory for a metadata file */ private final URI rootURI; - private final MessageType type; - - /** - * Make a {@link ParquetFileReader} for the supplied {@link File}. Wraps {@link IOException} as - * {@link UncheckedIOException}. - * - * @param parquetFile The parquet file or the parquet metadata file - * @param channelsProvider The {@link SeekableChannelsProvider} to use for reading the file - * @return The new {@link ParquetFileReader} - */ - public static ParquetFileReader create( - @NotNull final File parquetFile, - @NotNull final SeekableChannelsProvider channelsProvider) { - try { - return new ParquetFileReader(convertToURI(parquetFile, false), channelsProvider); - } catch (final IOException e) { - throw new UncheckedIOException("Failed to create Parquet file reader: " + parquetFile, e); - } - } + private final MessageType schema; /** * Make a {@link ParquetFileReader} for the supplied {@link URI}. Wraps {@link IOException} as @@ -91,7 +72,6 @@ private ParquetFileReader( // Construct a new file URI for the parent directory rootURI = convertToURI(new File(parquetFileURI).getParentFile(), true); } else { - // TODO(deephaven-core#5066): Add support for reading metadata files from non-file URIs rootURI = parquetFileURI; } try ( @@ -102,7 +82,7 @@ private ParquetFileReader( fileMetaData = Util.readFileMetaData(in); } } - type = fromParquetSchema(fileMetaData.schema, fileMetaData.column_orders); + schema = fromParquetSchema(fileMetaData.schema, fileMetaData.column_orders); } /** @@ -148,87 +128,6 @@ public SeekableChannelsProvider getChannelsProvider() { return channelsProvider; } - private Set columnsWithDictionaryUsedOnEveryDataPage = null; - - /** - * Get the name of all columns that we can know for certain (a) have a dictionary, and (b) use the dictionary on all - * data pages. - * - * @return A set of parquet column names that satisfies the required condition. - */ - @SuppressWarnings("unused") - public Set getColumnsWithDictionaryUsedOnEveryDataPage() { - if (columnsWithDictionaryUsedOnEveryDataPage == null) { - columnsWithDictionaryUsedOnEveryDataPage = - calculateColumnsWithDictionaryUsedOnEveryDataPage(); - } - return columnsWithDictionaryUsedOnEveryDataPage; - } - - /** - * True only if we are certain every data page in this column chunk uses dictionary encoding; note false also covers - * the "we can't tell" case. - */ - private static boolean columnChunkUsesDictionaryOnEveryPage(final ColumnChunk columnChunk) { - final ColumnMetaData columnMeta = columnChunk.getMeta_data(); - if (columnMeta.encoding_stats == null) { - return false; // this is false as "don't know". - } - for (PageEncodingStats encodingStat : columnMeta.encoding_stats) { - if (encodingStat.page_type != PageType.DATA_PAGE - && encodingStat.page_type != PageType.DATA_PAGE_V2) { - // skip non-data pages. - continue; - } - // this is a data page. - if (encodingStat.encoding != Encoding.PLAIN_DICTIONARY - && encodingStat.encoding != Encoding.RLE_DICTIONARY) { - return false; - } - } - return true; - } - - private Set calculateColumnsWithDictionaryUsedOnEveryDataPage() { - final Set result = new HashSet<>(fileMetaData.getSchemaSize()); - final List rowGroups = fileMetaData.getRow_groups(); - final Iterator riter = rowGroups.iterator(); - if (!riter.hasNext()) { - // For an empty file we say all columns satisfy the property. - for (SchemaElement se : fileMetaData.getSchema()) { - if (!se.isSetNum_children()) { // We want only the leaves. - result.add(se.getName()); - } - } - return result; - } - // On the first pass, for row group zero, we are going to add all columns to the set - // that satisfy the restriction. - // On later passes after zero, we will remove any column that does not satisfy - // the restriction. - final RowGroup rg0 = riter.next(); - for (ColumnChunk columnChunk : rg0.columns) { - if (columnChunkUsesDictionaryOnEveryPage(columnChunk)) { - final String parquetColumnName = columnChunk.getMeta_data().path_in_schema.get(0); - result.add(parquetColumnName); - } - } - - while (riter.hasNext()) { - final RowGroup rowGroup = riter.next(); - for (ColumnChunk columnChunk : rowGroup.columns) { - final String parquetColumnName = columnChunk.getMeta_data().path_in_schema.get(0); - if (!result.contains(parquetColumnName)) { - continue; - } - if (!columnChunkUsesDictionaryOnEveryPage(columnChunk)) { - result.remove(parquetColumnName); - } - } - } - return result; - } - /** * Create a {@link RowGroupReader} object for provided row group number * @@ -239,7 +138,7 @@ public RowGroupReader getRowGroup(final int groupNumber, final String version) { fileMetaData.getRow_groups().get(groupNumber), channelsProvider, rootURI, - type, + schema, getSchema(), version); } @@ -463,24 +362,7 @@ private static LogicalTypeAnnotation getLogicalTypeFromConvertedType( } } - /** - * Helper method to determine if a logical type is adjusted to UTC. - * - * @param logicalType the logical type to check - * @return true if the logical type is a timestamp adjusted to UTC, false otherwise - */ - private static boolean isAdjustedToUTC(final LogicalType logicalType) { - if (logicalType.getSetField() == LogicalType._Fields.TIMESTAMP) { - return logicalType.getTIMESTAMP().isAdjustedToUTC; - } - return false; - } - public MessageType getSchema() { - return type; - } - - public int rowGroupCount() { - return fileMetaData.getRow_groups().size(); + return schema; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java index 3e609652629..19dbb52aabf 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java @@ -973,10 +973,6 @@ private static Table readPartitionedTableWithMetadata( @NotNull final ParquetInstructions readInstructions, @Nullable final SeekableChannelsProvider channelsProvider) { verifyFileLayout(readInstructions, ParquetFileLayout.METADATA_PARTITIONED); - if (readInstructions.getTableDefinition().isPresent()) { - throw new UnsupportedOperationException("Detected table definition inside read instructions, reading " + - "metadata files with custom table definition is currently not supported"); - } final ParquetMetadataFileLayout layout = ParquetMetadataFileLayout.create(sourceURI, readInstructions, channelsProvider); return readTable(layout, diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/layout/ParquetMetadataFileLayout.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/layout/ParquetMetadataFileLayout.java index 4ec93d85a1b..84546de519e 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/layout/ParquetMetadataFileLayout.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/layout/ParquetMetadataFileLayout.java @@ -120,43 +120,53 @@ private ParquetMetadataFileLayout( final ParquetFileReader metadataFileReader = ParquetFileReader.create(metadataFileURI, channelsProvider); final ParquetMetadataConverter converter = new ParquetMetadataConverter(); final ParquetMetadata metadataFileMetadata = convertMetadata(metadataFileURI, metadataFileReader, converter); - final Pair>, ParquetInstructions> leafSchemaInfo = ParquetSchemaReader.convertSchema( - metadataFileReader.getSchema(), - metadataFileMetadata.getFileMetaData().getKeyValueMetaData(), - inputInstructions); - - if (channelsProvider.exists(commonMetadataFileURI)) { - final ParquetFileReader commonMetadataFileReader = - ParquetFileReader.create(commonMetadataFileURI, channelsProvider); - final Pair>, ParquetInstructions> fullSchemaInfo = + if (inputInstructions.getTableDefinition().isEmpty()) { + // Infer the definition from the metadata file + final Pair>, ParquetInstructions> leafSchemaInfo = ParquetSchemaReader.convertSchema( - commonMetadataFileReader.getSchema(), - convertMetadata(commonMetadataFileURI, commonMetadataFileReader, converter) - .getFileMetaData() - .getKeyValueMetaData(), - leafSchemaInfo.getSecond()); - final Collection> adjustedColumnDefinitions = new ArrayList<>(); - final Map> leafDefinitionsMap = - leafSchemaInfo.getFirst().stream().collect(toMap(ColumnDefinition::getName, Function.identity())); - for (final ColumnDefinition fullDefinition : fullSchemaInfo.getFirst()) { - final ColumnDefinition leafDefinition = leafDefinitionsMap.get(fullDefinition.getName()); - if (leafDefinition == null) { - adjustedColumnDefinitions.add(adjustPartitionDefinition(fullDefinition)); - } else if (fullDefinition.equals(leafDefinition)) { - adjustedColumnDefinitions.add(fullDefinition); // No adjustments to apply in this case - } else { - final List differences = new ArrayList<>(); - fullDefinition.describeDifferences(differences, leafDefinition, "full schema", "file schema", - "", false); - throw new TableDataException(String.format("Schema mismatch between %s and %s for column %s: %s", - metadataFileURI, commonMetadataFileURI, fullDefinition.getName(), differences)); + metadataFileReader.getSchema(), + metadataFileMetadata.getFileMetaData().getKeyValueMetaData(), + inputInstructions); + + if (channelsProvider.exists(commonMetadataFileURI)) { + // Infer the partitioning columns using the common metadata file + final ParquetFileReader commonMetadataFileReader = + ParquetFileReader.create(commonMetadataFileURI, channelsProvider); + final Pair>, ParquetInstructions> fullSchemaInfo = + ParquetSchemaReader.convertSchema( + commonMetadataFileReader.getSchema(), + convertMetadata(commonMetadataFileURI, commonMetadataFileReader, converter) + .getFileMetaData() + .getKeyValueMetaData(), + leafSchemaInfo.getSecond()); + final Collection> adjustedColumnDefinitions = new ArrayList<>(); + final Map> leafDefinitionsMap = + leafSchemaInfo.getFirst().stream() + .collect(toMap(ColumnDefinition::getName, Function.identity())); + for (final ColumnDefinition fullDefinition : fullSchemaInfo.getFirst()) { + final ColumnDefinition leafDefinition = leafDefinitionsMap.get(fullDefinition.getName()); + if (leafDefinition == null) { + adjustedColumnDefinitions.add(adjustPartitionDefinition(fullDefinition)); + } else if (fullDefinition.equals(leafDefinition)) { + adjustedColumnDefinitions.add(fullDefinition); // No adjustments to apply in this case + } else { + final List differences = new ArrayList<>(); + fullDefinition.describeDifferences(differences, leafDefinition, "full schema", "file schema", + "", false); + throw new TableDataException( + String.format("Schema mismatch between %s and %s for column %s: %s", + metadataFileURI, commonMetadataFileURI, fullDefinition.getName(), differences)); + } } + definition = TableDefinition.of(adjustedColumnDefinitions); + instructions = fullSchemaInfo.getSecond(); + } else { + definition = TableDefinition.of(leafSchemaInfo.getFirst()); + instructions = leafSchemaInfo.getSecond(); } - definition = TableDefinition.of(adjustedColumnDefinitions); - instructions = fullSchemaInfo.getSecond(); } else { - definition = TableDefinition.of(leafSchemaInfo.getFirst()); - instructions = leafSchemaInfo.getSecond(); + definition = inputInstructions.getTableDefinition().get(); + instructions = inputInstructions; } final List> partitioningColumns = definition.getPartitioningColumns(); @@ -187,8 +197,10 @@ private ParquetMetadataFileLayout( final int numPartitions = filePath.getNameCount() - 1; if (numPartitions != partitioningColumns.size()) { throw new TableDataException(String.format( - "Unexpected number of path elements in %s for partitions %s", - relativePathString, partitions.keySet())); + "Unexpected number of path elements in %s for partitions %s, found %d elements, expected " + + "%d based on definition %s", + relativePathString, partitions.keySet(), numPartitions, partitioningColumns.size(), + definition)); } final boolean useHiveStyle = filePath.getName(0).toString().contains("="); for (int pi = 0; pi < numPartitions; ++pi) { diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetDeephavenExamplesTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetDeephavenExamplesTest.java new file mode 100644 index 00000000000..996745ac6e5 --- /dev/null +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetDeephavenExamplesTest.java @@ -0,0 +1,77 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.table; + +import io.deephaven.engine.table.Table; +import io.deephaven.engine.testutil.junit4.EngineCleanup; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; + +import java.nio.file.Path; + +import static io.deephaven.engine.testutil.TstUtils.assertTableEquals; +import static org.junit.Assume.assumeTrue; + +/** + * Assumes that there is already a checkout of deephaven-examples. + * This is currently meant to be run locally. + */ +public class ParquetDeephavenExamplesTest { + + private static final Path CHECKOUT_ROOT = null; // Path.of("/path/to/deephaven-examples"); + + @Rule + public final EngineCleanup framework = new EngineCleanup(); + + @BeforeClass + public static void beforeClass() { + assumeTrue(CHECKOUT_ROOT != null); + } + + @Test + public void pems() { + read("Pems/parquet/pems"); + } + + @Test + public void crypto() { + read("CryptoCurrencyHistory/Parquet/crypto.parquet"); + read("CryptoCurrencyHistory/Parquet/crypto_sept7.parquet"); + read("CryptoCurrencyHistory/Parquet/crypto_sept8.parquet"); + read("CryptoCurrencyHistory/Parquet/CryptoTrades_20210922.parquet"); + read("CryptoCurrencyHistory/Parquet/FakeCryptoTrades_20230209.parquet"); + } + + @Test + public void taxi() { + read("Taxi/parquet/taxi.parquet"); + } + + @Test + public void sensorData() { + read("SensorData/parquet/SensorData_gzip.parquet"); + } + + @Test + public void grades() { + assertTableEquals( + read("ParquetExamples/grades"), + read("ParquetExamples/grades_meta")); + assertTableEquals( + read("ParquetExamples/grades_flat").sort("Name", "Class"), + read("ParquetExamples/grades_flat_meta").sort("Name", "Class")); + assertTableEquals( + read("ParquetExamples/grades_kv").sort("Name", "Class"), + read("ParquetExamples/grades_kv_meta").sort("Name", "Class")); + } + + private static Table read(String name) { + final String path = CHECKOUT_ROOT + .resolve(name) + .toUri() + .toString(); + return ParquetTools.readTable(path).select(); + } +} diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 065f708e5b3..ef553a17ca2 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -1823,6 +1823,202 @@ public void decimalLogicalTypeTest() { } } + @Test + public void metadataPartitionedDataWithCustomTableDefinition() throws IOException { + final Table inputData = TableTools.emptyTable(100).update( + "PC1 = (ii%2 == 0) ? `Apple` : `Ball`", + "PC2 = (ii%3 == 0) ? `Pen` : `Pencil`", + "numbers = ii", + "characters = (char) (65 + (ii % 23))"); + final TableDefinition tableDefinition = TableDefinition.of( + ColumnDefinition.ofString("PC1").withPartitioning(), + ColumnDefinition.ofString("PC2").withPartitioning(), + ColumnDefinition.ofLong("numbers"), + ColumnDefinition.ofChar("characters")); + final File parentDir = new File(rootFile, "metadataPartitionedDataWithCustomTableDefinition"); + final ParquetInstructions writeInstructions = ParquetInstructions.builder() + .setGenerateMetadataFiles(true) + .setTableDefinition(tableDefinition) + .build(); + writeKeyValuePartitionedTable(inputData, parentDir.getAbsolutePath(), writeInstructions); + final Table fromDisk = readTable(parentDir.getPath(), + EMPTY.withLayout(ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED)); + assertEquals(fromDisk.getDefinition(), tableDefinition); + assertTableEquals(inputData.sort("PC1", "PC2"), fromDisk.sort("PC1", "PC2")); + + // Now set a different table definitions and read the data + { + // Remove a column + final TableDefinition newTableDefinition = TableDefinition.of( + ColumnDefinition.ofString("PC1").withPartitioning(), + ColumnDefinition.ofString("PC2").withPartitioning(), + ColumnDefinition.ofLong("numbers")); + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(newTableDefinition) + .setFileLayout(ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED) + .build(); + final Table fromDiskWithNewDefinition = readTable(parentDir.getPath(), readInstructions); + assertEquals(newTableDefinition, fromDiskWithNewDefinition.getDefinition()); + assertTableEquals(inputData.select("PC1", "PC2", "numbers").sort("PC1", "PC2"), + fromDiskWithNewDefinition.sort("PC1", "PC2")); + } + + { + // Remove another column + final TableDefinition newTableDefinition = TableDefinition.of( + ColumnDefinition.ofString("PC1").withPartitioning(), + ColumnDefinition.ofString("PC2").withPartitioning(), + ColumnDefinition.ofChar("characters")); + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(newTableDefinition) + .setFileLayout(ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED) + .build(); + final Table fromDiskWithNewDefinition = readTable(parentDir.getPath(), readInstructions); + assertEquals(newTableDefinition, fromDiskWithNewDefinition.getDefinition()); + assertTableEquals(inputData.select("PC1", "PC2", "characters").sort("PC1", "PC2"), + fromDiskWithNewDefinition.sort("PC1", "PC2")); + } + + { + // Remove a partitioning column + final TableDefinition newTableDefinition = TableDefinition.of( + ColumnDefinition.ofString("PC1").withPartitioning(), + ColumnDefinition.ofChar("characters"), + ColumnDefinition.ofLong("numbers")); + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(newTableDefinition) + .setFileLayout(ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED) + .build(); + try { + readTable(parentDir.getPath(), readInstructions); + fail("Exception expected because of missing partitioning column in table definition"); + } catch (final RuntimeException expected) { + } + } + + { + // Add an extra column + final TableDefinition newTableDefinition = TableDefinition.of( + ColumnDefinition.ofString("PC1").withPartitioning(), + ColumnDefinition.ofString("PC2").withPartitioning(), + ColumnDefinition.ofLong("numbers"), + ColumnDefinition.ofChar("characters"), + ColumnDefinition.ofInt("extraColumn")); + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(newTableDefinition) + .setFileLayout(ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED) + .build(); + final Table fromDiskWithNewDefinition = readTable(parentDir.getPath(), readInstructions); + assertEquals(newTableDefinition, fromDiskWithNewDefinition.getDefinition()); + assertTableEquals(inputData.update("extraColumn = (int) null").sort("PC1", "PC2"), + fromDiskWithNewDefinition.sort("PC1", "PC2")); + } + + { + // Reorder partitioning and non-partitioning columns + final TableDefinition newTableDefinition = TableDefinition.of( + ColumnDefinition.ofString("PC2").withPartitioning(), + ColumnDefinition.ofString("PC1").withPartitioning(), + ColumnDefinition.ofChar("characters"), + ColumnDefinition.ofLong("numbers")); + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(newTableDefinition) + .setFileLayout(ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED) + .build(); + final Table fromDiskWithNewDefinition = readTable(parentDir.getPath(), readInstructions); + assertEquals(newTableDefinition, fromDiskWithNewDefinition.getDefinition()); + assertTableEquals(inputData.select("PC2", "PC1", "characters", "numbers").sort("PC2", "PC1"), + fromDiskWithNewDefinition.sort("PC2", "PC1")); + } + + { + // Rename partitioning and non partitioning columns + final TableDefinition newTableDefinition = TableDefinition.of( + ColumnDefinition.ofString("PartCol1").withPartitioning(), + ColumnDefinition.ofString("PartCol2").withPartitioning(), + ColumnDefinition.ofLong("nums"), + ColumnDefinition.ofChar("chars")); + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(newTableDefinition) + .setFileLayout(ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED) + .addColumnNameMapping("PC1", "PartCol1") + .addColumnNameMapping("PC2", "PartCol2") + .addColumnNameMapping("numbers", "nums") + .addColumnNameMapping("characters", "chars") + .build(); + final Table fromDiskWithNewDefinition = readTable(parentDir.getPath(), readInstructions); + assertEquals(newTableDefinition, fromDiskWithNewDefinition.getDefinition()); + assertTableEquals( + inputData.sort("PC1", "PC2"), + fromDiskWithNewDefinition + .renameColumns("PC1 = PartCol1", "PC2 = PartCol2", "numbers = nums", "characters = chars") + .sort("PC1", "PC2")); + } + } + + @Test + public void metadataPartitionedDataWithCustomTableDefinition2() throws IOException { + final Table inputData = TableTools.emptyTable(100) + .update("PC1 = (ii%2 == 0) ? `Apple` : `Ball`", + "PC2 = (ii%3 == 0) ? `Pen` : `Pencil`", + "numbers = ii", + "characters = (char) (65 + (ii % 23))"); + final TableDefinition tableDefinition = TableDefinition.of( + ColumnDefinition.ofString("PC1").withPartitioning(), + ColumnDefinition.ofString("PC2").withPartitioning(), + ColumnDefinition.ofLong("numbers"), + ColumnDefinition.ofChar("characters")); + final File parentDir = new File(rootFile, "metadataPartitionedDataWithCustomTableDefinition"); + final ParquetInstructions writeInstructions = ParquetInstructions.builder() + .setGenerateMetadataFiles(true) + .setTableDefinition(tableDefinition) + .build(); + writeKeyValuePartitionedTable(inputData, parentDir.getAbsolutePath(), writeInstructions); + + // Replace files in directory with empty files and read the data + final File dir = new File(parentDir, "PC1=Apple" + File.separator + "PC2=Pen"); + final String[] dataFileList = dir.list(); + assertNotNull(dataFileList); + for (final String dataFile : dataFileList) { + final File file = new File(dir, dataFile); + assertTrue(file.delete()); + assertTrue(file.createNewFile()); + } + + { + // Read as metadata partitioned + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setFileLayout(ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED) + .build(); + final Table fromDiskAfterDelete = readTable(parentDir.getPath(), readInstructions); + + // Make sure the definition is correct, and we can read the size as well as data for the remaining + // partitions + assertEquals(tableDefinition, fromDiskAfterDelete.getDefinition()); + assertEquals(inputData.size(), fromDiskAfterDelete.size()); + assertTableEquals( + inputData.where("PC1 == `Ball` && PC2 == `Pencil`").sort("PC1", "PC2"), + fromDiskAfterDelete.where("PC1 == `Ball` && PC2 == `Pencil`").sort("PC1", "PC2")); + } + + { + // Read as key-value partitioned + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setFileLayout(ParquetInstructions.ParquetFileLayout.KV_PARTITIONED) + .build(); + final Table fromDiskAfterDelete = readTable(parentDir.getPath(), readInstructions); + + // The definition should be the same as the original table definition, but it won't be able to compute the + // size + assertEquals(tableDefinition, fromDiskAfterDelete.getDefinition()); + try { + fromDiskAfterDelete.size(); + fail("Exception expected because one of the partitions is not a valid parquet file"); + } catch (final RuntimeException expected) { + } + } + } + @Test public void testVectorColumns() { final Table table = getTableFlat(20000, true, false); diff --git a/extensions/s3/src/test/java/io/deephaven/extensions/s3/CredentialsTest.java b/extensions/s3/src/test/java/io/deephaven/extensions/s3/CredentialsTest.java index 07e9b825563..5e30478d8aa 100644 --- a/extensions/s3/src/test/java/io/deephaven/extensions/s3/CredentialsTest.java +++ b/extensions/s3/src/test/java/io/deephaven/extensions/s3/CredentialsTest.java @@ -3,12 +3,11 @@ // package io.deephaven.extensions.s3; - import org.junit.jupiter.api.Test; import static org.assertj.core.api.Assertions.assertThat; -public class CredentialsTest { +class CredentialsTest { @Test void defaultCredentials() { diff --git a/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3InstructionsTest.java b/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3InstructionsTest.java index f2f524faae8..36d20b85102 100644 --- a/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3InstructionsTest.java +++ b/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3InstructionsTest.java @@ -12,10 +12,10 @@ import java.time.Duration; import java.util.Optional; +import static org.assertj.core.api.Assertions.failBecauseExceptionWasNotThrown; import static org.assertj.core.api.AssertionsForClassTypes.assertThat; -import static org.assertj.core.api.AssertionsForClassTypes.fail; -public class S3InstructionsTest { +class S3InstructionsTest { @Test void defaults() { @@ -63,7 +63,7 @@ void testMinMaxConcurrentRequests() { .regionName("some-region") .maxConcurrentRequests(-1) .build(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); } catch (IllegalArgumentException e) { assertThat(e).hasMessageContaining("maxConcurrentRequests"); } @@ -76,7 +76,7 @@ void tooSmallMaxConcurrentRequests() { .regionName("some-region") .maxConcurrentRequests(0) .build(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); } catch (IllegalArgumentException e) { assertThat(e).hasMessageContaining("maxConcurrentRequests"); } @@ -99,7 +99,7 @@ void tooSmallReadAheadCount() { .regionName("some-region") .readAheadCount(-1) .build(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); } catch (IllegalArgumentException e) { assertThat(e).hasMessageContaining("readAheadCount"); } @@ -122,7 +122,7 @@ void tooSmallFragmentSize() { .regionName("some-region") .fragmentSize(8 * (1 << 10) - 1) .build(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); } catch (IllegalArgumentException e) { assertThat(e).hasMessageContaining("fragmentSize"); } @@ -145,7 +145,7 @@ void badCredentials() { .regionName("some-region") .credentials(new Credentials() {}) .build(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); } catch (IllegalArgumentException e) { assertThat(e).hasMessageContaining("credentials"); } @@ -158,7 +158,7 @@ void tooSmallWritePartSize() { .regionName("some-region") .writePartSize(1024) .build(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); } catch (IllegalArgumentException e) { assertThat(e).hasMessageContaining("writePartSize"); } @@ -171,7 +171,7 @@ void tooSmallNumConcurrentWriteParts() { .regionName("some-region") .numConcurrentWriteParts(0) .build(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); } catch (IllegalArgumentException e) { assertThat(e).hasMessageContaining("numConcurrentWriteParts"); } @@ -185,7 +185,7 @@ void tooLargeNumConcurrentWriteParts() { .numConcurrentWriteParts(1001) .maxConcurrentRequests(1000) .build(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); } catch (IllegalArgumentException e) { assertThat(e).hasMessageContaining("numConcurrentWriteParts"); } @@ -239,7 +239,7 @@ void testBadConfigFilePath() { .configFilePath("/some/random/path") .build() .aggregatedProfileFile(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalStateException.class); } catch (IllegalStateException e) { assertThat(e).hasMessageContaining("/some/random/path"); } @@ -252,7 +252,7 @@ void testBadCredentialsFilePath() { .credentialsFilePath("/some/random/path") .build() .aggregatedProfileFile(); - fail("Expected exception"); + failBecauseExceptionWasNotThrown(IllegalStateException.class); } catch (IllegalStateException e) { assertThat(e).hasMessageContaining("/some/random/path"); }