Skip to content

Commit

Permalink
Better failure messages related to Git LFS (#4565)
Browse files Browse the repository at this point in the history
Also updated the README to explain how to install and fetch from Git LFS
  • Loading branch information
malhotrashivam authored Oct 11, 2023
1 parent e12d8cb commit a55a2ab
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 35 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ docker run hello-world
If any dependencies are missing or unsupported versions are installed, see [Launch Deephaven from pre-built images](https://deephaven.io/core/docs/tutorials/quickstart#prerequisites) for installation instructions.


For running the unit tests, you will also need to install [Git LFS](https://git-lfs.com/) and fetch all the required files. Run the following commands from inside the directory:
```
git lfs install
git lfs pull
```

### Create deployment

A directory must be created to store files and mount points for your deployment. Here, we are using the `deephaven-deployment` directory.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package io.deephaven.parquet.base;

/**
* Exception thrown when trying to read an invalid Parquet file.
*/
public class InvalidParquetFileException extends ParquetFileReaderException {
InvalidParquetFileException(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public ParquetFileReader(final String filePath, final SeekableChannelsProvider c
final long fileLen = readChannel.size();
if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer +
// footerIndex + MAGIC
throw new ParquetFileReaderException(
throw new InvalidParquetFileException(
filePath + " is not a Parquet file (too small length: " + fileLen + ")");
}

Expand All @@ -57,13 +57,13 @@ public ParquetFileReader(final String filePath, final SeekableChannelsProvider c
final byte[] magic = new byte[MAGIC.length];
Helpers.readBytes(readChannel, magic);
if (!Arrays.equals(MAGIC, magic)) {
throw new ParquetFileReaderException(
throw new InvalidParquetFileException(
filePath + " is not a Parquet file. expected magic number at tail "
+ Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic));
}
final long footerIndex = footerLengthIndex - footerLength;
if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) {
throw new ParquetFileReaderException(
throw new InvalidParquetFileException(
"corrupted file: the footer index is not within the file: " + footerIndex);
}
readChannel.position(footerIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import io.deephaven.engine.testutil.junit4.EngineCleanup;
import io.deephaven.engine.util.BigDecimalUtils;
import io.deephaven.engine.util.file.TrackedFileHandleFactory;
import io.deephaven.parquet.base.InvalidParquetFileException;
import io.deephaven.parquet.table.location.ParquetTableLocationKey;
import io.deephaven.stringset.ArrayStringSet;
import io.deephaven.engine.table.Table;
Expand Down Expand Up @@ -1038,8 +1039,18 @@ public void readWriteStatisticsTest() {
public void verifyPyArrowStatistics() {
final String path = ParquetTableReadWriteTest.class.getResource("/e0/pyarrow_stats.parquet").getFile();
final File pyarrowDest = new File(path);
final Table pyarrowFromDisk = ParquetTools.readTable(pyarrowDest);

final Table pyarrowFromDisk;
try {
pyarrowFromDisk = ParquetTools.readTable(pyarrowDest);
} catch (RuntimeException e) {
if (e.getCause() instanceof InvalidParquetFileException) {
final String InvalidParquetFileErrorMsgString = "Invalid parquet file detected, please ensure the " +
"file is fetched properly from Git LFS. Run commands 'git lfs install; git lfs pull' inside " +
"the repo to pull the files from LFS. Check cause of exception for more details.";
throw new UncheckedDeephavenException(InvalidParquetFileErrorMsgString, e.getCause());
}
throw e;
}
// Verify that our verification code works for a pyarrow generated table.
assertTableStatistics(pyarrowFromDisk, pyarrowDest);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import io.deephaven.engine.table.impl.locations.TableDataException;
import io.deephaven.engine.testutil.junit4.EngineCleanup;
import io.deephaven.engine.util.TableTools;
import io.deephaven.parquet.base.InvalidParquetFileException;
import io.deephaven.parquet.table.layout.ParquetKeyValuePartitionedLayout;
import io.deephaven.stringset.HashStringSet;
import io.deephaven.stringset.StringSet;
Expand Down Expand Up @@ -436,58 +437,83 @@ public void testMultipleRenamesWithSameOuterName() {
t -> t.updateView("Y = Z", "Y = X").where("Y % 2 == 0"));
}

private static final String InvalidParquetFileErrorMsgString = "Invalid parquet file detected, please ensure " +
"the file is fetched properly from Git LFS. Run commands 'git lfs install; git lfs pull' inside the repo " +
"to pull the files from LFS. Check cause of exception for more details.";

@Test
public void e0() {
final Table uncompressed =
ParquetTools.readTable(TestParquetTools.class.getResource("/e0/uncompressed.parquet").getFile());
try {
final Table uncompressed =
ParquetTools.readTable(TestParquetTools.class.getResource("/e0/uncompressed.parquet").getFile());

final Table gzip = ParquetTools.readTable(TestParquetTools.class.getResource("/e0/gzip.parquet").getFile());
assertTableEquals(uncompressed, gzip);
final Table gzip = ParquetTools.readTable(TestParquetTools.class.getResource("/e0/gzip.parquet").getFile());
assertTableEquals(uncompressed, gzip);

final Table lz4 = ParquetTools.readTable(TestParquetTools.class.getResource("/e0/lz4.parquet").getFile());
assertTableEquals(uncompressed, lz4);
final Table lz4 = ParquetTools.readTable(TestParquetTools.class.getResource("/e0/lz4.parquet").getFile());
assertTableEquals(uncompressed, lz4);

final Table snappy = ParquetTools.readTable(TestParquetTools.class.getResource("/e0/snappy.parquet").getFile());
assertTableEquals(uncompressed, snappy);
final Table snappy =
ParquetTools.readTable(TestParquetTools.class.getResource("/e0/snappy.parquet").getFile());
assertTableEquals(uncompressed, snappy);

final Table zstd = ParquetTools.readTable(TestParquetTools.class.getResource("/e0/zstd.parquet").getFile());
assertTableEquals(uncompressed, zstd);
final Table zstd = ParquetTools.readTable(TestParquetTools.class.getResource("/e0/zstd.parquet").getFile());
assertTableEquals(uncompressed, zstd);
} catch (RuntimeException e) {
if (e.getCause() instanceof InvalidParquetFileException) {
throw new UncheckedDeephavenException(InvalidParquetFileErrorMsgString, e.getCause());
}
}
}

@Test
public void e1() {
final Table uncompressed =
ParquetTools.readTable(TestParquetTools.class.getResource("/e1/uncompressed.parquet").getFile());
try {
final Table uncompressed =
ParquetTools.readTable(TestParquetTools.class.getResource("/e1/uncompressed.parquet").getFile());

final Table gzip = ParquetTools.readTable(TestParquetTools.class.getResource("/e1/gzip.parquet").getFile());
assertTableEquals(uncompressed, gzip);
final Table gzip = ParquetTools.readTable(TestParquetTools.class.getResource("/e1/gzip.parquet").getFile());
assertTableEquals(uncompressed, gzip);

final Table lz4 = ParquetTools.readTable(TestParquetTools.class.getResource("/e1/lz4.parquet").getFile());
assertTableEquals(uncompressed, lz4);
final Table lz4 = ParquetTools.readTable(TestParquetTools.class.getResource("/e1/lz4.parquet").getFile());
assertTableEquals(uncompressed, lz4);

final Table snappy = ParquetTools.readTable(TestParquetTools.class.getResource("/e1/snappy.parquet").getFile());
assertTableEquals(uncompressed, snappy);
final Table snappy =
ParquetTools.readTable(TestParquetTools.class.getResource("/e1/snappy.parquet").getFile());
assertTableEquals(uncompressed, snappy);

final Table zstd = ParquetTools.readTable(TestParquetTools.class.getResource("/e1/zstd.parquet").getFile());
assertTableEquals(uncompressed, zstd);
final Table zstd = ParquetTools.readTable(TestParquetTools.class.getResource("/e1/zstd.parquet").getFile());
assertTableEquals(uncompressed, zstd);
} catch (RuntimeException e) {
if (e.getCause() instanceof InvalidParquetFileException) {
throw new UncheckedDeephavenException(InvalidParquetFileErrorMsgString, e.getCause());
}
}
}

@Test
public void e2() {
final Table uncompressed =
ParquetTools.readTable(TestParquetTools.class.getResource("/e2/uncompressed.parquet").getFile());
try {
final Table uncompressed =
ParquetTools.readTable(TestParquetTools.class.getResource("/e2/uncompressed.parquet").getFile());

final Table gzip = ParquetTools.readTable(TestParquetTools.class.getResource("/e2/gzip.parquet").getFile());
assertTableEquals(uncompressed, gzip);
final Table gzip = ParquetTools.readTable(TestParquetTools.class.getResource("/e2/gzip.parquet").getFile());
assertTableEquals(uncompressed, gzip);

final Table lz4 = ParquetTools.readTable(TestParquetTools.class.getResource("/e2/lz4.parquet").getFile());
assertTableEquals(uncompressed, lz4);
final Table lz4 = ParquetTools.readTable(TestParquetTools.class.getResource("/e2/lz4.parquet").getFile());
assertTableEquals(uncompressed, lz4);

final Table snappy = ParquetTools.readTable(TestParquetTools.class.getResource("/e2/snappy.parquet").getFile());
assertTableEquals(uncompressed, snappy);
final Table snappy =
ParquetTools.readTable(TestParquetTools.class.getResource("/e2/snappy.parquet").getFile());
assertTableEquals(uncompressed, snappy);

final Table zstd = ParquetTools.readTable(TestParquetTools.class.getResource("/e2/zstd.parquet").getFile());
assertTableEquals(uncompressed, zstd);
final Table zstd = ParquetTools.readTable(TestParquetTools.class.getResource("/e2/zstd.parquet").getFile());
assertTableEquals(uncompressed, zstd);
} catch (RuntimeException e) {
if (e.getCause() instanceof InvalidParquetFileException) {
throw new UncheckedDeephavenException(InvalidParquetFileErrorMsgString, e.getCause());
}
}
}

private void testWriteRead(Table source, Function<Table, Table> transform) {
Expand Down

0 comments on commit a55a2ab

Please sign in to comment.