Skip to content

Commit

Permalink
[Kernel] Add integration test for V2 checkpoints (delta-io#2992)
Browse files Browse the repository at this point in the history
  • Loading branch information
chirag-s-db authored Apr 30, 2024
1 parent 2f599e6 commit 7714e81
Show file tree
Hide file tree
Showing 46 changed files with 63 additions and 0 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1714496114594,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpointInterval\":\"2\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0-SNAPSHOT","txnId":"f6282e54-afc6-4669-939b-0f8ba73062a0"}}
{"metaData":{"id":"8a390218-e4ee-4341-b6de-4920e27d3f78","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2"},"createdTime":1714496114564}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1714496114748,"operation":"SET TBLPROPERTIES","operationParameters":{"properties":"{\"delta.checkpointPolicy\":\"v2\"}"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0-SNAPSHOT","txnId":"fddb3112-ca9b-48af-bf19-be23f1c36c22"}}
{"metaData":{"id":"8a390218-e4ee-4341-b6de-4920e27d3f78","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2","delta.checkpointPolicy":"v2"},"createdTime":1714496114564}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint","appendOnly","invariants"]}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"checkpointMetadata":{"version":2}}
{"sidecar":{"path":"00000000000000000002.checkpoint.0000000001.0000000002.bd1885fd-6ec0-4370-b0f5-43b5162fd4de.parquet","sizeInBytes":9367,"modificationTime":1714496115780}}
{"sidecar":{"path":"00000000000000000002.checkpoint.0000000002.0000000002.0a8d73ee-aa83-49d0-9583-c99db75b89b2.parquet","sizeInBytes":9296,"modificationTime":1714496115788}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint","appendOnly","invariants"]}}
{"metaData":{"id":"8a390218-e4ee-4341-b6de-4920e27d3f78","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2","delta.checkpointPolicy":"v2"},"createdTime":1714496114564}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"commitInfo":{"timestamp":1714496115090,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"4","numOutputRows":"10","numOutputBytes":"1952"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0-SNAPSHOT","txnId":"a76e8fca-8bab-42cc-9618-77f8c536968c"}}
{"add":{"path":"part-00000-240b5dd6-323b-4f74-b6bc-ab9fdcacc630-c000.snappy.parquet","partitionValues":{},"size":485,"modificationTime":1714496115046,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"id\":4},\"maxValues\":{\"id\":8},\"nullCount\":{\"id\":0}}"}}
{"add":{"path":"part-00001-534ea355-2edd-4046-8d49-d932469170c7-c000.snappy.parquet","partitionValues":{},"size":496,"modificationTime":1714496115048,"dataChange":true,"stats":"{\"numRecords\":4,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}}
{"add":{"path":"part-00002-4438bc9d-9c60-4dd2-9343-574743ea4ca8-c000.snappy.parquet","partitionValues":{},"size":486,"modificationTime":1714496115087,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0}}"}}
{"add":{"path":"part-00003-ae431d66-23d5-4dc7-b961-136ce33e63da-c000.snappy.parquet","partitionValues":{},"size":485,"modificationTime":1714496115087,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":6},\"nullCount\":{\"id\":0}}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version":2,"size":9,"sizeInBytes":19554,"numOfAddFiles":4,"v2Checkpoint":{"path":"00000000000000000002.checkpoint.6374b053-df23-479b-b2cf-c9c550132b49.json","sizeInBytes":891,"modificationTime":1714496115810,"nonFileActions":[{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint","appendOnly","invariants"]}},{"metaData":{"id":"8a390218-e4ee-4341-b6de-4920e27d3f78","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2","delta.checkpointPolicy":"v2"},"createdTime":1714496114564}},{"checkpointMetadata":{"version":2}}],"sidecarFiles":[{"path":"00000000000000000002.checkpoint.0000000001.0000000002.bd1885fd-6ec0-4370-b0f5-43b5162fd4de.parquet","sizeInBytes":9367,"modificationTime":1714496115780},{"path":"00000000000000000002.checkpoint.0000000002.0000000002.0a8d73ee-aa83-49d0-9583-c99db75b89b2.parquet","sizeInBytes":9296,"modificationTime":1714496115788}]},"checksum":"d09f95a326aab562c60d415a32ddd216"}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1714496109365,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpointInterval\":\"2\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0-SNAPSHOT","txnId":"7517176e-cff7-46ac-b133-3cf096e2620d"}}
{"metaData":{"id":"7e2a1106-198b-4653-a612-2aa44685cb27","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2"},"createdTime":1714496109258}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1714496110834,"operation":"SET TBLPROPERTIES","operationParameters":{"properties":"{\"delta.checkpointPolicy\":\"v2\"}"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0-SNAPSHOT","txnId":"12ea26b9-c620-4104-95f6-654bcaabdda6"}}
{"metaData":{"id":"7e2a1106-198b-4653-a612-2aa44685cb27","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2","delta.checkpointPolicy":"v2"},"createdTime":1714496109258}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint","appendOnly","invariants"]}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"commitInfo":{"timestamp":1714496112086,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"4","numOutputRows":"10","numOutputBytes":"1952"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0-SNAPSHOT","txnId":"c9f86c17-1b30-44e7-873d-1e2102f54b0f"}}
{"add":{"path":"part-00000-485b0fff-1c7b-4f14-92e9-a72300fcdf88-c000.snappy.parquet","partitionValues":{},"size":485,"modificationTime":1714496111974,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"id\":4},\"maxValues\":{\"id\":8},\"nullCount\":{\"id\":0}}"}}
{"add":{"path":"part-00001-f7a80035-0622-431e-832e-a756c65cb2a5-c000.snappy.parquet","partitionValues":{},"size":496,"modificationTime":1714496111974,"dataChange":true,"stats":"{\"numRecords\":4,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}}
{"add":{"path":"part-00002-5754df9c-5a25-43a6-947b-f27840fddb1a-c000.snappy.parquet","partitionValues":{},"size":486,"modificationTime":1714496112068,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0}}"}}
{"add":{"path":"part-00003-6ab7bbbb-e14d-4fa3-8767-06b509e0a666-c000.snappy.parquet","partitionValues":{},"size":485,"modificationTime":1714496112071,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":6},\"nullCount\":{\"id\":0}}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version":2,"size":9,"sizeInBytes":37269,"numOfAddFiles":4,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"checkpointMetadata","type":{"type":"struct","fields":[{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"sidecar","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"v2Checkpoint":{"path":"00000000000000000002.checkpoint.e8fa2696-9728-4e9c-b285-634743fdd4fb.parquet","sizeInBytes":18634,"modificationTime":1714496114276,"nonFileActions":[{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint","appendOnly","invariants"]}},{"metaData":{"id":"7e2a1106-198b-4653-a612-2aa44685cb27","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2","delta.checkpointPolicy":"v2"},"createdTime":1714496109258}},{"checkpointMetadata":{"version":2}}],"sidecarFiles":[{"path":"00000000000000000002.checkpoint.0000000001.0000000002.055454d8-329c-4e0e-864d-7f867075af33.parquet","sizeInBytes":9268,"modificationTime":1714496113961},{"path":"00000000000000000002.checkpoint.0000000002.0000000002.33321cc1-9c55-4d1f-8511-fafe6d2e1133.parquet","sizeInBytes":9367,"modificationTime":1714496113961}]},"checksum":"f81aaf268542b71bb3fc9b63f754f9df"}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -1356,6 +1356,24 @@ class GoldenTables extends QueryTest with SharedSparkSession {
}
}

Seq("parquet", "json").foreach { ckptFormat =>
val tbl = "tbl"
generateGoldenTable(s"v2-checkpoint-$ckptFormat") { tablePath =>
withTable(tbl) {
withSQLConf(
(DeltaSQLConf.DELTA_CHECKPOINT_PART_SIZE.key, "2"),
("spark.databricks.delta.properties.defaults.checkpointInterval", "2"),
(DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key, ckptFormat)) {
spark.conf.set(DeltaSQLConf.CHECKPOINT_V2_TOP_LEVEL_FILE_FORMAT.key, ckptFormat)
sql(s"CREATE TABLE $tbl (id LONG) USING delta LOCATION '$tablePath'")
sql(s"ALTER TABLE $tbl SET TBLPROPERTIES('delta.checkpointPolicy' = 'v2')")
spark.range(10).repartition(4)
.write.format("delta").mode("append").saveAsTable(tbl)
}
}
}
}

generateGoldenTable("no-delta-log-folder") { tablePath =>
spark.range(20).write.format("parquet").save(tablePath)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,22 @@ public void runTests() throws Exception {
Optional.empty(), /* predicate */
6 /* expected row count */);

// Basic read: table with JSON V2 checkpoint
runAndVerifyRowCount(
"basic_read_table_v2_checkpoint_json",
"v2-checkpoint-json",
Optional.empty(), /* read schema - read all columns */
Optional.empty(), /* predicate */
10 /* expected row count */);

// Basic read: table with Parquet V2 checkpoint
runAndVerifyRowCount(
"basic_read_table_v2_checkpoint_parquet",
"v2-checkpoint-parquet",
Optional.empty(), /* read schema - read all columns */
Optional.empty(), /* predicate */
10 /* expected row count */);

// Partition pruning: simple expression
runAndVerifyRowCount(
"partition_pruning_simple_filter",
Expand Down

0 comments on commit 7714e81

Please sign in to comment.