Skip to content

Commit

Permalink
Use Delta table stats if time travel is the latest version
Browse files Browse the repository at this point in the history
  • Loading branch information
minyyy committed Jul 11, 2024
1 parent 9dad86a commit c58f08a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,14 @@ case class DeltaTableV2(
/** Creates a [[LogicalRelation]] that represents this table */
lazy val toLogicalRelation: LogicalRelation = {
val relation = this.toBaseRelation
// this.toBaseRelation calls initialSnapshot which forces its initialization.
val catalogTableOpt = if (timeTravelSpec.flatMap(_.version).contains(initialSnapshot.version)) {
catalogTable
} else {
ttSafeCatalogTable
}
LogicalRelation(
relation, toAttributes(relation.schema), ttSafeCatalogTable, isStreaming = false)
relation, toAttributes(relation.schema), catalogTableOpt, isStreaming = false)
}

/** Creates a [[DataFrame]] that uses the requested spark session to read from this table */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,22 @@ class DeltaTimeTravelSuite extends QueryTest
}
}

test("Time travel of latest version is able to use stats") {
withTable("test") {
Seq(("1", 1), ("2", 1))
.toDF("part", "a")
.coalesce(1)
.write
.format("delta")
.saveAsTable("test")
var plan = spark.sql("select * from test version as of 0").queryExecution.optimizedPlan
assert(plan.stats.attributeStats.isEmpty)
spark.sql("ANALYZE TABLE test COMPUTE STATISTICS FOR ALL COLUMNS")
plan = spark.sql("select * from test version as of 0").queryExecution.optimizedPlan
assert(plan.stats.attributeStats.nonEmpty)
}
}


test("SPARK-41154: Correct relation caching for queries with time travel spec") {
val tblName = "tab"
Expand Down

0 comments on commit c58f08a

Please sign in to comment.