Skip to content

Commit

Permalink
Spark: Avoid attempt to parsing partition path into different types i…
Browse files Browse the repository at this point in the history
…f type inference is disabled
  • Loading branch information
amogh-jahagirdar committed Jul 14, 2024
1 parent 573a57f commit 60d2292
Showing 1 changed file with 37 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -528,46 +528,46 @@ private[delta] object PartitionUtils {
timeZone: TimeZone,
dateFormatter: DateFormatter,
timestampFormatter: TimestampFormatter): Literal = {
val decimalTry = Try {
// `BigDecimal` conversion can fail when the `field` is not a form of number.
val bigDecimal = new JBigDecimal(raw)
// It reduces the cases for decimals by disallowing values having scale (eg. `1.1`).
require(bigDecimal.scale <= 0)
// `DecimalType` conversion can fail when
// 1. The precision is bigger than 38.
// 2. scale is bigger than precision.
Literal(bigDecimal)
}
if (typeInference) {
val decimalTry = Try {
// `BigDecimal` conversion can fail when the `field` is not a form of number.
val bigDecimal = new JBigDecimal(raw)
// It reduces the cases for decimals by disallowing values having scale (eg. `1.1`).
require(bigDecimal.scale <= 0)
// `DecimalType` conversion can fail when
// 1. The precision is bigger than 38.
// 2. scale is bigger than precision.
Literal(bigDecimal)
}

val dateTry = Try {
// try and parse the date, if no exception occurs this is a candidate to be resolved as
// DateType
dateFormatter.parse(raw)
// SPARK-23436: Casting the string to date may still return null if a bad Date is provided.
// This can happen since DateFormat.parse may not use the entire text of the given string:
// so if there are extra-characters after the date, it returns correctly.
// We need to check that we can cast the raw string since we later can use Cast to get
// the partition values with the right DataType (see
// org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning)
val dateValue = Cast(Literal(raw), DateType).eval()
// Disallow DateType if the cast returned null
require(dateValue != null)
Literal.create(dateValue, DateType)
}
val dateTry = Try {
// try and parse the date, if no exception occurs this is a candidate to be resolved as
// DateType
dateFormatter.parse(raw)
// SPARK-23436: Casting the string to date may still return null if a bad Date is provided.
// This can happen since DateFormat.parse may not use the entire text of the given string:
// so if there are extra-characters after the date, it returns correctly.
// We need to check that we can cast the raw string since we later can use Cast to get
// the partition values with the right DataType (see
// org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning)
val dateValue = Cast(Literal(raw), DateType).eval()
// Disallow DateType if the cast returned null
require(dateValue != null)
Literal.create(dateValue, DateType)
}

val timestampTry = Try {
val unescapedRaw = unescapePathName(raw)
// try and parse the date, if no exception occurs this is a candidate to be resolved as
// TimestampType
timestampFormatter.parse(unescapedRaw)
// SPARK-23436: see comment for date
val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval()
// Disallow TimestampType if the cast returned null
require(timestampValue != null)
Literal.create(timestampValue, TimestampType)
}
val timestampTry = Try {
val unescapedRaw = unescapePathName(raw)
// try and parse the date, if no exception occurs this is a candidate to be resolved as
// TimestampType
timestampFormatter.parse(unescapedRaw)
// SPARK-23436: see comment for date
val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval()
// Disallow TimestampType if the cast returned null
require(timestampValue != null)
Literal.create(timestampValue, TimestampType)
}

if (typeInference) {
// First tries integral types
Try(Literal.create(Integer.parseInt(raw), IntegerType))
.orElse(Try(Literal.create(JLong.parseLong(raw), LongType)))
Expand Down

0 comments on commit 60d2292

Please sign in to comment.