From 60d22920e32acf3ca6f880b171c8b34979c1723e Mon Sep 17 00:00:00 2001 From: Amogh Jahagirdar Date: Sun, 14 Jul 2024 11:37:26 -0600 Subject: [PATCH] Spark: Avoid attempt to parsing partition path into different types if type inference is disabled --- .../spark/sql/delta/util/PartitionUtils.scala | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala index 1dc56b8173b..633e155f6d7 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala @@ -528,46 +528,46 @@ private[delta] object PartitionUtils { timeZone: TimeZone, dateFormatter: DateFormatter, timestampFormatter: TimestampFormatter): Literal = { - val decimalTry = Try { - // `BigDecimal` conversion can fail when the `field` is not a form of number. - val bigDecimal = new JBigDecimal(raw) - // It reduces the cases for decimals by disallowing values having scale (eg. `1.1`). - require(bigDecimal.scale <= 0) - // `DecimalType` conversion can fail when - // 1. The precision is bigger than 38. - // 2. scale is bigger than precision. - Literal(bigDecimal) - } + if (typeInference) { + val decimalTry = Try { + // `BigDecimal` conversion can fail when the `field` is not a form of number. + val bigDecimal = new JBigDecimal(raw) + // It reduces the cases for decimals by disallowing values having scale (eg. `1.1`). + require(bigDecimal.scale <= 0) + // `DecimalType` conversion can fail when + // 1. The precision is bigger than 38. + // 2. scale is bigger than precision. + Literal(bigDecimal) + } - val dateTry = Try { - // try and parse the date, if no exception occurs this is a candidate to be resolved as - // DateType - dateFormatter.parse(raw) - // SPARK-23436: Casting the string to date may still return null if a bad Date is provided. - // This can happen since DateFormat.parse may not use the entire text of the given string: - // so if there are extra-characters after the date, it returns correctly. - // We need to check that we can cast the raw string since we later can use Cast to get - // the partition values with the right DataType (see - // org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning) - val dateValue = Cast(Literal(raw), DateType).eval() - // Disallow DateType if the cast returned null - require(dateValue != null) - Literal.create(dateValue, DateType) - } + val dateTry = Try { + // try and parse the date, if no exception occurs this is a candidate to be resolved as + // DateType + dateFormatter.parse(raw) + // SPARK-23436: Casting the string to date may still return null if a bad Date is provided. + // This can happen since DateFormat.parse may not use the entire text of the given string: + // so if there are extra-characters after the date, it returns correctly. + // We need to check that we can cast the raw string since we later can use Cast to get + // the partition values with the right DataType (see + // org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning) + val dateValue = Cast(Literal(raw), DateType).eval() + // Disallow DateType if the cast returned null + require(dateValue != null) + Literal.create(dateValue, DateType) + } - val timestampTry = Try { - val unescapedRaw = unescapePathName(raw) - // try and parse the date, if no exception occurs this is a candidate to be resolved as - // TimestampType - timestampFormatter.parse(unescapedRaw) - // SPARK-23436: see comment for date - val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval() - // Disallow TimestampType if the cast returned null - require(timestampValue != null) - Literal.create(timestampValue, TimestampType) - } + val timestampTry = Try { + val unescapedRaw = unescapePathName(raw) + // try and parse the date, if no exception occurs this is a candidate to be resolved as + // TimestampType + timestampFormatter.parse(unescapedRaw) + // SPARK-23436: see comment for date + val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval() + // Disallow TimestampType if the cast returned null + require(timestampValue != null) + Literal.create(timestampValue, TimestampType) + } - if (typeInference) { // First tries integral types Try(Literal.create(Integer.parseInt(raw), IntegerType)) .orElse(Try(Literal.create(JLong.parseLong(raw), LongType)))