Skip to content

Commit

Permalink
Merge pull request #133 from RedisLabs/fix-issue-132
Browse files Browse the repository at this point in the history
fix issue #132: scala.MatchError on dataframe read
  • Loading branch information
gkorland authored Feb 15, 2019
2 parents 6aaaba2 + 68c8f2a commit d183fdb
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 12 deletions.
20 changes: 10 additions & 10 deletions src/main/scala/org/apache/spark/sql/redis/RedisSourceRelation.scala
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,13 @@ class RedisSourceRelation(override val sqlContext: SQLContext,
new GenericRow(Array[Any]())
}
} else {
val filteredSchema = {
val requiredColumnsSet = Set(requiredColumns: _*)
val filteredFields = schema.fields
.filter { f =>
requiredColumnsSet.contains(f.name)
}
StructType(filteredFields)
// filter schema columns, it should be in the same order as given 'requiredColumns'
val requiredSchema = {
val fieldsMap = schema.fields.map(f => (f.name, f)).toMap
val requiredFields = requiredColumns.map { c =>
fieldsMap(c)
}
StructType(requiredFields)
}
val keyType =
if (persistenceModel == SqlOptionModelBinary) {
Expand All @@ -173,12 +173,12 @@ class RedisSourceRelation(override val sqlContext: SQLContext,
}
keysRdd.mapPartitions { partition =>
// grouped iterator to only allocate memory for a portion of rows
partition.grouped(iteratorGroupingSize).map { batch =>
partition.grouped(iteratorGroupingSize).flatMap { batch =>
groupKeysByNode(redisConfig.hosts, batch.iterator)
.flatMap { case (node, keys) =>
scanRows(node, keys, keyType, filteredSchema, requiredColumns)
scanRows(node, keys, keyType, requiredSchema, requiredColumns)
}
}.flatten
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
package com.redislabs.provider.redis.df

import java.sql.{Date, Timestamp}
import java.util.UUID

import com.redislabs.provider.redis.toRedisContext
import com.redislabs.provider.redis.util.Person.{data, _}
import com.redislabs.provider.redis.util.TestUtils._
import com.redislabs.provider.redis.util.{EntityId, Person}
import org.apache.spark.SparkException
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.redis.RedisSourceRelation.tableDataKeyPattern
import org.apache.spark.sql.redis._
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.{StructField, _}
import org.scalatest.Matchers

import scala.util.Random

/**
* @author The Viet Nguyen
*/
Expand Down Expand Up @@ -295,6 +298,37 @@ trait HashDataframeSuite extends RedisDataframeSuite with Matchers {
}
}

/**
* A test case for https://github.com/RedisLabs/spark-redis/issues/132
*/
test("RedisSourceRelation.buildScan columns ordering") {
val schema = {
StructType(Array(
StructField("id", StringType),
StructField("int", IntegerType),
StructField("float", FloatType),
StructField("double", DoubleType),
StructField("str", StringType)))
}

val rowsNum = 8
val rdd = spark.sparkContext.parallelize(1 to rowsNum, 2).map { _ =>
def genStr = UUID.randomUUID().toString
def genInt = Random.nextInt()
def genDouble = Random.nextDouble()
def genFloat = Random.nextFloat()
Row.fromSeq(Seq(genStr, genInt, genFloat, genDouble, genStr))
}

val df = spark.createDataFrame(rdd, schema)
val tableName = generateTableName("cols-ordering")
df.write.format(RedisFormat).option(SqlOptionTableName, tableName).save()
val loadedDf = spark.read.format(RedisFormat).option(SqlOptionTableName, tableName).load()
loadedDf.schema shouldBe schema
loadedDf.collect().length shouldBe rowsNum
loadedDf.show()
}

def saveMap(tableName: String): Unit = {
Person.dataMaps.foreach { person =>
saveMap(tableName, person("name"), person)
Expand Down

0 comments on commit d183fdb

Please sign in to comment.