我在hive中有一个表,我通过spark查询并进行一些处理,然后基于某些条件,我想在代码中新创建的表中存储一些值。
以下是我尝试过的代码以及其他一些代码。但每次我得到例外
val spark: SparkSession = SparkSession.builder.appName("Spark Preprocess").enableHiveSupport().getOrCreate()
import spark._
import spark.implicits._
val dataFrame = sql("select field1, field2, field3 from some_table).as[SomeTable]
val tableName = "ResultsTable_" + new SimpleDateFormat("yyMMdd_HHmmss").format(new Date())
sql("create table " + tableName + " (x bigint, y bigint)")
dataFrame.mapPartitions(partitionIterator => {
var rows = List[Result]()
partitionIterator.foreach( rowIterator => {
//some calculation based on rows and calculate x and y (happens only in some cases
rows = rows :+ Result(x, y)
}
Iterator(rows)
}.foreach(result => result.toDF().write.mode(SaveMode.Append).insertInto(tableName))
case class Result(x: Long, quantity: Long)
异常
java.lang.ArrayStoreException: scala.collection.mutable.WrappedArray$ofRef
at scala.collection.mutable.ArrayBuilder$ofRef.$plus$eq(ArrayBuilder.scala:87)
at scala.collection.mutable.ArrayBuilder$ofRef.$plus$eq(ArrayBuilder.scala:56)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2183)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2532)
我尝试过的其他事情是
foreach(result => result.toDS().write.mode(SaveMode.Append).insertInto(tableName))
foreach(result => sqlContext.createDataset(result).write.mode(SaveMode.Append).insertInto(tableName))