我正在尝试编写一个updateStateWithKey来获取输入的第一个值。当我尝试在案例类中使用它时,我收到错误
import org.apache.log4j.{Level, Logger}
import org.apache.spark._
import org.apache.spark.streaming._
case class Persons(name : String, school : String)
object StatefulNetworkWordCount {
def getPerson (str : String) : Persons = {
val splitArray = str.split(",")
val name = splitArray(0)
val school = splitArray(1)
Persons(name, school)
}
//Now, newValues is the new set of values
//runningCount is the existing values for each key
def updateFunction(newValues: Seq[Int], runningCount: Option[Int]): Option[Int] = {
val newCount = runningCount.getOrElse(0) + newValues.sum
Some(newCount)
}
def updateFunctionFrist(newValues: Seq[String], runningCount: Option[String]): Option[String] =
{
val newWord = if (runningCount.getOrElse("") == "")
{
val str = newValues.head.toString //Use existing values
Some(str)
}
else
{
val str = runningCount.getOrElse(newValues.head.toString)
Some(str)
}
newWord
}
def updateFunctionFirstPerson(newValues: Seq[Person], state: Option[Person]): Option[Person] =
{
val newWord = if (state.getOrElse("") == "") //If running count is empty
{
val str = newValues.head.asInstanceOf[Person]
Some(str)
}
else
{
val str = state.getOrElse(newValues.head.asInstanceOf[Person])
Some(str)
}
newWord
}
def main(args: Array[String]) {
Logger.getLogger("org").setLevel(Level.ERROR)
val conf = new SparkConf().setMaster("local[8]").setAppName("StatefulNetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(10))
// Set checkpoint directory
ssc.checkpoint(".")
// Create a DStream that will connect to hostname:port, like localhost:9999
val lines = ssc.socketTextStream("localhost", 9999)
// Split each line into words
val words = lines.flatMap(_.split(" "))
// Count each word in each batch
val pairs = words.map(word => (word.hashCode, word))
val runningCounts = pairs.updateStateByKey[Persons](updateFunctionFirstPerson _)
runningCounts.print()
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
}
}
该行
val runningCounts = pairs.updateStateByKey[Persons](updateFunctionFirstPerson _)
抛出错误,但如果我使用
val runningCounts = pairs.updateStateByKey[String](updateFunctionFirst _)
获取密钥的第一个值,它可以正常工作。我们可以在updateStateBykey中使用自定义类吗?我怎么用呢?