我正在尝试spark-testing-base中的单元测试用例,但是正在尝试该测试用例,但是由于某种原因,由于聚合没有发生,因此我无法使其正常工作。不知道如何将整个DStream传递给测试方法。欢迎任何建议。
import com.holdenkarau.spark.testing.StreamingSuiteBase
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.streaming.dstream.DStream
import org.scalatest.FunSuite
case class Person(name: String, mark: Int) extends Serializable
case class Total(name: String, total: Int) extends Serializable
class SampleStreamingTest3 extends FunSuite with StreamingSuiteBase {
test("simple test") {
val input = List(List(Person("Mark", 200)),
List(Person("Mark", 300)),
List(Person("Mark", 400)))
val expected = Array(List(Total("Mark",900)))
testOperation[Person, Total](input, Test.convertPersonToMarks _, expected, ordered = false)
}
}
object Test extends Serializable {
def convertPersonToMarks(input: DStream[Person]): DStream[Total] = {
val sparkSession = SparkSession.builder().appName("udf testings")
.master("local[*]")
.getOrCreate()
import sparkSession.implicits._
val output = input.transform { rdd =>
val df = rdd.toDF()
//df.select($"name",$"mark".as("total")).as[Total].rdd
val output = df.groupBy("name").agg(sum("mark").cast("Int").alias("total")).as[Total]
output.rdd
}
output.print()
output
}
}