使用Spark Streaming进行单元测试DStream转换

时间:2018-09-21 14:23:52

标签: apache-spark-sql spark-streaming spark-structured-streaming

我正在尝试spark-testing-base中的单元测试用例,但是正在尝试该测试用例,但是由于某种原因,由于聚合没有发生,因此我无法使其正常工作。不知道如何将整个DStream传递给测试方法。欢迎任何建议。

import com.holdenkarau.spark.testing.StreamingSuiteBase
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.streaming.dstream.DStream
import org.scalatest.FunSuite

case class Person(name: String, mark: Int) extends Serializable

case class Total(name: String, total: Int) extends Serializable

class SampleStreamingTest3 extends FunSuite with StreamingSuiteBase  {


  test("simple test") {
    val input = List(List(Person("Mark", 200)),
                     List(Person("Mark", 300)),
                     List(Person("Mark", 400)))
    val expected = Array(List(Total("Mark",900)))

    testOperation[Person, Total](input, Test.convertPersonToMarks _, expected, ordered = false)

  }
}

object Test extends Serializable {
  def convertPersonToMarks(input: DStream[Person]): DStream[Total] = {
    val sparkSession = SparkSession.builder().appName("udf testings")
      .master("local[*]")
      .getOrCreate()

    import sparkSession.implicits._

    val output = input.transform { rdd =>
      val df = rdd.toDF()

      //df.select($"name",$"mark".as("total")).as[Total].rdd
      val output = df.groupBy("name").agg(sum("mark").cast("Int").alias("total")).as[Total]

      output.rdd
    }

   output.print()

    output

  }
}

0 个答案:

没有答案