Question

我开发了一个流媒体应用程序，它使用多个文件流，然后将它们联合起来。我使用检查点从检查点数据重新加载..从检查点数据进行4,5次重新编码。我的应用程序给了我以上异常..我的代码如下：

def creatingFunc(): StreamingContext = {

//System.setProperty("hadoop.home.dir", "C:\\hadoop")

val conf = new SparkConf().setAppName("FileStreaming").set("spark.streaming.fileStream.minRememberDuration", "2000000h").set("spark.executor.instances","2") /*.set("SPARK_CONF_DIR","src/main/resources")*/
  .registerKryoClasses(Array(classOf[org.apache.hadoop.io.LongWritable]))//.setMaster("local[7]")

// Verify that the attached Spark cluster is 1.4.0+
val sc = new SparkContext(conf)
require(sc.version.replace(".", "").substring(0, 3).toInt >= 160, "Spark 1.6.0+ is required to run this code. Please attach it to a Spark 1.6.0+ cluster.")

// Create a StreamingContext
val ssc = new StreamingContext(sc, Seconds(batchIntervalSeconds))
ssc.checkpoint("/mapr/cellos-mapr/user/mbazarganigilani/SparkStreaming1/src/main/checkpoints")

val funcGSSNFilterHeader = (x: String) => {
  !x.contains("servedMSISDN")
}

val funcCCNFilterHeader = (x: String) => {
  !x.contains("resultCode")
}

val unionProbeStreams=(1 to 2).map(i=>i match {
  case 1 => {

    val ggsnArray = ssc.fileStream[LongWritable, Text, TextInputFormat]("/mapr/cellos-mapr/user/mbazarganigilani/SparkStreaming1/src/main/GGSN", filterF, false)
      .map(x => x._2.toString()).filter(x => funcGSSNFilterHeader(x)).map(x => {
      x.split(",")
    }).map(x => x.length match {
      case 25 => new SIMPLE_GGSN(x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9), x(10), x(11), x(12), x(13), x(14), new SIMPLE_GGSN_2(x(15), x(16), x(17), x(18), x(19), x(20), x(21), x(22), x(23), x(24)))
      case _ => new SIMPLE_GGSN("Invalid GGSN CDR", "", "", "", "", "", "", "", "", "", "", "", "", "", "", new SIMPLE_GGSN_2("", "", "", "", "", "", "", "", "", ""))
    }).map(x => (new SIMPLE_KEY_JOINS(x.IMSI, x.CAHRGING_ID), x)).map(x => (x._1, x._2.IMSI))

    ggsnArray
  }
  case 2 => {
    val ccnArray=ssc.fileStream[LongWritable, Text, TextInputFormat]("/mapr/cellos-mapr/user/mbazarganigilani/SparkStreaming1/src/main/CCN", filterF, false)
      .map(x => x._2.toString()).filter(x => funcCCNFilterHeader(x)).map(x => x.split(",")).map(x => x.length match {
      case 43 => new SIMPLE_CCN(x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9), x(10), x(11), x(12), x(13), x(14), x(15), x(16), x(17),
        x(18), x(19), x(20), new SIMPLE_CCN_2(x(21), x(22), x(23), x(24), x(25), x(26), x(27), x(28), x(29), x(30), x(31), x(32), x(33), x(34), x(35), x(36), x(37), x(38), x(39), x(40), x(41), x(42)))
      case _ => new SIMPLE_CCN("Invalid CCN CDR", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", new SIMPLE_CCN_2("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""))
    }).map(x => (new SIMPLE_KEY_JOINS(x.IMSI, x.ccn_2.CHARGINGCONTEXT_16778226), x)).map(x => (x._1, x._2.IMSI))

    ccnArray
  }

})

val joined=ssc.union(unionProbeStreams)

joined.checkpoint(Duration(batchIntervalSeconds * 1000 * 5))
//joined.foreachRDD(_.count())

joined.foreachRDD(y=> {

  println("this count for joint is "+ y.count())
  //y.foreach(x=> println(x))

})


ssc.remember(Minutes(1))  // To make sure data is not deleted by the time we query it interactively


println("Creating function called to create new StreamingContext")
newContextCreated = true
ssc
}

在从检查点数据加载几次后运行我的应用程序时，我得到了以下异常：

java.lang.IllegalStateException: SparkContext has been shutdown
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
    at org.apache.spark.rdd.RDD.count(RDD.scala:1157)
    at UnionStream$$anonfun$creatingFunc$5.apply(UnionStreaming.scala:453)
    at UnionStream$$anonfun$creatingFunc$5.apply(UnionStreaming.scala:451)
    at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:661)
    at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:661)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
    at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:426)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
    at scala.util.Try$.apply(Try.scala:161)
    at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
    at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:224)
    at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
    at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
    at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:223)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
2016-09-27 12:58:15,025 ERROR [JobScheduler] scheduler.JobScheduler: Error   running job streaming job 1474943750000 ms.4

我正在使用Spark Streaming 1.6.1 ..

java.lang.IllegalStateException：SparkContext已关闭

0 个答案: