转换后ConstantInputDStream不返回任何数据

时间:2016-10-21 13:57:24

标签: scala spark-streaming spark-cassandra-connector

我使用Cassandra数据库来读取数据,我创建了一个ConstantInputDStream,但是在执行我的代码后它没有返回任何数据。执行以下代码后,想要将结果保存在新的cassandra表中,但代码不返回任何结果。我不知道自己做错了什么。

@transient val conf = new SparkConf(true)
   .set("spark.cassandra.connection.host", "127.0.0.1").setAppName("CasteDate").setMaster("local[*]")
   .set("spark.cassandra.connection.port", "9042")
   .set("spark.driver.allowMultipleContexts", "true")
   .set("spark.streaming.receiver.writeAheadLog.enable", "true")

   Executors.newScheduledThreadPool(30)
   @transient val sc: SparkContext = new SparkContext(conf)
   @transient val ssc: StreamingContext = new StreamingContext(sc, Seconds(10))

   val checkpointDirectory = "/root/Desktop/checkpoint"
       ssc.checkpoint(checkpointDirectory)

   val sqlContext = new org.apache.spark.sql.SQLContext(sc)

   var input: SimpleDateFormat   = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
   input.setTimeZone(TimeZone.getTimeZone("GMT"))
   var dia: SimpleDateFormat  = new SimpleDateFormat("dd")
   var mes: SimpleDateFormat = new SimpleDateFormat("MM")
   var ano: SimpleDateFormat = new SimpleDateFormat("yyyy")
   var horas: SimpleDateFormat = new SimpleDateFormat("HH")
   var minutos: SimpleDateFormat  = new SimpleDateFormat("mm")


    val dataRdd = ssc.cassandraTable("smartgrids","analyzer_temp").select("timecol").collect()
    val inputRdd =  ssc.sparkContext.parallelize(dataRdd)
    val dstream = new ConstantInputDStream(ssc, inputRdd)


        sc.stop()

      val result = dstream.map{rdd =>
                val day = dia.format(input.parse(rdd.toString().replace("CassandraRow{timecol: ", "").replace("}", "").replace("-0800", "").replace("-0700", "")))
                val hour = horas.format(input.parse(rdd.toString().replace("CassandraRow{timecol: ", "").replace("}", "").replace("-0800", "").replace("-0700", "")))
                val minute = minutos.format(input.parse(rdd.toString().replace("CassandraRow{timecol: ", "").replace("}", "").replace("-0800", "").replace("-0700", "")))
                val month = mes.format(input.parse(rdd.toString().replace("CassandraRow{timecol: ", "").replace("}", "").replace("-0800", "").replace("-0700", "")))
                val year = ano.format(input.parse(rdd.toString().replace("CassandraRow{timecol: " , "").replace("}", "").replace("-0800", "").replace("-0700", "")))

                Seq(day, hour, minute, month, year)                

            }

                result.foreachRDD{rdd => if (!rdd.isEmpty)
                        rdd.saveToCassandra("features", "datepart", SomeColumns("day", "month", "year", "hour", "minute"))
                }


    ssc.start()
    //ssc.awaitTermination()

输出后执行代码:

   dataRdd: Array[com.datastax.spark.connector.CassandraRow] =   Array(CassandraRow{timecol:
   2015-08-20 21:01:00-0700}, CassandraRow{timecol:
   2014-11-07 12:22:00-0800}, CassandraRow{timecol: 
   2014-11-07 07:24:00-0800}, CassandraRow{timecol: 
   2014-11-07 17:15:00-0800}, CassandraRow{timecol:
   2014-11-12 15:25:00-0800}, CassandraRow{timecol: 
   2014-11-08 06:27:00-0800}, CassandraRow{timecol: 
   2014-11-17 12:20:00-0800}, CassandraRow{timecol:
   2014-11-07 21:38:00-0800}, CassandraRow{timecol:
   2014-10-29 17:07:00-070... 

   dstream: org.apache.spark.streaming.dstream.ConstantInputDStream[com.datastax.spark.connector.CassandraRow] = org.apache.spark.streaming.dstream.ConstantInputDStream@7ae0e35f 

  result: org.apache.spark.streaming.dstream.DStream[Seq[String]] = org.apache.spark.streaming.dstream.MappedDStream@27d60b43

0 个答案:

没有答案