我使用Cassandra数据库来读取数据,我创建了一个ConstantInputDStream,但是在执行我的代码后它没有返回任何数据。执行以下代码后,想要将结果保存在新的cassandra表中,但代码不返回任何结果。我不知道自己做错了什么。
@transient val conf = new SparkConf(true)
.set("spark.cassandra.connection.host", "127.0.0.1").setAppName("CasteDate").setMaster("local[*]")
.set("spark.cassandra.connection.port", "9042")
.set("spark.driver.allowMultipleContexts", "true")
.set("spark.streaming.receiver.writeAheadLog.enable", "true")
Executors.newScheduledThreadPool(30)
@transient val sc: SparkContext = new SparkContext(conf)
@transient val ssc: StreamingContext = new StreamingContext(sc, Seconds(10))
val checkpointDirectory = "/root/Desktop/checkpoint"
ssc.checkpoint(checkpointDirectory)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
var input: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
input.setTimeZone(TimeZone.getTimeZone("GMT"))
var dia: SimpleDateFormat = new SimpleDateFormat("dd")
var mes: SimpleDateFormat = new SimpleDateFormat("MM")
var ano: SimpleDateFormat = new SimpleDateFormat("yyyy")
var horas: SimpleDateFormat = new SimpleDateFormat("HH")
var minutos: SimpleDateFormat = new SimpleDateFormat("mm")
val dataRdd = ssc.cassandraTable("smartgrids","analyzer_temp").select("timecol").collect()
val inputRdd = ssc.sparkContext.parallelize(dataRdd)
val dstream = new ConstantInputDStream(ssc, inputRdd)
sc.stop()
val result = dstream.map{rdd =>
val day = dia.format(input.parse(rdd.toString().replace("CassandraRow{timecol: ", "").replace("}", "").replace("-0800", "").replace("-0700", "")))
val hour = horas.format(input.parse(rdd.toString().replace("CassandraRow{timecol: ", "").replace("}", "").replace("-0800", "").replace("-0700", "")))
val minute = minutos.format(input.parse(rdd.toString().replace("CassandraRow{timecol: ", "").replace("}", "").replace("-0800", "").replace("-0700", "")))
val month = mes.format(input.parse(rdd.toString().replace("CassandraRow{timecol: ", "").replace("}", "").replace("-0800", "").replace("-0700", "")))
val year = ano.format(input.parse(rdd.toString().replace("CassandraRow{timecol: " , "").replace("}", "").replace("-0800", "").replace("-0700", "")))
Seq(day, hour, minute, month, year)
}
result.foreachRDD{rdd => if (!rdd.isEmpty)
rdd.saveToCassandra("features", "datepart", SomeColumns("day", "month", "year", "hour", "minute"))
}
ssc.start()
//ssc.awaitTermination()
输出后执行代码:
dataRdd: Array[com.datastax.spark.connector.CassandraRow] = Array(CassandraRow{timecol:
2015-08-20 21:01:00-0700}, CassandraRow{timecol:
2014-11-07 12:22:00-0800}, CassandraRow{timecol:
2014-11-07 07:24:00-0800}, CassandraRow{timecol:
2014-11-07 17:15:00-0800}, CassandraRow{timecol:
2014-11-12 15:25:00-0800}, CassandraRow{timecol:
2014-11-08 06:27:00-0800}, CassandraRow{timecol:
2014-11-17 12:20:00-0800}, CassandraRow{timecol:
2014-11-07 21:38:00-0800}, CassandraRow{timecol:
2014-10-29 17:07:00-070...
dstream: org.apache.spark.streaming.dstream.ConstantInputDStream[com.datastax.spark.connector.CassandraRow] = org.apache.spark.streaming.dstream.ConstantInputDStream@7ae0e35f
result: org.apache.spark.streaming.dstream.DStream[Seq[String]] = org.apache.spark.streaming.dstream.MappedDStream@27d60b43