我尝试在toTF TempTable中保存一些分析,但是收到以下错误":215:错误:值toDF不是Double"的成员。 我正在阅读Cassandra表的数据,并且我正在做一些计算。我想将这些结果保存在临时表中。 我是斯卡拉的新人,有人可以帮助我吗? 我的代码
case class Consumo(consumo:Double, consumo_mensal: Double, mes: org.joda.time.DateTime,ano: org.joda.time.DateTime, soma_pf: Double,empo_gasto: Double);
object Analysegridata{
val conf = new SparkConf(true)
.set("spark.cassandra.connection.host","127.0.0.1").setAppName("LiniarRegression")
.set("spark.cassandra.connection.port", "9042")
.set("spark.driver.allowMultipleContexts", "true")
.set("spark.streaming.receiver.writeAheadLog.enable", "true");
val sc = new SparkContext(conf);
val ssc = new StreamingContext(sc, Seconds(1))
val sqlContext = new org.apache.spark.sql.SQLContext(sc);
val checkpointDirectory = "/var/lib/cassandra/data"
ssc.checkpoint(checkpointDirectory) // set checkpoint directory
// val context = StreamingContext.getOrCreate(checkpointDirectory)
import sqlContext.implicits._
JavaSparkContext.fromSparkContext(sc);
def rddconsumo(rddData: Double): Double = {
val rddData: Double = {
implicit val data = conf
val grid = sc.cassandraTable("smartgrids", "analyzer").as((r:Double) => (r)).collect
def goto(cs: Array[Double]): Double = {
var consumo = 0.0;
var totaldias = 0;
var soma_pf = 0.0;
var somamc = 0.0;
var tempo_gasto = 0.0;
var consumo_mensal = 0.0;
var i=0
for (i <- 0 until grid.length) {
val minutos = sc.cassandraTable("smartgrids","analyzer_temp").select("timecol", "MINUTE");
val horas = sc.cassandraTable("smartgrids","analyzer_temp").select("timecol","HOUR_OF_DAY");
val dia = sc.cassandraTable("smartgrids","analyzer_temp").select("timecol", "DAY_OF_MONTH");
val ano = sc.cassandraTable("smartgrids","analyzer_temp").select("timecol", "YEAR");
val mes = sc.cassandraTable("smartgrids","analyzer_temp").select("timecol", "MONTH");
val potencia = sc.cassandraTable("smartgrids","analyzer_temp").select("n_pf1", "n_pf2", "n_pf3")
def convert_minutos (minuto : Int) : Double ={
minuto/60
}
dia.foreach (i => {
def adSum(potencia: Array[Double]) = {
var i=0;
while (i < potencia.length) {
soma_pf += potencia(i);
i += 1;
soma_pf;
println("Potemcia =" + soma_pf)
}
}
def tempo(minutos: Array[Int]) = {
var i=0;
while (i < minutos.length) {
somamc += convert_minutos(minutos(i))
i += 1;
somamc
}
}
def tempogasto(horas: Array[Int]) = {
var i=0;
while (i < horas.length) {
tempo_gasto = horas(i) + somamc;
i += 1;
tempo_gasto;
println("Temo que o aparelho esteve ligado =" + tempo_gasto)
}
}
def consumof(dia: Array[Int]) = {
var i=0;
while (i < dia.length) {
consumo = soma_pf * tempo_gasto;
i += 1;
consumo;
println("Consumo diario =" + consumo)
}
}
})
mes.foreach (i => {
def totaltempo(dia: Array[Int]) = {
var i = 0;
while(i < dia.length){
totaldias += dia(i);
i += 1;
totaldias;
println("Numero total de dias =" + totaldias)
}
}
def consumomensal(mes: Array[Int]) = {
var i = 0;
while(i < mes.length){
consumo_mensal = consumo * totaldias;
i += 1;
consumo_mensal;
println("Consumo Mensal =" + consumo_mensal);
}
}
})
}
consumo;
totaldias;
consumo_mensal;
soma_pf;
tempo_gasto;
somamc
}
rddData
}
rddData.toDF().registerTempTable("rddData")
}
ssc.start()
ssc.awaitTermination()
error: value toDF is not a member of Double"
答案 0 :(得分:0)
相当不清楚你正在尝试做什么(代码太多,尝试提供 minimal 示例),但有一些明显的问题:
rddData
类型为Double
:好像应该是RDD[Double]
(这是Double值的分布式集合)。尝试将单个Double
值保存为表并没有多大意义,确实 - 不起作用(toDF
可以在RDD
上调用,而不是任何类型,特别是不在Double
上,如编译器警告的那样。collect()
数据:如果您要加载RDD,使用某些操作对其进行转换,然后将其另存为表格 - collect()
应该不是呼吁RDD
。 collect()
将所有数据(分布在群集中)发送到单个&#34;驱动程序&#34;机器(运行此代码的机器) - 之后您没有利用群集,并且再次不使用RDD
数据结构,因此您无法使用{将数据转换为DataFrame {1}}。