我尝试从this question获得提示,但似乎对我没有帮助。
我在主课程中得到“无法为内部类生成编码器”。我创建了另一个类,我正在生成我的数据帧。我使用的输出是dataframe在我的主类中创建一个val。它编译得很好但是给了我这个错误。
此外,当我将所有FT DataParser代码保存在main中时,它可以工作。这是我想念的微妙之处。我是Spark的新手,所以如果问题太傻了,请原谅。
Exception in thread "main" org.apache.spark.sql.AnalysisException: Unable to generate an encoder for inner class `com.DC.FTDataProject.exchangeParameters$Scrip` without access to the scope that this class was defined in.
Try moving this class out of its parent class.;
我的主要课程是
object MainApp extends App with Serializable {
@transient lazy val log = org.apache.log4j.LogManager.getLogger("ScalaSparkLogger")
val spark: SparkSession = SparkSession.builder
.appName("FTDataProject")
.config("spark.some.config.option", "some-value")
.master("local")
.getOrCreate()
log.info("Start of Main Programme")
val currency = new YahooCurrencyLoader() with CurrencyParameters
val ccy = currency.getXML(currency.ccyUrl) match {
case Success(v) => XML.save("PreviousRun.xml",v); log.info("XML has been saved for use")
case Failure(ex) => log.error("XML extraction failed. Look at Yahoo extraction class" + ex.getMessage )
}
val dp = new FTDataParser() with exchangeParameters
val checkDF: DataFrame = dp.sparkDfCreation("Europe","22022017","Annual",spark)
checkDF.filter(col("Symbol").equalTo("HSBA:LSE")).show()
}
我的FTDataparser类是
def sparkDfCreation(d : String, p : String, a : String, s : org.apache.spark.sql.SparkSession) : org.apache.spark.sql.DataFrame = {
val currency = new YahooCurrencyLoader() with CurrencyParameters
val xmllocation: String = "./PreviousRun.xml"
val loadxml: Elem = XML.loadFile(xmllocation)
//print(loadxml)
//print(currency.findCurrency(loadxml,"GBP"))
log.info("USD CAD Cross is " + currency.findCurrency(loadxml,"CAD").head)
val fn = fileHandler(d : String, p : String, a : String)
log.info("The filename is " + fn)
val df = s.read.option("delimiter" , "|").option("header","true").option("inferSchema","true").csv(fn)
log.info("The print schema is " + df.printSchema())
import s.implicits._
//val newdf = df.withColumn("TradeCCY", parseString($"PriceCCY"))
//val newDF = df.withColumn("CAvgVolume",udfStringtoNumber($"AvgVolume"))
//val a = df.columns.toList
val checkDF = df.rdd.map { x =>
val Close: Double = parseDouble(x(2)).getOrElse(999999)
val Open: Double = parseDouble(x(5)).getOrElse(999999)
val High: Double = parseDouble(x(6)).getOrElse(999999)
val Low: Double = parseDouble(x(7)).getOrElse(999999)
val PreviousClose: Double = parseDouble(x(8)).getOrElse(999999)
val priceccy: String = spc(x(3))
val realpriceccymul: Int = 1 / currency_multiplier_map(exchange_to_real_ccy_map.getOrElse(priceccy,priceccy))
val currencyCon : Double = currency.currencyCon(loadxml,exchange_to_real_ccy_map.getOrElse(priceccy,priceccy),"GBP")
val repccy: String = repccyFinder(spc(x(30)),spc(x(71)),spc(x(120)))
val reppriceccymul : Int = 1 / currency_multiplier_map(exchange_to_real_ccy_map.getOrElse(repccy,repccy))
val repcurrencyCon : Double = currency.currencyCon(loadxml, exchange_to_real_ccy_map.getOrElse(priceccy,priceccy), repccy)
Scrip(
stringParser(x(0)) + ":" + d + ":" + p,
d,
stringParser(x(0)),
stringParser(x(1)),
priceccy,
stringParser(x(28)),
stringParser(x(29)),
parseDouble(x(4)).getOrElse(999999), // 999999 means Beta is missing.
Close,
Open,
High,
Low,
PreviousClose,
Close * realpriceccymul * currencyCon,
Open * realpriceccymul * currencyCon,
High * realpriceccymul * currencyCon,
Low * realpriceccymul * currencyCon,
PreviousClose * reppriceccymul * currencyCon,
Close * reppriceccymul * repcurrencyCon,
Open * reppriceccymul * repcurrencyCon,
High * reppriceccymul * repcurrencyCon,
Low * reppriceccymul * repcurrencyCon,
PreviousClose * realpriceccymul * repcurrencyCon,
currency.findCurrency(loadxml,"GBP").head,
currency.findCurrency(loadxml,"AUD").head,
currency.findCurrency(loadxml,"EUR").head,
currency.findCurrency(loadxml,"INR").head,
currency.findCurrency(loadxml,"JPY").head,
currency.findCurrency(loadxml,"CAD").head,
parseDouble(x(9)).getOrElse(999999),
x(11).toString,
parseDouble(x(10)).getOrElse(999999),
x(12).toString,
stringConvertor(x(13)),
stringConvertor(x(14)),
stringConvertor(x(15)),
stringConvertor(x(17)),
x(18).toString.trim,
parseDouble(x(19)).getOrElse(999999), // 999999 means EPS is missing.
x(20).toString.trim,
parseDouble(x(21)).getOrElse(999999), // 999999 means Annual Divi is missing.
parseDouble(stringConvertor(x(23))).getOrElse(999999), // 999999 means Annual Divi yield is missing.
x(22).toString
)
}.toDF()
return checkDF
}
当我更改sparkDfCreation def sparkDfCreation(a : String, d : String, p : String, s : org.apache.spark.sql.SparkSession) : RDD[Scrip]
的签名然后在主要中我添加了这两行并且它似乎有效。
val checkRDD = dp.sparkDfCreation("Europe","22022017","Annual",spark)
//checkDF.filter(col("Symbol").equalTo("HSBA:LSE")).show()
val checkDF = spark.sqlContext.createDataFrame(checkRDD)
checkDF.filter(col("Symbol").equalTo("HSBA:LSE")).show()
}
但它似乎更冗长。这不是一个错误吗?