我有一个小代码片段,它将从spark驱动程序参数中读取数据,因此将循环并处理其逻辑。寻求你的帮助。 例如,火花驱动程序的输入是2012-01-01和6.下面是代码片段。在代码中,iDate和iMonths是上面的火花驱动器中的输入。
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.cassandra
import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType, TimestampType}
import org.apache.spark.sql._
import org.apache.spark.sql.Row
import _root_.filodb.spark._
import _root_.filodb.core.DatasetRef
import com.datastax.spark.connector._
import com.datastax.spark.connector.cql._
import _root_.filodb.core.metadata.RichProjection
import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions.Window
import Array._
import java.util.Calendar
import java.text.SimpleDateFormat
import scala.collection.mutable.ListBuffer
import hc.implicits._
val args = sc.getConf.get("spark.driver.args").split("\\s+")
val iDate = args(0) + " 00:00:00"
val iMonths = args(1).toInt
val vDateFormatDate = new SimpleDateFormat("yyyy-MM-dd")
val vDateFormatYearMonth = new SimpleDateFormat("yyyyMM")
val vDateFormatDateTime = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss aa")
val vDateFrom = Calendar.getInstance()
val vDateFrom1 = Calendar.getInstance()
val vETLInsertDate = vDateFormatDateTime.format(Calendar.getInstance().getTime())
vDateFrom1.add(Calendar.MONTH,-1)
val vMonthCheck = vDateFormatYearMonth.format(vDateFrom.getTime())+","+vDateFormatYearMonth.format(vDateFrom1.getTime())
try
{
val vBillMaxDate = if (iDate == "0001-01-01 00:00:00") {
hc.read.format("filodb.spark").option("database","*****").option("dataset","*****").load().filter(s"""yr_mon in ( ${vMonthCheck} )""").agg(max("*****").alias("max_date")).map { case Row (value) => value.asInstanceOf[java.sql.Timestamp]; }.first()
} else {
new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(iDate)
}
val vMaxDate = vDateFormatDate.format(vBillMaxDate)
val vMaxYearMonth = vDateFormatYearMonth.format(vBillMaxDate).toString
vDateFrom.setTime(vDateFormatYearMonth.parse(vMaxYearMonth))
var vYearMonList = new ListBuffer[String]()
var vCount = 0
for (i <- 1 to iMonths)
{
val vInputYearMonth = vDateFormatYearMonth.format( vDateFrom.getTime() ).toString
println("Starting aggregation for : " + vInputYearMonth)
vYearMonList += vInputYearMonth
vDateFrom.add(Calendar.MONTH,-1)
vCount +=1
}
}catch{
case exp: Exception=>{
println("METADATA~error:"+ exp)
}
}finally{
FiloDriver.shutdown()
sc.stop()
sys.exit(0)
}
获得以下错误:
vMonthCheck: String = 201801,201712
Starting aggregation for : 201201
Starting aggregation for : 201112
Starting aggregation for : 201111
Starting aggregation for : 201110
Starting aggregation for : 201109
Starting aggregation for : 201108
java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:313)
at scala.None$.get(Option.scala:311)
at filodb.spark.FiloSetup$class.config(FiloSetup.scala:31)
at filodb.spark.FiloDriver$.config(FiloSetup.scala:64)
at filodb.spark.FiloSetup$class.columnStore(FiloSetup.scala:42)
at filodb.spark.FiloDriver$.columnStore$lzycompute(FiloSetup.scala:64)
at filodb.spark.FiloDriver$.columnStore(FiloSetup.scala:64)
at filodb.spark.FiloDriver$.columnStore(FiloSetup.scala:64)
at filodb.coordinator.CoordinatorSetup$class.shutdown(CoordinatorSetup.scala:83)
at filodb.spark.FiloDriver$.shutdown(FiloSetup.scala:64)
更新了vMonthCheck变量的代码以处理该方案:
val vMonthCheck = if (iDate == "0001-01-01 00:00:00") {vDateFormatYearMonth.format(vDateFrom.getTime())+","+vDateFormatYearMonth.format(vDateFrom1.getTime())} else {iDate.substring(0,4).concat(iDate.substring(5,7))}