我正在尝试读取一个文件,其中每个文件都是“温度”类型的对象。 我创建了一个案例类:
import com.fasterxml.jackson.annotation.JsonProperty
case class Temparature (@JsonProperty YEAR: String,
@JsonProperty MONTH: String,
@JsonProperty DAY : String,
@JsonProperty MAX_TEMP: String,
@JsonProperty MIN_TEMP : String
)
现在,我正在尝试读取文件,并想以temparature对象的形式存储在rdd中:
import examples.partnerModels.{ Temparature}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession}
import com.databricks.spark.avro._
class RampGen extends IPartnerModelGen
{
override def getHeaderFields():List[String] =
{
Ramp.apply().getHeaderFields()
}
override def generateMatchFiles(sc: SparkContext, sqlContext: SparkSession, intPeriodId: Integer, inputDir: String, outputDir: String, partnerName:String,delimeter:String) =
{
println("input dir : " + inputDir)
println("output dir : " + outputDir)
val allFilteredDataRecords = sqlContext.read.csv( inputDir ).as[ Temparature ].rdd
allFilteredDataRecords.foreach{println}
}
}
object RampGen {
def create: IPartnerModelGen = new RampGen()
}
我将此方法称为“ RampGen.create.generateMatchFiles”:
package examples
import examples.PartnerModelGenerator.RampGen
import examples.util.ReferenceFileUtil
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
object MatchTableGenerator {
def main( args: Array[ String ] ): Unit =
{
val periodId = args( 0 )
val outputDir = args( 1 )
val inputDir = args( 2 )
val partnerName = args( 3 )
val delimeter = args( 4 )
val islocalServer = if (args.length == 6) true else false
val intPeriodId = periodId.toInt
val sc =
if (islocalServer)
{
println("String context in local mode..................")
System.setProperty("hadoop.home.dir", "C:\\Hadoop")
new SparkContext(new SparkConf().setMaster("local[2]").setAppName("Create Table Files for " + partnerName)
// Windows
}
else
{
println("String context in server mode..................")
new SparkContext( new SparkConf().setAppName( "Create Table Files for " + partnerName ) ) // Unix
}
val sqlContext = SparkSession.builder().getOrCreate()
partnerName.toUpperCase() match
{
case "RAMP" => RampGen.create.generateMatchFiles( sc, sqlContext, intPeriodId, inputDir, outputDir, partnerName, delimeter )
}
}
}
我遇到错误:
error: Unable to find encoder for type stored in a Dataset. Primitive types (Int, String, etc) and Product types (case classes) are supported by importing spark.implicits._ Support for serializing other types will be added in future releases.
[ERROR] val allFilteredDataRecords = sqlContext.read.csv( inputDir ).as[ Temparature ].rdd
供参考:
package examples.PartnerModelGenerator
import examples.partnerModels.DataRecord
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
trait IPartnerModelGen extends Serializable
{
def getHeaderFields():List[String]
def generateMatchFiles(sc: SparkContext, sqlContext: SparkSession, intPeriodId: Integer, inputDir: String, outputDir: String, partnerName:String,delimeter:String)
}
我可以在这里得到任何帮助吗?