Question

我正在尝试读取一个文件，其中每个文件都是“温度”类型的对象。 我创建了一个案例类：

import com.fasterxml.jackson.annotation.JsonProperty
case class  Temparature (@JsonProperty YEAR: String,
                     @JsonProperty MONTH: String,
                     @JsonProperty DAY : String,
                     @JsonProperty MAX_TEMP: String,
                     @JsonProperty MIN_TEMP : String
                    )

现在，我正在尝试读取文件，并想以temparature对象的形式存储在rdd中：

import examples.partnerModels.{ Temparature}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession}
import com.databricks.spark.avro._




class RampGen extends IPartnerModelGen
{
  override def getHeaderFields():List[String] =
  {
    Ramp.apply().getHeaderFields()
  }

  override def generateMatchFiles(sc: SparkContext, sqlContext: SparkSession, intPeriodId: Integer, inputDir: String, outputDir: String, partnerName:String,delimeter:String) =
  {

    println("input dir : " + inputDir)
    println("output dir : " + outputDir)
    val allFilteredDataRecords = sqlContext.read.csv( inputDir ).as[ Temparature ].rdd

    allFilteredDataRecords.foreach{println}

  }

}

object RampGen {
  def create: IPartnerModelGen = new RampGen()
}

我将此方法称为“ RampGen.create.generateMatchFiles”：

package examples

import examples.PartnerModelGenerator.RampGen
import examples.util.ReferenceFileUtil
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}


object MatchTableGenerator {

  def main( args: Array[ String ] ): Unit =
  {

    val periodId = args( 0 )
    val outputDir = args( 1 )
    val inputDir = args( 2 )
    val partnerName = args( 3 )
    val delimeter = args( 4 )
    val islocalServer = if (args.length == 6) true else false

    val intPeriodId = periodId.toInt

    val sc =
      if (islocalServer)
      {
        println("String context in local mode..................")
        System.setProperty("hadoop.home.dir", "C:\\Hadoop")
        new SparkContext(new SparkConf().setMaster("local[2]").setAppName("Create Table Files for " + partnerName)
          // Windows
      }
      else
      {
        println("String context in server mode..................")
        new SparkContext( new SparkConf().setAppName( "Create Table Files for " + partnerName ) ) // Unix
      }

    val sqlContext = SparkSession.builder().getOrCreate()


    partnerName.toUpperCase() match
    {
      case "RAMP" => RampGen.create.generateMatchFiles( sc, sqlContext, intPeriodId, inputDir, outputDir, partnerName, delimeter )
    }


  }

}

我遇到错误：

error: Unable to find encoder for type stored in a Dataset.  Primitive types (Int, String, etc) and Product types (case classes) are supported by importing spark.implicits._  Support for serializing other types will be added in future releases.

[ERROR]     val allFilteredDataRecords = sqlContext.read.csv( inputDir ).as[ Temparature ].rdd

供参考：

package examples.PartnerModelGenerator

import examples.partnerModels.DataRecord
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}

trait  IPartnerModelGen extends Serializable
{
  def getHeaderFields():List[String]
  def generateMatchFiles(sc: SparkContext, sqlContext: SparkSession, intPeriodId: Integer, inputDir: String, outputDir: String, partnerName:String,delimeter:String)
}

我可以在这里得到任何帮助吗？

Scala / Spark错误：无法找到数据集中存储的类型的编码器

0 个答案: