获取无法序列化的错误对象(类:org.apache.kafka.clients.consumer.ConsumerRecord,值:ConsumerRecord(topic =

时间:2018-10-17 02:11:15

标签: scala apache-spark apache-kafka spark-streaming kafka-consumer-api

我遇到错误:

object not serializable (class: org.apache.kafka.clients.consumer.ConsumerRecord, value: ConsumerRecord(topic = b24_tx_financial_formatted_clean, partition = 0, offset = 200, CreateTime = 1538416446690, checksum = 3419013820, serialized key size = -1, serialized value size = 113, key = null, value = [B@4e41c53e)) - element of array (index: 0).

此使用者代码从生产者那里获取数据,该数据的键为字符串,而值则为在基于Java的代码中序列化的AVRO。即使CreateDataStream也无法正常工作,也没有数据。在某些时候我也看到ArrayoutofBound错误。对于我的一位朋友来说,相同的代码可以正常工作。我使用的是kafka010,因为我具有SSL协议,因此无法使用kafka的旧版本。我在下面的评论中尝试了所有反序列化选项,但没有任何效果。流变量也没有输出。

KafkaConsumerProperties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); KafkaConsumerProperties.put("value.deserializer", "io.confluent.kafka.serializers.KafkaAvroDeserializer");

这是我的代码。

import org.json4s.jackson.JsonMethods._
import org.json4s.{DefaultFormats, JField, JObject, JString, JValue}
import java.text.SimpleDateFormat
import java.util.{Calendar, Date}
import io.confluent.kafka.schemaregistry.client.rest.RestService

import org.apache.log4j.{Level, LogManager, Logger}
import com.databricks.spark.avro._
//import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.SparkConf
import org.apache.avro.Schema
import org.apache.avro.io._
import org.apache.spark.streaming._
import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericRecord}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
import org.codehaus.jettison.json.JSONObject
//import org.json.simple.JSONObject

object ScalaJsonTest extends java.io.Serializable {

  val runNum = 789
  val hdfsPath = s"HDFS PATH put here"

//  val schemaStr =
//    """{
//  "type": "record",
//  "name": "b24_ptlf_financial_formatted",
//  "fields": [
//  {
//    "name": "AuthorizationTransactionSource",
//    "type": "string"
//  },
//  {
//    "name": "AuthorizationTransactionSourceID",
//    "type": "string"
//  },
//  {
//    "name": "TransactionTimestamp",
//    "type": {
//      "type": "long",
//      "logicalType": "timestamp-millis"
//    }
//  },
//  {
//    "name": "TerminalDate",
//    "type": {
//      "type": "int",
//      "logicalType": "date"
//    }
//  },
//  {
//    "name": "TerminalTime",
//    "type": {
//      "type": "int",
//      "logicalType": "time-millis"
//    }
//  },
//  {
//    "name": "MonerisCustomerNumber",
//    "type": "string"
//  },
//  {
//    "name": "DeviceNumber",
//    "type": [
//    "null",
//    "string"
//    ],
//    "default": null
//  },
//  {
//    "name": "PrimaryAccountNumberPrefix",
//    "type": "string"
//  },
//  {
//    "name": "PrimaryAccountNumberSuffix",
//    "type": "string"
//  },
//  {
//    "name": "TransactionApprovedFlag",
//    "type": "boolean"
//  },
//  {
//    "name": "TransactionAmount",
//    "type": "long"
//  },
//  {
//    "name": "TransactionCurrency",
//    "type": [
//    "null",
//    "string"
//    ],
//    "default": null
//  },
//  {
//    "name": "MerchantSettlementAmount",
//    "type": [
//    "null",
//    "long"
//    ],
//    "default": null
//  },
//  {
//    "name": "MerchantSettlementCurrency",
//    "type": [
//    "null",
//    "string"
//    ],
//    "default": null
//  },
//  {
//    "name": "TransactionTypeCode",
//    "type": {
//      "type": "enum",
//      "name": "TransactionTypeCode",
//      "symbols": [
//      "PURC",
//      "AUTH",
//      "COMP",
//      "RFND"
//      ]
//    }
//  },
//  {
//    "name": "PointOfSaleEntryMode",
//    "type": [
//    "null",
//    "string"
//    ],
//    "default": null
//  },
//  {
//    "name": "ElectronicCommerceIndicator",
//    "type": [
//    "null",
//    "string"
//    ],
//    "default": null
//  },
//  {
//    "name": "InternalCardFingerprint",
//    "type": "string"
//  }
//  ]
//}""""
val schemaRegistryURL = "http://vtorppsdv01.corp.moneris.com:8081"
  val topics = "b24_tx_financial_formatted_clean"
  val subjectValueName = topics + "-value"

  val restService = new RestService(schemaRegistryURL)

  val valueRestResponseSchema = restService.getLatestVersion(subjectValueName)

  //Use Avro parsing classes to get Avro Schema
  val parser = new Schema.Parser
  val topicValueAvroSchema: Schema = parser.parse(valueRestResponseSchema.getSchema)
 // val schema = new Schema.Parser().parse(schemaStr)



  def main(args: Array[String]) {
     {
      Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
      val log = LogManager.getRootLogger
      log.setLevel(Level.WARN)
      val props = Map[String, Object](
        "bootstrap.servers" -> 
        "security.protocol" -> "SSL",
        "schema.registry.url" -> ,
        "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer",
  //       "value.deserializer" -> "io.confluent.kafka.serializers.KafkaAvroDeserializer",
         "value.deserializer" -> "org.apache.kafka.common.serialization.ByteArrayDeserializer",
        //"key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer",
       // "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer",
        "enable.auto.commit" -> "false",
        "auto.offset.reset" -> "earliest",
        "group.id" -> "b24_ptlf_eim_processing" ,
        "security.protocol" -> "SSL",
        "ssl.keystore.location" -> "C:\\Users\\pawan.likhi\\Desktop\\spark code\\old keys\\SimpleKafkaConsumer\\kafka-eim-dev.jks",
        "ssl.keystore.password" -> ,
        "ssl.key.password" -> ,
        "ssl.truststore.location" -> "C:\\Users\\pawan.likhi\\Desktop\\spark code\\old keys\\SimpleKafkaConsumer\\cpbp-ca-dev.jks",
        "ssl.truststore.password" -> "iB>3v$6m@9",
        "ssl.keystore.type" -> "JCEKS",
        "ssl.truststore.type" -> "JCEKS"
      )

      val sparkConf = new SparkConf().setAppName("KafkaConsumer").setMaster("local[*]")
      val sc = SparkSession.builder().config(sparkConf).getOrCreate
      val ssc = new StreamingContext(sc.sparkContext, Seconds(10))
      val subscribeStrategy = ConsumerStrategies.Subscribe[String, Array[Byte]](Array("b24_tx_financial_formatted_clean"), props)
      val stream = KafkaUtils.createDirectStream(ssc, LocationStrategies.PreferConsistent, subscribeStrategy)
      print(topicValueAvroSchema)
      stream.print()
      val result = stream.map(record => {
        val outVal = parseRecord(record.value)
        /*if(outVal != null) {

            outVal
          }*/

        outVal
      })

        result.print()
        //      result.foreachRDD((rdd, batchtime) => {
//        val outPath = hdfsPath + "/" + batchtime.milliseconds
//        rdd.saveAsTextFile(outPath)
//        val df = sc.createDataFrame(rdd, getSchema())
//       df.write.mode(SaveMode.Append).format("csv").save(outPath)
//      })

      ssc.start()

      Thread.sleep(60 * 1000)

      //    ssc.awaitTermination()
      ssc.stop()
      sc.stop()
      sc.close()
    }

  }


  def parseRecord(message: Array[Byte]): String = {

    var opTypeStr = ""
    var jsonRow = new JSONObject()
    var binRow2 = ""
    var binRow = ""

     {

      val reader = new GenericDatumReader[GenericData.Record](topicValueAvroSchema)

      val decoderFactory = new DecoderFactory

      val binaryDecoder = decoderFactory.binaryDecoder(message, null)


      val binRow: String = reader.read(null, binaryDecoder).toString


      binRow
    }
  }

  def getSchema1(): StructType = {
    {


      StructType(List(
        StructField("table", StringType, true),
        StructField("Filed1", StringType, true),
        StructField("Filed2", StringType, true),
        StructField("Filed3", StringType, true),
        StructField("Filed4", StringType, true),
        StructField("Filed5", StringType, true),
        StructField("Filed6", StringType, true),
        StructField("Filed7", StringType, true),
        StructField("Filed8", StringType, true),
        StructField("Filed9", StringType, true),
        StructField("Filed10", StringType, true),
        StructField("Filed11", StringType, true),
        StructField("Filed12", StringType, true),
        StructField("Filed13", StringType, true),
        StructField("Filed14", StringType, true),
        StructField("Filed15", StringType, true),
        StructField("Filed16", StringType, true),
        StructField("Filed17", StringType, true),
        StructField("Filed18", StringType, true)

      ))
    }
        StructType(List(
          StructField("table", StringType, true),
          StructField("Field1", StringType, true),
          StructField("Filed2", StringType, true),
          StructField("Field3", StringType, true),
          StructField("Filed4", StringType, true),
          StructField("Filed5", StringType, true),
          StructField("Filed6", StringType, true),
          StructField("Filed7", StringType, true),
          StructField("Filed8", StringType, true),
          StructField("Filed9", StringType, true),
          StructField("Filed10", StringType, true),
          StructField("Filed11", StringType, true),
          StructField("Filed12", StringType, true),
          StructField("Filed13", StringType, true),
          StructField("Filed14", StringType, true),
          StructField("Filed15", StringType, true),
          StructField("Filed16", StringType, true),
          StructField("Filed17", StringType, true),
          StructField("Filed18", StringType, true)

        ))
      }

}

0 个答案:

没有答案