当我使用scala版本2.11时,这是有效的,但当我降级到2.10时,这不起作用。即使我使用scala的json4s v2.10。
我尝试添加原生格式,但无法成功。有人可以提供一些如何解决它的信息。
package com.golfbreaks.spark.streaming
import java.text.SimpleDateFormat
import com.golfbreaks.quote.Quote
import com.google.gson.{Gson, GsonBuilder}
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.json4s._
import org.json4s.native.JsonParser
object Test {
def main(args: Array[String]): Unit = {
implicit val formats = DefaultFormats
//DefaultFormats
val zkQuorum = "quickstart.cloudera"
val topics = "sf_quotes"
val group = "group1"
val numThreads = 2
val tableName: TableName = TableName.valueOf("sf_quotes")
//val Array(zkQuorum, group, topics, numThreads) = args
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("GolfBreaksStreamingQuotes")
val ssc = new StreamingContext(sparkConf, Seconds(2))
val gson: Gson = new GsonBuilder().serializeNulls().create()
val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap, StorageLevel.MEMORY_ONLY)
.filter(_._2 != null)
.map {
//case (_, json) => Quote.parseQuote(json)
//case (_, json) => json
//read[Quote](json)
case (_, json) => JsonParser.parse(json).extract[Quote]
}
//.reduceByKeyAndWindow(_ + _, _ - _, Minutes(1), Seconds(2), 2)
//lines.foreachRDD( rdd => rdd.foreach(println))
//println(json);
/*val conf = HBaseConfiguration.create()
conf.set("hbase.zookeeper.quorum", "instance-26765.bigstep.io,instance-26766.bigstep.io,instance-26767.bigstep.io")
conf.set("hbase.master", "instance-26765.bigstep.io:60000")
conf.setInt("timeout", 120000)*/
//lines.print()
lines.transform(rdd => {
val hbaseTableName = "sf_quotes"
//Creates the HBase confs
val hconf = HBaseConfiguration.create()
hconf.set("hbase.zookeeper.quorum", "quickstart.cloudera")
//hconf.set("hbase.zookeeper.quorum", "instance-26765.bigstep.io,instance-26766.bigstep.io,instance-26767.bigstep.io,instance-26768.bigstep.io")
hconf.set("hbase.zookeeper.property.clientPort", "2181")
hconf.set("hbase.defaults.for.version.skip", "true")
val job = Job.getInstance(hconf)
val jobConf = job.getConfiguration
jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName.getNameAsString)
job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])
//hconf.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName)
//hconf.setClass("mapreduce.job.outputformat.class", classOf[TableOutputFormat[String]], classOf[OutputFormat[String, Mutation]])
/*rdd.map {
jsonStr => gson.fromJson(jsonStr,classOf[Quote])
}*/
rdd.map(quote => (new ImmutableBytesWritable, {
val quoteColumnFamily = Bytes.toBytes("quote")
val putRecord = new Put(Bytes.toBytes(quote.Id))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Id"), Bytes.toBytes(quote.Id))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Name"), Bytes.toBytes(quote.Name))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Opportunity__c"), Bytes.toBytes(quote.Opportunity__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Account__c"), Bytes.toBytes(quote.Account__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("SalesChannel__c"), Bytes.toBytes(quote.SalesChannel__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("PrimaryVenue__c"), Bytes.toBytes(quote.PrimaryVenue__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("SalesRegion__c"), Bytes.toBytes(quote.SalesRegion__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("SalePriceGross__c"), Bytes.toBytes(quote.SalePriceGross__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("CostPriceGross__c"), Bytes.toBytes(quote.CostPriceGross__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("GrossProfit__c"), Bytes.toBytes(quote.GrossProfit__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("GrossProfitPercent__c"), Bytes.toBytes(quote.GrossProfitPercent__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("NumberOfGuest__c"), Bytes.toBytes(quote.NumberOfGuest__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Gross_profit_per_person__c"), Bytes.toBytes(quote.Gross_profit_per_person__c))
putRecord
}
)
).saveAsNewAPIHadoopDataset(jobConf)
rdd
}).print()
println("**************************************************************")
ssc.start()
println("start Streaming")
ssc.awaitTermination()
}
}
package com.golfbreaks.quote
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.Row
/**
* Created by rapolu on 07/03/2017.
*/
case class Quote(Id: String,
Name: String = "",
Opportunity__c: String = "",
Account__c: String = "",
SalesChannel__c: String = "",
PrimaryVenue__c: String = "",
SalesRegion__c: String = "",
SalePriceGross__c: String = "0.0",
CostPriceGross__c: String = "0.0",
GrossProfit__c: String = "0.0",
GrossProfitPercent__c: String = "0.0",
NumberOfGuest__c: String = "0",
Gross_profit_per_person__c: String = "0.0") extends Serializable
object Quote extends Serializable{
def parseQuote(str: String):Unit = {
def apply(r: Row): Quote =
Quote(r.getString(0),r.getString(1),r.getString(2),r.getString(3),r.getString(4),r.getString(5),r.getString(6),
r.getString(7),r.getString(8),r.getString(9),r.getString(10), r.getString(11),r.getString(12)
//, r.getString(13),r.getString(13)
)
}
def convertToPut(quote: Quote): (ImmutableBytesWritable,Put) = {
val quoteColumnFamily = Bytes.toBytes("quote")
val putRecord = new Put(Bytes.toBytes(quote.Id))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Id"), Bytes.toBytes(quote.Id))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Name"), Bytes.toBytes(quote.Name))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Opportunity__c"), Bytes.toBytes(quote.Opportunity__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Account__c"), Bytes.toBytes(quote.Account__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("SalesChannel__c"), Bytes.toBytes(quote.SalesChannel__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("PrimaryVenue__c"), Bytes.toBytes(quote.PrimaryVenue__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("SalesRegion__c"), Bytes.toBytes(quote.SalesRegion__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("SalePriceGross__c"), Bytes.toBytes(quote.SalePriceGross__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("CostPriceGross__c"), Bytes.toBytes(quote.CostPriceGross__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("GrossProfit__c"), Bytes.toBytes(quote.GrossProfit__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("GrossProfitPercent__c"), Bytes.toBytes(quote.GrossProfitPercent__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("NumberOfGuest__c"), Bytes.toBytes(quote.NumberOfGuest__c))
putRecord.addColumn(quoteColumnFamily, Bytes.toBytes("Gross_profit_per_person__c"), Bytes.toBytes(quote.Gross_profit_per_person__c))
//addPutToList(putRecord)
(new ImmutableBytesWritable(Bytes.toBytes(quote.Id)),putRecord)
}
//, @JsonProperty("CreatedDate") CreateDate: String, @JsonProperty("LastModifiedDate") LastModifiedDate: String) extends Quote
}
Exception in thread "main" org.apache.spark.SparkException: Task not serializable
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:304)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:294)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2067)
at org.apache.spark.streaming.dstream.DStream$$anonfun$map$1.apply(DStream.scala:558)
at org.apache.spark.streaming.dstream.DStream$$anonfun$map$1.apply(DStream.scala:558)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:726)
at org.apache.spark.streaming.StreamingContext.withScope(StreamingContext.scala:260)
at org.apache.spark.streaming.dstream.DStream.map(DStream.scala:557)
at com.golfbreaks.spark.streaming.Test$.main(Test.scala:48)
at com.golfbreaks.spark.streaming.Test.main(Test.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
Caused by: java.io.NotSerializableException: org.json4s.DefaultFormats$$anon$4
Serialization stack:
- object not serializable (class: org.json4s.DefaultFormats$$anon$4, value: org.json4s.DefaultFormats$$anon$4@3b954661)
- field (class: com.golfbreaks.spark.streaming.Test$$anon$1, name: dateFormat, type: interface org.json4s.DateFormat)
- object (class com.golfbreaks.spark.streaming.Test$$anon$1, com.golfbreaks.spark.streaming.Test$$anon$1@3e8de7fd)
- field (class: com.golfbreaks.spark.streaming.Test$$anonfun$3, name: formats$1, type: interface org.json4s.DefaultFormats)
- object (class com.golfbreaks.spark.streaming.Test$$anonfun$3, <function1>)
at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:301)
... 17 more
答案 0 :(得分:0)
在序列化堆栈之后,似乎std::string foofullPath = resolvePath("test/data/foo.txt");
无法序列化存在问题。您可以在闭包中显式地传递它们,或者在闭包中将它们定义为隐式val,而不是在开始时隐式定义它们。
我不确定哪个方法请求隐式参数,我将假设它为formats
。如果是这种情况,您可以尝试以下方式:
extract[T]