我正在使用GSON
转换RDD[String, String]
,其中值(第二个字符串)可以是包含Maps
和Lists
的案例类。当我使用Gson.toJson
时,它会像这样给空jsons:
{"itemIds":{},"itemsNames":{}}
但我想:
{"itemIds":{"ABC" : 3,"123" : 33 },"itemsNames":{"ABC" : 3, "A" : 4}}
我试过GSON:
val gson: Gson = new GsonBuilder().serializeNulls().create()
def toJson(value: Any): String = {
gson.toJson(value)
}
def getPersonRDD(result: DataFrame): RDD[(String, String)] = {
val finalValue = result.rdd.map({
r =>
val customerId = r.getAs[String](CUSTOMER_ID)
val itemId = r.getAs[Map[String, Int]](ITEM_ID)
val itemName = r.getAs[Map[String, Int]](ITEM_NAME)
val person = Person(itemId, itemName)
val jsonString = toJson(person)
(customerId, jsonString)
})
return finalValue
}
case class Person(itemId: Map[String, Int], itemNames: Map[String, Int]) extends Serializable
这给了我:
{"itemIds":{},"itemsNames":{}}
尝试了playJson,但收到了错误:
trait JsonParser {
def toJsonString(scenario: Person): String
}
@SerialVersionUID(114L)
class JsonParserImpl() extends JsonParser with Serializable {
implicit val implicitPersonsWrites = new Writes[Person] {
def writes(person: Person): JsValue = {
Json.obj(
"itemId" -> person.itemId,
"itemNames" -> person.itemNames
)
}
}
def toJsonString(testDocument: Person): String = Json.toJson(testDocument).toString
}
def getPersonRDD(result: DataFrame): RDD[(String, String)] = {
val jsonParserImpl = new JsonParserImpl
val finalValue = result.rdd.map({
r =>
val customerId = r.getAs[String](CUSTOMER_ID)
val itemId = r.getAs[Map[String, Int]](ITEM_ID)
val itemName = r.getAs[Map[String, Int]](ITEM_NAME)
val person = Person(itemId, itemName)
val jsonString = jsonParserImpl.toJsonString(person)
(customerId, jsonString)
})
return finalValue
}
这给了我:
org.apache.spark.SparkException: Task not serializable
[junit] java.lang.RuntimeException: org.apache.spark.SparkException: Task not serializable
[junit] at
[
有人能告诉我这里做错了什么吗?我是scala / spark的新手。请告诉我如何实现这一目标。