(这个问题有很多重复;我尝试了所有这些,但没有为我解决。这就是我问一个新问题的原因。)
我正在尝试从数据库中获取记录并基于该值,我使用curl命令调用一个URL并再次将输出保存到数据库。当我在Spark中尝试同样的事情时,我遇到了以下错误:
Exception in thread "main" org.apache.spark.SparkException: Task not serializable
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:298)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2287)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:925)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:924)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:924)
at com.inndata.services.ipa_th$.main(ipa_th.scala:83)
at com.inndata.services.ipa_th.main(ipa_th.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:755)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
这是我的例子:
val list = hiveContext.sql("select application_number from t").collect()
list.map(x => x.asInstanceOf[Int]).map(x =>
{
print("************************** + "+ x)
val z = x.toString().replace("[","").replace("]","")
if(z!= null)
{
val cmd = Seq("curl", "-X", "POST", "--insecure", "--header", "Content-Type: application/json", "--header", "Accept: application/json", "-d", "{\"searchText\":\"11477949\",\"qf\":\"applId\"}", "https:link") //cmd.!
val r = cmd.!!
val r1 = r.toString()
val rdd = context.parallelize(Seq(r1))
val dff = sqlCotext.read.schema(schema).json(rdd.toDS)
val dfContent = dff.select(explode(dff("queryResults.searchResponse.response.docs.transactions"))).toDF("transaction")
val a1 = dfContent.select("transaction.code").collect()
val a2 = dfContent.select("transaction.description").collect()
val a3 = dfContent.select("transaction.recordDate").collect()
/* val a1 = dff.select(expr("queryResults.searchResponse.response.docs.transactions[0].code").as("event_code")).collect()
val a2=dff.select(expr("queryResults.searchResponse.response.docs.transactions[0].description").as("event_description")).collect()
val a3=dff.select(expr("queryResults.searchResponse.response.docs.transactions[0].recordDate").as("event_recorded_date")).collect()*/
for (mmm1 <- a1; mm2 <- a2; mm3 <- a3)
{
val ress1 = mmm1.toString().replace("[", " ").replace("]", " ").replace("]", " ").replace("WrappedArray(","").replace(")","")
val res2 = mm2.toString().replace("[", " ").replace("]", " ").replace("]", " ").replace("WrappedArray(","").replace(")","")
val res3 = mm3.toString().replace("[", " ").replace("]", " ").replace("]", " ").replace("WrappedArray(","").replace(")","")
println(ress1)
println(res2)
println(res3)
initialDF1 = initialDF1.union(Seq((100, ress1, res2, res3)).toDF("application_number", "event_code", "event_description", "event_recorded_date"))
initialDF1.show()
}
}
})