获取java.io.NotSerializableException:org.apache.spark.SparkContext异常

时间:2018-04-12 05:13:38

标签: scala apache-spark dataframe serialization rdd

(这个问题有很多重复;我尝试了所有这些,但没有为我解决。这就是我问一个新问题的原因。)

我正在尝试从数据库中获取记录并基于该值,我使用curl命令调用一个URL并再次将输出保存到数据库。当我在Spark中尝试同样的事情时,我遇到了以下错误:

Exception in thread "main" org.apache.spark.SparkException: Task not serializable
    at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:298)
    at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288)
    at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108)
    at org.apache.spark.SparkContext.clean(SparkContext.scala:2287)
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:925)
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:924)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
    at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:924)
    at com.inndata.services.ipa_th$.main(ipa_th.scala:83)
    at com.inndata.services.ipa_th.main(ipa_th.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:755)
    at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
    at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

这是我的例子:

val list = hiveContext.sql("select application_number from t").collect()

    list.map(x => x.asInstanceOf[Int]).map(x =>
        {
          print("************************** + "+ x)
          val z = x.toString().replace("[","").replace("]","")
          if(z!= null)
          {
            val cmd = Seq("curl", "-X", "POST", "--insecure", "--header", "Content-Type: application/json", "--header", "Accept: application/json", "-d", "{\"searchText\":\"11477949\",\"qf\":\"applId\"}", "https:link")      //cmd.!
          val r = cmd.!!
            val r1 = r.toString()
            val rdd = context.parallelize(Seq(r1))
            val dff = sqlCotext.read.schema(schema).json(rdd.toDS)
            val dfContent = dff.select(explode(dff("queryResults.searchResponse.response.docs.transactions"))).toDF("transaction")
            val a1 = dfContent.select("transaction.code").collect()
            val a2 = dfContent.select("transaction.description").collect()
            val a3 = dfContent.select("transaction.recordDate").collect()
            /* val a1 = dff.select(expr("queryResults.searchResponse.response.docs.transactions[0].code").as("event_code")).collect()
             val a2=dff.select(expr("queryResults.searchResponse.response.docs.transactions[0].description").as("event_description")).collect()
             val a3=dff.select(expr("queryResults.searchResponse.response.docs.transactions[0].recordDate").as("event_recorded_date")).collect()*/
            for (mmm1 <- a1; mm2 <- a2; mm3 <- a3)
            {
              val ress1 = mmm1.toString().replace("[", " ").replace("]", " ").replace("]", " ").replace("WrappedArray(","").replace(")","")
              val res2 = mm2.toString().replace("[", " ").replace("]", " ").replace("]", " ").replace("WrappedArray(","").replace(")","")
              val res3 = mm3.toString().replace("[", " ").replace("]", " ").replace("]", " ").replace("WrappedArray(","").replace(")","")
              println(ress1)
              println(res2)
              println(res3)
              initialDF1 = initialDF1.union(Seq((100, ress1, res2, res3)).toDF("application_number", "event_code", "event_description", "event_recorded_date"))
            initialDF1.show()
            }
          }

        })

0 个答案:

没有答案