将数据从Dstream解析为ElasticSearch时出错

时间:2016-10-17 04:06:54

标签: scala elasticsearch apache-spark spark-streaming

我一直在尝试解析来自Dstream的数据来自火花流(TCP)并将其发送到弹性搜索。我收到错误org.elasticsearch.hadoop.rest.EsHadoopInvalidRequest: Found unrecoverable error [127.0.0.1:9200] returned Bad Request(400) - failed to parse; Bailing out..

以下是我的代码:

import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.SparkContext
import org.apache.spark.serializer.KryoSerializer;
import org.apache.spark.SparkContext._
import org.elasticsearch.spark._
import org.elasticsearch.spark.rdd.EsSpark
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.spark.TaskContext
import     org.elasticsearch.common.transport.InetSocketTransportAddress;


object Test {
  case class createRdd(Message: String, user: String) 



def main(args:Array[String]) { 


  val mapper=new ObjectMapper()

  val SparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[*]")
 SparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
  SparkConf.set("es.nodes","localhost:9200")
  SparkConf.set("es.index.auto.create", "true")





// Create a local StreamingContext with batch interval of 10 second
val ssc = new StreamingContext(SparkConf, Seconds(10))

/* Create a DStream that will connect to hostname and port, like localhost 9999. As stated earlier, DStream will get created from StreamContext, which in return is created from SparkContext. */

    val lines = ssc.socketTextStream("localhost",9998)

// Using this DStream (lines) we will perform  transformation or output operation.


val words = lines.map(_.split(" "))

words.foreachRDD(_.saveToEs("spark/test"))


ssc.start()        // Start the computation



    ssc.awaitTermination()  // Wait for the computation to terminate

  }
}

以下是错误:

16/10/17 11:02:30 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
16/10/17 11:02:30 INFO BlockManager: Found block input-0-1476682349200 locally
16/10/17 11:02:30 INFO Version: Elasticsearch Hadoop v5.0.0.BUILD.SNAPSHOT [4282a0194a]
16/10/17 11:02:30 INFO EsRDDWriter: Writing to [spark/test]
16/10/17 11:02:30 ERROR TaskContextImpl: Error in TaskCompletionListener
org.elasticsearch.hadoop.rest.EsHadoopInvalidRequest: Found unrecoverable error [127.0.0.1:9200] returned Bad Request(400) - failed to parse; Bailing out..
    at org.elasticsearch.hadoop.rest.RestClient.processBulkResponse(RestClient.java:250)
    at org.elasticsearch.hadoop.rest.RestClient.bulk(RestClient.java:202)
    at org.elasticsearch.hadoop.rest.RestRepository.tryFlush(RestRepository.java:220)
    at org.elasticsearch.hadoop.rest.RestRepository.flush(RestRepository.java:242)
    at org.elasticsearch.hadoop.rest.RestRepository.close(RestRepository.java:267)
    at org.elasticsearch.hadoop.rest.RestService$PartitionWriter.close(RestService.java:120)
    at org.elasticsearch.spark.rdd.EsRDDWriter$$anonfun$write$1.apply(EsRDDWriter.scala:42)
    at org.elasticsearch.spark.rdd.EsRDDWriter$$anonfun$write$1.apply(EsRDDWriter.scala:42)
    at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
    at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
    at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
    at org.apache.spark.scheduler.Task.run(Task.scala:99)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at 

java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

我在scala上编码。我无法找到错误的原因。除了例外,请帮助我。

谢谢。

0 个答案:

没有答案