我一直在尝试解析来自Dstream的数据来自火花流(TCP)并将其发送到弹性搜索。我收到错误org.elasticsearch.hadoop.rest.EsHadoopInvalidRequest: Found unrecoverable error [127.0.0.1:9200] returned Bad Request(400) - failed to parse; Bailing out..
以下是我的代码:
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.SparkContext
import org.apache.spark.serializer.KryoSerializer;
import org.apache.spark.SparkContext._
import org.elasticsearch.spark._
import org.elasticsearch.spark.rdd.EsSpark
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.spark.TaskContext
import org.elasticsearch.common.transport.InetSocketTransportAddress;
object Test {
case class createRdd(Message: String, user: String)
def main(args:Array[String]) {
val mapper=new ObjectMapper()
val SparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[*]")
SparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
SparkConf.set("es.nodes","localhost:9200")
SparkConf.set("es.index.auto.create", "true")
// Create a local StreamingContext with batch interval of 10 second
val ssc = new StreamingContext(SparkConf, Seconds(10))
/* Create a DStream that will connect to hostname and port, like localhost 9999. As stated earlier, DStream will get created from StreamContext, which in return is created from SparkContext. */
val lines = ssc.socketTextStream("localhost",9998)
// Using this DStream (lines) we will perform transformation or output operation.
val words = lines.map(_.split(" "))
words.foreachRDD(_.saveToEs("spark/test"))
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
}
}
以下是错误:
16/10/17 11:02:30 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
16/10/17 11:02:30 INFO BlockManager: Found block input-0-1476682349200 locally
16/10/17 11:02:30 INFO Version: Elasticsearch Hadoop v5.0.0.BUILD.SNAPSHOT [4282a0194a]
16/10/17 11:02:30 INFO EsRDDWriter: Writing to [spark/test]
16/10/17 11:02:30 ERROR TaskContextImpl: Error in TaskCompletionListener
org.elasticsearch.hadoop.rest.EsHadoopInvalidRequest: Found unrecoverable error [127.0.0.1:9200] returned Bad Request(400) - failed to parse; Bailing out..
at org.elasticsearch.hadoop.rest.RestClient.processBulkResponse(RestClient.java:250)
at org.elasticsearch.hadoop.rest.RestClient.bulk(RestClient.java:202)
at org.elasticsearch.hadoop.rest.RestRepository.tryFlush(RestRepository.java:220)
at org.elasticsearch.hadoop.rest.RestRepository.flush(RestRepository.java:242)
at org.elasticsearch.hadoop.rest.RestRepository.close(RestRepository.java:267)
at org.elasticsearch.hadoop.rest.RestService$PartitionWriter.close(RestService.java:120)
at org.elasticsearch.spark.rdd.EsRDDWriter$$anonfun$write$1.apply(EsRDDWriter.scala:42)
at org.elasticsearch.spark.rdd.EsRDDWriter$$anonfun$write$1.apply(EsRDDWriter.scala:42)
at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
我在scala上编码。我无法找到错误的原因。除了例外,请帮助我。
谢谢。