我正在尝试连接到在docker shell中运行的kafka服务器。当我运行程序时,kafka服务器通过打印来响应
kafkamanager | [error] p.c.s.n.PlayDefaultUpstreamHandler - Exception caught in Netty
kafkamanager | java.lang.IllegalArgumentException: empty text
kafkamanager | at org.jboss.netty.handler.codec.http.HttpVersion.<init>(HttpVersion.java:89) ~[io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.handler.codec.http.HttpVersion.valueOf(HttpVersion.java:62) ~[io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.handler.codec.http.HttpRequestDecoder.createMessage(HttpRequestDecoder.java:75) ~[io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.handler.codec.http.HttpMessageDecoder.decode(HttpMessageDecoder.java:191) ~[io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.handler.codec.http.HttpMessageDecoder.decode(HttpMessageDecoder.java:102) ~[io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.handler.codec.replay.ReplayingDecoder.callDecode(ReplayingDecoder.java:500) ~[io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.handler.codec.replay.ReplayingDecoder.cleanup(ReplayingDecoder.java:554) ~[io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.handler.codec.frame.FrameDecoder.channelDisconnected(FrameDecoder.java:365) [io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:102) [io.netty.netty-3.10.4.Final.jar:na]
kafkamanager | at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564) [io.netty.netty-3.10.4.Final.jar:na]
一遍又一遍,即使程序本身由于超时异常而崩溃:
18/06/22 10:10:51 ERROR Utils: Aborting task org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
18/06/22 10:10:51 ERROR Utils: Aborting task org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
18/06/22 10:10:51 ERROR Utils: Aborting taskorg.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
18/06/22 10:10:51 ERROR DataWritingSparkTask: Writer for partition 2 is aborting.
18/06/22 10:10:51 ERROR DataWritingSparkTask: Writer for partition 0 is aborting.
18/06/22 10:10:51 ERROR DataWritingSparkTask: Writer for partition 1 is aborting.
18/06/22 10:10:51 ERROR DataWritingSparkTask: Writer for partition 2 aborted.
18/06/22 10:10:51 ERROR DataWritingSparkTask: Writer for partition 1 aborted.
18/06/22 10:10:51 ERROR DataWritingSparkTask: Writer for partition 0 aborted.
18/06/22 10:10:51 ERROR Executor: Exception in task 2.0 in stage 0.0 (TID 2)
org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
18/06/22 10:10:51 ERROR Executor: Exception in task 1.0 in stage 0.0 (TID 1)
org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
18/06/22 10:10:51 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)
org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
18/06/22 10:10:51 ERROR TaskSetManager: Task 2 in stage 0.0 failed 1 times; aborting job
18/06/22 10:10:51 ERROR WriteToDataSourceV2Exec: Data source writer org.apache.spark.sql.execution.streaming.sources.InternalRowMicroBatchWriter@240a98a7 is aborting.
18/06/22 10:10:51 ERROR WriteToDataSourceV2Exec: Data source writer org.apache.spark.sql.execution.streaming.sources.InternalRowMicroBatchWriter@240a98a7 aborted.
18/06/22 10:10:51 ERROR MicroBatchExecution: Query [id = 6b4e22ba-596e-4a9f-b14d-43b669008f36, runId = cdcda6fa-ec8b-4f3a-9c7c-388a4812e0d5] terminated with error
org.apache.spark.SparkException: Writing job aborted.
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:112)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247)
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:294)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3272)
at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2722)
at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2722)
at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3253)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3252)
at org.apache.spark.sql.Dataset.collect(Dataset.scala:2722)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch$3$$anonfun$apply$16.apply(MicroBatchExecution.scala:480)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch$3.apply(MicroBatchExecution.scala:475)
at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:271)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:58)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch(MicroBatchExecution.scala:474)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply$mcV$sp(MicroBatchExecution.scala:133)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:121)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:121)
at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:271)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:58)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1.apply$mcZ$sp(MicroBatchExecution.scala:121)
at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:56)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:117)
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:279)
at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:189)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 0.0 failed 1 times, most recent failure: Lost task 2.0 in stage 0.0 (TID 2, localhost, executor driver): org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:82)
... 31 more
Caused by: org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
我的docker-compose.yml看起来像这样:
version: "2"
services:
kafkaserver:
image: "spotify/kafka:latest"
container_name: kafka
hostname: kafkaserver
networks:
- kafkanet
ports:
- 2181:2181
- 9092:9092
environment:
ADVERTISED_HOST: kafkaserver
ADVERTISED_PORT: 9092
kafka_manager:
image: "mzagar/kafka-manager-docker:1.3.3.4"
container_name: kafkamanager
networks:
- kafkanet
ports:
- 9000:9000
links:
- kafkaserver
environment:
ZK_HOSTS: "kafkaserver:2181"
networks:
kafkanet:
driver: bridge
我的实际代码如下:
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.execution.streaming.FileStreamSource.Timestamp
import org.apache.spark.sql.types._
object SpeedTester {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder.master("local[4]").appName("SpeedTester").config("spark.driver.memory", "8g").getOrCreate()
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
import spark.implicits._
val mySchema = StructType(Array(
StructField("incident_id", StringType),
StructField("date", StringType),
StructField("state", StringType),
StructField("city_or_county", StringType),
StructField("n_killed", IntegerType),
StructField("n_injured", IntegerType)
))
val streamingDataFrame = spark.readStream.schema(mySchema).csv("C:/Users/zoldham/IdeaProjects/flinkpoc/Data/test")
streamingDataFrame.selectExpr("CAST(incident_id AS STRING) AS key", "to_json(struct(*)) AS value").writeStream
.format("kafka")
.option("topic", "testTopic")
.option("kafka.bootstrap.servers", "localhost:9000")
.option("checkpointLocation", "C:/Users/zoldham/IdeaProjects/flinkpoc/Data")
.start()
val df = spark.readStream.format("kafka").option("kafka.bootstrap.servers", "localhost:9000")
.option("subscribe", "testTopic").load()
val df1 = df.selectExpr("CAST(value AS STRING)", "CAST(timestamp AS TIMESTAMP)").as[(String, Timestamp)]
.select(from_json(col("value"), mySchema).as("data"), col("timestamp"))
.select("data.*", "timestamp")
df1.writeStream
.format("console")
.option("truncate","false")
.start()
.awaitTermination()
}
}
为什么连接不正确?我会搞乱某种授权吗?感谢您提供任何建议,我一直在努力寻找其他已发现此问题的人。