我正在尝试在我的测试套件中嵌入Kafka / Zookeeper,以测试Spark作为kafka消费者。我创建了一个主题并发送如下消息:
val kafkaTestUtils = new KafkaTestUtils
val topic = "test"
kafkaTestUtils.createTopic(topic)
kafkaTestUtils.produceMessage(topic, "test")
并消费:
// throws org.apache.spark.SparkException: org.apache.spark.SparkException: Couldn't find leader offsets for Set()
val rdd = KafkaUtils.createRDD[String, String, StringDecoder, StringDecoder](sc, kafkaParams, offsetRanges)
有趣的是,在嵌入式kafka服务器启动后,似乎没有经纪人在zookeeper中注册。
//returns an empty string
val zk = new ZkClient(zkQuorum, Integer.MAX_VALUE, 10000, ZKStringSerializer)
ZkUtils.getAllBrokersInCluster(zk).map(_.connectionString)
下面是我对嵌入式kafka(KafkaStartable)和zookeeper的实现。
package com.c2fo.atlas.kafka
import java.util.Properties
import kafka.admin.{AdminUtils, TopicCommand}
import kafka.producer.{ProducerConfig, KeyedMessage, Producer}
import org.I0Itec.zkclient.ZkClient
import org.apache.curator.test.TestingServer
import kafka.server.KafkaServerStartable
import kafka.server.KafkaConfig
import kafka.utils.ZKStringSerializer
import kafka.utils.TestUtils
/**
* This is a helper class for Kafka test suites. This has the functionality to set up
* and tear down local Kafka servers, and to push data using Kafka producers.
*
*/
class KafkaTestUtils {
val zkServer: TestingServer = new TestingServer(2181)
zkServer.start()
val port = TestUtils.choosePort
val zkClient = new ZkClient(zkServer.getConnectString, 3000,3000, ZKStringSerializer)
var props = TestUtils.createBrokerConfig(0, port, true)
props.put("zookeeper.connect", zkServer.getConnectString)
val config = new KafkaConfig(props)
val kafkaServer: KafkaServerStartable = new KafkaServerStartable(config)
kafkaServer.startup()
val producerProps = TestUtils.getProducerConfig(s"localhost:$port")
val producerConfig = new ProducerConfig(producerProps)
val producer = new Producer[String, Array[Byte]](producerConfig)
def createTopic(topic: String) = {
AdminUtils.createTopic(zkClient, topic, 1, 1)
}
def fetchTopicMetadata(topic: String) : Properties = {
AdminUtils.fetchTopicConfig(zkClient, topic)
}
def produceMessage(topic: String, message: String): Unit = {
val data = new KeyedMessage[String, Array[Byte]](topic, message.getBytes)
producer.send(data)
}
def teardown() = {
kafkaServer.shutdown()
zkClient.close()
zkServer.close()
}
}
编辑:完整的堆栈跟踪
Testing started at 4:59 PM ...
2016-01-18 16:59:42 INFO ConfigUtil$:31 - Loading config overrides for jenkins
2016-01-18 16:59:44 INFO ConfigUtil$:31 - Loading config overrides for jenkins
2016-01-18 16:59:45 ERROR DefaultEventHandler:97 - Failed to collate messages by topic, partition due to: Failed to fetch topic metadata for topic: test
2016-01-18 16:59:46 INFO ConfigUtil$:31 - Loading config overrides for jenkins
2016-01-18 16:59:46 INFO ConfigUtil$:31 - Loading config overrides for jenkins
2016-01-18 16:59:46 INFO ConfigUtil$:31 - Loading config overrides for jenkins
org.apache.spark.SparkException: Couldn't find leader offsets for Set()
org.apache.spark.SparkException: org.apache.spark.SparkException: Couldn't find leader offsets for Set()
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$checkErrors$1.apply(KafkaCluster.scala:366)
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$checkErrors$1.apply(KafkaCluster.scala:366)
at scala.util.Either.fold(Either.scala:97)
at org.apache.spark.streaming.kafka.KafkaCluster$.checkErrors(KafkaCluster.scala:365)
at org.apache.spark.streaming.kafka.KafkaUtils$.org$apache$spark$streaming$kafka$KafkaUtils$$checkOffsets(KafkaUtils.scala:178)
at org.apache.spark.streaming.kafka.KafkaUtils$$anonfun$createRDD$1.apply(KafkaUtils.scala:207)
at org.apache.spark.streaming.kafka.KafkaUtils$$anonfun$createRDD$1.apply(KafkaUtils.scala:203)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:709)
at org.apache.spark.streaming.kafka.KafkaUtils$.createRDD(KafkaUtils.scala:203)
at com.c2fo.atlas.jobs.batch.reprocess.KafkaIntervalJob$.consume(KafkaIntervalJob.scala:40)
at com.c2fo.atlas.jobs.Job$$anonfun$delayedInit$1.apply$mcV$sp(Job.scala:22)