我是新手来激发scala并且在尝试运行这个试图从kafka主题中读取的简单代码时,我在创建直接流时遇到错误,建议我为重载函数createDirectStream提供错误数量的类型参数。下面是我收到错误的行
val messages = KafkaUtils.createDirectStream [String, String, StringDecoder, StringDecoder]
(streamingContext, kafkaParams, topicsSet)
以下是完整的代码。
package com.test.spark
import java.util.Properties
import org.apache.spark
import kafka.serializer.StringDecoder
import org.apache.spark.streaming.kafka010._
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming._
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object KafkaAirDRsProcess {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("AirDR Kafka to Spark")
val sc = new SparkContext(sparkConf)
val streamingContext = new StreamingContext(sc, Seconds(10))
// Create direct kafka stream with brokers and topics
val brokers = "10.21.165.145:6667 "
val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
val topics="AIRMAIN , dummy"
val topicsSet = topics.split(",").toSet
//val topicsSet=topics.map(_.toString).toSet
val messages = KafkaUtils.createDirectStream [String, String, StringDecoder, StringDecoder]
(streamingContext, kafkaParams, topicsSet)
val LinesDStream = messages.map(_._2)
val AirDRStream= LinesDStream.map(AirDRFilter.parseAirDR)
AirDRStream.foreachRDD(foreachFunc = rdd => {
System.out.println("--- New RDD with " + rdd.count() + " records");
if (rdd.count() > 0) {
rdd.toDF().registerTempTable("AirDRTemp")
val FilteredCDR = sqlContext.sql("select * from AirDRTemp" )
println("======================print result =================")
FilteredCDR.show()
}
});
//streamingContext.checkpoint("/tmp/mytest/ckpt/")
streamingContext.start()
streamingContext.awaitTermination()
}
}
以下是intellij错误的快照
答案 0 :(得分:0)
由于您使用的是kafka-0-10,因此您可以创建InputDStream,如下所示
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "10.21.165.145:6667:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"auto.offset.reset" -> "earliest",
"enable.auto.commit" -> (true: java.lang.Boolean)
)
val topics = ???
val stream = KafkaUtils.createDirectStream[String, String](
streamingContext,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
希望这有帮助!