我正在与Kafka一起进行Spark Streaming。 这是我的代码文件名:TwitterSparkStream_1.py:
import sys
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
def stream(ssc, topic):
kstream = KafkaUtils.createDirectStream(ssc, topics = [topic],
kafkaParams = {"metadata.broker.list": 'localhost:9092'})
tweets = kstream.map(lambda x: x[1].encode("ascii", "ignore"))
counts = tweets.flatMap(lambda line: line.split(" ")) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a+b)
counts.pprint()
if __name__ == "__main__":
sc = SparkContext(appName="PythonStreamingDirectKafkaTwtStream")
ssc = StreamingContext(sc, 2)
brokers = 'localhost:9092'
topic = sys.argv[1]
print("topic is {}".format(topic))
stream(ssc,topic)
ssc.start()
ssc.awaitTermination()
ssc.stop(stopGraceFully = True)
当我运行spark-submit
spark-submit --jars spark-streaming-kafka-0-8_2.11-2.3.0.jar
TwitterSparkStream_1.py topic
我收到错误
Exception in thread "Thread-5" java.lang.NoClassDefFoundError: kafka/common/TopicAndPartition
at java.lang.Class.getDeclaredMethods0(Native Method)
at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
at java.lang.Class.privateGetPublicMethods(Class.java:2902)
at java.lang.Class.getMethods(Class.java:1615)
at py4j.reflection.ReflectionEngine.getMethodsByNameAndLength(ReflectionEngine.java:345)
at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:305)
at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
at py4j.Gateway.invoke(Gateway.java:274)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException: kafka.common.TopicAndPartition
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 12 more
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "/usr/local/Cellar/spark-2.3.0-bin-hadoop2.7/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py", line 1062, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/Cellar/spark-2.3.0-bin-hadoop2.7/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py", line 908, in send_command
response = connection.send_command(command)
File "/usr/local/Cellar/spark-2.3.0-bin-hadoop2.7/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py", line 1067, in send_command
"Error while receiving", e, proto.ERROR_ON_RECEIVE)
py4j.protocol.Py4JNetworkError: Error while receiving
Traceback (most recent call last):
File "/Users/KaviAnu/Documents/hadoop-code/spark/twitter-streaming-spark/TwitterSparkStream_1.py", line 27, in <module>
stream(ssc,topic)
File "/Users/KaviAnu/Documents/hadoop-code/spark/twitter-streaming-spark/TwitterSparkStream_1.py", line 14, in stream
kstream = KafkaUtils.createDirectStream(ssc, topics = [topic], kafkaParams = {"metadata.broker.list": 'localhost:9092'})
File "/usr/local/Cellar/spark-2.3.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/streaming/kafka.py", line 145, in createDirectStream
File "/usr/local/Cellar/spark-2.3.0-bin-hadoop2.7/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py", line 1160, in __call__
File "/usr/local/Cellar/spark-2.3.0-bin-hadoop2.7/python/lib/py4j-0.10.6-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JError: An error occurred while calling o25.createDirectStreamWithoutMessageHandler
我有kafka生产者,经纪人和消费者在后台运行,我也打开了spark-shell来查看我的saprk-ui,我看不到任何可以引导我解决问题的方法。
感谢您的帮助。
预先感谢