from pyspark import SparkConf, SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
import operator
import numpy as np
import matplotlib.pyplot as plt
def main():
conf = SparkConf().setMaster("local[2]").setAppName("Streamer")
sc = SparkContext(conf=conf)
# Creating a streaming context with batch interval of 10 sec
ssc = StreamingContext(sc, 10)
ssc.checkpoint("checkpoint")
pwords = load_wordlist("positive.txt")
nwords = load_wordlist("negative.txt")
counts = stream(ssc, pwords, nwords, 100)
make_plot(counts)
def make_plot(counts):
"""
This function plots the counts of positive and negative words for each timestep.
"""
positiveCounts = []
negativeCounts = []
time = []
for val in counts:
positiveTuple = val[0]
positiveCounts.append(positiveTuple[1])
negativeTuple = val[1]
negativeCounts.append(negativeTuple[1])
for i in range(len(counts)):
time.append(i)
posLine = plt.plot(time, positiveCounts,'bo-', label='Positive')
negLine = plt.plot(time, negativeCounts,'go-', label='Negative')
plt.axis([0, len(counts), 0, max(max(positiveCounts), max(negativeCounts))+50])
plt.xlabel('Time step')
plt.ylabel('Word count')
plt.legend(loc = 'upper left')
plt.show()
def load_wordlist(filename):
"""
This function returns a list or set of words from the given filename.
"""
words = {}
f = open(filename, 'rU')
text = f.read()
text = text.split('\n')
for line in text:
words[line] = 1
f.close()
return words
def wordSentiment(word,pwords,nwords):
if word in pwords:
return ('positive', 1)
elif word in nwords:
return ('negative', 1)
def updateFunction(newValues, runningCount):
if runningCount is None:
runningCount = 0
return sum(newValues, runningCount)
def sendRecord(record):
connection = createNewConnection()
connection.send(record)
connection.close()
def stream(ssc, pwords, nwords, duration):
kstream = KafkaUtils.createDirectStream(
ssc, topics = ['twitterstream'], kafkaParams = {"metadata.broker.list": 'localhost:9092'})
tweets = kstream.map(lambda x: x[1].encode("ascii", "ignore"))
# Each element of tweets will be the text of a tweet.
# We keep track of a running total counts and print it at every time step.
words = tweets.flatMap(lambda line:line.split(" "))
positive = words.map(lambda word: ('Positive', 1) if word in pwords else ('Positive', 0))
negative = words.map(lambda word: ('Negative', 1) if word in nwords else ('Negative', 0))
allSentiments = positive.union(negative)
sentimentCounts = allSentiments.reduceByKey(lambda x,y: x+y)
runningSentimentCounts = sentimentCounts.updateStateByKey(updateFunction)
runningSentimentCounts.pprint()
# The counts variable hold the word counts for all time steps
counts = []
sentimentCounts.foreachRDD(lambda t, rdd: counts.append(rdd.collect()))
# Start the computation
ssc.start()
ssc.awaitTerminationOrTimeout(duration)
ssc.stop(stopGraceFully = True)
return counts
if __name__=="__main__":
main()
错误恰好如下:
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_gtk3.py", line 343, in idle_draw
self.draw()
File "/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_gtk3.py", line 336, in draw
self.get_property("window").process_updates (False)
TypeError: Couldn't find foreign struct converter for 'cairo.Context'
答案 0 :(得分:6)
见Github discussion。例如,尝试:
MATCH path = (a)-[r]-(b)
RETURN count(path) AS nPaths, count(distinct r) AS nRels;
答案 1 :(得分:1)
这里的问题是matplotlib的后端GTK3。我建议将其更改为适合您的后端。见https://matplotlib.org/faq/usage_faq.html
我经常做
import matplotlib
matplotlib.use('Agg')
在与matplotlib相关的任何导入之前。 您将无法看到该图,但可以使用savefig()保存它,然后使用图像查看器打开它。
答案 2 :(得分:0)
在3年前回复以上评论(我正在使用WSL 2和VSCode,以及用于GUI应用程序的X服务器),
sudo apt install python3-gi-cairo
您将省去更改后端的麻烦。
答案 3 :(得分:0)
就我而言,python3-gi-cairo 已安装但可能已损坏。因此,如果有人遇到这种情况,您只需在 apt 安装中添加 --reinstall
。
sudo apt install --reinstall python3-gi-cairo
在此处重新安装不会破坏您的系统,因为当您删除(所有依赖项也将被删除)然后再次安装时。