我尝试将数据发送到端口,并将其与火花流一起使用。火花和端口连接。我可以发送数据,但火花无法打印出来。
import socket
serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
serversocket.bind(('localhost', 8000))
serversocket.listen(5) # become a server socket, maximum 5 connections
while True:
connection, address = serversocket.accept()
print "connected"
data = "osman tamer"
connection.send(data)
火花
import findspark
findspark.init('C:\spark')
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
sc = SparkContext("local[2]","test")
ssc = StreamingContext(sc, 3)
lines = ssc.socketTextStream("localhost",8000)
words = lines.flatMap(lambda line: line.split(" ")).map(lambda word: (word, 1))
word_count = words.reduceByKey(lambda x,y:x+y)
word_count.pprint()
ssc.start()