我试图从套接字流中获取每条记录。我希望记录是来自行的字符串数据类型。如何在python中编写代码?谢谢!
model = pipeline.PipelineModel.read().load(model_path)
sc = spark.sparkContext
ssc = StreamingContext(sc, 1)
lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
if (lines is not None):
lines.foreachRDD(lambda rdd: rdd.foreach(processRecord))
def processRecord(record):
print("test")
...
答案 0 :(得分:0)
from __future__ import print_function
import sys
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
if __name__ == "__main__":
sc = SparkContext(appName="Demo")
ssc = StreamingContext(sc, 1)
#record = ssc.socketTextStream("localhost", 9999)
record = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
# print out each single word
record.flatMap(lambda line: line.split(" ")).pprint()
# start streaming
ssc.start()
# stop when the socket we are listening is dead
ssc.awaitTermination()
感谢。