我正在学习MongoDB,同时做一个简单的练习,但是我使用pymongo_spark将数据从Kafka使用者存储到MongoDb时遇到了问题。 确实,保存到MongoDb中的数据与Kafka消费者产生的数据不同。
这是卡夫卡消费者的密码
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
import math
import time
from pyspark import SparkConf
import pymongo_spark
# Important: activate pymongo_spark.
pymongo_spark.activate()
startInformation = {}
oldX = ''
oldY = ''
# Create a local StreamingContext with two working thread and batch interval of 3 second
sc = SparkContext("local[2]", "OdometryConsumer")
ssc = StreamingContext(sc, 3)
kafkaStream = KafkaUtils.createDirectStream(ssc, ['odometry'], {'metadata.broker.list': 'localhost:9092'})
def getPositionSpeed(line):
fr = open('/Users/en1gma/Desktop/info.txt', 'r')
for l in fr.readlines():
oldX = float(l.split(' ')[0])
oldY = float(l.split(' ')[1])
try:
oldTs = int(l.split(' ')[2])
except:
oldTs = int(time.time())
fr.close()
fields = line[1].split(" ")
robotId = fields[0].split(":")[1]
deltaSpace = float(fields[1].split(":")[1])
thetaTwist = float(fields[2].split(":")[1])
ts = int(fields[3].split(":")[1])
newX = oldX + deltaSpace*(math.cos(thetaTwist))
newY = oldY + deltaSpace*(math.sin(thetaTwist))
try:
speed = deltaSpace/(ts - oldTs)
except:
speed = float(0)
fw = open('/Users/en1gma/Desktop/info.txt', 'w')
fw.write(str(newX) + " " + str(newY) + " " + str(ts))
fw.close()
startInformation["robotId"] = str(robotId)
startInformation["x_coordinate"] = str(newX)
startInformation["y_coordinate"] = str(newY)
startInformation["speed"] = str(speed)
startInformation["deltaSpace"] = str(deltaSpace)
startInformation["thetaTwist"] = str(thetaTwist)
startInformation["timeStamp"] = str(ts)
return startInformation
elaborate = kafkaStream.map(getPositionSpeed)
def sendRecord(rdd):
try:
rdd.saveToMongoDB('mongodb://localhost:27017/db.test')
except:
pass
elaborate.foreachRDD(sendRecord)
ssc.start() # Start the computation
ssc.awaitTermination() # Wait for the computation to terminate
你知道为什么会这样吗?
谢谢。