我有一个Python函数,它使用PyMongo处理大量的JSON数据(成千上万的测量/时间戳),然后构建一个MongoDB批量写操作(使用PyMongo)。虽然这段代码有效,但非常慢,而且运行的时间越长越慢。关于如何优化这一建议的任何建议将不胜感激!
def import_metrics(user, data):
i = 0
bulkop = db.metrics.initialize_unordered_bulk_op()
# data is a JSON object containing tens of thousands of measurements:
# data[metrics] = [
# {
# value: int
# ts : timestamp
# },
# ]
for entry in data['metrics']
utc_dt = datetime.utcfromtimestamp(entry['ts'])
# ignore if entry is null
if isinstance(entry['value'], float):
# upsert data
metric = {
'userId': ObjectId(user['_id']),
'ts': utc_dt
'value': int(entry['value']),
}
key = {
'userId': ObjectId(user['_id']),
'ts': utc_dt
}
# add query to bulk operation
bulkop.find(key).upsert().update_one({'$set': metric})
i = i + 1
if i > 0:
# we have some data to insert
try:
result = bulkop.execute()
except BulkWriteError as bwe:
print 'ERROR: ' + str(bwe.details)
请注意,集合中只有一个索引:
> db.metrics.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "mydb.metrics"
}
]
提前致谢!