我有一段代码简单地循环了两个数组,对于第一个数组的每个元素,它在第二个数组中找到相关的元素,仅更改第一个出现的元素并删除其余的元素。
/**
* The aggregation data structure:
* "_id": {
* "geometry": geometry,
* "dups": [
* "5b3b25b4e54029249c459bfc", keep only the fisrt element in allDocs
* "5b3b25b4e54029249c459e65", delete it from allDocs
* "5b3b25b4e54029249c459d7d" delete it from allDocs
* ],
* "dupsProp": [ ], array of all properties of duplicatePoints
* "count": 3
*/
var aggregationRes =[46,000 objects]
var allDocs =[345,000 objects]
aggregationRes.forEach(function (resElem, counter) {
console.log(counter + "/" + aggregationRes.length)
//Delete objects in allDocs based on dups array except the first one
var foundIndex = allDocs.findIndex(x => x._id.toString() == resElem.dups[0]);
//assign the mergedProperties
allDocs[foundIndex].properties = resElem.dupsProp;
//delete the remaining ids in Docs from dups array
resElem.dups.forEach(function (dupElem, index) {
var tmpFoundIndex = allDocs.findIndex(x => x._id.toString() == resElem.dups[index + 1]);
if (tmpFoundIndex !== -1) {
allDocs.splice(tmpFoundIndex, 1)
}
})
})
此脚本运行将近4个小时。如您所见,计算确实很简单,但是由于allDocs数组很大,因此需要花费很长时间。如果有人给我提示如何减少计算时间,那就太好了。 预先感谢
答案 0 :(得分:1)
根据Bergi的想法,我们通过id为文档编制索引,以避免不得不查找昂贵的索引:
var allDocs =[345,000 objects]
var aggregationRes =[46,000 objects]
var allDocsIndexed = {};
allDocs.forEach(function(doc){
allDocsIndexed[doc._id.toString()] = doc;
});
aggregationRes.forEach(function (resElem, counter) {
allDocsIndexed[resElem.dups[0]].properties = resElem.dupsProp;
for (var i = 1; i < resElem.dupsProp.length; i++) {
delete allDocsIndexed[resElem.dupsProp[i]];
}
});
var allUndeletedDocs = allDocs.filter(doc => allDocsIndexed.hasOwnProperty(doc_id.toString()));
请注意,对于javascript,这是一种有效的解决方案,但提供了更多详细信息,使用mongodb功能可能会存在更好的解决方案。