Mongo Aggregation删除ID的所有记录,仅保留最旧的记录

时间:2020-01-10 08:56:24

标签: mongodb mongodb-query aggregation-framework aggregate-functions

具有包含值的集合

_id:ObjectId('......')
ton_id :ObjectId('abcd')
value:587900
date:2019-12-13T07:09:40.075+00:00


_id:ObjectId('......')
ton_id :ObjectId('abcd')
value:50540
date:2018-1-13T07:09:40.075+00:00

_id:ObjectId('......')
ton_id :ObjectId('abcd1')
value:55400
date:2019-5-13T07:09:40.075+00:00


_id:ObjectId('......')
ton_id :ObjectId('abcd1')
value:22500
date:2018-12-13T07:09:40.075+00:00


对于ton_ids abcdabcd1,除去最旧的记录以外的所有记录。

必需的输出

_id:ObjectId('......')
ton_id :ObjectId('abcd')
value:50540
date:2018-1-13T07:09:40.075+00:00



_id:ObjectId('......')
ton_id :ObjectId('abcd1')
value:22500
date:2018-12-13T07:09:40.075+00:00

4 个答案:

答案 0 :(得分:1)

类似的东西应该可以工作。使用聚合选择要保留的ID,然后进行批量更新以删除其他ID。

var bulk = db.getCollection(colname).initializeUnorderedBulkOp();

db.getCollection(colname).aggregate([
    {$match:{"ton_id":{"$in":[abcd, abcd1]}}},
    {$sort:{"date":1}},
    {$group:{
        "_id":"$ton_id", 
        "keep_id":{"$first":"$_id"}
    }},
    {$project:{"_id":0, "ton_id":"$_id", "keep_id":1}}
]).forEach(function(doc){ 
    bulk.find({"_id":{"$ne":doc.keep_id},"ton_id":doc.ton_id}).remove();
}); 
bulk.execute(); 

答案 1 :(得分:0)

很遗憾,无法通过1次操作完成此操作。

所以我们必须分两个阶段进行操作,首先找到要排除的文档,然后删除重置。

let tonIds = [ObjectId("abcd"), ObjectId("abcd1")]
let docToExclude = await db.collection.findOne({ton_id: {$in: tonIds}}).sort({date: -1})
let deleteTheRest = await db.collection.removeMany({_id: {$ne: docToExclude._id}, ton_id: {$in: tonIds}})

答案 2 :(得分:0)

我认为您想要显示集合中每个唯一ton_id的最早数据

您可以使用类似这样的聚合查询来实现

db.getCollection(colname).aggregate([
    {
        $sort:{
            date:-1
        }
    },
    {
        $group:{
            _id:'$ton_id',
            id:{$first:'$_id'},
            value:{$first:'$value'},
            date:{$first:'$value'}
        }
    },{
        $project:{
            _id:'$id',
            ton_id:'$_id',
            value:1,
            date:1
        }
    }
])

答案 3 :(得分:0)

    var bulk = Collection.collection.initializeUnorderedBulkOp();
    let removableTonIds = ["abcd", "abcd1"];
    let pipeline = [{
        $match: {
            "ton_id": {
                "$in": removableTonIds
            }
        }
    },
    {
        $sort: {
            date: -1
        }
    },
    {
        $group: {
            _id: '$ton_id',
            id: {
                $first: '$_id'
            },
        }
    }, {
        $group: {
            _id: null,
            keep_ids: {
                $addToSet: '$id'
            }
        }
    },
    {
        $project: {
            keep_ids: 1
        }
    }
]

    var cursor = Collection.aggregate(pipeline).cursor({batchSize: 1000}).exec();


    cursor.on( 'data', function( data ) {
        bulk.find( { "_id" : { $nin: data.keep_ids }, "ton_id": {$in: removableTonIds } } ).remove();
    });

    cursor.on( 'end', function() {
        if ( bulk.length === 0 ) {
            callback();
        } else {
            bulk.execute(function (error) {
                if (error) {
                    callback(error);
                } else {
                    callback();
                }
            });
        }
    });