查询连续值范围mongo

时间:2017-09-28 11:11:53

标签: mongodb mongodb-query

我有一个number字段的文档。进程会添加number值不在集合中的文档,但首先会检查具有该number的文档是否存在。

考虑一组文档,number从0到234,number从653到667,number从10543到22000. number从235到... 652和668到10542,其文件需要导入。

是否可以构建一个返回集合中存在的连续值范围的查询? (即0到234和653到667和10543到22000)

根据这些信息,我会立即知道在235到652和668到10542之间填写缺失的文件,并在22001继续......

2 个答案:

答案 0 :(得分:2)

如果您可以接受丢失而不是范围的所有个人ID,则这是您的查询:

collection.aggregate({
    $group: {
        "_id": null, // group all documents into the same bucket
        "numbers":
        {
            $push: "$number" // create an array of all "number" fields
        }
    }
}, {
    $project: {
        "_id": 0, // get rid of the "_id" field - not really needed
        "numbers": {
            $setDifference: [ { // compute the difference between...
                $range: [ 0, 10 ] // ... all numbers from 0 to 10 - adjust this to your needs...
            }, "$numbers" ] // ...and the available values for "number"
        }
    }
})

有很多方法可以根据这些信息计算范围,但我觉得在您的情况下可能甚至不需要这些。

UPDATE(基于你的评论):这是一个更长的版本,增加了一些额外的阶段,以便从离散数字到范围 - 代码不是很漂亮,可能不是超快但它应该至少工作......

collection.aggregate({
    $sort: {
        "number": 1 // we need to sort in order to find ranges later
    }
},
{
    $group: {
        "_id": null, // group all documents into the same bucket
        "numbers":
        {
            $push: "$number" // create an array of all "number" fields
        }
    }
}, {
    $project: {
        "_id": 0, // get rid of the "_id" field - not really needed
        "numbers": {
            $setDifference: [ { // compute the difference between...
                $range: [ 0, 10 ] // ... all numbers from 0 to 10 - adjust this to your needs...
            }, "$numbers" ] // ...and the available values for "number"
        }
    }
},
{
    $project: {
        "numbers": "$numbers", // ...we create two identical arrays
        "numbers2": "$numbers" // ...by duplicating our missing numbers array
    }
},
{
    $unwind: "$numbers" // this will flatten one of the two created number arrays
},
{
    $project: {
        "number": "$numbers",
        "precedingNumber": {
            $arrayElemAt: [
                "$numbers2", // use the second (remaining) numbers array to find the previous number...
                { $max: [0, { $add: [ { $indexOfArray: [ "$numbers2", "$numbers" ] }, -1 ] } ] } // ...which needs to sit in that sorted array at the position of the element we're looking at right now - 1
            ]
        },
        "followingNumber": {
            $arrayElemAt: [
                "$numbers2", // use the second (remaining) numbers array to find the next number...
                { $add: [ { $indexOfArray: [ "$numbers2", "$numbers" ] }, 1 ] } // ...which needs to sit in that sorted array at the position of the element we're looking at right now + 1
            ]
        }
    }
}, {
    $project: {
        "number": 1, // include number 
        "precedingInRange": { $cond: [ { $eq: [ { $add: [ "$number", -1 ] }, "$precedingNumber" ] }, true, false ] },
        "followingInRange": { $cond: [ { $eq: [ { $add: [ "$number", 1 ] }, "$followingNumber" ] }, true, false ] }
    }
}, {
    $match: {
        $or: [ // filter out all items that are inside a range (or rather: include only the outer items of each range)
            { "precedingInRange": false },
            { "followingInRange": false }
        ]
    }
}, {
    $project: { // some beautification of the ouput to help deal with the data in your application
        "singleNumber": { $cond: [ { $not: { $or: [ "$precedingInRange", "$followingInRange" ] } }, "$number", null ] },
        "startOfRange": { $cond: [ "$followingInRange", "$number", null ] },
        "endOfRange": { $cond: [ "$precedingInRange", "$number", null ] }
    }
})

更新2:

我有一种感觉,我找到了一种更好的方法来很好地获得范围,而不会涉及太多魔法:

collection.aggregate({
    $sort: {
        "number": 1 // we need to sort by numbers in order to be able to do the range magic later
    }
}, {
    $group: {
        "_id": null, // group all documents into the same bucket
        "numbers":
        {
            $push: "$number" // create an array of all "number" fields
        }
    }
}, {
    $project: {
        "numbers": {
            $reduce: {
                input: "$numbers",
                initialValue: [],
                in: {
                    "start": { 
                        $concatArrays: [
                            "$$value.start",
                            {
                                $cond: { // if preceding element in array of numbers is not "current element - 1" then add it, otherwise skip
                                    if: { $ne: [ { $add: [ "$$this", -1 ] }, { $arrayElemAt: [ "$numbers", { $add: [ { $indexOfArray: [ "$numbers", "$$this" ] }, -1 ] } ] } ] },
                                    then: [ "$$this" ],
                                    else: []
                                }
                            }
                        ]
                    },
                    "end": { 
                        $concatArrays: [
                            "$$value.end",
                            {
                                $cond: { // if following element in array of numbers is not "current element + 1" then add it, otherwise skip
                                    if: { $ne: [ { $add: [ "$$this", 1 ] }, { $arrayElemAt: [ "$numbers", { $add: [ { $indexOfArray: [ "$numbers", "$$this" ] }, 1 ] } ] } ] },
                                    then: [ "$$this" ],
                                    else: []
                                }
                            }
                        ]
                    }
                }
            }
        }
    }
}, {
    $project: {
        "ranges": {
            $zip: {
                inputs: [ "$numbers.start", "$numbers.end" ],
            }
        }
    }
})

答案 1 :(得分:0)

您可以接近的一个角度是预先定义您希望检查存在的范围,然后运行聚合操作,您可以在其中获取这些范围中的数字计数。

例如,给定预定义的范围

var ranges = [
    [0, 99],
    [100, 199],
    [200, 299]
];

和只有3个数字的测试集合:

db.test.insert([
    { number: 1  },
    { number: 87  },
    { number: 200  }
])

要执行的管道如下

db.test.aggregate([
    {
        "$group": {
            "_id": null,
            "range0Count": {
                "$sum": {
                    "$cond": [
                        {
                            "$and": [
                                { "$gte": [ "$number", 0 ] },
                                { "$lte": [ "$number", 99 ] }
                            ]
                        },
                        1,
                        0
                    ]
                }
            },
            "range1Count": {
                "$sum": {
                    "$cond": [
                        {
                            "$and": [
                                { "$gte": [ "$number", 100 ] },
                                { "$lte": [ "$number", 199 ] }
                            ]
                        },
                        1,
                        0
                    ]
                }
            },
            "range2Count": {
                "$sum": {
                    "$cond": [
                        {
                            "$and": [
                                { "$gte": [ "$number", 200 ] },
                                { "$lte": [ "$number", 299 ] }
                            ]
                        },
                        1,
                        0
                    ]
                }
            }
        }
    }
])

会产生以下结果

{
    "_id" : null,
    "range0Count" : 2.0,
    "range1Count" : 0.0,
    "range2Count" : 1.0
}

您可以使用ranges数组上的reduce方法进一步重构管道,以提取组管道操作符对象,如下所示:

var ranges = [
    [0, 99],
    [100, 199],
    [200, 299]
];
var group = ranges.reduce(function(acc, range, idx) {
    acc["$group"]["range" + idx + "Count"] = {
        "$sum": {
            "$cond": [
                {
                    "$and": [
                        { "$gte": ["$number", range[0] ] },
                        { "$lte": ["$number", range[1] ] }
                    ]
                },
                1,
                0
            ]
        }
    };
    return acc;
}, { "$group": { "_id": null } });

db.test.aggregate([group])

使用上面的模板,您可以根据需要自定义范围,然后从结果中获取无计数的范围。