我有一个number
字段的文档。进程会添加number
值不在集合中的文档,但首先会检查具有该number
的文档是否存在。
考虑一组文档,number
从0到234,number
从653到667,number
从10543到22000. number
从235到... 652和668到10542,其文件需要导入。
是否可以构建一个返回集合中存在的连续值范围的查询? (即0到234和653到667和10543到22000)
根据这些信息,我会立即知道在235到652和668到10542之间填写缺失的文件,并在22001继续......
答案 0 :(得分:2)
如果您可以接受丢失而不是范围的所有个人ID,则这是您的查询:
collection.aggregate({
$group: {
"_id": null, // group all documents into the same bucket
"numbers":
{
$push: "$number" // create an array of all "number" fields
}
}
}, {
$project: {
"_id": 0, // get rid of the "_id" field - not really needed
"numbers": {
$setDifference: [ { // compute the difference between...
$range: [ 0, 10 ] // ... all numbers from 0 to 10 - adjust this to your needs...
}, "$numbers" ] // ...and the available values for "number"
}
}
})
有很多方法可以根据这些信息计算范围,但我觉得在您的情况下可能甚至不需要这些。
UPDATE(基于你的评论):这是一个更长的版本,增加了一些额外的阶段,以便从离散数字到范围 - 代码不是很漂亮,可能不是超快但它应该至少工作......
collection.aggregate({
$sort: {
"number": 1 // we need to sort in order to find ranges later
}
},
{
$group: {
"_id": null, // group all documents into the same bucket
"numbers":
{
$push: "$number" // create an array of all "number" fields
}
}
}, {
$project: {
"_id": 0, // get rid of the "_id" field - not really needed
"numbers": {
$setDifference: [ { // compute the difference between...
$range: [ 0, 10 ] // ... all numbers from 0 to 10 - adjust this to your needs...
}, "$numbers" ] // ...and the available values for "number"
}
}
},
{
$project: {
"numbers": "$numbers", // ...we create two identical arrays
"numbers2": "$numbers" // ...by duplicating our missing numbers array
}
},
{
$unwind: "$numbers" // this will flatten one of the two created number arrays
},
{
$project: {
"number": "$numbers",
"precedingNumber": {
$arrayElemAt: [
"$numbers2", // use the second (remaining) numbers array to find the previous number...
{ $max: [0, { $add: [ { $indexOfArray: [ "$numbers2", "$numbers" ] }, -1 ] } ] } // ...which needs to sit in that sorted array at the position of the element we're looking at right now - 1
]
},
"followingNumber": {
$arrayElemAt: [
"$numbers2", // use the second (remaining) numbers array to find the next number...
{ $add: [ { $indexOfArray: [ "$numbers2", "$numbers" ] }, 1 ] } // ...which needs to sit in that sorted array at the position of the element we're looking at right now + 1
]
}
}
}, {
$project: {
"number": 1, // include number
"precedingInRange": { $cond: [ { $eq: [ { $add: [ "$number", -1 ] }, "$precedingNumber" ] }, true, false ] },
"followingInRange": { $cond: [ { $eq: [ { $add: [ "$number", 1 ] }, "$followingNumber" ] }, true, false ] }
}
}, {
$match: {
$or: [ // filter out all items that are inside a range (or rather: include only the outer items of each range)
{ "precedingInRange": false },
{ "followingInRange": false }
]
}
}, {
$project: { // some beautification of the ouput to help deal with the data in your application
"singleNumber": { $cond: [ { $not: { $or: [ "$precedingInRange", "$followingInRange" ] } }, "$number", null ] },
"startOfRange": { $cond: [ "$followingInRange", "$number", null ] },
"endOfRange": { $cond: [ "$precedingInRange", "$number", null ] }
}
})
更新2:
我有一种感觉,我找到了一种更好的方法来很好地获得范围,而不会涉及太多魔法:
collection.aggregate({
$sort: {
"number": 1 // we need to sort by numbers in order to be able to do the range magic later
}
}, {
$group: {
"_id": null, // group all documents into the same bucket
"numbers":
{
$push: "$number" // create an array of all "number" fields
}
}
}, {
$project: {
"numbers": {
$reduce: {
input: "$numbers",
initialValue: [],
in: {
"start": {
$concatArrays: [
"$$value.start",
{
$cond: { // if preceding element in array of numbers is not "current element - 1" then add it, otherwise skip
if: { $ne: [ { $add: [ "$$this", -1 ] }, { $arrayElemAt: [ "$numbers", { $add: [ { $indexOfArray: [ "$numbers", "$$this" ] }, -1 ] } ] } ] },
then: [ "$$this" ],
else: []
}
}
]
},
"end": {
$concatArrays: [
"$$value.end",
{
$cond: { // if following element in array of numbers is not "current element + 1" then add it, otherwise skip
if: { $ne: [ { $add: [ "$$this", 1 ] }, { $arrayElemAt: [ "$numbers", { $add: [ { $indexOfArray: [ "$numbers", "$$this" ] }, 1 ] } ] } ] },
then: [ "$$this" ],
else: []
}
}
]
}
}
}
}
}
}, {
$project: {
"ranges": {
$zip: {
inputs: [ "$numbers.start", "$numbers.end" ],
}
}
}
})
答案 1 :(得分:0)
您可以接近的一个角度是预先定义您希望检查存在的范围,然后运行聚合操作,您可以在其中获取这些范围中的数字计数。
例如,给定预定义的范围
var ranges = [
[0, 99],
[100, 199],
[200, 299]
];
和只有3个数字的测试集合:
db.test.insert([
{ number: 1 },
{ number: 87 },
{ number: 200 }
])
要执行的管道如下
db.test.aggregate([
{
"$group": {
"_id": null,
"range0Count": {
"$sum": {
"$cond": [
{
"$and": [
{ "$gte": [ "$number", 0 ] },
{ "$lte": [ "$number", 99 ] }
]
},
1,
0
]
}
},
"range1Count": {
"$sum": {
"$cond": [
{
"$and": [
{ "$gte": [ "$number", 100 ] },
{ "$lte": [ "$number", 199 ] }
]
},
1,
0
]
}
},
"range2Count": {
"$sum": {
"$cond": [
{
"$and": [
{ "$gte": [ "$number", 200 ] },
{ "$lte": [ "$number", 299 ] }
]
},
1,
0
]
}
}
}
}
])
会产生以下结果
{
"_id" : null,
"range0Count" : 2.0,
"range1Count" : 0.0,
"range2Count" : 1.0
}
您可以使用ranges数组上的reduce方法进一步重构管道,以提取组管道操作符对象,如下所示:
var ranges = [
[0, 99],
[100, 199],
[200, 299]
];
var group = ranges.reduce(function(acc, range, idx) {
acc["$group"]["range" + idx + "Count"] = {
"$sum": {
"$cond": [
{
"$and": [
{ "$gte": ["$number", range[0] ] },
{ "$lte": ["$number", range[1] ] }
]
},
1,
0
]
}
};
return acc;
}, { "$group": { "_id": null } });
db.test.aggregate([group])
使用上面的模板,您可以根据需要自定义范围,然后从结果中获取无计数的范围。