MongoDB在聚合管道中使用$ addToSet避免重复

时间:2016-09-22 14:55:01

标签: mongodb mongodb-query aggregation-framework mongodb-aggregation

有聚合管道:

db.getCollection('yourCollection').aggregate(
    {
        $unwind: {
            path: "$dates",
            includeArrayIndex: "idx"
        }
    },
    {
        $project: {
            _id: 0,
            dates: 1,
            numbers: { $arrayElemAt: ["$numbers", "$idx"] },
            goals: { $arrayElemAt: ["$goals", "$idx"] },
            durations: { $arrayElemAt: ["$durations", "$idx"] }
        }
    }
)

执行以下数据(示例文档):

{
    "_id" : ObjectId("52d017d4b60fb046cdaf4851"),
    "dates" : [
        1399518702000,
        1399126333000,
        1399209192000,
        1399027545000
    ],
    "dress_number" : "4",
    "name" : "J. Evans",
    "numbers" : [
        "5982",
        "5983",
        "5984",
        "5985"
    ],
    "goals": [
        "1",
        "0",
        "4",
        "2"
    ],
   "durations": [
       "78",
       "45",
       "90",
       "90"
   ]
}

{
    "_id" : ObjectId("57e250c1b60fb0213d06737c"),
    "dates" : [
        "1399027545000",
        "1399101432000",
        "1399026850000",
        "1399904504000"
    ],
    "dress_number" : "6",
    "name" : K. Mitnick,
    "numbers" : [
        "0982",
        "0981",
        "0958",
        "0982"
    ],
    "durations" : [
        98,
        110,
        66,
        92
    ],
    "goals" : [
        "2",
        "3",
        "0",
        "1"
    ]
}

查询效果很好,但是有重复的记录,所以我尝试使用$addToSet运算符来避免重复:

db.getCollection('yourCollection').aggregate(
        {
            $match: {
                "number": number
            }
        },
        {
            $unwind: {
                path: "$dates",
                includeArrayIndex: "idx"
            }
        },
         $group: {
                    _id: '$_id',
                    dates: { $addToSet: '$dates' }
        },
        {
            $project: {
                _id: 0,
                dates: 1,
                numbers: { $arrayElemAt: ["$numbers", "$idx"] },
                goals: { $arrayElemAt: ["$goals", "$idx"] },
                durations: { $arrayElemAt: ["$durations", "$idx"] }
            }
        }
    )

但我只有日期(其他字段为null

{ dates: 
     [ '1399026850000',
       '1399101432000',
       '1399027545000',
       '1399904504000',
       '1399024474000',
       '1399126333000' ],
    numbers: null,
    goals: null,
    durations: null },
  { dates: 
     [ '1399027545000',
       '1399024474000',
       '1399518702000',
       '1399126333000',
       '1399209192000',
       '1399356651000' ],
    numbers: null,
    goals: null,
    conversation_durations: null },
  { dates: 
     [ '1399026850000',
       '1399101432000',
       '1399027545000',
       '1399904504000',
       '1399024474000' ],
    numbers: null,
    goals: null,
    durations: null } 

有人知道问题出在哪里吗?

1 个答案:

答案 0 :(得分:6)

您需要使用$group运算符在$first管道中包含字段,如下所示:

db.getCollection('yourCollection').aggregate([
    { "$unwind": "$dates" },
    {
        "$group": {
            "_id": "$_id",
            "dates": { "$addToSet": "$dates" },
            "numbers": { "$first": "$numbers" },
            "goals": { "$first": "$goals" },
            "durations": { "$first": "$durations" }
        }
    },
    { "$unwind": {
            "path": "$dates",
            "includeArrayIndex": "idx"
    } },
    {
        "$project": {
            "_id": 0,
            "dates": 1,
            "numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
            "goals": { "$arrayElemAt": ["$goals", "$idx"] },
            "durations": { "$arrayElemAt": ["$durations", "$idx"] }
        }
    }
])

或使用 $setUnion 消除重复项:

db.getCollection('yourCollection').aggregate([
    {
        "$project": {
            "_id": 0,
            "dates": { "$setUnion": ["$dates", "$dates"] },
            "numbers": 1,
            "goals": 1,
            "durations": 1
        }
    }
    { "$unwind": {
            "path": "$dates",
            "includeArrayIndex": "idx"
    } },
    {
        "$project": {
            "_id": 0,
            "dates": 1,
            "dateIndex": "$idx",
            "numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
            "goals": { "$arrayElemAt": ["$goals", "$idx"] },
            "durations": { "$arrayElemAt": ["$durations", "$idx"] }
        }
    }
])