MongoDB - 将结果聚合并连接到组

时间:2016-06-29 08:00:46

标签: mongodb mongodb-query aggregation-framework

我有一些items这样的集合:

[
    { name: 'item1', description: 'description #1', categories: 'cat_A; cat_B'},
    { name: 'item2', description: 'description #2', categories: 'cat_B'},
    { name: 'item3', description: 'description #3', categories: 'cat_C; cat_B'},
    { name: 'item4', description: 'description #4', categories: 'cat_B; cat_A'},
    { name: 'item5', description: 'description #5', categories: 'cat_B'},
    { name: 'item6', description: 'description #6', categories: 'cat_D'}
]

我想按类别查找和过滤结果。我创建了mongo查询:

db.getCollection('items')
    .aggregate([
        {
            $match: {
                categories: {$in: [/cat_a/i, /cat_b/i]}
            }
        }, {
            $group: {
                _id: "$categories",
                items: { $push:  { name: "$name", description: '$description' } }
            }
        }
    ])

所以它让我回答:

result : [
    {
        "_id" : "cat_B; cat_C",
        "items" : [
            {
                "name" : "item3",
                "description" : "description #3"
            }
        ]
    }, {
        "_id" : "cat_B; cat_A",
        "items" : [
            {
                "name" : "item4",
                "description" : "description #4"
            }
        ]
    }, {
        "_id" : "cat_B",
        "items" : [
            {
                "name" : "item2",
                "description" : "description #2"
            },
            {
                "name" : "item5",
                "description" : "description #5"
            }
        ]
    }, {
        "_id" : "cat_A; cat_B",
        "items" : [
            {
                "name" : "item1",
                "description" : "description #1"
            }
        ]
    }
]

我想要实现的目标是:

result : [
    {
        "_id" : "cat_A",
        "items" : [
            {
                "name" : "item1",
                "description" : "description #1"
            },
            {
                "name" : "item4",
                "description" : "description #4"
            }
        ]
    }, {
        "_id" : "cat_B",
        "items" : [
            {
                "name" : "item1",
                "description" : "description #1"
            },
            {
                "name" : "item2",
                "description" : "description #2"
            },
            {
                "name" : "item3",
                "description" : "description #3"
            },
            {
                "name" : "item4",
                "description" : "description #4"
            },
            {
                "name" : "item5",
                "description" : "description #5"
            }
        ]
    }
]

纯mongo查询可以吗?

1 个答案:

答案 0 :(得分:2)

使用聚合框架,您将需要一种机制将categories字符串拆分为不同的集合,但这样的运算符尚不存在;最接近的是 substr 运算符,它需要知道索引位置的索引和要提取的子字符串的指定字符数,这几乎是不可能的。因此建议将类别存储为不同类别名称的数组。

- 的 修改 -

如果您希望保留categories字段,那么我建议您创建一个存储类别列表的额外字段,然后您可以在该字段上运行聚合管道以获得所需的结果。< / p>

让我们用一个例子来演示上面的方法:

更改架构

a)如果使用MongoDB v3.0或更低版本:

var bulk = db.items.initializeOrderedBulkOp(),
    counter = 0;

db.items.find({}).forEach(doc) {
    var categoriesList = doc.categories.replace(/^\s+|\s+$/g,"").split(/\s*;\s*/);
    bulk.find({ "_id": doc._id })
        .updateOne({ 
            "$set": { "categoriesList": categoriesList } 
        });
    counter++;

    if (counter % 1000 == 0) {
        bulk.execute();
        bulk = db.items.initializeOrderedBulkOp();
    }
} 

if (counter % 1000 != 0 ) bulk.execute();

b)如果使用MongoDB v3.2.X或更高版本:

var cursor = db.items.find({}),
    bulkUpdateOps = [];

cursor.forEach(function(doc){ 
    var categoriesList = doc.categories.replace(/^\s+|\s+$/g,"").split(/\s*;\s*/);
    bulkUpdateOps.push({ 
        "updateOne": {
            "filter": { "_id": doc._id },
            "update": { "$set": { "categoriesList": categoriesList } }
         }
    });

    if (bulkUpdateOps.length == 1000) {
        db.items.bulkWrite(bulkUpdateOps);
        bulkUpdateOps = [];
    }
});         

if (bulkUpdateOps.length > 0) db.items.bulkWrite(bulkUpdateOps); 

在新架构上运行聚合

db.items.aggregate([
    { "$match": { "categoriesList": { "$in": ['cat_A', 'cat_B'] } } },
    { "$unwind": "$categoriesList" },
    {
        "$group": {
            "_id": "$categoriesList",
            "items": { "$push":  { "name": "$name", "description": '$description' } }
        }
    }
])