是否可以从嵌套在数组中的字典中查找$ lookup?

时间:2019-05-06 22:17:01

标签: mongodb pymongo

假设我具有以下文档模型。

{"emb": [{"emb_a": a1, "emb_b": b1}, {"emb_a": a2, "emb_b": b2}]}

在此结构中,a1,b1,a2,b2都代表不同的ObjectId。

目标是aggregate查询结果,以便将所有结果加载到内存中。

from pymongo import MongoClient
from bson import ObjectId
from pprint import pprint


class Config(object):
    DATABASE_URI = "mongodb://localhost:27017/test"
    DATABASE = "test_db"


print(f"Connecting to: [{Config.DATABASE}]...")
client = MongoClient(Config.DATABASE_URI)
db = client[Config.DATABASE]
print(f"Connected: [{Config.DATABASE}]...")


a1 = db.a.insert({"a": 1})
a2 = db.a.insert({"a": 2})

b1 = db.b.insert({"b": 1})
b2 = db.b.insert({"b": 2})


def generate_doc():
    return {"emb": [{"emb_a": a1, "emb_b": b1}, {"emb_a": a2, "emb_b": b2}]}


# INSERT A BUNCH OF DOCUMENTS
db.test_collection.insert_many([generate_doc() for i in range(0, 5)])

# AGGREGATION PIPELINE
pprint(
    list(
        db.test_collection.aggregate(
            [
                {
                    "$lookup": {
                        "from": "a",
                        "localField": "emb.emb_a",
                        "foreignField": "_id",
                        "as": "emb.emb_a",
                    }
                },
                {
                    "$lookup": {
                        "from": "b",
                        "localField": "emb.emb_b",
                        "foreignField": "_id",
                        "as": "emb.emb_b",
                    }
                },
            ]
        )
    )
)


client.drop_database(Config.DATABASE)

下面是该脚本的结果...

{'_id': ObjectId('5cd0af6deb62e064cd99bae4'),
  'emb': {'emb_a': [{'_id': ObjectId('5cd0af6deb62e064cd99badc'), 'a': 1},
                    {'_id': ObjectId('5cd0af6deb62e064cd99badd'), 'a': 2}],
          'emb_b': []}}

但是我想回来的是...

{
    "emb": [
        {"emb_a": {'_id': ObjectId('5cd0af6deb62e064cd99badc'), 'a': 1}, "emb_b": {'_id': ObjectId('5cd0af6deb62e064cd99badd'), 'b': 1}},
        {"emb_a": {'_id': ObjectId('5cd0af6deb62e064cd99bade'), 'a': 2}, "emb_b": {'_id': ObjectId('5cd0af6deb62e064cd99badf'), 'b': 2}}
    ]
}

可以这样做吗?

1 个答案:

答案 0 :(得分:1)

查询不起作用,因为您正在用emb子句覆盖as属性。试试这个:

db.test_collection.aggregate(
[
    {
        "$lookup": {
            "from": "a",
            "localField": "emb.emb_a",
            "foreignField": "_id",
            "as": "emb_a",
        }
    },
    {
        "$lookup": {
            "from": "b",
            "localField": "emb.emb_b",
            "foreignField": "_id",
            "as": "emb_b",
        }
    },
    {
        $project: {
            '_id': 0,
            'emb': 0
        }
    },
    {
        $replaceRoot: {
            newRoot: {
                'emb': {
                    'emb_a': '$emb_a',
                    'emb_b': '$emb_b'
                }
            }
        }
    }
]);

在这里,您同时保留了emb和嵌套文档emb_aemb_b。在第三个流水线阶段,我删除了emb(带有投影),因为我不再需要它来进行查找,最后我使用先前计算的emb_aemb_b来重建它。 / p>