使用$ graphLookup在Mongo中进行分层查询

时间:2018-09-20 22:01:41

标签: mongodb

我有一个拥有50万条记录的员工集合。每条记录将具有以下详细信息。

mongo文档如下。

{
  "_id": "234463456453643563456",
  "name": "Mike",
  "empId": "10",
  "managerId": "8",
  "projects" : [ "123", "456", "789"]
}
  1. 当我提供任何empId时,它应将该管理器的完整层次结构以及以下过滤器返回到底层。
  

a。筛选   位置
    b。过滤项目

结果应该像

      10     ->>> Manager
      /\
     /  \
    8    6  ---->> 8 & 6 reporting to manager 10
    /\    /\
   /  \  /  \
  4    5 2   1  ---->> 4 & 5 reporting to manager 8 ...

任何帮助您获得具有等级的分层结果的人都会感激吗?

我无法获得预期的结果。

样本数据:-

db.getCollection("employees").insert({"_id":"10","empId": "10","name":"Employee10","managerId":"15" });

db.getCollection("employees").insert({"_id":"8","empId": "8","name":"Employee8","managerId":"10" });

db.getCollection("employees").insert({"_id":"6","empId": "6","name":"Employee6","managerId":"10" });

db.getCollection("employees").insert({"_id":"4","empId": "4","name":"Employee4","managerId":"8" });

db.getCollection("employees").insert({"_id":"5","empId": "5","name":"Employee5","managerId":"8" });

db.getCollection("employees").insert({"_id":"2","empId": "2","name":"Employee2","managerId":"6" });

db.getCollection("employees").insert({"_id":"1","empId": "1","name":"Employee1","managerId":"6" });

查询:-

db.getCollection('employees').aggregate([
{
    $match: {
        empId : "10"
    }
},
{
   $graphLookup: {
      from: "employees",
      startWith: "$empId",
      connectFromField: "empId",
      connectToField: "managerId",
      as: "reportees",
      maxDepth: 4,
      depthField: "level"
   }
},
{
   $project: {
     "empId":1,
     "managerId":1,
     "reportees.empId":1,
     "reportees.name":1,
     "reportees.managerId":1,
     "reportees.level":1
   }
}
]);

实际结果:-

{ 
    "_id" : "10", 
    "empId" : "10", 
    "managerId" : "15", 
    "reportees" : [
        {
            "empId" : "1", 
            "name" : "Employee1", 
            "managerId" : "6", 
            "level" : NumberLong(1)
        }, 
        {
            "empId" : "4", 
            "name" : "Employee4", 
            "managerId" : "8", 
            "level" : NumberLong(1)
        }, 
        {
            "empId" : "2", 
            "name" : "Employee2", 
            "managerId" : "6", 
            "level" : NumberLong(1)
        }, 
        {
            "empId" : "5", 
            "name" : "Employee5", 
            "managerId" : "8", 
            "level" : NumberLong(1)
        }, 
        {
            "empId" : "6", 
            "name" : "Employee6", 
            "managerId" : "10", 
            "level" : NumberLong(0)
        }, 
        {
            "empId" : "8", 
            "name" : "Employee8", 
            "managerId" : "10", 
            "level" : NumberLong(0)
        }
    ]
}

预期结果:-

{ 
    "_id" : "10", 
    "empId" : "10", 
    "managerId" : "15", 
    "reportees" : [
        {
            "empId" : "6", 
            "name" : "Employee6", 
            "managerId" : "10", 
            "level" : NumberLong(0),
            "reportees" : [
              {
               "empId" : "1", 
               "name" : "Employee1", 
               "managerId" : "6", 
               "level" : NumberLong(1)
              }, 
              {
               "empId" : "2", 
               "name" : "Employee2", 
               "managerId" : "6", 
               "level" : NumberLong(1)
              }
            ]
        }, 
        {
            "empId" : "8", 
            "name" : "Employee8", 
            "managerId" : "10", 
            "level" : NumberLong(0),
            "reportees" : [
              {
                "empId" : "5", 
                "name" : "Employee5", 
                "managerId" : "8", 
                "level" : NumberLong(1)
              },
              {
                "empId" : "4", 
                "name" : "Employee4", 
                "managerId" : "8", 
                "level" : NumberLong(1)
              }
             ]
        }
    ]
}

问题:-

  1. 是否可以通过$ graphLookup获得预期的输出?
  2. 此外,是否有可能在最高级别以及每个子级别获得计数?
  3. 如何在各个级别应用投影?
  4. 如何在此之上应用过滤器?

3 个答案:

答案 0 :(得分:3)

这恰好是您$graphLookup所要达到的目标(至少需要遍历)。对于过滤部分,您可以根据要过滤的精确程度简单地使用$filter$match

看看这个查询的结果:

db.employees.aggregate({
    $graphLookup: {
      from: "employees",
      startWith: "$managerId",
      connectFromField: "managerId",
      connectToField: "empId",
      as: "managers",
    }
})

根据您的澄清更新1:

为了获得您想要的层次结构,可以执行以下操作。但是,我不会将其称为一个漂亮的解决方案,因为它需要您静态定义要下降的级别数并重复部分,但这确实可以完成您的示例。不确定,是否可以轻松扩展到更多级别。我个人认为,客户端循环解决方案将更适合此类工作:

db.employees.aggregate([
{
    $match: {
        empId : "10"
    }
},
// level 0
{
   $graphLookup: {
      from: "employees",
      startWith: "$empId",
      connectFromField: "empId",
      connectToField: "managerId",
      as: "reportees",
      maxDepth: 0
   }
},
{
    $unwind: "$reportees" // flatten
},
{
    $addFields: {
        "reportees.level": 0 // add level field
    }
},
// level 1
{
   $graphLookup: {
      from: "employees",
      startWith: "$reportees.empId",
      connectFromField: "reportees.empId",
      connectToField: "managerId",
      as: "reportees.reportees",
      maxDepth: 0
   }
},
{
    $group: { // group previously flattened documents back together
        _id: "$_id",
        empId: { $first: "$empId" },
        name: { $first: "$name" },
        managerId: { $first: "$managerId" },
        reportees: { $push: "$reportees" },
    }
},
{
    $addFields: {
        "reportees.reportees.level": 1 // add level field
    }
}
])

更新2:

以下查询从输出结构的角度将您带到您想要的位置(我省略了level字段,但是应该很容易添加)。但是,它并不是特别漂亮,并且再次要求您预先定义最大的组织深度。

db.employees.aggregate([
{
    $match: {
        empId : "10"
    }
},
{
   $graphLookup: { // get the relevant documents out of our universe of employees
      from: "employees",
      startWith: "$empId",
      connectFromField: "empId",
      connectToField: "managerId",
      as: "reportees"
   }
},
{
    $project: { // add the employee we are interested in into the array of employees we're looking at
        _id: 0,
        reportees: { $concatArrays: [ "$reportees", [ { _id: "$_id", empId: "$empId", name: "$name", managerId: "$managerId" } ] ] }
    }
},
{
    $project: {
        reportees: {
            $let: {
                vars: {
                    managers: {
                        $filter: { // remove employees with no reportess so keep managers only
                            input: {
                                $map: {
                                    input: "$reportees",
                                    as: "this",
                                    in: {
                                        $mergeObjects: [
                                            "$$this",
                                            {
                                                reportees: {
                                                    $filter: { // extract reportees from list of employees
                                                        input: "$reportees",
                                                        as: "that",
                                                        cond: {
                                                            $eq: [ "$$this._id", "$$that.managerId" ]
                                                        }
                                                    }
                                                }
                                            }
                                        ]
                                    }
                                }
                            },
                            as: "this",
                            cond: { $ne: [ "$$this.reportees", [] ] }
                        }
                    }
                },
                in: {
                    $cond: [ // this is to break the processing once we have reached a top level manager
                        { $eq: [ "$$managers", [] ] },
                        "$reportees",
                        "$$managers"
                    ]
                }
            }
        }
    }
},
// second level: exactly identical to the previous stage
// third level: exactly identical to the previous stage
// basically, from here onwards you would need to repeat an exact copy of the previous stage to go one level deeper
]);

答案 1 :(得分:3)

我相信拥有 level 字段,我们可以使用$reduce从数组构建层次结构。为此,我们需要按reportees之后的级别降序排列$graphLookup。不幸的是,目前唯一的方法是使用$unwind + $sort + $group,这会使聚合过程变得很长。

然后,我们可以使用$reduce处理该有序数组。在每个步骤中,我们只需要向结果集中添加一个雇员,包括上一级的reportees。另外,我们需要检测在处理过程中level何时发生变化,并在这种情况下重新排列辅助数组。

在这种情况下,

$addFields仅替换现有的reportees字段。 $concatArrays允许我们将当前员工($$this)附加到结果中。使用$filter,我们可以从较低级别获得reportees

db.getCollection('employees').aggregate([
    {
        $match: {
            empId : "10"
        }
    },
    {
        $graphLookup: {
            from: "employees",
            startWith: "$empId",
            connectFromField: "empId",
            connectToField: "managerId",
            as: "reportees",
            maxDepth: 4,
            depthField: "level"
        }
    },
    {
        $project: {
            "empId":1,
            "managerId":1,
            "reportees.empId":1,
            "reportees.name":1,
            "reportees.managerId":1,
            "reportees.level":1
        }
    },
    {
        $unwind: "$reportees"
    },
    {
        $sort: { "reportees.level": -1 }
    },
    {
        $group: {
            _id: "$_id",
            empId: { $first: "$empId" },
            managerId: { $first: "$managerId" },
            reportees: { $push: "$reportees" }
        }
    },
    {
        $addFields: {
            reportees: {
                $reduce: {
                    input: "$reportees",
                    initialValue: {
                        currentLevel: -1,
                        currentLevelEmployees: [],
                        previousLevelEmployees: []
                    },
                    in: {
                        $let: {
                            vars: {
                                prev: { 
                                    $cond: [ 
                                        { $eq: [ "$$value.currentLevel", "$$this.level" ] }, 
                                        "$$value.previousLevelEmployees", 
                                        "$$value.currentLevelEmployees" 
                                    ] 
                                },
                                current: { 
                                    $cond: [ 
                                        { $eq: [ "$$value.currentLevel", "$$this.level" ] }, 
                                        "$$value.currentLevelEmployees", 
                                        [] 
                                    ] 
                                }
                            },
                            in: {
                                currentLevel: "$$this.level",
                                previousLevelEmployees: "$$prev",
                                currentLevelEmployees: {
                                    $concatArrays: [
                                        "$$current", 
                                        [
                                            { $mergeObjects: [ 
                                                "$$this", 
                                                { reportees: { $filter: { input: "$$prev", as: "e", cond: { $eq: [ "$$e.managerId", "$$this.empId"  ] } } } } 
                                            ] }
                                        ]
                                    ]
                                }
                            }
                        }
                    }
                }
            }
        }
    },
    {
        $addFields: { reportees: "$reportees.currentLevelEmployees" }
    }
]).pretty()

上述解决方案应适用于多个级别。输出:

{
    "_id" : "10",
    "empId" : "10",
    "managerId" : "15",
    "reportees" : [
        {
            "empId" : "6",
            "name" : "Employee6",
            "managerId" : "10",
            "level" : NumberLong(0),
            "reportees" : [
                {
                        "empId" : "1",
                        "name" : "Employee1",
                        "managerId" : "6",
                        "level" : NumberLong(1),
                        "reportees" : [ ]
                },
                {
                        "empId" : "2",
                        "name" : "Employee2",
                        "managerId" : "6",
                        "level" : NumberLong(1),
                        "reportees" : [ ]
                }
            ]
        },
        {
            "empId" : "8",
            "name" : "Employee8",
            "managerId" : "10",
            "level" : NumberLong(0),
            "reportees" : [
                {
                    "empId" : "5",
                    "name" : "Employee5",
                    "managerId" : "8",
                    "level" : NumberLong(1),
                    "reportees" : [ ]
                },
                {
                    "empId" : "4",
                    "name" : "Employee4",
                    "managerId" : "8",
                    "level" : NumberLong(1),
                    "reportees" : [ ]
                }
            ]
        }
    ]
}

答案 2 :(得分:0)

$ graphLookup上的官方文档或多或少会提供帮助。

https://docs.mongodb.com/manual/reference/operator/aggregation/graphLookup/

只是一种提醒。