优化MongoDB聚合管道(组,查找,匹配)

时间:2020-03-01 06:35:33

标签: python mongodb aggregation-framework pymongo

我是NoSQL数据库的新手,我选择MongoDB作为我的第一个NoSQL数据库。我创建了一个聚合管道来显示所需的数据,这是我的文档示例:

Users Collection中的文档样本

{
    "_id": 9,
    "name": "Sample Name",
    "email": "email@example.com",
    "password": "password hash"
}

Pages Collection中的文档样本(这一点并不重要)

{
    "_id": 42,
    "name": "Product Name",
    "description": "Product Description",
    "user_id": 8,
    "rating_categories": [{
        "_id": 114,
        "name": "Build Quality"
    }, {
        "_id": 115,
        "name": "Price"
    }, {
        "_id": 116,
        "name": "Feature"
    }, {
        "_id": 117,
        "name": "Comfort"
    }, {
        "_id": 118,
        "name": "Switch"
    }]
}

评论集中的文档样本

{
    "_id": 10,
    "page_id": 42, #ID reference from pages collection
    "user_id": 8, #ID reference from users collection
    "review": "The review of the product",
    "ratings": [{
        "_id": 114, #ID Reference from pages collection of what rating category it is
        "rating": 5
    }, {
        "_id": 115,
        "rating":4
    }, {
        "_id": 116,
        "rating": 5
    }, {
        "_id": 117,
        "rating": 3
    }, {
        "_id": 118,
        "rating": 4
    }],
    "created": "1582825968963", #Date Object
    "votes": {
        "downvotes": [],
        "upvotes": [9] #IDs of users who upvote this review
    }
}

我想通过page_id获得评论,可以通过我制作的API进行访问,这是聚合的预期结果:

[
  {
    "_id": 10, #Review of the ID
    "created": "Thu, 27 Feb 2020 17:52:48 GMT",
    "downvote_count": 0, #Length of votes.downvotes from reviews collection
    "page_id": 42, #Page ID
    "ratings": [ #Stores what rate at what rating category id
      {
        "_id": 114,
        "rating": 5
      },
      {
        "_id": 115,
        "rating": 4
      },
      {
        "_id": 116,
        "rating": 5
      },
      {
        "_id": 117,
        "rating": 3
      },
      {
        "_id": 118,
        "rating": 4
      }
    ],
    "review": "The Review",
    "upvote_count": 0, #Length of votes.upvotes from reviews collection
    "user": { #User who reviewed
      "_id": 8, #User ID
      "downvote_count": 0, #How many downvotes this user receive from all of the user's reviews
      "name": "Sample Name", #Username
      "review_count": 1, #How many reviews the user made
      "upvote_count": 1 #How many upvotes this user receive from all of the user's reviews
    },
    "vote_state": 0 #Determining vote state from the user (who requested to the API) for this review, 0 for no vote, -1 for downvote, 1 for upvote
  },
  ...
]

以下是我针对上述结果进行的评论收集汇总的管道:

user_id = 9
page_id = 42
pipeline = [
            {"$group": {
                    "_id": {"user_id":"$user_id", "page_id": "$page_id"},
                    "review_id": {"$last": "$_id"},
                    "page_id": {"$last": "$page_id"},
                    "user_id" : {"$last": "$user_id"},
                    "ratings": {"$last": "$ratings"},
                    "review": {"$last": "$review"},
                    "created": {"$last": "$created"},
                    "votes": {"$last": "$votes"},
                    "upvote_count": {"$sum": 
                        {"$cond": [ 
                            {"$ifNull": ["$votes.upvotes", False]}, 
                            {"$size": "$votes.upvotes"}, 
                            0
                        ]}
                    },
                    "downvote_count": {"$sum": 
                        {"$cond": [ 
                            {"$ifNull": ["$votes.downvotes", False]}, 
                            {"$size": "$votes.downvotes"}, 
                            0
                        ]}
                    }}},
            {"$lookup": {
                "from": "users",
                "localField": "user_id",
                "foreignField": "_id",
                "as": "user"
            }},
            {"$unwind": "$user"},
            {"$lookup": {
                "from": "reviews",
                "localField": "user._id",
                "foreignField": "user_id",
                "as": "user.reviews"
            }},
            {"$addFields":{
                "_id": "$review_id",
                "user.review_count": {"$size": "$user.reviews"},
                "user.upvote_count": {"$sum":{
                    "$map":{
                        "input":"$user.reviews",
                        "in":{"$cond": [ 
                            {"$ifNull": ["$$this.votes.upvotes", False]}, 
                            {"$size": "$$this.votes.upvotes"}, 
                            0
                        ]}
                    }
                }},
                "user.downvote_count": {"$sum":{
                    "$map":{
                        "input":"$user.reviews",
                        "in":{"$cond": [ 
                            {"$ifNull": ["$$this.votes.downvotes", False]}, 
                            {"$size": "$$this.votes.downvotes"}, 
                            0
                        ]}
                    }
                }},
                "vote_state": {"$switch": {
                    "branches": [
                        {"case": { "$and" : [
                            {"$ifNull": ["$votes.upvotes", False]}, 
                            {"$in": [user_id, "$votes.upvotes"]}
                        ]}, "then": 1
                        },
                        {"case": { "$and" : [
                            {"$ifNull": ["$votes.downvotes", False]}, 
                            {"$in": [user_id, "$votes.downvotes"]}
                        ]}, "then": -1
                        },
                    ],
                    "default": 0
                }},
            }},
            {"$project":{
                "user.password": 0,
                "user.email": 0,
                "user_id": 0,
                "review_id" : 0,
                "votes": 0,
                "user.reviews": 0 
            }},
            {"$sort": {"created": -1}},
            {"$match": {"page_id": page_id}},
        ]

注意:用户可以对同一page_id进行多条评论,但只会显示最新的评论

我正在使用pymongo btw,这就是为什么运算符带有引号

我的问题是:

  1. 是否有空间优化我的聚合管道?

  2. 是否执行多个小聚合执行以获取上述数据是一种好习惯,还是最好进行一次大聚合(或尽可能少地获取我想要的数据)?< / p>

  3. 如您所见,每次我想访问votes.upvotes集合上的文档中的votes.downvotesreview时,我都会检查该字段是否为空,即因为在用户进行评论时不会创建votes.upvotesvotes.downvotes字段,而是在用户对该评论进行投票时创建。用户进行审核并删除votes.upvotes时,我应该在votes.downvotes$ifNull上留空字段吗?这样会提高聚合的性能吗?

谢谢

1 个答案:

答案 0 :(得分:1)

检查此聚合是否具有更好的性能。

如果还没有创建这些索引:

db.reviews.create_index([("page_id", 1)])

注意:我们可以进一步提高性能,而无需再次进行$lookup 评论


db.reviews.aggregate([
  {
    $match: {
      page_id: page_id
    }
  },
  {
    $addFields: {
      request_user_id: user_id
    }
  },
  {
    $group: {
      _id: {
        page_id: "$page_id",
        user_id: "$user_id",            
        request_user_id: "$request_user_id"
      },
      data: {
        $push: "$$ROOT"
      }
    }
  },
  {
    $lookup: {
      "from": "users",
      "let": {
        root_user_id: "$_id.user_id"
      },
      "pipeline": [
        {
          $match: {
            $expr: {
              $eq: [
                "$$root_user_id",
                "$_id"
              ]
            }
          }
        },
        {
          $lookup: {
            "from": "reviews",
            "let": {
              root_user_id: "$$root_user_id"
            },
            "pipeline": [
              {
                $match: {
                  $expr: {
                    $eq: [
                      "$$root_user_id",
                      "$user_id"
                    ]
                  }
                }
              },
              {
                $project: {
                  user_id: 1,
                  downvote_count: {
                    $size: "$votes.downvotes"
                  },
                  upvote_count: {
                    $size: "$votes.upvotes"
                  }
                }
              },
              {
                $group: {
                  _id: null,
                  review_count: {
                    $sum: {
                      $cond: [
                        {
                          $eq: [
                            "$$root_user_id",
                            "$user_id"
                          ]
                        },
                        1,
                        0
                      ]
                    }
                  },
                  upvote_count: {
                    $sum: "$upvote_count"
                  },
                  downvote_count: {
                    $sum: "$downvote_count"
                  }
                }
              },
              {
                $unset: "_id"
              }
            ],
            "as": "stats"
          }
        },
        {
          $project: {
            tmp: {
              $mergeObjects: [
                {
                  _id: "$_id",
                  name: "$name"
                },
                {
                  $arrayElemAt: [
                    "$stats",
                    0
                  ]
                }
              ]
            }
          }
        },
        {
          $replaceWith: "$tmp"
        }
      ],
      "as": "user"
    }
  },
  {
    $addFields: {
      first: {
        $mergeObjects: [
          "$$ROOT",
          {
            $arrayElemAt: [
              "$data",
              0
            ]
          },
          {
            user: {
              $arrayElemAt: [
                "$user",
                0
              ]
            },
            created: {
              $toDate: {
                $toLong: {
                  $arrayElemAt: [
                    "$data.created",
                    0
                  ]
                }
              }
            },
            downvote_count: {
              $reduce: {
                input: "$data.votes.downvotes",
                initialValue: 0,
                in: {
                  $add: [
                    "$$value",
                    {
                      $size: "$$this"
                    }
                  ]
                }
              }
            },
            upvote_count: {
              $reduce: {
                input: "$data.votes.upvotes",
                initialValue: 0,
                in: {
                  $add: [
                    "$$value",
                    {
                      $size: "$$this"
                    }
                  ]
                }
              }
            },
            vote_state: {
              $cond: [
                {
                  $gt: [
                    {
                      $size: {
                        $filter: {
                          input: "$data.votes.upvotes",
                          cond: {
                            $in: [
                              "$_id.request_user_id",
                              "$$this"
                            ]
                          }
                        }
                      }
                    },
                    0
                  ]
                },
                1,
                {
                  $cond: [
                    {
                      $gt: [
                        {
                          $size: {
                            $filter: {
                              input: "$data.votes.downvotes",
                              cond: {
                                $in: [
                                  "$_id.request_user_id",
                                  "$$this"
                                ]
                              }
                            }
                          }
                        },
                        0
                      ]
                    },
                    -1,
                    0
                  ]
                }
              ]
            }
          }
        ]
      }
    }
  },
  {
    $unset: [
      "first.data",
      "first.votes",
      "first.user_id",
      "first.request_user_id"
    ]
  },
  {
    $replaceWith: "$first"
  },
  {
    "$sort": {
      "created": -1
    }
  }
])

MongoPlayground