汇总两个集合,然后分组/项目

时间:2019-10-07 13:26:52

标签: mongodb aggregation-framework aggregate

我继承了一些旧代码,必须为这些代码生成一个文档,总结给定的评级和下载次数。这些细节分布在两个集合中,我需要为每个文档组合一个结果。但是我应该能够吸引所有用户或某些用户。

我目前的查询非常混乱且未经优化,也无法过滤允许的用户本身。

const cardQuery = [];
      const toolQuery = [];
      const users = [];
      if (req.query.org && (req.user.organization === req.query.org ||
          req.user.roles.includes('SUPER'))) {
        const results = await User.find({organization: req.query.org}).select('_id').lean();
        results.forEach((element) => {
          users.push(element._id);
        });
        cardQuery.push({
          $match: {
            'owner': {$in: users},
          },
        });
      }
toolQuery.push( {
        $project: {
          _id: '$_id',
          title: '$title',
          ratings: '$ratings',
          theme: '$theme',
        },
      });
      toolQuery.push({
        $unwind: {
          'path': '$ratings',
          'preserveNullAndEmptyArrays': true,
        },
      });
      toolQuery.push({
        $project: {
          _id: '$_id',
          title: '$title',
          userId: '$ratings.user',
          rate: {$ifNull: ['$ratings.score', 0]},
          theme: '$theme',
        },
      });
      toolQuery.push({
        $group: {
          _id: '$_id',
          count: {
            $sum: 1,
          },
          avg: {
            $avg: '$rate',
          },
          ratings: {
            $push: {
              rate: '$rate',
              userId: '$userId',
            },
          },
          theme: {
            $first: '$theme',
          },
          title: {
            $first: '$title',
          },
        },
      });
      toolQuery.push({
        $project: {
          _id: '$_id',
          title: '$title',
          ratings: '$ratings',
          theme: '$theme',
          avg: {
            $ifNull: [
              '$avg',
              0,
            ],
          },
          count: {
            $cond: {
              if: {
                $lte: [
                  '$avg',
                  0,
                ],
              },
              then: 0,
              else: '$count',
            },
          },
        },
      });
      toolQuery.push({
        $sort: {
          avg: -1,
          count: -1,
        },
      });
      const toolData = await Tool.aggregate(toolQuery);
      cardQuery.push({$unwind: '$progress'});
      cardQuery.push({
        '$group': {
          '_id': {
            'tool': '$progress.tool',
            'toolcon': {$cond: ['$progress.file_one', 1, 0]},
            'worksheetcon': {$cond: ['$progress.file_two', 1, 0]},
          },
        },
      });
      cardQuery.push({
        '$group': {
          '_id': '$_id.tool',
          'tooldl': {$sum: '$_id.toolcon'},
          'worksheetdl': {$sum: '$_id.worksheetcon'},
        },
      });
      // TODO Why??? We already have the tooldata!
      cardQuery.push({
        $lookup: {
          from: 'tools',
          localField: '_id',
          foreignField: '_id',
          as: 'toolData',
        },
      });
      cardQuery.push({$unwind: '$toolData'});
      cardQuery.push({
        '$project': {
          '_id': '$_id',
          'title': '$toolData.title',
          'tool_downloads': {$sum: '$tooldl'},
          'worksheet_downloads': {$sum: '$worksheetdl'},
        },
      });
      const cardData = await Memorycard.aggregate(cardQuery);
      const formattedData = [];
      for (const tool of toolData) {
        const found = cardData.find((x) => (x._id.toString() === tool._id.toString()));
        if (found) {
          formattedData.push({
            '_id': tool._id,
            'title': tool.title,
            'theme': tool.theme,
            'avg': tool.avg,
            'count': tool.count,
            'ratings': tool.ratings,
            'total_dl': (found.tool_downloads + found.worksheet_downloads),
            'tool_dl': found.tool_downloads,
            'ws_dl': found.worksheet_downloads,
          });
        }
      }

您可以说这相当笨拙,聚合并不是我最喜欢的事情。数据应类似于Tool db中每个Tool的formattedData对象。

用户保存档文档

{
    "_id" : ObjectId("5d1c032c2330cc00179ea41e"),
    "owner" : ObjectId("5d1c032c2330cc00179ea41d"),
    "progress" : [ 
        {
            "file_one" : true,
            "file_two" : true,
            "_id" : ObjectId("5d1c03e92330cc00179ea44e"),
            "tool" : ObjectId("5c7d4c9971338741c09c6c73"),
            "createdAt" : ISODate("2019-07-03T01:24:57.677Z"),
            "updatedAt" : ISODate("2019-07-03T01:25:40.165Z")
        }
    ],
}

工具文档

{
    "_id" : ObjectId("5c7d4c9971338741c09c6c65"),
    "title" : "Ecosystem Analysis",
    "rating" : 5,
    "ratings" : [ 
        {
            "_id" : ObjectId("5d23b56deac6ce0017bc8fce"),
            "user" : ObjectId("5d163f1f3bc0ec001701b21c"),
            "score" : 5,
            "feedback" : "Woop woop",
            "username" : "max"
        }, 
        {
            "_id" : ObjectId("5d329d0569b61d0017d801e1"),
            "user" : ObjectId("5d247411eac6ce0017bc91c1"),
            "score" : 5,
            "feedback" : "",
            "username" : "demoaccount"
        }
    ],
    "theme" : "strategy",
    "totalratings" : 2
}

[编辑] 我在以下方面取得了不错的结果,但现在我需要过滤掉外部数组中未包含的所有者ID。

 // Get all relevant memoryCards from allowed users
      toolQuery.push({
        $lookup: {
          'from': 'memorycards',
          'let': {toolId: '$_id'},

          'pipeline': [
            {'$match': {'$expr': {'$in': ['$$toolId', '$progress.tool']}}},
            {'$unwind': '$progress'},
            {'$match': {'$expr': {'$eq': ['$$toolId', '$progress.tool']}}},
            {
              $project: {
                '_id': 0,
                'byUser': '$owner',
                'tool': '$progress.file_one',
                'worksheet': '$progress.file_two',
              },
            },
          ],
          'as': 'downloads',
        },
      });

[编辑2]最终结果以及评论中的一些指导

toolQuery.push({
        $project: {
          _id: '$_id',
          title: '$title',
          theme: '$theme',
          ratings: '$ratings',
        },
      });
      toolQuery.push({
        $lookup: {
          'from': 'memorycards',
          'let': {toolId: '$_id'},
          'pipeline': [
            {'$match': {'$expr': {'$in': ['$$toolId', '$progress.tool']}}},
            {'$unwind': '$progress'},
            {'$match': {'$expr': {'$eq': ['$$toolId', '$progress.tool']}}},
            {
              $project: {
                '_id': 0,
                'byUser': '$owner',
                'tool': '$progress.file_one',
                'worksheet': '$progress.file_two',
              },
            },
          ],
          'as': 'downloads',
        },
      });
      if (users.length>1) {
      // PRUNE RATINGS
        toolQuery.push({
          $project: {
            'title': 1,
            'theme': 1,
            'avg': {$cond: [{$eq: [{$size: '$ratings'}, 0]}, 0,
              {
                $divide: [{$sum: '$ratings.score'}, {$sum: {$size: '$ratings'}}],
              }],
            },
            'downloads': {
              $filter: {
                input: '$downloads', as: 'download',
                cond: {
                  $gt: [{
                    $size: {$setIntersection: [['$$download.byUser'], users]},
                  }, 0]},
              },
            },
            'ratings': {
              $filter: {
                input: '$ratings', as: 'download',
                cond: {
                  $gt: [{
                    $size: {$setIntersection: [['$$download.user'], users]},
                  }, 0]},
              },
            },
          },
        });
      } else {
        toolQuery.push({
          $project: {
            'title': 1,
            'theme': 1,
            'avg': {$cond: [{$eq: [{$size: '$ratings'}, 0]}, 0,
              {
                $divide: [{$sum: '$ratings.score'}, {$sum: {$size: '$ratings'}}],
              }],
            },
            'downloads': 1,
          },
        });
      }
      toolQuery.push({
        $unwind: '$downloads',
      });
      toolQuery.push({
        $group: {
          _id: '$_id',
          avg: {$first: '$avg'},
          title: {$first: '$title'},
          theme: {$first: '$theme'},
          total_dl: {$sum: {
            $add: [
              {$sum: {$cond: ['$downloads.tool', 1, 0]}},
              {$sum: {$cond: ['$downloads.worksheet', 1, 0]}},
            ],
          }},
        },
      });
      toolQuery.push({
        $sort: {
          avg: -1,
          total_dl: -1,
        },
      });

0 个答案:

没有答案