如何重新调整mongoDB数组中值的最大出现次数

时间:2019-03-30 11:57:02

标签: node.js mongodb aggregation-framework

我在mongodb中有一个数组:我想从给定数组中最大出现devDependenciesList

[{
    "_id" : 0,
    "repoId" : 460078,
    "devDependenciesList" : [ 
        "value1", 
        "value2", 
        "value3", 
        "value4"
    ]
},{
    "_id" : 1,
    "repoId" : 1232,
    "devDependenciesList" : [ 
        "value1", 
        "value4", 
        "value7", 
        "value93"
    ]
},{
    "_id" : 2,
    "repoId" : 5423,
    "devDependenciesList" : [ 
        "value1", 
        "value23", 
        "value3", 
        "value4"
    ]
}]

输出应为:

[value1:3,value4:3,value3:2]

2 个答案:

答案 0 :(得分:3)

基本上,您需要先$unwind数组内容,然后在每个值上$group作为分组键,并用$sum进行计数:

db.collection.aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }}
])

哪个会返回:

{ "_id" : "value23", "count" : 1 }
{ "_id" : "value93", "count" : 1 }
{ "_id" : "value7", "count" : 1 }
{ "_id" : "value2", "count" : 1 }
{ "_id" : "value3", "count" : 2 }
{ "_id" : "value1", "count" : 3 }
{ "_id" : "value4", "count" : 3 }

这是那里的基本数据,但是如果您真的想要“键/计数”表格,则可以执行以下操作:

db.collection.aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }},
  { "$sort": { "count": -1 } },
  { "$group": {
    "_id": null,
    "items": { "$push": { "k": "$_id", "v": "$count" } }
  }},
  { "$replaceRoot": {
    "newRoot": { "$arrayToObject": "$items" }
  }}
])

哪个会返回:

{
        "value1" : 3,
        "value4" : 3,
        "value3" : 2,
        "value23" : 1,
        "value93" : 1,
        "value7" : 1,
        "value2" : 1
}

附加的$group$push用于将所有结果收集到单个文档中,该文档具有一个以"k""v"元素命名的数组。您需要$arrayToObject运算符的这种形式,该形式将在下一个$replaceRoot阶段用于返回最终输出。

您需要支持后一种运算符的MongoDB版本,但实际上您不支持。实际上,这在客户端代码中最有效。例如在外壳中使用JavaScript:

db.collection.aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }},
  { "$sort": { "count": -1 } }    
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})

与上面的结果相同。

当然,如果要排除所有奇异结果或类似结果,只需在$match之后添加$group

db.collection.aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }},
  { "$match": { "count": { "$gt": 1 } } },
  { "$sort": { "count": -1 } }    
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})

或者使用类似于以下内容的节点本机驱动程序:

let result = (await db.collection('collection').aggregate([
  { "$unwind": "$devDependenciesList" },
  { "$group": { 
    "_id": "$devDependenciesList",
    "count": { "$sum": 1 }
  }},
  { "$match": { "count": { "$gt": 1 } } },
  { "$sort": { "count": -1 } }    
]).toArray()).reduce((o,{ _id, count }) => ({ ...o,  [_id]: count }),{})

考虑到在实际数组返回时使用async/await以及使用ES6功能(例如对象散布和解构)。

当然只是:

{ "value1" : 3, "value4" : 3, "value3" : 2 }

仅供参考,这是一个完全可复制的清单:

const { MongoClient } = require('mongodb');

const uri = 'mongodb://localhost:27017';
const opts = { useNewUrlParser: true };

const data = [
  {
    "_id" : 0,
    "repoId" : 460078,
    "devDependenciesList" : [
      "value1",
      "value2",
      "value3",
      "value4"
    ]
  },{
    "_id" : 1,
    "repoId" : 1232,
    "devDependenciesList" : [
      "value1",
      "value4",
      "value7",
      "value93"
    ]
  },{
    "_id" : 2,
    "repoId" : 5423,
    "devDependenciesList" : [
      "value1",
      "value23",
      "value3",
      "value4"
    ]
  }
];

const log = data => console.log(JSON.stringify(data, undefined, 2));

(async function() {

  let client;

  try {
    client = await MongoClient.connect(uri, opts);

    const db = client.db('test');

    // Clean data
    await db.collection('collection').deleteMany();

    // Insert data
    await db.collection('collection').insertMany(data);

    let result = (await db.collection('collection').aggregate([
      { "$unwind": "$devDependenciesList" },
      { "$group": {
        "_id": "$devDependenciesList",
        "count": { "$sum": 1 }
      }},
      { "$match": { "count": { "$gt": 1 } } },
      { "$sort": { "count": -1 } }
    ]).toArray()).reduce((o, { _id, count }) => ({ ...o, [_id]: count }),{});

    log(result);

    let sample = await db.collection('collection').aggregate([
      { "$unwind": "$devDependenciesList" },
      { "$sortByCount": "$devDependenciesList" },
    ],{ "explain": true }).toArray();

    log(sample);

  } catch(e) {
    console.error(e);
  } finally {
    if (client)
      client.close();
  }

})()

显示期望结果的输出和显示“ $sortByCount”不是“真正的”聚合阶段的“解释”输出,只是键入早于MongoDB 2.2的东西的较短方法: >

{
  "value1": 3,
  "value4": 3,
  "value3": 2
}
[
  {
    "stages": [
      {
        "$cursor": {
          "query": {},
          "fields": {
            "devDependenciesList": 1,
            "_id": 0
          },
          "queryPlanner": {
            "plannerVersion": 1,
            "namespace": "test.collection",
            "indexFilterSet": false,
            "parsedQuery": {},
            "winningPlan": {
              "stage": "COLLSCAN",
              "direction": "forward"
            },
            "rejectedPlans": []
          }
        }
      },
      {
        "$unwind": {
          "path": "$devDependenciesList"
        }
      },
      {
        "$group": {
          "_id": "$devDependenciesList",
          "count": {
            "$sum": {
              "$const": 1
            }
          }
        }
      },
      {
        "$sort": {
          "sortKey": {
            "count": -1
          }
        }
      }
    ],
    "ok": 1,
    "operationTime": "6674186995377373190",
    "$clusterTime": {
      "clusterTime": "6674186995377373190",
      "signature": {
        "hash": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
        "keyId": 0
      }
    }
  }
]

答案 1 :(得分:1)

请尝试使用$sortByCount$unwind,如下所示:

db.getCollection("test").aggregate([
    {
        $unwind: "$devDependenciesList"
    },
    {
        $sortByCount: "$devDependenciesList"
    }
]).map((obj)=>{return {[obj._id]:obj.count}})

这是我能想到的简单而简短的解决方案。