我在mongodb中有一个数组:我想从给定数组中最大出现devDependenciesList
值
[{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}]
输出应为:
[value1:3,value4:3,value3:2]
答案 0 :(得分:3)
基本上,您需要先$unwind
数组内容,然后在每个值上$group
作为分组键,并用$sum
进行计数:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}}
])
哪个会返回:
{ "_id" : "value23", "count" : 1 }
{ "_id" : "value93", "count" : 1 }
{ "_id" : "value7", "count" : 1 }
{ "_id" : "value2", "count" : 1 }
{ "_id" : "value3", "count" : 2 }
{ "_id" : "value1", "count" : 3 }
{ "_id" : "value4", "count" : 3 }
这是那里的基本数据,但是如果您真的想要“键/计数”表格,则可以执行以下操作:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } },
{ "$group": {
"_id": null,
"items": { "$push": { "k": "$_id", "v": "$count" } }
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$items" }
}}
])
哪个会返回:
{
"value1" : 3,
"value4" : 3,
"value3" : 2,
"value23" : 1,
"value93" : 1,
"value7" : 1,
"value2" : 1
}
附加的$group
和$push
用于将所有结果收集到单个文档中,该文档具有一个以"k"
和"v"
元素命名的数组。您需要$arrayToObject
运算符的这种形式,该形式将在下一个$replaceRoot
阶段用于返回最终输出。
您需要支持后一种运算符的MongoDB版本,但实际上您不支持。实际上,这在客户端代码中最有效。例如在外壳中使用JavaScript:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})
与上面的结果相同。
当然,如果要排除所有奇异结果或类似结果,只需在$match
之后添加$group
:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})
或者使用类似于以下内容的节点本机驱动程序:
let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o,{ _id, count }) => ({ ...o, [_id]: count }),{})
考虑到在实际数组返回时使用async/await
以及使用ES6功能(例如对象散布和解构)。
当然只是:
{ "value1" : 3, "value4" : 3, "value3" : 2 }
仅供参考,这是一个完全可复制的清单:
const { MongoClient } = require('mongodb');
const uri = 'mongodb://localhost:27017';
const opts = { useNewUrlParser: true };
const data = [
{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}
];
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
let client;
try {
client = await MongoClient.connect(uri, opts);
const db = client.db('test');
// Clean data
await db.collection('collection').deleteMany();
// Insert data
await db.collection('collection').insertMany(data);
let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o, { _id, count }) => ({ ...o, [_id]: count }),{});
log(result);
let sample = await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$sortByCount": "$devDependenciesList" },
],{ "explain": true }).toArray();
log(sample);
} catch(e) {
console.error(e);
} finally {
if (client)
client.close();
}
})()
显示期望结果的输出和显示“ $sortByCount
”不是“真正的”聚合阶段的“解释”输出,只是键入早于MongoDB 2.2的东西的较短方法: >
{
"value1": 3,
"value4": 3,
"value3": 2
}
[
{
"stages": [
{
"$cursor": {
"query": {},
"fields": {
"devDependenciesList": 1,
"_id": 0
},
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.collection",
"indexFilterSet": false,
"parsedQuery": {},
"winningPlan": {
"stage": "COLLSCAN",
"direction": "forward"
},
"rejectedPlans": []
}
}
},
{
"$unwind": {
"path": "$devDependenciesList"
}
},
{
"$group": {
"_id": "$devDependenciesList",
"count": {
"$sum": {
"$const": 1
}
}
}
},
{
"$sort": {
"sortKey": {
"count": -1
}
}
}
],
"ok": 1,
"operationTime": "6674186995377373190",
"$clusterTime": {
"clusterTime": "6674186995377373190",
"signature": {
"hash": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"keyId": 0
}
}
}
]
答案 1 :(得分:1)
请尝试使用$sortByCount
和$unwind
,如下所示:
db.getCollection("test").aggregate([
{
$unwind: "$devDependenciesList"
},
{
$sortByCount: "$devDependenciesList"
}
]).map((obj)=>{return {[obj._id]:obj.count}})
这是我能想到的简单而简短的解决方案。