我使用Mongo v2.2.0。
我编写了查询,但主要问题是$ arrayElemAt。使用$ unwind- $ first的标准替换对我不起作用,我认为存在更好的解决方案。我有限制将此聚合管道作为单个操作运行,而不是运行正和否定数据的查询,然后在代码中合并结果。我需要对结果查询应用$ sort,$ limit和$ skip来限制用于从其他集合中过滤记录的words
计数,并在Java代码中组合来自两个集合的数据。
汇总查询:
[
{
$match: {
"merchantId": ObjectId("59520e6ccc7a701fbed31f94"),
"date": {
"$gte": NumberLong(1389644800000),
"$lt": NumberLong(1502409599999)
},
"isbn": "a123",
}
},
{
$project: {
"word": 1,
"sentence": 1,
"type": 1,
"date": 1
}
},
{
$sort: {
"date": -1
}
},
{
$group: {
"_id": {
"word": "$word",
"type": "$type"
},
"date": {
$max: "$date"
},
"sentence": {
$first: "$sentence"
},
"sentenceCount": {
"$sum": 1
}
},
},
{
$group: {
"_id": "$_id.word",
"word": { $first: "$_id.word"},
"positiveCount": {$sum: {$cond: [{$eq: ["$_id.type", "positive"]}, "$sentenceCount", 0]}},
"count": {$sum: "$sentenceCount"},
"positiveSentence": {
"$push": {
"$cond": [{$eq: ["$_id.type", "positive"]}, "$sentence", "$noval"]
}
},
"negativeSentence": {
"$push": {
"$cond": [{$eq: ["$_id.type", "negative"]}, "$sentence", "$noval"]
}
}
}
},
{
$project: {
"_id": 0,
"word": 1,
"sentimentPercentage": {$cond: [{$eq: ["$count", 0]}, 0, {$multiply: [{$divide: ["$positiveCount", "$count"]}, 100]}]},
"positiveSentence": {$arrayElemAt: ["$positiveSentence", 0]},
"negativeSentence": {$arrayElemAt: ["$negativeSentence", 0]},
}
},
{
$sort: {
sentimentPercentage: -1
}
},
{
$limit: 50
}
]
收集文件“架构”:
{
"_id" : ObjectId("59887424e4b099e00724aa44"),
"merchantId" : ObjectId("59520e6ccc7a701fbed31f94"),
"isbn" : "a123",
"sentence" : "Great, friendly service.",
"word" : "service",
"type" : "positive",
"date" : NumberLong(1466809200000),
}
预期产出:
{
"word" : "expectations",
"sentimentPercentage" : 100.0,
"positiveSentence" : "The service exceeded our expectations."
},
{
"word" : "representative",
"sentimentPercentage" : 87.5,
"positiveSentence" : "Excellent local representative, met the flight and gave us all the relevant information to ensure a great holiday.",
"negativeSentence" : "The representative at resort was poor."
},
{
"word" : "seats",
"sentimentPercentage" : 0.0,
"negativeSentence" : "Long delay and pre booked seats were lost ."
}
拜托,您能否告诉我如何使用Mongo< = 2.2.0?
的功能替换$ arrayElemAt运算符,甚至更好地如何优化此查询到所需的输出?答案 0 :(得分:1)
这似乎给了我合理的结果。但是,如果因为$unwind阶段不支持v2.2中的preserveNullAndEmptyArrays
参数而没有肯定或没有否定句子,我认为它将无法正常工作......
db.getCollection('test').aggregate([
{
$project: {
"word": 1,
"sentence": 1,
"type": 1,
"date": 1
}
},
{
$sort: {
"date": -1
}
},
{
$group: {
"_id": {
"word": "$word",
"type": "$type"
},
"date": {
$max: "$date"
},
"sentence": {
$first: "$sentence"
},
"sentenceCount": {
"$sum": 1
}
},
},
{
$group: {
"_id": "$_id.word",
"word": { $first: "$_id.word"},
"positiveCount": {$sum: {$cond: [{$eq: ["$_id.type", "positive"]}, "$sentenceCount", 0]}},
"count": {$sum: "$sentenceCount"},
"positiveSentence": {
"$push": {
"$cond": [{$eq: ["$_id.type", "positive"]}, "$sentence", "$noval"]
}
},
"negativeSentence": {
"$push": {
"$cond": [{$eq: ["$_id.type", "negative"]}, "$sentence", "$noval"]
}
}
}
},
{ $unwind: "$positiveSentence" },
{ $group:
{
"_id": "$_id",
"word": { $first: "$word" },
"count": { $first: "$count" },
"positiveCount": { $first: "$positiveCount" },
"positiveSentence": { $first: "$positiveSentence" },
"negativeSentence": { $first: "$negativeSentence" },
}
},
{ $unwind: "$negativeSentence" },
{ $group:
{
"_id": "$_id",
"word": { $first: "$word" },
"count": { $first: "$count" },
"positiveCount": { $first: "$positiveCount" },
"positiveSentence": { $first: "$positiveSentence" },
"negativeSentence": { $first: "$negativeSentence" },
}
},
{
$project: {
"_id": 0,
"word": 1,
"sentimentPercentage": {$cond: [{$eq: ["$count", 0]}, 0, {$multiply: [{$divide: ["$positiveCount", "$count"]}, 100]}]},
"positiveSentence": 1,
"negativeSentence": 1
}
}
])
您可以进一步简化此操作,例如摆脱第一个投影和分组阶段。如果你愿意,我可以在几个小时内调查一下。