Question

我的MongoDB文档看起来像这样

{  
   "_id":"sdf23sddfsd",
   "the_list":[  
      {  
         "Sentiment":[  
            "Negative",
            "Positive",
            "Positive"
         ]
      },
      {  
         "Sentiment":[  
            "Neutral",
            "Positive"
         ]
      }
   ],
   "some_other_list":[  
      {  
         "Sentiment":[  
            "Positive",
            "Positive",
            "Positive"
         ]
      }
   ]
}

我需要知道＆＃34;肯定＆＃34;的总发生次数。为＆＃34; the_list＆＃34;字段，在上面的情况下是三个，即有三次出现＆＃34;肯定＆＃34;。

显然，我从

获得一份文件

db.tmp.find({ "the_list.Sentiment": { "$in": ["Positive"] } }).count()

还得到一个：

db.tmp.aggregate(
    { $match: { "the_list.Sentiment": { "$in": ["Positive"] } } }
    , { $group: { _id: "_id", count: { $sum: 1 } } }
)

最后，我正在试验下面，仍然是零

db.tmp.aggregate(
  {
        $group: {
            _id: "_id", count: {
                $sum: {
                    $cond: [{"$in": ["Positive","$the_list.Sentiment"]} , 1, 0]
                }
            }
            }
        }
    )

Answer 1

您需要过滤子文档数组以返回那些等于＆＃34; Positive＆＃34;并返回结果数组的总和或等于＆＃34的元素数;正＆＃34;对于＆＃34; the_list＆＃34;中的每个子文档数组使用$map和$filter。从那里，您只需要使用$reduce运算符

组合总和

[
  {
    "$project": {
      "count": {
        "$reduce": {
          "input": {
            "$map": {
              "input": "$the_list",
              "in": {
                "$filter": {
                  "input": "$$this.Sentiment",
                  "cond": {
                    "$eq": [
                      "$$this",
                      "Positive"
                    ]
                  }
                }
              }
            }
          },
          "initialValue": 0,
          "in": {
            "$add": [
              "$$value",
              {
                "$size": "$$this"
              }
            ]
          }
        }
      }
    }
  }
]

在MongoDB 3.4之前，您需要一种不同的方法，如此管道中所示

[
  {
    "$project": {
      "arrayOfSum": {
        "$sum": {
          "$map": {
            "input": "$the_list",
            "in": {
              "$size": {
                "$filter": {
                  "input": "$$this.Sentiment",
                  "cond": {
                    "$eq": [
                      "$$this",
                      "Positive"
                    ]
                  }
                }
              }
            }
          }
        }
      }
    }
  }
]

Answer 2

就个人而言，我会更灵活一点，你可以看到＆＃34;所有＆＃34;可能的值并同时计算它们。这基本上给出了两点调整：

在我希望

"the_list

"some_other_list"

根据我的需要调整值，以任意组合过滤"Positive"，"Negative"和"Neutral"

请记住聚合管道，例如＆＃34; all＆＃34; MongoDB查询DSL基本上只是一个数据结构＆＃34;，所以如果有＆＃34;清洁点＆＃34;在哪里我可以替换＆＃34;列表＆＃34; []具有可变输入以将输出改变为不同的需求，然后我这样做。

所以显示其他可选调整已注释掉：

db.getCollection('tmp').aggregate([
  { "$project": {
    "Sentiment": {
      "$arrayToObject": {
        "$reduce": {
          "input": {
            "$filter": {
              "input": {
                "$reduce": {
                  "input": {
                    "$concatArrays": [
                      "$the_list.Sentiment",
                      //"$some_other_list.Sentiment"
                    ]
                  },
                  "initialValue": [],
                  "in": {
                    "$concatArrays": [ "$$value", "$$this" ]      
                  }
                }
              },
              "as": "l",
              "cond": { "$in": [ "$$l", ["Positive"/*,"Negative","Neutral"*/] ] }
            }
          },
          "initialValue": [],
          "in": {
            "$cond": {
              "if": { "$in": [ "$$this", "$$value.k" ] },
              "then": {
                "$let": {
                   "vars": {
                     "index": { "$indexOfArray": [ "$$value.k", "$$this" ] }
                   },
                   "in": {
                     "$concatArrays": [
                       [{ 
                         "k": "$$this",
                         "v": { "$add": [
                           { "$arrayElemAt": [ "$$value.v", "$$index" ] },
                           1
                         ]}
                       }],
                       { "$filter": {
                         "input": "$$value",
                         "as": "v",
                         "cond": { "$ne": [ "$$v.k", "$$this" ] }
                       }}
                     ]
                   }  
                } 
              },
              "else": {
                "$concatArrays": [ 
                  "$$value",
                  [{ "k": "$$this", "v": 1 }]
                ]
              }
            }    
          }
        }
      }
    }
  }}
])

每个文档会生成：

{
    "_id" : "sdf23sddfsd",
    "Sentiment" : {
        "Positive" : 3.0
    }
}

或者在两个列表中查找所有值（从而删除注释）：

{
    "_id" : "sdf23sddfsd",
    "Sentiment" : {
        "Positive" : 6.0,
        "Negative" : 1.0,
        "Neutral" : 1.0
    }
}

如果你想在整个集合中使用它，那么我删除$arrayToObject并简单地$unwind缩小的数组，以便为所有可能的值传递给$group：

db.getCollection('tmp').aggregate([
  { "$project": {
    "Sentiment": {
      "$reduce": {
        "input": {
          "$filter": {
            "input": {
              "$reduce": {
                "input": {
                  "$concatArrays": [
                    "$the_list.Sentiment",
                    "$some_other_list.Sentiment"
                  ]
                },
                "initialValue": [],
                "in": {
                  "$concatArrays": [ "$$value", "$$this" ]      
                }
              }
            },
            "as": "l",
            "cond": { "$in": [ "$$l", ["Positive","Negative","Neutral"] ] }
          }
        },
        "initialValue": [],
        "in": {
          "$cond": {
            "if": { "$in": [ "$$this", "$$value.k" ] },
            "then": {
              "$let": {
                 "vars": {
                   "index": { "$indexOfArray": [ "$$value.k", "$$this" ] }
                 },
                 "in": {
                   "$concatArrays": [
                     [{ 
                       "k": "$$this",
                       "v": { "$add": [
                         { "$arrayElemAt": [ "$$value.v", "$$index" ] },
                         1
                       ]}
                     }],
                     { "$filter": {
                       "input": "$$value",
                       "as": "v",
                       "cond": { "$ne": [ "$$v.k", "$$this" ] }
                     }}
                   ]
                 }  
              } 
            },
            "else": {
              "$concatArrays": [ 
                "$$value",
                [{ "k": "$$this", "v": 1 }]
              ]
            }
          }    
        }
      }
    }
  }},
  { "$unwind": "$Sentiment" },
  { "$group": {
    "_id": "$Sentiment.k",
    "count": { "$sum": "$Sentiment.v" }  
  }}
])

每个可能的值产生一个文档，并且它在所有文档中的总计数：

{ "_id" : "Neutral", "count" : 1.0 }
{ "_id" : "Negative", "count" : 1.0 }
{ "_id" : "Positive", "count" : 6.0 }

我们甚至可以通过＆＃34;灵活键获得单个文档＆＃34;如果我们想要，只需在该管道的末尾再添加两个阶段：

  { "$group": {
    "_id": null,
    "Sentiment": { "$push": { "k": "$_id", "v": "$count" } }
  }},
  { "$addFields": {
    "Sentiment": { "$arrayToObject": "$Sentiment" }  
  }}

将三个可能的文档合并为一个文件几乎是过分的，但它至少反映了单个文档的输出：

{
    "_id" : null,
    "Sentiment" : {
        "Neutral" : 1.0,
        "Negative" : 1.0,
        "Positive" : 6.0
    }
}

或者如果您更喜欢＆＃34;命名键＆＃34;在＆＃34;单身＆＃34;文档，您可以在$group阶段列出所需的所有内容：

db.getCollection('tmp').aggregate([
  { "$project": {
    "Sentiment": {
      "$arrayToObject": {
        "$reduce": {
          "input": {
            "$filter": {
              "input": {
                "$reduce": {
                  "input": {
                    "$concatArrays": [
                      "$the_list.Sentiment",
                      "$some_other_list.Sentiment"
                    ]
                  },
                  "initialValue": [],
                  "in": {
                    "$concatArrays": [ "$$value", "$$this" ]      
                  }
                }
              },
              "as": "l",
              "cond": { "$in": [ "$$l", ["Positive","Negative","Neutral"] ] }
            }
          },
          "initialValue": [],
          "in": {
            "$cond": {
              "if": { "$in": [ "$$this", "$$value.k" ] },
              "then": {
                "$let": {
                   "vars": {
                     "index": { "$indexOfArray": [ "$$value.k", "$$this" ] }
                   },
                   "in": {
                     "$concatArrays": [
                       [{ 
                         "k": "$$this",
                         "v": { "$add": [
                           { "$arrayElemAt": [ "$$value.v", "$$index" ] },
                           1
                         ]}
                       }],
                       { "$filter": {
                         "input": "$$value",
                         "as": "v",
                         "cond": { "$ne": [ "$$v.k", "$$this" ] }
                       }}
                     ]
                   }  
                } 
              },
              "else": {
                "$concatArrays": [ 
                  "$$value",
                  [{ "k": "$$this", "v": 1 }]
                ]
              }
            }    
          }
        }
      }
    }
  }},
  { "$group": {
    "_id": null,
    "Positive": { "$sum": { "$ifNull": [ "$Sentiment.Positive", 0 ] } },
    "Negative": { "$sum": { "$ifNull": [ "$Sentiment.Negative", 0 ] } },
    "Neutral": { "$sum": { "$ifNull": [ "$Sentiment.Neutral", 0 ] } }    
  }}
])

输出：

{
    "_id" : null,
    "Positive" : 6.0,
    "Negative" : 1.0,
    "Neutral" : 1.0
}

这样你就可以直接进入$group，甚至在一个阶段，如果你真的想在那里推动怪物条件。

此外，如果没有必要＆＃34;聚合＆＃34;根本没有，它可以更简单地重塑光标返回的文档。你应该使用＆＃34;聚合＆＃34;对于实际的＆＃34;聚合＆＃34;，情况可能就是这样，但是对于您实际上并不需要的情况，显示客户端代码有多清洁是很好的：

db.getCollection('tmp').find({}).map( doc => 
  Object.assign({
    "_id": doc._id,
     "Sentiment": 
       [].concat.apply([],
        ["the_list","some_other_list"].map( l => 
          doc[l].reduce((acc,curr) => acc.concat(curr.Sentiment),[])
        )
      )
      .filter( k => ["Positive","Negative","Neutral"].indexOf(k) != -1)
      .reduce((acc,curr) => 
       (acc.hasOwnProperty(curr))
         ? Object.assign(acc, { [curr]: acc[curr] += 1 })
         : Object.assign(acc,{ [curr]: 1 })
       ,{})
  })
)

我们再次使用相同的＆＃34;灵活的＆＃34;形式，使您可以轻松地调整＆＃34;一个＆＃34;任意数量的可能组合的可能值。当然还有相同的输出结果：

{
    "_id" : "sdf23sddfsd",
    "Sentiment" : {
        "Negative" : 1.0,
        "Positive" : 6.0,
        "Neutral" : 1.0
    }
}

所以你想要做的事情都是主观的。对我来说，我宁愿一次性获得所有计数，或者至少可以选择通过简单调整任一条件来获得不同的计数，而不是运行单独的查询，然后再寻找一种结合它们的方法。

这是我对设计的想法，我在那里寻找最适合最灵活的案例的解决方案，并且我可以根据输入参数进行调整。

Answer 3

您可以使用$map对每个文档中匹配的$filter情绪求和，然后$group对文档进行计数。

像

这样的东西

db.tmp.aggregate([
  {
    "$project": {
      "count": {
        "$sum": {
          "$map": {
            "input": "$the_list",
            "as": "result",
            "in": {
              "$size": {
                "$filter": {
                  "input": "$$result.Sentiment",
                  "as": "senti",
                  "cond": {
                    "$eq": [
                      "$$senti",
                      "Positive"
                    ]
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  {
    "$group": {
      "_id": null,
      "count": {
        "$sum": "$count"
      }
    }
  }
])

使用$reduce

的Mongo 3.4版本

db.tmp.aggregate([
  {
    "$project": {
      "count": {
        "$reduce": {
          "input": "$the_list",
          "initialValue": 0,
          "in": {
            "$add": [
              "$$value",
              {
                "$size": {
                  "$filter": {
                    "input": "$$this.Sentiment",
                    "as": "senti",
                    "cond": {
                      "$eq": [
                        "$$senti",
                        "Positive"
                      ]
                    }
                  }
                }
              }
            ]
          }
        }
      }
    }
  },
  {
    "$group": {
      "_id": null,
      "count": {
        "$sum": "$count"
      }
    }
  }
])

从文档中的内部数组获取值的出现总数

3 个答案: