MongoDB我们如何按字段值对数据进行分组

时间:2018-02-05 08:11:04

标签: mongodb mongoose mongodb-query aggregation-framework

我有以下查询,我想要的是拥有一组自定义组字段名称和字段值。

db.getCollection('mycollection').aggregate([
  {"$match":{
     "expireDate":{"$gte":"2018-02-06T00:00:00.000Z"},
     "publishDate":{"$lte":"2018-02-06T00:00:00.000Z"},
     "isPublished":true,"isDrafted":false,
     "deletedAt":{"$eq":null},"deleted":false
  }},
  {"$group":{
     "twentyFourHourAgo":{
        "$sum":{
           "$cond":[
             {"$gt":["$publishDate","2018-02-04T08:48:16.892Z"]},1,0
           ]
        }
      },
      "fortyEightHourAgo":{
        "$sum":{
            "$cond":[
               {"$gt":["$publishDate","2018-02-01T08:48:16.892Z"]},1,0
            ]
        }
      },
      "thirtyDaysAgo":{
         "$sum":{
            "$cond":[
               {"$gt":["$publishDate","2017-12-31T08:48:16.892Z"]},1,0
            ]
         }
      },
      "_id":{
        "position":{"$ifNull":["$position","Unknown"]},
        "workType":{"$ifNull":["$workType","Unknown"]},
        "functionalArea":{"$ifNull":["$functionalArea","Unknown"]},
        "minimumEducation":{"$ifNull":["$minimumEducation","Unknown"]},
        "gender":{"$ifNull":["$gender","Unknown"]},
        "contractType":{"$ifNull":["$contractType","Unknown"]},
        "locations":{"$ifNull":["$locations","Unknown"]},
        "requiredLanguages":{"$ifNull":["$requiredLanguages","Unknown"]},
        "company":{"$ifNull":["$company.name","Unknown"]}},"count":{"$sum":1}
     }
 },
 {"$group":{
     "_id":null,
     "twentyFourHourAgo":{
        "$sum":"twentyFourHourAgo"
     },
     "fortyEightHourAgo":{
        "$sum":"$fortyEightHourAgo"
     },
     "thirtyDaysAgo":{
        "$sum":"$thirtyDaysAgo"
     },
     "position":{"$addToSet":{"Name":"$_id.position","Count":"$count"}},
     "workType":{"$addToSet":{"Name":"$_id.workType","Count":"$count"}},
     "functionalArea":{
        "$addToSet":{"Name":"$_id.functionalArea","Count":"$count"}
     },
     "minimumEducation":{
        "$addToSet":{"Name":"$_id.minimumEducation","Count":"$count"}
     },
     "gender":{"$addToSet":{"Name":"$_id.gender","Count":"$count"}},"contractType":{"$addToSet":{"Name":"$_id.contractType","Count":"$count"}},"locations":{"$addToSet":{"Name":"$_id.locations","Count":"$count"}},"requiredLanguages":{"$addToSet":{"Name":"$_id.requiredLanguages","Count":"$count"}},"company":{"$addToSet":{"Name":"$_id.company","Count":"$count"}}}}]  
)

我在集合架构中的文档就像:

/* 1 */
{
    "_id" : ObjectId("59e4540bf14f1607b90ffb81"),
    "vacancyNumber" : "1",
    "position" : "Software Tester",
    "publishDate" : ISODate("2018-01-02T00:00:00.000Z"),
    "expireDate" : ISODate("2018-05-29T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "Education",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "companyFiles",
            "name" : "abbbff58cd3fda2c59ab2ee620ea5aa0",
            "mime" : ".png",
            "size" : 5806
        }
    },
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Germany", 
        "Itly", 
        "Iran"
    ],
    "canApplyOnline" : true,
    "skills" : [ 
        "Skill1", 
        "Skill2", 
        "Skill3", 
        "Skill4"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Key1", 
        "Key2"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "requiresTravel" : true,
    "gender" : "male",
    "nationalities" : [ 
        "afghan"
    ],
    "workType" : "Full Time",
    "contractType" : "Permanent",
}

/* 2 */
{
    "_id" : ObjectId("59f9402e05d04ebe5653d98f"),
    "vacancyNumber" : "1",
    "position" : "Software Engineer",
    "publishDate" : ISODate("2018-01-03T00:00:00.000Z"),
    "expireDate" : ISODate("2018-11-10T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "Education",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "logo container",
            "name" : "logo name",
            "mime" : "logo mime type",
            "size" : 1
        }
    },    
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Afghanistan", 
        "Itly", 
        "Iran"
    ],
    "skills" : [ 
        "Skill1", 
        "Another Skill"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Keyword", 
        "Key1"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "gender" : "male",
    "nationalities" : [ 
        "afghan", 
        "iranian"
    ],
    "workType" : "Full Time",
    "contractType" : "Short-Term",
}

/* 3 */
{
    "_id" : ObjectId("5a03235234f7504f13970abd"),
    "vacancyNumber" : "1",
    "position" : "Software Tester",
    "publishDate" : ISODate("2017-10-10T00:00:00.000Z"),
    "expireDate" : ISODate("2018-11-25T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "IT Software",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "My First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "logo container",
            "name" : "logo name",
            "mime" : "logo mime type",
            "size" : 1
        }
    },
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Germany", 
        "Itly", 
        "Iran"
    ],
    "skills" : [ 
        "Skill1", 
        "Test Skill"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Test Key", 
        "Keyword"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "gender" : "female",
    "nationalities" : [ 
        "afghan"
    ],
    "workType" : "Part Time",
    "contractType" : "Permanent",
}

现在我想通过自定义表达式检查'twentyFourHourAgo,fortyEightHourAgo,thirtyDaysAgo'以及字段值functionalArea, position, locations, keywords, workType)来计算数据组。

我当前的查询结果是

{
    "_id" : null,
    "twentyFourHourAgo" : 0,
    "fortyEightHourAgo" : 0.0,
    "thirtyDaysAgo" : 2.0,
    "position" : [ 
        {
            "Name" : "Software Engineer",
            "Count" : 1.0
        }, 
        {
            "Name" : "Software Tester",
            "Count" : 1.0
        }
    ],
    "workType" : [ 
        {
            "Name" : "Full Time",
            "Count" : 1.0
        }, 
        {
            "Name" : "Part Time",
            "Count" : 1.0
        }
    ],
    "functionalArea" : [ 
        {
            "Name" : "Education",
            "Count" : 1.0
        }, 
        {
            "Name" : "IT Software",
            "Count" : 1.0
        }
    ],
    "minimumEducation" : [ 
        {
            "Name" : "Doctorate",
            "Count" : 1.0
        }
    ],
    "gender" : [ 
        {
            "Name" : "male",
            "Count" : 1.0
        }, 
        {
            "Name" : "female",
            "Count" : 1.0
        }
    ],
    "contractType" : [ 
        {
            "Name" : "Short-Term",
            "Count" : 1.0
        }, 
        {
            "Name" : "Permanent",
            "Count" : 1.0
        }
    ],
    "locations" : [ 
        {
            "Name" : [ 
                "Afghanistan", 
                "Itly", 
                "Iran"
            ],
            "Count" : 1.0
        }, 
        {
            "Name" : [ 
                "Germany", 
                "Itly", 
                "Iran"
            ],
            "Count" : 1.0
        }
    ],
    "requiredLanguages" : [ 
        {
            "Name" : [ 
                "Arabic", 
                "English", 
                "Russian", 
                "Dari", 
                "French"
            ],
            "Count" : 1.0
        }
    ],
    "company" : [ 
        {
            "Name" : "First Company",
            "Count" : 1.0
        }, 
        {
            "Name" : "My First Company",
            "Count" : 1.0
        }
    ]
}

如您所见,我有三个具有以下属性的文档:

  1. 两个文档具有相同的位置Software Tester,但查询返回1 Software Tester(这意味着如果我有多个文档在特定列中有一些共同的值,它们的计数结果是错误的)。其他字段的contractType,workType等存在同样的问题......'。
  2. locations等数组类型字段中,我的第一个文档在Germany, Italy, Iran数组中有locations个值,我的第二个文档有Afghanistan, Italy, Iran,我的第三个文档文件有Germany, Italy, Iran。但查询结果如下:

    "locations" : [ 
      {
        "Name" : [ 
          "Afghanistan", 
          "Itly", 
          "Iran"
        ],
        "Count" : 1.0
      }, 
      {
        "Name" : [ 
          "Germany", 
          "Itly", 
          "Iran"
        ],
        "Count" : 1.0
      }
     ],
    
  3. 这应该是:Germany => 2, Italy,Iran => 3, and Afghanistan => 1 其他数组类型字段也存在同样的问题。

1 个答案:

答案 0 :(得分:1)

这可以告诉你你的要求:

db.getCollection('foo').aggregate([
 {"$match":{
        "expireDate":{"$gte": ISODate("2018-01-02T00:00:00.000Z")},
        "publishDate":{"$lte": ISODate("2018-05-29T00:00:00.000Z")},
        "isPublished":true,"isDrafted":false,
        "deletedAt":{"$eq":null},
        "deleted":false
    }},
    {"$group":{
        "twentyFourHourAgo":{
            "$sum":{
                "$cond":[
                    {"$gte":["$publishDate", ISODate("2018-01-02T00:00:00.000Z")]},1,0
                ]
            }
        },
        "fortyEightHourAgo":{
            "$sum":{
                "$cond":[
                    {"$gte":["$publishDate", ISODate("2018-01-02T00:00:00.000Z")]},1,0
                ]
            }
        },
        "thirtyDaysAgo":{
            "$sum":{
                "$cond":[
                    {"$gte":["$publishDate", ISODate("2018-01-02T00:00:00.000Z")]},1,0
                ]
            }
        },
        "_id":{
            "$ifNull":["$functionalArea","Unknown"]
        }, 
        /* Changes start from here */
        "count" : { "$sum" : 1 } } },
        { "$group" : { 
            "_id" : "null", "fortyEightHourAgo" : { "$sum" : "$fortyEightHourAgo"}, 
            "thirtyDaysAgo" : { "$sum" : "$thirtyDaysAgo"},  
            "twentyFourHourAgo" : { "$sum" : "$twentyFourHourAgo"},  
            "functionalArea" : { "$addToSet" : { "Name": "$_id", "Count" : "$count" } } }}
])

输出:

{
    "_id" : null,
    "fortyEightHourAgo" : 3.0,
    "thirtyDaysAgo" : 3.0,
    "twentyFourHourAgo" : 3.0,
    "functionalArea" : [ 
        {
            "Name" : "Education",
            "Count" : 1.0
        }, 
        {
            "Name" : "IT Software",
            "Count" : 2.0
        }
    ]
}