$ sort和$ match用于标识要处理的最新记录的汇总

时间:2018-12-12 21:07:59

标签: mongodb

已编辑的问题:我有一个汇总脚本,该脚本尝试经历以下阶段。我遇到的问题是注释部分。

我遇到的问题是下面管道中的3号和6号。我在脚本下方包含一个示例原始文档。

对于数字3,如果我取消注释该部分,它将运行,但仅返回一个文档,并且某些字段消失。当我将其注释掉时,可以根据需要获取正确数量的文档。

对于数字6,我认为当前计数是针对订单项而不是订单项来自的文档。我想按组最多一个级别来计算文档,但不确定如何。

谢谢,马特

01在日期范围内查找某种类型的文档

02对按创建日期排序的文档进行排序以标识重复项

03使用$ first

从所有文档的最新版本中选择值

04 $ unwind line_items并仅对类别的line_items应用$ match

05为YYYYMM创建一个值以按(日期作为数字)左6个位置分组

06按组计数文档(我认为这仅提供line_items的数量,而不是文档的数量)

07预期格式的项目值带有附加标签($ literal值)

db.getCollection("9SP_Data").aggregate([

// find documents of a type and within a number range
{"$match" : {"_id.object_category" : "revenue-transaction"
        ,"_id.transaction_date": {
            $gte: 20160101000000,
            $lt: 20170101000000
            },
}},

// sort into order so that if duplicates, the new document listed at top
{$sort : { "_id.connection":1,
       "_id.company":1,
       "_id.transaction_reference":1,  
       "object_creation_date": -1 }},
/*
// THIS IS MY PROBLEM AREA - I don't have all the fields and it results in 1 document created
{$group : { _id: {"$transaction_reference",  
    "company" : {$first: "$_id.connection"}, 
    "transaction_date" : {$first: "$_id.transaction_date"}, 
    "object_category" : {$first: "$_id.object_category"}, 
    "transaction_status" : {$first: "$_id.transaction_status"}, 
    "object_origin_category" : {$first: "$_id.object_origin_category"}, 
    "object_origin_sub_category" : {$first: "$_id.object_origin_sub_category"}, 
    "object_origin" : {$first: "$_id.object_origin"}, 
    "line_items" : {$first: "$line_items"}}},
*/

    {"$unwind" :  "$line_items"},
    {"$match"  :  {"line_items.item_category":"sales-revenue"}},
    {"$group" : {
       "_id":
           {
            "company" : "$_id.connection",
            "sum_by_date":  {$trunc:{$divide: ["$_id.transaction_date", 100000000 ]}},
            //  10000000000 - by year
            //  100000000 - by month 
            //  1000000 - by date 
            //  10000 - by hour 
            //  100 - by minute 
            "category" : "$line_items.item_category",
            "origin_category" : "$_id.object_origin_category",
            "object_origin_type" : "$_id.object_origin_type",
            "object_origin" : "$_id.object_origin"
           },
        "metric_value"  : { $sum: "$line_items.item_net_total_value" },

        // count number of documents (I think this is counting line_items but I need number of distinct documents by _id.transaction_reference)
        "metric_volume":{$sum:1}}
},

// format the output to include the following values
{$project : {
"_id.company"               : "$_id.company",
"_id.metric_name"           : {$literal : "revenue"},
"_id.metric_category"       : {$literal : "sales"},
"_id.metric_type"           : {$literal : "month"},
"_id.metric_lookup"         : "$_id.sum_by_date",
"_id.object_origin_category": "$_id.origin_category",
"_id.object_origin_type"    : "$_id.object_origin_type",
"_id.object_origin"         : "$_id.object_origin",
"metric_value"              : "$metric_value",
"metric_volume"             : "$metric_volume"
}}
])

集合中的示例文档:

{
"_id" : {
    "connection" : "cb1c4a56-1544-4e9d-a433-abb33429a300",
    "transaction_date" : 20171129170558,
    "transaction_date_utc" : "2017-11-29 17:05:58",
    "object_class" : "goods-service-transaction",
    "object_category" : "revenue-transaction",
    "object_type" : "receipt",
    "object_origin_category" : "point-of-sale",
    "object_origin_type" : "offline",
    "object_origin" : "vend",
    "transaction_status" : "CLOSED",
    "related_reference" : "85"
},
"object_creation_date" : "20181210120904",
"party_identifier" : "WALKIN",
"staff_identifier" : "02dcd191-ae2b-11e6-f485-7967ed9c6343",
"staff_name" : "uat1@9spokes.com",
"line_items" : [
    {
    "item_name" : "Summer Dress / 10",
    "item_system_id" : "02dcd191-ae20-11e6-f485-7967ee5a21ee",
    "item_identifier" : "10017",
    "item_category" : "sales-revenue",
    "item_type" : "goods-service",
    "item_quantity" : 1,
    "item_net_unit_sale_value" : 102.2727,
    "item_net_unit_discount_value" : 0,
    "item_unit_tax_value" : 11.3636,
    "item_net_total_value" : 102.2727,
    "item_total_tax_value" : 11.36364,
    "item_total_gross_value" : 113.63636
},
    {
    "item_name" : "Dress Shirt / Polyester / Medium",
    "item_system_id" : "02dcd191-ae20-11e6-f485-7967eee35001",
    "item_identifier" : "10023",
    "item_category" : "sales-revenue",
    "item_type" : "goods-service",
    "item_quantity" : 1,
    "item_net_unit_sale_value" : 61.3636,
    "item_net_unit_discount_value" : 0,
    "item_unit_tax_value" : 6.8182,
    "item_net_total_value" : 61.3636,
    "item_total_tax_value" : 6.81818,
    "item_total_gross_value" : 68.18182
}
]
}

0 个答案:

没有答案