MapReduce子文档

时间:2012-01-23 22:00:34

标签: mongodb mapreduce

我正在尝试绘制我在Mongo数据库中录制的电子邮件活动。每当我发送电子邮件时,我都会创建一条记录,然后,当电子邮件中有活动时(打开,点击,标记为垃圾邮件),我会通过添加文档来更新文档。

以下是一份示例文件:

{
  "_id" : new BinData(3, "wbbS0lRI0ESx5DyStKq9pA=="),
  "MemberId" : null,
  "NewsletterId" : 4,
  "NewsletterTypeId" : null,
  "Contents" : "[message goes here]",
  "History" : [{
      "EmailActionType" : "spam",
      "DateAdded" : new Date("Sat, 10 Dec 2011 04:17:26 GMT -08:00")
    }, {
      "EmailActionType" : "processed",
      "DateAdded" : new Date("Sun, 11 Dec 2011 04:17:26 GMT -08:00")
    }, {
      "EmailActionType" : "deffered",
      "DateAdded" : new Date("Mon, 12 Dec 2011 04:17:26 GMT -08:00")
    }],
  "DateAdded" : new Date("Mon, 01 Jan 0001 00:00:00 GMT -08:00")
}

我想要查询数据库中的特定历史日期范围。最终结果应该是一个列表,其中包含每天存在活动的项目和每种活动类型的总计:

date: "20111210", spam: 1, processed: 0, deffered: 0
date: "20111211", spam: 0, processed: 1, deffered: 0
date: "20111212", spam: 0, processed: 0, deffered: 1

以下是我目前的情况:

db.runCommand({ mapreduce: Email, 
 map : function Map() {
    var key   = this.NewsletterId;
    emit(
            key,
            { "history" : this.History }
        ); 
}
 reduce : function Reduce(key, history) {
    var from = new Date (2011, 1, 1, 0, 0, 0, 0);
    var to = new Date (2013, 05, 15, 23, 59, 59, 0);

    // \/ determine # days in the date range \/
    var ONE_DAY = 1000 * 60 * 60 * 24; // The number of milliseconds in one day
    var from_ms = from.getTime(); // Convert both date1 to milliseconds
    var to_ms = to.getTime(); // Convert both date1 to milliseconds

    var difference_ms = Math.abs(from_ms - to_ms); // Calculate the difference in milliseconds 
    var numDays = Math.round(difference_ms/ONE_DAY); // Convert back to days and return
    // /\ determine # days between the two days  /\

    var results = new Array(numDays); //array where we will store the results. We will have an entry for each day in the date range.

    //initialize array that will contain our results for each type of emailActivity
    for(var i=0; i < numDays; i++){
        results[i] = {
            numSpam: 0,
            numProcessed: 0,
            numDeffered: 0
        }
    }

    //traverse the history records and count each type of event
    for (var i = 0; i < history.length; i++){
        var to_ms2 = history[i].DateAdded.getTime(); // Convert both date1 to milliseconds

        var difference_ms2 = Math.abs(from_ms - to_ms2); // Calculate the difference in milliseconds 
        var resultsIndex = Math.round(difference_ms2/ONE_DAY); //determine which row in the results array this date corresponds to

        switch(history[i].EmailActionType)
        {
            case 'spam':
               results[resultsIndex].numSpam = ++results[resultsIndex].numSpam;
               break;
            case 'processed':
              results[resultsIndex].numProcessed =  ++results[resultsIndex].numProcessed;
               break;
            case 'deffered':
               results[resultsIndex].numDeffered = ++results[resultsIndex].numDeffered;
               break;
        }
    }
    return results; 
}
 finalize : function Finalize(key, reduced) {
    return { 
        "numSpam": reduced.numSpam,
        "numProcessed": reduced.numProcessed,
        "numDeffered": reduced.numDeffered, 
    };
}
 out : { inline : 1 }
 });

当我运行它时,我没有得到任何东西,但我也没有收到任何错误,所以不确定在哪里看。

1 个答案:

答案 0 :(得分:3)

你的问题肯定在你的Map / Reduce功能中。您的emit与预期输出之间存在脱节。

您的预期输出:

date: "20111210", spam: 1, processed: 0, deffered: 0

根据keyvalue计算地图/减少总是输出。所以你的输出看起来像这样:

_id: "20111220", value: { spam: 1, processed: 0, deferred: 0 }

这是基本前提。您的emit需要输出正确格式的数据。所以,如果你emit(key, value),那么你应该:{/ p>

var key='20111220'
var value={spam:1, processed:0, deferred:0}

在您的情况下,当您循环浏览History时,每个文档会发出几次。这很正常。

仅当同一个键有多个值时,才会运行reduce函数。所以如果你有这个:

_id: "20111220", value: { spam: 1, processed: 0, deferred: 0 }
_id: "20111220", value: { spam: 1, processed: 2, deferred: 0 }

然后reduce会把它们拉到一起并给你这个:

_id: "20111220", value: { spam: **2**, processed: **2**, deferred: 0 }

这是对答案的快速抨击:

map = function() {
  for(var i in this.History) {
    var key = get_date(this.History[i].DateAdded);
    var value = {spam: 0, processed: 0, deffered: 0};

    if(this.History[i].EmailActionType == "Spam") { value.spam++; }
    else if(....)
    ...

    emit(key, value);
  }
}

reduce = function(key, values) { 
  // values is an array of these things {spam: 0, processed: 0, deffered: 0}
  var returnValue = { spam: 1, processed: 0, deffered: 0 };
  for(var i in values) {
    returnValue.spam += values[i].spam;
    returnValue.processed += values[i].processed;
    returnValue.deffered += values[i].deffered;
  }
  return returnValue;
}

请记住emit的结构必须与最终值的结构相匹配。