奇怪的地图减少了CouchDB中的行为。 Rereduce?

时间:2011-01-14 17:12:32

标签: couchdb mapreduce

我对couchdb有一个mapreduce问题(两个函数如下所示):当我用grouplevel = 2(精确)运行它时,我得到准确的输出:

{"rows":[
 {"key":["2011-01-11","staff-1"],"value":{"total":895.72,"count":2,"services":6,"services_ignored":6,"services_liked":0,"services_disliked":0,"services_disliked_avg":0,"Revise":{"total":275.72,"count":1},"Review":{"total":620,"count":1}}},
 {"key":["2011-01-11","staff-2"],"value":{"total":8461.689999999999,"count":2,"services":41,"services_ignored":37,"services_liked":4,"services_disliked":0,"services_disliked_avg":0,"Revise":{"total":4432.4,"count":1},"Review":{"total":4029.29,"count":1}}},
 {"key":["2011-01-11","staff-3"],"value":{"total":2100.72,"count":1,"services":10,"services_ignored":4,"services_liked":3,"services_disliked":3,"services_disliked_avg":2.3333333333333335,"Revise":{"total":2100.72,"count":1}}},

但是,更改为grouplevel = 1,因此所有不同人员密钥的值应按日期分组,不再提供准确的输出(请注意总数是当前的,但所有其他人都是错误的):

{"rows":[
  {"key":["2011-01-11"],"value":{"total":11458.130000000001,"count":2,"services":0,"services_ignored":0,"services_liked":0,"services_disliked":0,"services_disliked_avg":0,"None":{"total":11458.130000000001,"count":2}}},

我唯一的理论是这与rereduce有关,我还没有学到。我应该探索那个选项还是我在这里错过了其他的东西?

这是Map功能:

function(doc) {
if(doc.doc_type == 'Feedback') {
    emit([doc.date.split('T')[0], doc.staff_id], doc);
}
}

这就是Reduce:

function(keys, vals) {
// sum all key points by status: total, count, services (liked, rejected, ignored)
var ret = {
    'total':0,
    'count':0, 
    'services': 0,
    'services_ignored': 0,
    'services_liked': 0,
    'services_disliked': 0,
    'services_disliked_avg': 0,
};

var total_disliked_score = 0;

// handle status
function handle_status(doc) {
    if(!doc.status || doc.status == '' || doc.status == undefined) {
        status = 'None';
    } else if (doc.status == 'Declined') {
        status = 'Rejected';
    } else {
        status = doc.status;
    }
    if(!ret[status]) ret[status] = {'total':0, 'count':0};
    ret[status]['total'] += doc.total;  
    ret[status]['count'] += 1;
};

// handle likes / dislikes
function handle_services(services) {
    ret.services += services.length;
    for(var a in services) {
        if (services[a].user_likes == 10) {
            ret.services_liked += 1;
        } else if (services[a].user_likes >= 1) {
            ret.services_disliked += 1;
            total_disliked_score += services[a].user_likes;
            if (total_disliked_score >= ret.services_disliked) {
                ret.services_disliked_avg = total_disliked_score / ret.services_disliked;
            }
        } else {
            ret.services_ignored += 1;
        }
    }
}

// loop thru docs 
for(var i in vals) {
    // increment the total $
    ret.total += vals[i].total;
    ret.count += 1;

    // update totals and sums for the status of this route
    handle_status(vals[i]);

    // do the likes / dislikes stats
    if(vals[i].groups) {
        for(var ii in vals[i].groups) {
            if(vals[i].groups[ii].services) {
                handle_services(vals[i].groups[ii].services); 
            }
        }
    }

    // handle deleted services
    if(vals[i].hidden_services) {
        if (vals[i].hidden_services) {
            handle_services(vals[i].hidden_services);
        }
    }
}

return ret;
}

2 个答案:

答案 0 :(得分:8)

这是一个经典的错误。请记住,CouchDB减少发生在几个步骤中,其中一些步骤将作为输入接收其他减少步骤的结果。但是,您的代码似乎假设vals[i]将是表示单个文档的{ "groups": _ , "hidden_services": _ , _ }形式的对象。当 rereduce 发生时,此代码将失败,因为vals[i]将采用{ "count" : _ , "services" : _ , _ }形式表示先前缩减步骤的结果。

因此,例如,通过使用ret.count += 1进行计数,您可以计算中间减少结果的数量,而不是文档数量。

一种解决方案是编写两个版本的reduce代码,一个用于处理原始reduce,另一个用于处理 rereduce 步骤。您可以通过查看第三个参数来确定给定调用是初始调用还是重新调用调用(如果是initial则为false,如果是rereduce则为true)。

另一个解决方案是让map函数发出reduce函数返回的相同形式{ "count" : _ , "services" : _ , _ }的预处理值,并使reduce函数只将这些值的成员加在一起。 / p>

答案 1 :(得分:2)

作为参考,在var ret = {...}下面添加以下代码来处理rereduce工作!

function rereduce_status(row, ret, stat) 
{
    if(row[stat]) {
        if(!ret[stat]) ret[stat] = {'total':0, 'count':0};
        ret[stat]['total'] += row[stat].total;
        ret[stat]['count'] += row[stat].count;
    }   
    return ret;
}

if(rereduce) {
    for (var i in vals) {
        ret.total += vals[i].total;
        ret.count += vals[i].count;
        ret.services += vals[i].services;
        ret.services_ignored += vals[i].services_ignored;
        ret.services_liked += vals[i].services_liked;
        ret.services_disliked += vals[i].services_disliked;
        ret.services_disliked_score += vals[i].services_disliked_score;
        if (ret.services_disliked_score >= ret.services_disliked) {
            ret.services_disliked_avg = ret.services_disliked_score / ret.services_disliked;
        }
        ret = rereduce_status(vals[i], ret, 'None');
        ret = rereduce_status(vals[i], ret, 'Review');
        ret = rereduce_status(vals[i], ret, 'Revise');
        ret = rereduce_status(vals[i], ret, 'Rejected');
        ret = rereduce_status(vals[i], ret, 'Booked');
    }

    return ret;
}