如何将Couchbase中的数据与map reduce合并?

时间:2017-07-07 04:10:15

标签: javascript mapreduce couchbase

我的应用程序将多个文档类型存储在同一个存储桶中。我知道这不是一个好习惯,但我对我可以在我的服务器上创建多少桶有限制,目前还没有解决方法。文档的前缀是它们的类型,所以当我得到一个文档时,我只需要连接前缀和id来获取密钥,我就可以进行密钥查找。

我需要创建一个报告,从多个文档类型中提取信息。

我的地图如下所示:

function(doc, meta) {

  var getStep = function(stepName, exit, mapper) {
    if (meta.id.indexOf(stepName) !== -1) {
      var hotelId = parseInt(meta.id.replace(stepName + '_', ''));
      if (hotelId > 0) {
        var result = {
          hotelId: hotelId,
          exit: exit
        };
        if (mapper !== undefined) {
          mapper(result);
        }
        return result;
      }
    }
    return null;
  };

  var photos = getStep('PHOTOS', 7);
  if (photos != null) {
    emit(photos.hotelId, photos);
  }
  var pricing = getStep('PICR', 5);
  if (pricing != null) {
    emit(pricing.hotelId, pricing);
  }
  var owner = getStep('OWNER', 1);
  if (owner != null) {
    emit(owner.hotelId, owner);
  }
  var amenity = getStep('AM', 4);
  if (amenity != null) {
    emit(amenity.hotelId, amenity);
  }
  var description = getStep('HDESC', 3, function(result) {
    result.description = doc.description;
    result.hotelRoomTypeId = doc.hotelRoomTypeId;
    result.starRating = doc.starRating;
  });
  if (description != null) {
    emit(description.hotelId, description);
  }
  var contact = getStep('DC', 3, function(result) {
    result.email = doc.emailAddress;
    result.contact = doc.mainContactName;
  });
  if (contact != null) {
    emit(contact.hotelId, contact);
  }
  var location = getStep('LOC', 2, function(result) {
    result.city = doc.cityName;
    result.zip = doc.postalCode;
    result.country = doc.countryName;
    result.street = doc.stateName + ', ' + doc.streetName;
  });
  if (location != null) {
    emit(location.hotelId, location);
  }
  var property = getStep('PRP', 1, function(result) {
    result.paymentMethodId = doc.paymentMethodId
  });
  if (property != null) {
    emit(property.hotelId, property);
  }
}

它会生成此输出:

"total_rows":...,"rows":[
{"id":"DC_1","key":1,"value":{"hotelId":1,"exit":3,"email":"test@example.com","contact":"Jeno"}},
{"id":"HDESC_1","key":1,"value":{"hotelId":1,"exit":3,"description":".","hotelRoomTypeId":0,"starRating":5}},
{"id":"LOC_1","key":1,"value":{"hotelId":1,"exit":2,"city":"Barcelona","zip":"1222","country":"Spain","street":"Catalonia, someplacenice"}},
{"id":"PRP_1","key":1,"value":{"hotelId":1,"exit":1}},
{"id":"PRP_2","key":2,"value":{"hotelId":2,"exit":1}},
{"id":"AM_3","key":3,"value":{"hotelId":3,"exit":4}},
{"id":"AM_4","key":4,"value":{"hotelId":4,"exit":4}},
{"id":"PHOTOS_4","key":4,"value":{"hotelId":4,"exit":7}},
{"id":"PRP_4","key":4,"value":{"hotelId":4"exit":1}},
{"id":"AM_4","key":4,"value":{"hotelId":4,"exit":4}},
{"id":"PRP_4","key":4,"value":{"hotelId":4,"exit":1}},
{"id":"PHOTOS_5","key":5,"value":{"hotelId":5,"exit":7}}
...

]

我正在尝试通过hotelId对日期进行分组,这是新密钥,并使用自定义减速器将字段合并到一个文档中。我根据错误类型得到不同的错误,但所有错误似乎表明减速器可以返回的日期有限制。如果我将返回类型从对象更改为关联数组,其工作方式与我获得更好的错误非常相似。

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {
    var results = {}; // Object!
    for (var i = 0; i < values.length; i++) {
      var row = values[i];
      if (!results[row.hotelId]) {
        results[row.hotelId] = {
          phone: '',
          exit: 1
        };
      }
      var result = results[row.hotelId];
      for (var name in row) {
        result[name] = row[name];
      }
      if (row.exit > row.exit) {
        result.exit = row.exit;
      }
    };

    return results;
  }
}

给我RangeError: Maximum call stack size exceeded

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {
    var results = []; // Array!
    for (var i = 0; i < values.length; i++) {
      var row = values[i];
      if (!results[row.hotelId]) {
        results[row.hotelId] = {
          phone: '',
          exit: 1
        };
      }
      var result = results[row.hotelId];
      for (var name in row) {
        result[name] = row[name];
      }
      if (row.exit > row.exit) {
        result.exit = row.exit;
      }
    };

    return results;
  }
}

给我reduction too large error

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {    
    return values;
  }
}

给我RangeError: Maximum call stack size exceeded

如果我跑:

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {        
    return values.length;
  }
}

我回来了:

[ 68, 72, 65, 66, 68, 68, 70, 114 ]

JavaScript引擎应该能够减少最大114的数组,输出数据应该更小。显然,reduce可以返回多少数据max_kv_size_per_doc是1Mb,也有10秒的执行限制,但在我的情况下,它是另一回事。有没有办法通过更改算法,返回数组或数组等来绕过这些限制?我可以在地图中做些什么或者我可以在rereduce中使用一些技巧吗?

1 个答案:

答案 0 :(得分:3)

我明白了。如果我使用复合键和group_level,它可以工作。

因此,如果我更改地图以返回数组作为酒店ID的关键字,并设置group_level = 1,那么我将按照我最初的预期对这些值进行分组:

function(doc, meta) {

  var getStep = function(stepName, exit, mapper) {
    if (meta.id.indexOf(stepName) !== -1) {
      var hotelId = parseInt(meta.id.replace(stepName + '_', ''));
      if (hotelId > 0) {
        var result = {
          hotelId: hotelId,
          exit: exit
        };
        if (mapper !== undefined) {
          mapper(result);
        }
        return result;
      }
    }
    return null;
  };

  var photos = getStep('PHOTOS', 7);
  if (photos != null) {
    emit([photos.hotelId], photos); // array as key
  }
  var pricing = getStep('PICR', 5); // array as key
  if (pricing != null) {
    emit([pricing.hotelId], pricing);
  }
  var owner = getStep('OWNER', 1); // array as key
  if (owner != null) {
    emit([owner.hotelId], owner);
  }
  var amenity = getStep('AM', 4); // array as key
  if (amenity != null) {
    emit([amenity.hotelId], amenity);
  }
  var description = getStep('HDESC', 3, function(result) {
    result.description = doc.description;
    result.hotelRoomTypeId = doc.hotelRoomTypeId;
    result.starRating = doc.starRating;
  });
  if (description != null) {
    emit([description.hotelId], description); // array as key
  }
  var contact = getStep('DC', 3, function(result) {
    result.email = doc.emailAddress;
    result.contact = doc.mainContactName;
  });
  if (contact != null) {
    emit([contact.hotelId], contact); // array as key
  }
  var location = getStep('LOC', 2, function(result) {
    result.city = doc.cityName;
    result.zip = doc.postalCode;
    result.country = doc.countryName;
    result.street = doc.stateName + ', ' + doc.streetName;
  });
  if (location != null) {
    emit([location.hotelId], location); // array as key
  }
  var property = getStep('PRP', 1, function(result) {
    result.paymentMethodId = doc.paymentMethodId
  });
  if (property != null) {
    emit([property.hotelId], property); // array as key
  } 
}

然后我需要设置group_level=1reduce=true。您可以在视图编辑器或查询字符串中执行此操作。

最后一位是减少:

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {           
    var result = {};
    values.forEach(function(item){
        for(var name in item){
            result[name] = item[name];
        }
    });

    return result;
  }
}

结果将由hotelId按预期合并:)