NodeJS中BatchWrite到DynamoDB时的指数退避

时间:2017-03-20 18:24:21

标签: node.js amazon-web-services amazon-dynamodb aws-lambda

我有从S3读取CSV文件的工作代码,在BatchWriteItem DynamoDB请求中每25行分组,并发送它。 BatchWrite通常会返回UnprocessedItems成功,其中包含部分项目(不是全部25项)。随后的重新提交也可能失败(部分或完全)。我想在发送后续请求时实现指数退避,但我发现假设任务相同的所有库。在我的情况下,这些项目可能与先前请求中的项目相同或不同。

我对Node.js不太熟悉。是否有任何库/方法来实现具有(不同)上下文的重新尝试的任务?

我正在使用AWS Lambda,因此无法使用全局变量。

帮助函数写入DDB并重试1次:

// batchwrite to DDB
function batchWriteDDB(params) {
  dynamodb.batchWriteItem(params, function(err, data) {
    if (err) {
      console.error("Batchwrite failed: " + err, err.stack);
    } else {
      var unprocessed = data.UnprocessedItems;
      if (Object.keys(unprocessed).length === 0) {
        console.log("Processed all items.");
      } else {
        // some unprocessed items, do it again
        console.warn("Batchwrite did not to complete: " + util.inspect(unprocessed, { showHidden: false, depth: null }));
        console.log("Retry btachwriting...");
        var params2 = {};
        params2["RequestItems"] = data.UnprocessedItems;
        dynamodb.batchWriteItem(params2, function(error, data2) {
          if (err) {
            console.error("Retry failed: " + err, err.stack);
          } else {
            var unprocessed2 = data2.UnprocessedItems;
            if (Object.keys(unprocessed2).length === 0) {
              console.log("Retry processed all items.");
            } else {
              console.error("Failed AGAIN to complete: " + util.inspect(unprocessed2, { showHidden: false, depth: null }));
            }
          }
        });
      }
    }
  });
}

2 个答案:

答案 0 :(得分:7)

AWS SDK支持指数退避和重试机制。你可以配置它。

将所有服务的基本重试延迟设置为300毫秒

AWS.config.update({retryDelayOptions: {base: 300}});
// Delays with maxRetries = 3: 300, 600, 1200

设置自定义退避功能以在重试时提供延迟值

AWS.config.update({retryDelayOptions: {customBackoff: function(retryCount) {
  // returns delay in ms
}}});

专门针对AWS DynamoDB服务进行配置: -

var dynamodb = (new AWS.DynamoDB({maxRetries: 5}))

专门针对AWS DynamoDB服务进行配置: -

  • maxRetries = 5
  • 延迟= 300毫秒

<强>配置: -

var dynamodb = new AWS.DynamoDB({maxRetries: 5, retryDelayOptions: {base: 300} });

MaxRetry properties

答案 1 :(得分:0)

Bellow是处理写入Dynamodb的未处理项目的递归方法。

var batchWrite = function (items, table, callback) {
    var params = { RequestItems: {} };
    logger.info('batchWrite initial length of items: ' + items.length);
    table = table || 'Merchants';
    params['RequestItems'][table] = [];

    var attempt = 0;
    var batchCount = 0;
    while (items.length > 0) {

        // Pull off up to 25 items from the list
        for (var i = params['RequestItems'][table].length; i < 25; i++) {

            // Nothing else to add to the batch if the input list is empty
            if (items.length === 0) {
                break;
            }

            // Take a URL from the list and add a new PutRequest to the list of requests
            // targeted at the Image table
            item = items.pop();
            //logger.info('batchWrite length of items after pop: '+items.length);
            params['RequestItems'][table].push(item);
        }
        // Kick off this batch of requests
        logger.info("Calling BatchWriteItem with a new batch of "
            + params['RequestItems'][table].length + " items");
        logger.info("batchCount = " + batchCount + " set to execute in " + (10 * batchCount) + " seconds");
        logger.info("form of params sent to batchWrite: ");
        let dynamo = new AWS.DynamoDB({ apiVersion: '2012-08-10' });
        dynamo.batchWriteItem(params, doBatchWriteItem);

        // Initialize a new blank params variable
        params['RequestItems'][table] = [];
        batchCount++;
    }

    //A callback that repeatedly calls BatchWriteItem until all of the writes have completed
    function doBatchWriteItem(err, data) {
        batchCount--;
        if (err) {
            logger.info(err); // an error occurred
            if (batchCount === 0) {
                callback(err, data);
            }
        } else {
            console.dir(data);
            if (('UnprocessedItems' in data) && (table in data.UnprocessedItems)) {
                // More data. Call again with the unprocessed items.
                var params = {
                    RequestItems: data.UnprocessedItems
                };
                attempt++;
                batchCount++;
                logger.info("Calling BatchWriteItem again to retry "
                    + params['RequestItems'][table].length + "UnprocessedItems in " + (10 * attempt) + " seconds");
                logger.info("batchCount increased to " + batchCount);
                setTimeout(function () {
                    let dynamo = new AWS.DynamoDB({ apiVersion: '2012-08-10' });

                    dynamo.batchWriteItem(params, doBatchWriteItem);
                }, 10000 * attempt);
            } else {
                logger.info("BatchWriteItem processed all items in the batch, batchCount = " + batchCount);
                if (batchCount === 0) {
                    logger.info("batchWrite processed all batches");
                    callback(null, data);
                }
            }
        }
    }
}

使用集合和参数调用batchWrite函数。

batchWrite(collection, 'your-table-name', (err, data) => {
   if (err) {
        logger.info('error ');
   }
   logger.info('success ');
});