我是Amazon DynamoDB的新手。我目前有20000行需要添加到表中。但是,基于我所读到的内容,我似乎只能使用带有25个WriteRequests的BatchWriteItem类一次最多写入25行。有可能增加这个吗?我怎样一次写超过25行?目前大约需要15分钟来写入所有20000行。谢谢。
答案 0 :(得分:5)
我正在寻找一些使用JavaScript SDK执行此操作的代码。我找不到它,所以我自己把它放在一起。我希望这有助于其他人!
function multiWrite(table, data, cb) {
var AWS = require('aws-sdk');
var db = new AWS.DynamoDB.DocumentClient({region: 'us-east-1'});
// Build the batches
var batches = [];
var current_batch = [];
var item_count = 0;
for(var x in data) {
// Add the item to the current batch
item_count++;
current_batch.push({
PutRequest: {
Item: data[x]
}
});
// If we've added 25 items, add the current batch to the batches array
// and reset it
if(item_count%25 == 0) {
batches.push(current_batch);
current_batch = [];
}
}
// Add the last batch if it has records and is not equal to 25
if(current_batch.length > 0 && current_batch.length != 25) batches.push(current_batch);
// Handler for the database operations
var completed_requests = 0;
var errors = false;
function handler(request) {
return function(err, data) {
// Increment the completed requests
completed_requests++;
// Set the errors flag
errors = (errors) ? true : err;
// Log the error if we got one
if(err) {
console.error(JSON.stringify(err, null, 2));
console.error("Request that caused database error:");
console.error(JSON.stringify(request, null, 2));
}
// Make the callback if we've completed all the requests
if(completed_requests == batches.length) {
cb(errors);
}
}
}
// Make the requests
var params;
for(x in batches) {
// Items go in params.RequestItems.id array
// Format for the items is {PutRequest: {Item: ITEM_OBJECT}}
params = '{"RequestItems": {"' + table + '": []}}';
params = JSON.parse(params);
params.RequestItems[table] = batches[x];
// Perform the batchWrite operation
db.batchWrite(params, handler(params));
}
}
答案 1 :(得分:4)
您只能在一个BatchWriteItem请求中发送最多25个项目,但您可以一次发送任意数量的BatchWriteItem请求。假设您已经provisioned enough write throughput,您应该能够通过在多个线程/进程/主机之间拆分这些20k行并将它们并行推送到数据库来显着加快速度。
对于那么小的数据集,它可能有点重量级,但您可以使用AWS Data Pipeline从S3中提取数据。它基本上自动化了创建Hadoop集群的过程,以便从S3中吸取数据,并在一堆并行的BatchWriteItem请求中将其发送到DynamoDB。
答案 2 :(得分:0)
function putInHistory(data,cb) {
var arrayOfArray25 = _.chunk(data, 25);
async.every(arrayOfArray25, function(arrayOf25, callback) {
var params = {
RequestItems: {
[TABLES.historyTable]: []
}
};
arrayOf25.forEach(function(item){
params.RequestItems[TABLES.historyTable].push({
PutRequest: {
Item: item
}
})
});
docClient.batchWrite(params, function(err, data) {
if (err){
console.log(err);
callback(err);
} else {
console.log(data);
callback(null, true);
};
});
}, function(err, result) {
if(err){
cb(err);
} else {
if(result){
cb(null,{allWritten:true});
} else {
cb(null,{allWritten:false});
}
}
});
}
您可以使用lodash从数组中创建数据块,然后使用async库的每个/每个方法在25个元素的块上执行batchWrite
答案 3 :(得分:0)
来自@Geerek的答案是带有lambda函数的解决方案:
exports.handler = (event, context, callback) => {
console.log(`EVENT: ${JSON.stringify(event)}`);
var AWS = require('aws-sdk');
AWS.config.update({ region: process.env.REGION })
var docClient = new AWS.DynamoDB.DocumentClient();
const {data, table, cb} = event
// Build the batches
var batches = [];
var current_batch = [];
var item_count = 0;
for (var i = 0; i < data.length; i++) {
// Add the item to the current batch
item_count++
current_batch.push({
PutRequest: {
Item: data[i],
},
})
// If we've added 25 items, add the current batch to the batches array
// and reset it
if (item_count % 25 === 0) {
batches.push(current_batch)
current_batch = []
}
}
// Add the last batch if it has records and is not equal to 25
if (current_batch.length > 0 && current_batch.length !== 25) {
batches.push(current_batch)
}
// Handler for the database operations
var completed_requests = 0
var errors = false
function handler (request) {
console.log('in the handler: ', request)
return function (err, data) {
// Increment the completed requests
completed_requests++;
// Set the errors flag
errors = (errors) ? true : err;
// Log the error if we got one
if(err) {
console.error(JSON.stringify(err, null, 2));
console.error("Request that caused database error:");
console.error(JSON.stringify(request, null, 2));
callback(err);
}else {
callback(null, data);
}
// Make the callback if we've completed all the requests
if(completed_requests === batches.length) {
cb(errors);
}
}
}
// Make the requests
var params;
for (var j = 0; j < batches.length; j++) {
// Items go in params.RequestItems.id array
// Format for the items is {PutRequest: {Item: ITEM_OBJECT}}
params = '{"RequestItems": {"' + table + '": []}}'
params = JSON.parse(params)
params.RequestItems[table] = batches[j]
console.log('before db.batchWrite: ', params)
// Perform the batchWrite operation
docClient.batchWrite(params, handler(params))
}
};
答案 4 :(得分:0)
我编写了一个 npm package,它应该可以作为 batchWrite
方法的简单替代品,您只需要将 dynamoDB 实例作为第一个参数传递,事情应该可以正常工作:
https://www.npmjs.com/package/batch-write-all
检查项目自述文件中的示例:
// Use bellow instead of this: dynamodb.batchWrite(params).promise();
batchWriteAll(dynamodb, params).promise();