使用带有分区键和排序键的bash删除DynamoDB表中的所有项目

时间:2018-08-02 18:36:39

标签: amazon-web-services amazon-dynamodb aws-cli

我正在尝试使用bash中的AWS CLI删除同时具有分区键和排序键的DynamoDB表中的所有项目。 The best thing I've found so far是:

aws dynamodb scan --table-name $TABLE_NAME --attributes-to-get "$KEY" \
--query "Items[].$KEY.S" --output text | \
tr "\t" "\n" | \
xargs -t -I keyvalue aws dynamodb delete-item --table-name $TABLE_NAME \
--key "{\"$KEY\": {\"S\": \"keyvalue\"}}"

但是,这不适用于同时具有分区键和排序键的表,而且我还无法使其与这样的表一起使用。知道如何修改脚本以使其适用于具有组合键的表吗?

6 个答案:

答案 0 :(得分:5)

根据表的大小,这可能会过于昂贵并导致停机。请记住,删除操作的成本与写入操作的成本相同,因此,置备的WCU将使您受到限制。删除并重新创建表将更加简单和快捷

# this uses jq but basically we're just removing 
# some of the json fields that describe an existing 
# ddb table and are not actually part of the table schema/defintion
aws dynamodb describe-table --table-name $table_name | jq '.Table | del(.TableId, .TableArn, .ItemCount, .TableSizeBytes, .CreationDateTime, .TableStatus, .ProvisionedThroughput.NumberOfDecreasesToday)' > schema.json
# delete the table
aws dynamodb delete-table --table-name $table_name
# create table with same schema (including name and provisioned capacity)
aws dynamodb create-table --cli-input-json file://schema.json

如果您确实要删除每个项目,并且您在正确的轨道上,则只需在扫描投影和删除命令中同时指定哈希键和范围键即可。

aws dynamodb scan \
  --attributes-to-get $HASH_KEY $RANGE_KEY \
  --table-name $TABLE_NAME --query "Items[*]" \
  # use jq to get each item on its own line
  | jq --compact-output '.[]' \
  # replace newlines with null terminated so 
  # we can tell xargs to ignore special characters 
  | tr '\n' '\0' \
  | xargs -0 -t -I keyItem \
    # use the whole item as the key to delete (dynamo keys *are* dynamo items)
    aws dynamodb delete-item --table-name $TABLE_NAME --key=keyItem

如果您想花哨的话,可以使用describe-table调用来获取哈希和范围键以填充$HASH_KEY$RANGE_KEY,但我将其保留为练习你。

答案 1 :(得分:1)

更正@Cheruvian发布的内容。以下命令有效,在创建schema.json时,我们需要排除的其他字段很少。

aws dynamodb describe-table --table-name $table_name | jq '.Table | del(.TableId, .TableArn, .ItemCount, .TableSizeBytes, .CreationDateTime, .TableStatus, .LatestStreamArn, .LatestStreamLabel, .ProvisionedThroughput.NumberOfDecreasesToday, .ProvisionedThroughput.LastIncreaseDateTime)' > schema.json

aws dynamodb delete-table --table-name $table_name

aws dynamodb create-table --cli-input-json file://schema.json

答案 2 :(得分:1)

如果您对使用Node.js感兴趣,请看一下此示例(我在这里使用TypeScript)。可以在AWS docs中找到更多相关信息。

import AWS from 'aws-sdk';
const DynamoDb = new AWS.DynamoDB.DocumentClient({
region: 'eu-west-1'

});
export const getAllItemsFromTable = async TableName => {
   const Res = await DynamoDb.scan({ TableName }).promise();
   return Res.Items;
};

export const deleteAllItemsFromTable = async (TableName = '', items:{ id: string }, hashKey) => {
  var counter = 0;
  //split items into patches of 25
  // 25 items is max for batchWrite
  asyncForEach(split(items, 25), async (patch, i) => {
    const RequestItems = {
      TableName: patch.map(item => {
        return {
          DeleteRequest: {
            Key: {
              id: item.id
            }
          }
        };
      })
    };
    await DynamoDb.batchWrite({ RequestItems }).promise();
    counter += patch.length;
    console.log('counter : ', counter);
  });
};

function split(arr, n) {
  var res = [];
  while (arr.length) {
    res.push(arr.splice(0, n));
  }
  return res;
}

async function asyncForEach(array, callback) {
  for (let index = 0; index < array.length; index++) {
    await callback(array[index], index, array);
  }
}

const tableName = "table"
// assuming table hashKey is named "id"
deleteAllItemsFromTable(tableName,getAllItemsFromTable(tableName))

答案 3 :(得分:1)

我已经创建了一个节点模块来执行此操作:

https://www.npmjs.com/package/dynamodb-empty

yarn global add dynamodb-empty
dynamodb-empty --table tableName

答案 4 :(得分:0)

我们有一些带有索引的表,因此必须删除一些字段,另外还有“ .ProvisionedThroughput.LastDecreaseDateTime”。 因为我对jq完全陌生,所以需要做些小事来摆弄;-) 但这就是它对我们起作用的方式:

    aws dynamodb describe-table --table-name $table_name | jq '.Table | del(.TableId, .TableArn, .ItemCount, .TableSizeBytes, .CreationDateTime, .TableStatus, .LatestStreamArn, .LatestStreamLabel, .ProvisionedThroughput.NumberOfDecreasesToday, .ProvisionedThroughput.LastIncreaseDateTime, .ProvisionedThroughput.LastDecreaseDateTime, .GlobalSecondaryIndexes[].IndexSizeBytes, .GlobalSecondaryIndexes[].ProvisionedThroughput.NumberOfDecreasesToday, .GlobalSecondaryIndexes[].IndexStatus, .GlobalSecondaryIndexes[].IndexArn, .GlobalSecondaryIndexes[].ItemCount)' > schema.json

答案 5 :(得分:0)

从这里的@Adel和@codeperson答案,我使用Amplify CLI(带有Hello World模板)创建了一个函数,其中表名必须使用事件对象传递:

/* Amplify Params - DO NOT EDIT
    API_DEALSPOON_GRAPHQLAPIENDPOINTOUTPUT
    API_DEALSPOON_GRAPHQLAPIIDOUTPUT
Amplify Params - DO NOT EDIT */

const AWS = require('aws-sdk')
const environment = process.env.ENV
const region = process.env.REGION
const apiDealspoonGraphQLAPIIdOutput = process.env.API_DEALSPOON_GRAPHQLAPIIDOUTPUT

exports.handler = async (event) => {

    const DynamoDb = new AWS.DynamoDB.DocumentClient({region});

    // const tableName = "dev-invite";
    // const hashKey = "InviteToken";
    let {tableName, hashKey} = event
    
    tableName = `${tableName}-${apiDealspoonGraphQLAPIIdOutput}'-'${environment}`
    
    // Customization 4: add logic to determine which (return true if you want to delete the respective item)
    // If you don't want to filter anything out, then just return true in this function (or remove the filter step below, where this filter is used)
    const shouldDeleteItem = (item) => {
        return item.Type === "SECURE_MESSAGE" || item.Type === "PATIENT";
    };

    const getAllItemsFromTable = async (lastEvaluatedKey) => {
        const res = await DynamoDb.scan({
            TableName: tableName,
            ExclusiveStartKey: lastEvaluatedKey
        }).promise();
        return {items: res.Items, lastEvaluatedKey: res.LastEvaluatedKey};
    };

    const deleteAllItemsFromTable = async (items) => {
        let numItemsDeleted = 0;
        // Split items into patches of 25
        // 25 items is max for batchWrite
        await asyncForEach(split(items, 25), async (patch, i) => {
            const requestItems = {
                [tableName]: patch.filter(shouldDeleteItem).map(item => {
                    numItemsDeleted++;
                    return {
                        DeleteRequest: {
                            Key: {
                                [hashKey]: item[hashKey]
                            }
                        }
                    };
                })
            };
            if (requestItems[tableName].length > 0) {
                await DynamoDb.batchWrite({RequestItems: requestItems}).promise();
                console.log(`finished deleting ${numItemsDeleted} items this batch`);
            }
        });

        return {numItemsDeleted};
    };

    function split(arr, n) {
        const res = [];
        while (arr.length) {
            res.push(arr.splice(0, n));
        }
        return res;
    }

    async function asyncForEach(array, callback) {
        for (let index = 0; index < array.length; index++) {
            await callback(array[index], index, array);
        }
    }

    let lastEvaluatedKey;
    let totalItemsFetched = 0;
    let totalItemsDeleted = 0;

    console.log(`------ Deleting from table ${tableName}`);

    do {
        const {items, lastEvaluatedKey: lek} = await getAllItemsFromTable(lastEvaluatedKey);
        totalItemsFetched += items.length;
        console.log(`--- a group of ${items.length} was fetched`);

        const {numItemsDeleted} = await deleteAllItemsFromTable(items);
        totalItemsDeleted += numItemsDeleted;
        console.log(`--- ${numItemsDeleted} items deleted`);

        lastEvaluatedKey = lek;
    } while (!!lastEvaluatedKey);

    console.log("Done!");
    console.log(`${totalItemsFetched} items total fetched`);
    console.log(`${totalItemsDeleted} items total deleted`);
};