从Cosmos DB集合中批量删除项目(文档)

时间:2020-05-06 15:08:35

标签: azure azure-cosmosdb azure-cosmosdb-sqlapi

我在cosmos db集合中大约有70,000个文档。我正在使用以下存储过程来批量删除项目(文档)。

但是当我执行此存储过程时,仅删除了500个文档。如何一次性删除所有项目(文档)?

这是我的删除查询:

SELECT COUNT(1) FROM c WHERE c.UserUUId=null

存储过程:

function bulkDeleteSproc(query) {
    var collection = getContext().getCollection();
    var collectionLink = collection.getSelfLink();
    var response = getContext().getResponse();
    var responseBody = {
        deleted: 0,
        continuation: true
    };

    // Validate input.
    if (!query) throw new Error("The query is undefined or null.");

    tryQueryAndDelete();

    // Recursively runs the query w/ support for continuation tokens.
    // Calls tryDelete(documents) as soon as the query returns documents.
    function tryQueryAndDelete(continuation) {
        var requestOptions = {continuation: continuation};

        var isAccepted = collection.queryDocuments(collectionLink, query, requestOptions, function (err, retrievedDocs, responseOptions) {
            if (err) throw err;

            if (retrievedDocs.length > 0) {
                // Begin deleting documents as soon as documents are returned form the query results.
                // tryDelete() resumes querying after deleting; no need to page through continuation tokens.
                //  - this is to prioritize writes over reads given timeout constraints.
                tryDelete(retrievedDocs);
            } else if (responseOptions.continuation) {
                // Else if the query came back empty, but with a continuation token; repeat the query w/ the token.
                tryQueryAndDelete(responseOptions.continuation);
            } else {
                // Else if there are no more documents and no continuation token - we are finished deleting documents.
                responseBody.continuation = false;
                response.setBody(responseBody);
            }
        });

        // If we hit execution bounds - return continuation: true.
        if (!isAccepted) {
            response.setBody(responseBody);
        }
    }

    // Recursively deletes documents passed in as an array argument.
    // Attempts to query for more on empty array.
    function tryDelete(documents) {
        if (documents.length > 0) {
            // Delete the first document in the array.
            var isAccepted = collection.deleteDocument(documents[0]._self, {}, function (err, responseOptions) {
                if (err) throw err;

                responseBody.deleted++;
                documents.shift();
                // Delete the next document in the array.
                tryDelete(documents);
            });

            // If we hit execution bounds - return continuation: true.
            if (!isAccepted) {
                response.setBody(responseBody);
            }
        } else {
            // If the document array is empty, query for more documents.
            tryQueryAndDelete();
        }
    }
}

这是执行存储过程后的输出:

enter image description here

2 个答案:

答案 0 :(得分:2)

存储过程的作用域是在单个逻辑分区中执行。因此,您必须为存储过程的每次执行传递分区键值。为此,您将需要运行一个跨分区查询以在分区键路径上获得唯一性。然后遍历该结果集,并使用每个分区键值调用存储过程。

我应该注意,如果您尝试删除集合中的每个值,则删除集合本身然后重新创建它的开销要便宜得多。

希望有帮助。

答案 1 :(得分:1)

正如Mark所指出的,删除集合本身而不是单个项目可能会更便宜。

如果由于某些原因要删除项目而不是整个集合,则可以考虑使用TTL设置。请参阅https://docs.microsoft.com/en-us/azure/cosmos-db/time-to-live

可以在容器级别和项目级别设置TTL。另外,由于项目的TTL到期使用了剩余的RU,因此它们的价格更低廉

希望这会有所帮助