我正在尝试使用异步方法将大量文件从一个 S3 复制到另一个 S3。为了实现相同的目的,将大量文件分成批次,并将每个批次移交给异步方法列表。问题是,每个异步方法在批处理中处理的文件不超过 1 个,而每个批处理包含超过 1k 个文件,不知道为什么异步不回去处理剩余的文件。
代码如下:
public void CreateAndExecuteSpawn(string srcBucket, List<List<string>> pdfFileList, IAmazonS3 s3client)
{
int i = 0;
List<Action> actions = new List<Action>();
LambdaLogger.Log("PDF Set count: " + pdfFileList.Count.ToString());
foreach (var list in pdfFileList)
actions.Add(() => RenameFilesAsync(srcBucket, list, s3client));
foreach (var method in actions)
{
method.Invoke();
LambdaLogger.Log("Mehtod invoked: "+ i++.ToString());
}
}
public async void RenameFilesAsync(string srcBucket, List<string> pdfFiles, IAmazonS3 s3client)
{
LambdaLogger.Log("In RenameFileAsync method");
CopyObjectRequest copyRequest = new CopyObjectRequest
{
SourceBucket = srcBucket,
DestinationBucket = srcBucket
};
try
{
foreach (var file in pdfFiles)
{
if (!file.Contains("index.xml"))
{
string[] newFilename = file.Split('{');
string[] destKey = file.Split('/');
copyRequest.SourceKey = file;
copyRequest.DestinationKey = destKey[0] + "/" + destKey[1] + "/Renamed/" + newFilename[1];
LambdaLogger.Log("About to rename File: " + file);
//Here after copying one file, function doesn't return to foreach loop
CopyObjectResponse response = await s3client.CopyObjectAsync(copyRequest);
//await s3client.CopyObjectAsync(copyRequest);
LambdaLogger.Log("Rename done: ");
}
}
}
catch(Exception ex)
{
LambdaLogger.Log(ex.Message);
LambdaLogger.Log(copyRequest.DestinationKey);
}
}
public void FunctionHandler(S3Event evnt, ILambdaContext context)
{
//Some code here
CreateAndExecuteSpawn(bucket, pdfFileSet, s3client);
}
答案 0 :(得分:2)
首先,您需要修复批次,以便一次处理一个批次。 Avoid async void
;改用 async Task
:
public async Task CreateAndExecuteSpawnAsync(string srcBucket, List<List<string>> pdfFileList, IAmazonS3 s3client)
{
int i = 0;
List<Func<Task>> actions = new();
LambdaLogger.Log("PDF Set count: " + pdfFileList.Count.ToString());
foreach (var list in pdfFileList)
actions.Add(() => RenameFilesAsync(srcBucket, list, s3client));
foreach (var method in actions)
{
await method();
LambdaLogger.Log("Mehtod invoked: "+ i++.ToString());
}
}
public async Task RenameFilesAsync(string srcBucket, List<string> pdfFiles, IAmazonS3 s3client)
然后您可以在每个批处理中添加异步并发。当前代码只是一个 foreach
循环,所以当然一次只处理一个。您可以通过Select
运行任务然后在最后执行Task.WhenAll
来将其更改为异步并发:
LambdaLogger.Log("In RenameFileAsync method");
CopyObjectRequest copyRequest = new CopyObjectRequest
{
SourceBucket = srcBucket,
DestinationBucket = srcBucket
};
try
{
var tasks = pdfFiles
.Where(file => !file.Contains("index.xml"))
.Select(async file =>
{
string[] newFilename = file.Split('{');
string[] destKey = file.Split('/');
copyRequest.SourceKey = file;
copyRequest.DestinationKey = destKey[0] + "/" + destKey[1] + "/Renamed/" + newFilename[1];
LambdaLogger.Log("About to rename File: " + file);
CopyObjectResponse response = await s3client.CopyObjectAsync(copyRequest);
LambdaLogger.Log("Rename done: ");
})
.ToList();
await Task.WhenAll(tasks);
}