我正在尝试使用多个线程下载多个文件。该程序使用BFS算法来获取给定特定URL的所有文件:http://www.police.am/Hanraqve/问题是,当释放多个线程时,可以多次下载同一文件。我正在考虑一种同步下载过程的方法,这样只有在Mutexes或Semaphores的帮助下才能下载每个文件一次。任何想法或实际代码将非常感激。这是我的初始代码
public static async Task Download()
{
nodes.Enqueue(root);
while (nodes.Count() != 0)
{
String currentNode = "";
if (nodes.TryDequeue(out currentNode))
{
if (!visitedNodes.Contains(currentNode))
{
visitedNodes.Add(currentNode);
if (isFolder(currentNode))
{
List<String> urls = GetUrlsFromHtml(currentNode);
foreach (String url in urls)
{
nodes.Enqueue(url);
}
}
else
{
string fileName = currentNode.Remove(0, currentNode.LastIndexOf('/') + 1);
using (WebClient webClient = new WebClient())
{
await webClient.DownloadFileTaskAsync(new Uri(currentNode), destinationFolderPath + @"\" + fileName);
files.Enqueue(destinationFolderPath + @"\" + fileName);
}
}
}
}
}
//cts.Cancel();
}
public static List<String> GetUrlsFromHtml(string url)
{
HtmlWeb hw = new HtmlWeb();
HtmlDocument doc = hw.Load(url);
List<String> urls = new List<String>();
foreach (HtmlNode htmlNode in doc.DocumentNode.SelectNodes("//a[@href]"))
{
string hrefValue = htmlNode.Attributes["href"].Value;
if (hrefValue[0] >= '1' && hrefValue[0] <= '9')
{
urls.Add(url + hrefValue);
}
}
return urls;
}
public static bool isFolder(string url)
{
return url.EndsWith("/");
}
}
}