我遇到的问题是:
我尝试下载 1000 多张图片 -> 它可以工作,但是加载完全下载的图片需要很长时间,并且程序继续并下载下一张图片等......直到我们承认 100 张图片但第 8 张图片还没下载完。
所以我想了解为什么我在这里遇到这样的问题和/或如何解决这个问题。
希望看到一个问题
private string DownloadSourceCode(string url)
{
string sourceCode = "";
try
{
using (WebClient WC = new WebClient())
{
WC.Encoding = Encoding.UTF8;
WC.Headers.Add("Accept", "image / webp, */*");
WC.Headers.Add("Accept-Language", "fr, fr - FR");
WC.Headers.Add("Cache-Control", "max-age=1");
WC.Headers.Add("DNT", "1");
WC.Headers.Add("Origin", url);
WC.Headers.Add("TE", "Trailers");
WC.Headers.Add("user-agent", Fichier.LoadUserAgent());
sourceCode = WC.DownloadString(url);
}
}
catch (WebException e)
{
if (e.Status == WebExceptionStatus.ProtocolError)
{
string status = string.Format("{0}", ((HttpWebResponse)e.Response).StatusCode);
LabelID.TextInvoke(string.Format("{0} {1} {2} ", status,
((HttpWebResponse)e.Response).StatusDescription,
((HttpWebResponse)e.Response).Server));
}
}
catch (NotSupportedException a)
{
MessageBox.Show(a.Message);
}
return sourceCode;
}
private void DownloadImage(string URL, string filePath)
{
try
{
using (WebClient WC = new WebClient())
{
WC.Encoding = Encoding.UTF8;
WC.Headers.Add("Accept", "image / webp, */*");
WC.Headers.Add("Accept-Language", "fr, fr - FR");
WC.Headers.Add("Cache-Control", "max-age=1");
WC.Headers.Add("DNT", "1");
WC.Headers.Add("Origin", "https://myprivatesite.fr//" + STARTNBR.ToString());
WC.Headers.Add("user-agent", Fichier.LoadUserAgent());
WC.DownloadFile(URL, filePath);
NBRIMAGESDWLD++;
}
STARTNBR = CheckBoxBack.Checked ? --STARTNBR : ++STARTNBR;
}
catch (IOException)
{
LabelID.TextInvoke("Accès non autorisé au fichier");
}
catch (WebException e)
{
if (e.Status == WebExceptionStatus.ProtocolError)
{
LabelID.TextInvoke(string.Format("{0} / {1} / {2} ", ((HttpWebResponse)e.Response).StatusCode,
((HttpWebResponse)e.Response).StatusDescription,
((HttpWebResponse)e.Response).Server));
}
}
catch (NotSupportedException a)
{
MessageBox.Show(a.Message);
}
}
private void DownloadImages()
{
const string URL = "https://myprivatesite.fr/";
string imageIDURL = string.Concat(URL, STARTNBR);
string sourceCode = DownloadSourceCode(imageIDURL);
if (sourceCode != string.Empty)
{
string imageNameURL = Fichier.GetURLImage(sourceCode);
if (imageNameURL != string.Empty)
{
string imagePath = PATHIMAGES + STARTNBR + ".png";
LabelID.TextInvoke(STARTNBR.ToString());
LabelImageURL.TextInvoke(imageNameURL + "\r");
DownloadImage(imageNameURL, imagePath);
Extension.SaveOptions(STARTNBR, CheckBoxBack.Checked);
}
}
STARTNBR = CheckBoxBack.Checked ? --STARTNBR : ++STARTNBR;
}
// END FUNCTIONS
private void BoutonStartPause_Click(object sender, EventArgs e)
{
if (Fichier.RGBIMAGES != null)
{
if (boutonStartPause.Text == "Start")
{
boutonStartPause.ForeColor = Color.DarkRed;
boutonStartPause.Text = "Pause";
if (myTimer == null)
myTimer = new System.Threading.Timer(_ => new Task(DownloadImages).Start(), null, 0, Trackbar.Value);
}
else if (boutonStartPause.Text == "Pause")
EndTimer();
Extension.SaveOptions(STARTNBR, CheckBoxBack.Checked);
}
}
答案 0 :(得分:0)
所以我想了解为什么我在这里遇到这样的问题和/或如何解决这个问题。
我能想到的原因可能有两个。
连接/端口耗尽
当您尝试一次创建太多连接时,或者当您之前建立的连接尚未释放时,就会发生这种情况。当您使用 WebClient
时,它使用的资源有时不会立即释放。这会导致处理该对象的时间与下一个 WebClient
尝试使用相同端口/连接实际访问该端口的实际时间之间存在延迟。
最有可能导致连接/端口耗尽
的示例int i = 1_000;
while(i --> 0)
{
using var Client = new WebClient();
// do some webclient stuff
}
当您创建大量 Web 客户端时,由于 WebClient
中固有的并发性有时是必需的。有可能在下一个 WebClient
实例化时,上一个使用的端口可能还不可用,导致延迟(等待端口时)或更糟 em> 下一个 WebClient
打开另一个端口/连接。这可能会导致一个永无止境的连接列表打开,从而导致事情陷入停顿!
Thread Pool 精疲力竭
这是由于尝试一次创建过多的 Task
或 Thread
对象而阻止它们自己的执行(通过 Thread.Sleep
或长时间运行的操作)。
通常这不是问题,因为内置的 TaskScheduler
在跟踪大量任务并确保它们都轮流执行代码方面做得非常好。
这成为问题的地方在于 TaskScheduler
没有上下文来说明哪些任务是重要的,或者哪些任务需要比其他任务更多的时间才能完成。因此,当许多任务正在处理长时间运行的操作、阻塞或抛出异常时,TaskScheduler
必须等待这些任务完成才能开始新的任务。如果你特别不走运,TaskScheduler
可以启动一堆全阻塞的任务并且没有任务可以启动,即使所有其他等待的任务都很小并且会立即完成.
您通常应该使用尽可能少的任务来提高可靠性并避免线程池耗尽。
你能做什么
您有几个选项可以帮助提高此代码的可靠性和性能。
HttpClient
。我了解您可能需要使用 WebClient
,因此我仅使用 WebClient
提供了答案。WebClient
帮助器类来限制可以同时处于活动状态的可用网络客户端,并且在您要多次访问同一个网站时能够保持网络客户端打开。WebClient
助手类
我创建了一个非常简单的帮助类来帮助您入门。这将允许您异步创建 WebClient
请求,而不必担心一次创建太多客户端。默认限制是客户端处理器中的内核数(这是任意选择的)。
public class ConcurrentWebClient
{
// limits the number of maximum clients able to be opened at once
public static int MaxConcurrentDownloads => Environment.ProcessorCount;
// holds any clients that should be kept open
private static readonly ConcurrentDictionary<string, WebClient> Clients;
// prevents more than the alloted webclients to be open at once
public static readonly SemaphoreSlim Locker;
// allows cancellation of clients
private static CancellationTokenSource TokenSource = new();
static ConcurrentWebClient()
{
Clients = new ConcurrentDictionary<string, WebClient>();
Locker ??= new SemaphoreSlim(MaxConcurrentDownloads, MaxConcurrentDownloads);
}
// creates new clients, or if a name is provided retrieves it from the dictionary so we don't need to create more than we need
private async Task<WebClient> CreateClient(string Name, bool persistent, CancellationToken token)
{
// try to retrieve it from the dictionary before creating a new one
if (Clients.ContainsKey(Name))
{
return Clients[Name];
}
WebClient newClient = new();
if (persistent)
{
// try to add the client to the dict so we can reference it later
while (Clients.TryAdd(Name, newClient) is false)
{
token.ThrowIfCancellationRequested();
// allow other tasks to do work while we wait to add the new client
await Task.Delay(1, token);
}
}
return newClient;
}
// allows sending basic dynamic requests without having to create webclients outside of this class
public async Task<T> NewRequest<T>(Func<WebClient, T> Expression, int? MaxTimeout = null, string Id = null)
{
// make sure we dont have more than the maximum clients open at one time
// 100s was chosen becuase WebClient has a default timeout of 100s
await Locker.WaitAsync(MaxTimeout ?? 100_000, TokenSource.Token);
bool persistent = true;
if (Id is null)
{
persistent = false;
Id = string.Empty;
}
try
{
WebClient client = await CreateClient(Id, persistent, TokenSource.Token);
// run the expression to get the result
T result = await Task.Run<T>(() => Expression(client), TokenSource.Token);
if (persistent is false)
{
// just in case the user disposes of the client or sets it to ull in the expression we should not assume it's not null at this point
client?.Dispose();
}
return result;
}
finally
{
// make sure even if we encounter an error we still
// release the lock
Locker.Release();
}
}
// allows assigning the headers without having to do it for every webclient manually
public static void AssignDefaultHeaders(WebClient client)
{
client.Encoding = System.Text.Encoding.UTF8;
client.Headers.Add("Accept", "image / webp, */*");
client.Headers.Add("Accept-Language", "fr, fr - FR");
client.Headers.Add("Cache-Control", "max-age=1");
client.Headers.Add("DNT", "1");
// i have no clue what Fichier is so this was not tested
client.Headers.Add("user-agent", Fichier.LoadUserAgent());
}
// cancels a webclient by name, whether its being used or not
public async Task Cancel(string Name)
{
// look to see if we can find the client
if (Clients.ContainsKey(Name))
{
// get a token incase we have to emergency cance
CancellationToken token = TokenSource.Token;
// try to get the client from the dictionary
WebClient foundClient = null;
while (Clients.TryGetValue(Name, out foundClient) is false)
{
token.ThrowIfCancellationRequested();
// allow other tasks to perform work while we wait to get the value from the dictionary
await Task.Delay(1, token);
}
// if we found the client we should cancel and dispose of it so it's resources gets freed
if (foundClient != null)
{
foundClient?.CancelAsync();
foundClient?.Dispose();
}
}
}
// the emergency stop button
public void ForceCancelAll()
{
// this will throw lots of OperationCancelledException, be prepared to catch them, they're fast.
TokenSource?.Cancel();
TokenSource?.Dispose();
TokenSource = new();
foreach (var item in Clients)
{
item.Value?.CancelAsync();
item.Value?.Dispose();
}
Clients.Clear();
}
}
一次请求多项内容
在这里,我所做的只是切换到使用 helper 类,并使它可以使用相同的连接请求多个事物
public async Task<string[]> DownloadSourceCode(string[] urls)
{
var downloader = new ConcurrentWebClient();
return await downloader.NewRequest<string[]>((WebClient client) =>
{
ConcurrentWebClient.AssignDefaultHeaders(client);
client.Headers.Add("TE", "Trailers");
string[] result = new string[urls.Length];
for (int i = 0; i < urls.Length; i++)
{
string url = urls[i];
client.Headers.Remove("Origin");
client.Headers.Add("Origin", url);
result[i] = client.DownloadString(url);
}
return result;
});
}
private async Task<bool> DownloadImage(string[] URLs, string[] filePaths)
{
var downloader = new ConcurrentWebClient();
bool downloadsSucessful = await downloader.NewRequest<bool>((WebClient client) =>
{
ConcurrentWebClient.AssignDefaultHeaders(client);
int len = Math.Min(URLs.Length, filePaths.Length);
for (int i = 0; i < len; i++)
{
// side-note, this is assuming the websites you're visiting aren't mutating the headers
client.Headers.Remove("Origin");
client.Headers.Add("Origin", "https://myprivatesite.fr//" + STARTNBR.ToString());
client.DownloadFile(URLs[i], filePaths[i]);
NBRIMAGESDWLD++;
STARTNBR = CheckBoxBack.Checked ? --STARTNBR : ++STARTNBR;
}
return true;
});
return downloadsSucessful;
}