在 C# 中加载图像需要很长时间

时间:2021-05-18 01:40:56

标签: c#

我遇到的问题是:

我尝试下载 1000 多张图片 -> 它可以工作,但是加载完全下载的图片需要很长时间,并且程序继续并下载下一张图片等......直到我们承认 100 张图片但第 8 张图片还没下载完。

所以我想了解为什么我在这里遇到这样的问题和/或如何解决这个问题。

希望看到一个问题

private string DownloadSourceCode(string url)
{
    string sourceCode = "";

    try
    {
        using (WebClient WC = new WebClient())
        {
            WC.Encoding = Encoding.UTF8;
            WC.Headers.Add("Accept", "image / webp, */*");
            WC.Headers.Add("Accept-Language", "fr, fr - FR");
            WC.Headers.Add("Cache-Control", "max-age=1");
            WC.Headers.Add("DNT", "1");
            WC.Headers.Add("Origin", url);
            WC.Headers.Add("TE", "Trailers");
            WC.Headers.Add("user-agent", Fichier.LoadUserAgent());

            sourceCode = WC.DownloadString(url);
        }
    }
    catch (WebException e)
    {
        if (e.Status == WebExceptionStatus.ProtocolError)
        {
            string status = string.Format("{0}", ((HttpWebResponse)e.Response).StatusCode);
            LabelID.TextInvoke(string.Format("{0} {1} {2} ", status,
                                                    ((HttpWebResponse)e.Response).StatusDescription,
                                                    ((HttpWebResponse)e.Response).Server));
        }
    }
    catch (NotSupportedException a) 
    { 
        MessageBox.Show(a.Message); 
    }

    return sourceCode;
}

private void DownloadImage(string URL, string filePath)
{
    try
    {
        using (WebClient WC = new WebClient())
        {
            WC.Encoding = Encoding.UTF8;
            WC.Headers.Add("Accept", "image / webp, */*");
            WC.Headers.Add("Accept-Language", "fr, fr - FR");
            WC.Headers.Add("Cache-Control", "max-age=1");
            WC.Headers.Add("DNT", "1");
            WC.Headers.Add("Origin", "https://myprivatesite.fr//" + STARTNBR.ToString());
            WC.Headers.Add("user-agent", Fichier.LoadUserAgent());

            WC.DownloadFile(URL, filePath);

            NBRIMAGESDWLD++;
        }

        STARTNBR = CheckBoxBack.Checked ? --STARTNBR : ++STARTNBR;
    }
    catch (IOException)
    {
        LabelID.TextInvoke("Accès non autorisé au fichier");
    }
    catch (WebException e) 
    {
        if (e.Status == WebExceptionStatus.ProtocolError)
        {
            LabelID.TextInvoke(string.Format("{0} / {1} / {2} ", ((HttpWebResponse)e.Response).StatusCode,
                                                                        ((HttpWebResponse)e.Response).StatusDescription,
                                                                        ((HttpWebResponse)e.Response).Server));
        }
    }
    catch (NotSupportedException a) 
    { 
        MessageBox.Show(a.Message); 
    }
}

private void DownloadImages()
{
    const string URL = "https://myprivatesite.fr/";

    string imageIDURL = string.Concat(URL, STARTNBR);

    string sourceCode = DownloadSourceCode(imageIDURL);

    if (sourceCode != string.Empty)
    {
        string imageNameURL = Fichier.GetURLImage(sourceCode);

        if (imageNameURL != string.Empty)
        {
            string imagePath = PATHIMAGES + STARTNBR + ".png";
            LabelID.TextInvoke(STARTNBR.ToString());
            LabelImageURL.TextInvoke(imageNameURL + "\r");

            DownloadImage(imageNameURL, imagePath);

            Extension.SaveOptions(STARTNBR, CheckBoxBack.Checked);
        }
    }

    STARTNBR = CheckBoxBack.Checked ? --STARTNBR : ++STARTNBR;
}

// END FUNCTIONS

private void BoutonStartPause_Click(object sender, EventArgs e)
{
    if (Fichier.RGBIMAGES != null)
    {
        if (boutonStartPause.Text == "Start")
        {
            boutonStartPause.ForeColor = Color.DarkRed;
            boutonStartPause.Text = "Pause";

            if (myTimer == null)
                myTimer = new System.Threading.Timer(_ => new Task(DownloadImages).Start(), null, 0, Trackbar.Value);
        }
        else if (boutonStartPause.Text == "Pause")           
            EndTimer();

        Extension.SaveOptions(STARTNBR, CheckBoxBack.Checked);
    }
}

1 个答案:

答案 0 :(得分:0)

<块引用>

所以我想了解为什么我在这里遇到这样的问题和/或如何解决这个问题。

我能想到的原因可能有两个。

  • 连接/端口耗尽
  • 线程池耗尽

连接/端口耗尽 当您尝试一次创建太多连接时,或者当您之前建立的连接尚未释放时,就会发生这种情况。当您使用 WebClient 时,它使用的资源有时不会立即释放。这会导致处理该对象的时间与下一个 WebClient 尝试使用相同端口/连接实际访问该端口的实际时间之间存在延迟。

最有可能导致连接/端口耗尽

的示例
int i = 1_000;
while(i --> 0)
{
    using var Client = new WebClient();
        // do some webclient stuff
}

当您创建大量 Web 客户端时,由于 WebClient 中固有的并发性有时是必需的。有可能在下一个 WebClient 实例化时,上一个使用的端口可能还不可用,导致延迟(等待端口时)或更糟 em> 下一个 WebClient 打开另一个端口/连接。这可能会导致一个永无止境的连接列表打开,从而导致事情陷入停顿!

Thread Pool 精疲力竭
这是由于尝试一次创建过多的 TaskThread 对象而阻止它们自己的执行(通过 Thread.Sleep 或长时间运行的操作)。

通常这不是问题,因为内置的 TaskScheduler 在跟踪大量任务并确保它们都轮流执行代码方面做得非常好。

这成为问题的地方在于 TaskScheduler 没有上下文来说明哪些任务是重要的,或者哪些任务需要比其他任务更多的时间才能完成。因此,当许多任务正在处理长时间运行的操作、阻塞或抛出异常时,TaskScheduler 必须等待这些任务完成才能开始新的任务。如果你特别不走运,TaskScheduler 可以启动一堆全阻塞的任务并且没有任务可以启动即使所有其他等待的任务都很小并且会立即完成.

您通常应该使用尽可能少的任务来提高可靠性并避免线程池耗尽。

你能做什么
您有几个选项可以帮助提高此代码的可靠性和性能。

  • 考虑改用 HttpClient。我了解您可能需要使用 WebClient,因此我仅使用 WebClient 提供了答案。
  • 考虑在同一任务中请求多个下载/字符串以避免线程池耗尽
  • 考虑使用 WebClient 帮助器类来限制可以同时处于活动状态的可用网络客户端,并且在您要多次访问同一个网站时能够保持网络客户端打开。

WebClient 助手类
我创建了一个非常简单的帮助类来帮助您入门。这将允许您异步创建 WebClient 请求,而不必担心一次创建太多客户端。默认限制是客户端处理器中的内核数(这是任意选择的)。

public class ConcurrentWebClient
{
    // limits the number of maximum clients able to be opened at once
    public static int MaxConcurrentDownloads => Environment.ProcessorCount;

    // holds any clients that should be kept open

    private static readonly ConcurrentDictionary<string, WebClient> Clients;

    // prevents more than the alloted webclients to be open at once
    public static readonly SemaphoreSlim Locker;

    // allows cancellation of clients
    private static CancellationTokenSource TokenSource = new();

    static ConcurrentWebClient()
    {
        Clients = new ConcurrentDictionary<string, WebClient>();

        Locker ??= new SemaphoreSlim(MaxConcurrentDownloads, MaxConcurrentDownloads);
    }

    // creates new clients, or if a name is provided retrieves it from the dictionary so we don't need to create more than we need
    private async Task<WebClient> CreateClient(string Name, bool persistent, CancellationToken token)
    {
        // try to retrieve it from the dictionary before creating a new one
        if (Clients.ContainsKey(Name))
        {
            return Clients[Name];
        }

        WebClient newClient = new();

        if (persistent)
        {
            // try to add the client to the dict so we can reference it later
            while (Clients.TryAdd(Name, newClient) is false)
            {
                token.ThrowIfCancellationRequested();

                // allow other tasks to do work while we wait to add the new client
                await Task.Delay(1, token);
            }
        }

        return newClient;
    }

    // allows sending basic dynamic requests without having to create webclients outside of this class
    public async Task<T> NewRequest<T>(Func<WebClient, T> Expression, int? MaxTimeout = null, string Id = null)
    {
        // make sure we dont have more than the maximum clients open at one time
        // 100s was chosen becuase WebClient has a default timeout of 100s
        await Locker.WaitAsync(MaxTimeout ?? 100_000, TokenSource.Token);

        bool persistent = true;

        if (Id is null)
        {
            persistent = false;
            Id = string.Empty;
        }

        try
        {
            WebClient client = await CreateClient(Id, persistent, TokenSource.Token);

            // run the expression to get the result
            T result = await Task.Run<T>(() => Expression(client), TokenSource.Token);

            if (persistent is false)
            {
                // just in case the user disposes of the client or sets it to ull in the expression we should not assume it's not null at this point
                client?.Dispose();
            }

            return result;
        }
        finally
        {
            // make sure even if we encounter an error we still
            // release the lock
            Locker.Release();
        }
    }

    // allows assigning the headers without having to do it for every webclient manually
    public static void AssignDefaultHeaders(WebClient client)
    {
        client.Encoding = System.Text.Encoding.UTF8;
        client.Headers.Add("Accept", "image / webp, */*");
        client.Headers.Add("Accept-Language", "fr, fr - FR");
        client.Headers.Add("Cache-Control", "max-age=1");
        client.Headers.Add("DNT", "1");
        // i have no clue what Fichier is so this was not tested
        client.Headers.Add("user-agent", Fichier.LoadUserAgent());
    }

    // cancels a webclient by name, whether its being used or not
    public async Task Cancel(string Name)
    {
        // look to see if we can find the client
        if (Clients.ContainsKey(Name))
        {
            // get a token incase we have to emergency cance
            CancellationToken token = TokenSource.Token;

            // try to get the client from the dictionary
            WebClient foundClient = null;

            while (Clients.TryGetValue(Name, out foundClient) is false)
            {
                token.ThrowIfCancellationRequested();

                // allow other tasks to perform work while we wait to get the value from the dictionary
                await Task.Delay(1, token);
            }

            // if we found the client we should cancel and dispose of it so it's resources gets freed
            if (foundClient != null)
            {
                foundClient?.CancelAsync();

                foundClient?.Dispose();
            }
        }
    }

    // the emergency stop button
    public void ForceCancelAll()
    {
        // this will throw lots of OperationCancelledException, be prepared to catch them, they're fast.
        TokenSource?.Cancel();
        TokenSource?.Dispose();
        TokenSource = new();

        foreach (var item in Clients)
        {
            item.Value?.CancelAsync();
            item.Value?.Dispose();
        }

        Clients.Clear();
    }
}

一次请求多项内容
在这里,我所做的只是切换到使用 helper 类,并使它可以使用相同的连接请求多个事物

public async Task<string[]> DownloadSourceCode(string[] urls)
{
    var downloader = new ConcurrentWebClient();

    return await downloader.NewRequest<string[]>((WebClient client) =>
    {
        ConcurrentWebClient.AssignDefaultHeaders(client);

        client.Headers.Add("TE", "Trailers");

        string[] result = new string[urls.Length];

        for (int i = 0; i < urls.Length; i++)
        {
            string url = urls[i];

            client.Headers.Remove("Origin");

            client.Headers.Add("Origin", url);

            result[i] = client.DownloadString(url);
        }

        return result;
    });
}
private async Task<bool> DownloadImage(string[] URLs, string[] filePaths)
{
    var downloader = new ConcurrentWebClient();

    bool downloadsSucessful = await downloader.NewRequest<bool>((WebClient client) =>
    {
        ConcurrentWebClient.AssignDefaultHeaders(client);

        int len = Math.Min(URLs.Length, filePaths.Length);

        for (int i = 0; i < len; i++)
        {
            // side-note, this is assuming the websites you're visiting aren't mutating the headers
            client.Headers.Remove("Origin");

            client.Headers.Add("Origin", "https://myprivatesite.fr//" + STARTNBR.ToString());

            client.DownloadFile(URLs[i], filePaths[i]);

            NBRIMAGESDWLD++;

            STARTNBR = CheckBoxBack.Checked ? --STARTNBR : ++STARTNBR;
        }


        return true;
    });

    return downloadsSucessful;
}