Question

我从网站下载图片。这是代码：

class MyClient : WebClient
        {
            public bool HeadOnly { get; set; }
            protected override WebRequest GetWebRequest(Uri address)
            {
                WebRequest req = base.GetWebRequest(address);
                if (HeadOnly && req.Method == "GET")
                {
                    req.Method = "HEAD";
                }
                return req;
            }
        }

然后方法：

public static HtmlAgilityPack.HtmlDocument getHtmlDocumentWebClient(string url, bool useProxy, string proxyIp, int proxyPort, string usename, string password)
        {
            HtmlAgilityPack.HtmlDocument doc = null;
            try
            {
                doc = null;
                using (MyClient clients = new MyClient())
                {
                    clients.HeadOnly = true;
                    byte[] body = clients.DownloadData(url);
                    // note should be 0-length
                    string type = clients.ResponseHeaders["content-type"];
                    clients.HeadOnly = false;
                    // check 'tis not binary... we'll use text/, but could
                    // check for text/html
                    if (type == null)
                    {
                        return null;
                    }
                    else
                    {
                        if (type.StartsWith(@"text/html"))
                        {
                            string text = clients.DownloadString(url);

                            try
                            {
                                doc = new HtmlAgilityPack.HtmlDocument();
                                WebClient client = new WebClient();
                                //client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");
                                client.Credentials = CredentialCache.DefaultCredentials;
                                client.Proxy = WebRequest.DefaultWebProxy;
                                if (useProxy)
                                {
                                    //Proxy                
                                    if (!string.IsNullOrEmpty(proxyIp))
                                    {
                                        WebProxy p = new WebProxy(proxyIp, proxyPort);
                                        if (!string.IsNullOrEmpty(usename))
                                        {
                                            if (password == null)
                                                password = string.Empty;
                                            NetworkCredential nc = new NetworkCredential(usename, password);
                                            p.Credentials = nc;
                                        }
                                    }
                                }
                                doc.Load(client.OpenRead(url));
                            }
                            catch
                            {

                            }
                        }
                    }
                }

                if (doc == null)
                {
                    MessageBox.Show("Doc is null   " + doc + " The link that did it was    " + url);
                }

            }
            catch
            {

            }
            return doc;
        }

我尝试将常规try catch添加到方法中，但它仍然转到MessageBox而不是catch。

无论如何，我试图下载的图片链接是：

http://members.tripod.com/~DannyWest/bundy.jpg

然后我使用了一个破坏点并在线上：

if (type.StartsWith(@"text/html"))

它跳转到MessageBox.Show ...

现在我看到该类型包含：image / jpeg 我想知道问题是不是因为没有text / html或其他什么东西可能有链接？

编辑**

我试图改变添加的方法：

if (type.StartsWith(@"text/html")|| type.StartsWith(@"image/jpeg"))

添加了部分图像/ jpeg 但是后来它在另一个类上使用了这个方法：

private List<string> getLinks(HtmlAgilityPack.HtmlDocument document)
        {

                List<string> mainLinks = new List<string>();
                var linkNodes = document.DocumentNode.SelectNodes("//a[@href]");
                if (linkNodes != null)
                {
                    foreach (HtmlNode link in linkNodes)
                    {
                        var href = link.Attributes["href"].Value;
                        if (href.StartsWith("http://") == true || href.StartsWith("https://") == true || href.StartsWith("www") == true) // filter for http 
                        {
                            mainLinks.Add(href);
                        }
                    }
                }

                return mainLinks;


        }

linkNodes始终为null。当type.StartsWith是image / jpeg时，此linkNodes始终为null。当type.StartsWith是text / html时，linkNodes不为null。

如果需要，我可以将我的项目上传到我的skydrive。

为什么变量doc一直返回null？

0 个答案: