为什么不下载文件?

时间:2016-06-09 11:29:01

标签: c# html dom webclient html-agility-pack

嘿伙计们我制作了这个小程序来抓取一些HTML信息,但它没有下载一些文件......

以下是代码:

using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleApplication2
{
    class Program
    {


        static void Main(string[] args)
        {
            using (System.Net.WebClient client = new WebClient())
            {

                List<string> source = new List<string>();

                HtmlWeb web = new HtmlWeb();
                string url = "http://www.jornaldenegocios.pt/mercados/bolsa/detalhe/wall_street_cada_vez_mais_perto_de_maximo_historico.html";
                HtmlDocument document = web.Load(url);

                var head = document.DocumentNode.SelectSingleNode("//head");
                var meta = head.SelectNodes("//meta").AsEnumerable();
                var link = document.DocumentNode.SelectSingleNode("//head").SelectNodes("//link").AsEnumerable();
                var urls = document.DocumentNode.Descendants("img")
                        .Select(e => e.GetAttributeValue("src", null))
                        .Where(s => !String.IsNullOrEmpty(s));

                var titulo = "";
                var descricao = "";
                var linkImg = "";
                var linkIcon = "";
                var linkImgAlt = "";
                int length = 0;

                Uri myUri = new Uri(url);
                string host = myUri.Host;


                var fbProperties = (head.SelectNodes("//meta[contains(@property, 'og:')]") ?? Enumerable.Empty<HtmlNode>())
                    .ToDictionary(n => n.Attributes["property"].Value, n => n.Attributes["content"].Value);


                linkIcon = (head.SelectSingleNode("//link[contains(@rel, 'apple-touch-icon')]")?.Attributes["href"]?.Value) ??
                    (head.SelectSingleNode("//link[contains(@rel, 'icon')]")?.Attributes["href"]?.Value) ??
                    host + "/favicon.ico";


                var title = head.SelectSingleNode("//title")?.InnerText;



                if (fbProperties.TryGetValue("og:title", out titulo) == false || titulo == null)
                {
                    titulo = (title ?? host);
                }

                if (fbProperties.TryGetValue("og:description", out descricao) == false || descricao == null)
                {
                    descricao = ("none");
                }

                if (fbProperties.TryGetValue("og:image", out linkImg) == false || linkImg == null)
                {
                    linkImg = (linkImgAlt ?? "none");
                }

                foreach (var node in urls)
                {
                    source.Add(node);
                }

                foreach (var links in source)
                {
                    length = client.DownloadData(links).Length;

                    if (length<client.DownloadData(links).Length)
                    {
                        linkImgAlt = links;
                    }

                }




                Console.WriteLine("");
                Console.WriteLine("Titulo:");
                Console.WriteLine(titulo);
                Console.WriteLine("");
                Console.WriteLine("Descriçao:");
                Console.WriteLine(descricao);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem:");
                Console.WriteLine(linkImg);
                Console.WriteLine("");
                Console.WriteLine("Link do Icon:");
                Console.WriteLine(linkIcon);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem:");
                Console.WriteLine(length);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem:");
                Console.WriteLine(linkImgAlt);

                Console.ReadLine();
            }
        }
    }
}

每件事情都很好,只是这个小部分给了我一些问题:

foreach (var node in urls)
{
    source.Add(node);
}

foreach (var links in source)
{            
    length = client.DownloadData(links).Length;

    if (length<client.DownloadData(links).Length)
    {
        linkImgAlt = links;
    }
}

以下是运行程序时出现的错误:

System.dll

中出现未处理的“System.Net.WebException”类型异常

其他信息:找不到文件'C:\ i \ closePestana.png'。

打印:http://i.imgur.com/C9JPjtk.png

我认为这不是下载文件,这就是为什么给我这个错误信息。你可以帮我解决这个问题吗?

感谢。

1 个答案:

答案 0 :(得分:1)

我没有回答......但如果有人遇到我的问题,这就是解决方案。

using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleApplication2
{
    class Program
    {


        static void Main(string[] args)
        {
            using (var client = new HttpClient())
            {

                List<string> source = new List<string>();

                HtmlWeb web = new HtmlWeb();
                string url = "http://www.jornaldenegocios.pt/mercados/bolsa/detalhe/wall_street_cada_vez_mais_perto_de_maximo_historico.html";
                HtmlDocument document = web.Load(url);
                Uri myUri = new Uri(url);
                string host = myUri.Host;

                var head = document.DocumentNode.SelectSingleNode("//head");
                var meta = head.SelectNodes("//meta").AsEnumerable();
                var link = document.DocumentNode.SelectSingleNode("//head").SelectNodes("//link").AsEnumerable();
                var urls = document.DocumentNode.SelectNodes("//img")
                        .Select(e => e.GetAttributeValue("src", null))
                        .Where(s => !string.IsNullOrEmpty(s))
                        .Where(s => !s.StartsWith("//"))
                        .Select(s => s.StartsWith("http") ? s : myUri.Scheme + "://" + host + s);

                var titulo = "";
                var descricao = "";
                var linkImg = "";
                var linkIcon = "";
                var linkImgAlt = "";
                var length = 0L;



                var fbProperties = (head.SelectNodes("//meta[contains(@property, 'og:')]") ?? Enumerable.Empty<HtmlNode>())
                    .ToDictionary(n => n.Attributes["property"].Value, n => n.Attributes["content"].Value);


                linkIcon = (head.SelectSingleNode("//link[contains(@rel, 'apple-touch-icon')]")?.Attributes["href"]?.Value) ??
                    (head.SelectSingleNode("//link[contains(@rel, 'icon')]")?.Attributes["href"]?.Value) ??
                    host + "/favicon.ico";


                var title = head.SelectSingleNode("//title")?.InnerText;



                if (fbProperties.TryGetValue("og:title", out titulo) == false || titulo == null)
                {
                    titulo = (title ?? host);
                }

                if (fbProperties.TryGetValue("og:description", out descricao) == false || descricao == null)
                {
                    descricao = ("none");
                }

                if (fbProperties.TryGetValue("og:image", out linkImg) == false || linkImg == null)
                {
                    linkImg = (linkImgAlt ?? "none");
                }

                foreach (var node in urls)
                {
                    source.Add(node);
                }



                foreach (var links in source)
                {
                    try
                    {
                        var response = client.SendAsync(new HttpRequestMessage
                        {
                            Method = HttpMethod.Head,
                            RequestUri = new Uri(links)
                        }).Result;

                        var fileLength = response.Content.Headers.ContentLength;

                        Console.WriteLine($"{links}: {fileLength} bytes");

                        if (length < fileLength)
                        {
                            linkImgAlt = links;
                            length = fileLength ?? 0;
                        }

                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e);
                    }

                }


                Console.WriteLine("");
                Console.WriteLine("Titulo:");
                Console.WriteLine(titulo);
                Console.WriteLine("");
                Console.WriteLine("Descriçao:");
                Console.WriteLine(descricao);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem:");
                Console.WriteLine(linkImg);
                Console.WriteLine("");
                Console.WriteLine("Link do Icon:");
                Console.WriteLine(linkIcon);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem:");
                Console.WriteLine(length);
                Console.WriteLine("");
                Console.WriteLine("Link da Imagem (alt):");
                Console.WriteLine(linkImgAlt);

                Console.ReadLine();
            }
        }
    }
}