无法在C#中读取UNICODE URL

时间:2012-09-12 05:11:35

标签: c# unicode web-crawler idn

以下代码不起作用:

using System;
using System.IO;
using System.Net;
using System.Web;

namespace Proyecto_Prueba_04
{
    class Program
    {
        /// <summary>
        /// 
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static string GetWebText(string url)
        {
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);

            request.UserAgent = "A .NET Web Crawler";

            WebResponse response = request.GetResponse();

            Stream stream = response.GetResponseStream();

            StreamReader reader = new StreamReader(stream);

            string htmlText = reader.ReadToEnd();

            return htmlText;
        } // End of the GetWebText method.

        /// <summary>
        /// 
        /// </summary>
        /// <param name="args"></param>
        public static void Main(string[] args)
        {
            string urlPrueba = Uri.UnescapeDataString("http://?????????.??/");
            Console.WriteLine("urlPrueba" + " = " + urlPrueba);

            var encoded = HttpUtility.UrlPathEncode(urlPrueba);
            Console.WriteLine("encoded" + " = " + encoded);

            string codigoHTML = GetWebText(encoded);
            Console.WriteLine("codigoHTML" + " = " + codigoHTML);

            Console.ReadLine();
        } // End of the Main method.
    } // End of the Program class.
} // End of the Proyecto_Prueba_04 namespace.

我无法理解如何处理UNICODE URL。

有什么想法吗?

感谢。

2 个答案:

答案 0 :(得分:2)

您可以使用IdnMapping课程。

  string idn = "президент.рф";

  IdnMapping mapping = new IdnMapping();
  string asciiIdn = mapping.GetAscii(idn);
  Console.WriteLine(asciiIdn);    

  var text = GetWebText("http://" + asciiIdn);
  Console.WriteLine(text);

答案 1 :(得分:0)

在字符串中使用System.Uri而不是url。

这很有效,我试过了:

using System;
using System.IO;
using System.Net;
using System.Web;

namespace Proyecto_Prueba_04
{
internal class Program
{
    public static string GetWebText(Uri uri)
    {
        var request = (HttpWebRequest)WebRequest.Create(uri);
        request.UserAgent = "A .NET Web Crawler";

        string htmlText = null;
        using (var response = request.GetResponse())
        {
            using (Stream stream = response.GetResponseStream())
            {
                if (stream != null)
                {
                    using (var reader = new StreamReader(stream))
                    {
                        htmlText = reader.ReadToEnd();
                    }
                }
            }
        }

        return htmlText;
    }

    public static void Main(string[] args)
    {
        const string urlPrueba = "http://президент.рф/";
        var uri = new Uri(urlPrueba);

        Console.WriteLine("urlPrueba" + " = " + uri.AbsoluteUri);

        string codigoHTML = GetWebText(uri);
        Console.WriteLine("codigoHTML" + " = " + codigoHTML);

        Console.ReadLine();
    }
}

}