我想抓取需要身份验证和网站的网站。安全证书

时间:2017-11-10 04:56:08

标签: c# asp.net-mvc

我想抓取需要身份验证的网站。验证后,我转到启用了安全证书的新页面。创建SSL连接后,它返回的响应不完整,无法进行爬网。

要点: 我在登录页面上使用凭据成功登录后,我创建了另一个SSL检测页面的请求。所以,我可以在安装证书时检索页面。它返回的响应是不完整的。谢谢,请帮帮我?

public async Task<ActionResult> TestWebsite()
{
    string loginUrl = "http://www.example.com/login";
    string uri = "~/Security_certificate.p12";
    string CertPass = "00804";
    string host = "https://www.example.net/ex";

    CookieContainer myContainer = new CookieContainer();
    Cookie cookobj = new Cookie();

    var nvc = new List<KeyValuePair<string, string>>();
    nvc.Add(new KeyValuePair<string, string>("Username", "user"));
    nvc.Add(new KeyValuePair<string, string>("Password", "123123"));

    //Getting Certificate ready
    X509Certificate2Collection certificates = new X509Certificate2Collection();
    certificates.Import(uri, CertPass, X509KeyStorageFlags.MachineKeySet | X509KeyStorageFlags.PersistKeySet);

    ServicePointManager.ServerCertificateValidationCallback = (a, b, c, d) => true;

    try
    {
        using (var client = new HttpClient())
        {
            var req1 = new HttpRequestMessage(HttpMethod.Post, loginUrl) { Content = new FormUrlEncodedContent(nvc) };

            var res = await client.SendAsync(req1);
            var con = await res.Content.ReadAsStringAsync();                        

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(con);

            /*RequestCrawledPage*/
            HttpWebRequest req = (HttpWebRequest)WebRequest.Create(host);
            req.AllowAutoRedirect = true;
            req.Method = "GET";
            req.Headers.Add("cache-control", "no-cache");
            req.ClientCertificates = certificates;

            req.ContentType = "application/x-www-form-urlencoded";

            WebResponse resp = req.GetResponse();

            var result = resp.GetResponseStream();
            //var resa = await client.SendAsync(req1);
            //var cona = await res.Content.ReadAsStringAsync();
            doc.Load(result);
        }

        return View();
    }
    catch (Exception ex)
    {
        throw;
    }

    return View();
}

0 个答案:

没有答案