我想抓取需要身份验证的网站。验证后,我转到启用了安全证书的新页面。创建SSL连接后,它返回的响应不完整,无法进行爬网。
要点: 我在登录页面上使用凭据成功登录后,我创建了另一个SSL检测页面的请求。所以,我可以在安装证书时检索页面。它返回的响应是不完整的。谢谢,请帮帮我?
public async Task<ActionResult> TestWebsite()
{
string loginUrl = "http://www.example.com/login";
string uri = "~/Security_certificate.p12";
string CertPass = "00804";
string host = "https://www.example.net/ex";
CookieContainer myContainer = new CookieContainer();
Cookie cookobj = new Cookie();
var nvc = new List<KeyValuePair<string, string>>();
nvc.Add(new KeyValuePair<string, string>("Username", "user"));
nvc.Add(new KeyValuePair<string, string>("Password", "123123"));
//Getting Certificate ready
X509Certificate2Collection certificates = new X509Certificate2Collection();
certificates.Import(uri, CertPass, X509KeyStorageFlags.MachineKeySet | X509KeyStorageFlags.PersistKeySet);
ServicePointManager.ServerCertificateValidationCallback = (a, b, c, d) => true;
try
{
using (var client = new HttpClient())
{
var req1 = new HttpRequestMessage(HttpMethod.Post, loginUrl) { Content = new FormUrlEncodedContent(nvc) };
var res = await client.SendAsync(req1);
var con = await res.Content.ReadAsStringAsync();
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(con);
/*RequestCrawledPage*/
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(host);
req.AllowAutoRedirect = true;
req.Method = "GET";
req.Headers.Add("cache-control", "no-cache");
req.ClientCertificates = certificates;
req.ContentType = "application/x-www-form-urlencoded";
WebResponse resp = req.GetResponse();
var result = resp.GetResponseStream();
//var resa = await client.SendAsync(req1);
//var cona = await res.Content.ReadAsStringAsync();
doc.Load(result);
}
return View();
}
catch (Exception ex)
{
throw;
}
return View();
}