我正在尝试某个网站上的html信息,以便我可以解析我们数据库的信息。问题是第二个&第三个responseFromServer
信息返回相同。但是,当我按照真实网络浏览器内部的链接时,我会得到正确的信息(正确的页面)。
我认为每个WebRequest基本上都是打开一个“新的”Web实例,我想要它做的是使用相同的实例。
有没有办法指定(使用WebClient?),以便每个请求都包含在“相同的浏览器”中(缺少更好的术语)
static void CountyInfo(string Address)
{
WebClient webClient = new WebClient();
webClient.Headers.Add("Cache-Control: private");
webClient.Headers.Add("Content-Type: text/html; charset=utf-8");
webClient.Headers.Add("Server: Microsoft-IIS/6.0");
webClient.Headers.Add("X-AspNet-Version: 4.0.30319");
webClient.Headers.Add("X-Powered-By: ASP.NET");
webClient.Headers.Add("X-UA-Compatible: IE=8, IE=9, IE=10, IE=11");
Address = Address.Replace(" ", "+");
string url1 = "http://mcassessor.maricopa.gov/?s=" + Address;
WebRequest request1 = WebRequest.Create(url1);
WebResponse response1 = request1.GetResponse();
//Stream dataStream1 = response1.GetResponseStream();
Stream dataStream1 = webClient.OpenRead(url1);
StreamReader reader1 = new StreamReader(dataStream1);
string responseFromServer1 = reader1.ReadToEnd();
string ParcelNum = getBetween(responseFromServer1, "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=", "target=");
ParcelNum = new String(ParcelNum.Where(Char.IsDigit).ToArray());
//reader1.Close();
//response1.Close();
//NEW GET request
string url2 = "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=" + ParcelNum;
WebRequest request2 = WebRequest.Create(url2);
WebResponse response2 = request2.GetResponse();
//Stream dataStream2 = response2.GetResponseStream();
Stream dataStream2 = webClient.OpenRead(url2);
StreamReader reader2 = new StreamReader(dataStream2);
string responseFromServer2 = reader2.ReadToEnd();
//reader2.Close();
//response2.Close();
//NEW GET request
string url3 = "http://treasurer.maricopa.gov/Parcel/" + "TaxDetails.aspx?taxyear=2013";
WebRequest request3 = WebRequest.Create(url3);
WebResponse response3 = request3.GetResponse();
//Stream dataStream3 = response3.GetResponseStream();
Stream dataStream3 = webClient.OpenRead(url3);
StreamReader reader3 = new StreamReader(dataStream3);
string responseFromServer3 = reader3.ReadToEnd();
reader3.Close();
response3.Close();
}
编辑:刚看到这个。 request1给了我正确的页面(查询结果页面),但请求2和3将我返回到网站的“主页”。即使我将url2和url3传递到requests2&分别为3。
答案 0 :(得分:1)
您需要管理Cookie。获取Web服务器提供的cookie,并在下一次请求时将其传回。
答案 1 :(得分:0)
对于长代码帖子感到抱歉,但这就是我的工作方式。如果你能浓缩它,请告诉我。
static public CookieContainer cookieJar;
static void Main(string[] args)
{
string ControlNumber = "######";
GetOrderInfo newOrder = new GetOrderInfo(ControlNumber);
obtainCookies();
MARICOPAcounty(newOrder.OrderAddress);
}
static void obtainCookies()
{
string postData = "http://mcassessor.maricopa.gov/";
CookieContainer tempCookies = new CookieContainer();
UTF8Encoding encoding = new UTF8Encoding();
byte[] byteData = encoding.GetBytes(postData);
HttpWebRequest postReq = (HttpWebRequest)WebRequest.Create("http://mcassessor.maricopa.gov/");
postReq.Method = "POST";
postReq.KeepAlive = true;
Cookie chocoChip = new Cookie("_ga", "GA1.2.1813386723.1386802842") { Domain = "http://mcassessor.maricopa.gov/" };
postReq.CookieContainer = new CookieContainer();
postReq.CookieContainer.Add(chocoChip);
postReq.ContentType = "text/html; charset=utf-8";
postReq.Referer = "http://mcassessor.maricopa.gov/";
postReq.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
postReq.ContentLength = byteData.Length;
Stream postReqStream = postReq.GetRequestStream();
postReqStream.Write(byteData, 0, byteData.Length);
postReqStream.Close();
HttpWebResponse postResponse;
postResponse = (HttpWebResponse)postReq.GetResponse();
postResponse.Cookies.Add(chocoChip);
StreamReader postReqReader = new StreamReader(postResponse.GetResponseStream());
tempCookies.Add(chocoChip);
cookieJar = tempCookies;
string soureCode = postReqReader.ReadToEnd();
if (postReq != null)
{
Console.WriteLine("\r\n\r\n postResponse COOKIES");
Console.WriteLine(postResponse.Cookies[0]);
Console.WriteLine("\r\n\r\n postReq Headers");
Console.WriteLine(postReq.Headers.ToString());
Console.WriteLine("\r\n\r\n postResponse Headers");
Console.Write("\t" + postResponse.Headers);
}
}
public static string MARICOPAcounty(string Address)
{
//-------------------------------------------------------------------------------------------------------------------------------//
//-------------------------------------------------------------------------------------------------------------------------------//
Address = Address.Replace(" ", "+");
string url1 = "http://mcassessor.maricopa.gov/?s=" + Address;
HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(url1);
request1.CookieContainer = cookieJar;
request1.Method = "GET";
request1.Accept = "text/html, application/xhtml+xml, */*";
request1.Headers.Add("Accept-Language: en-US");
request1.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
request1.Headers.Add("Accept-Enconding: gzip, deflate");
HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse();
StreamReader sr1 = new StreamReader(response1.GetResponseStream());
string sourceCode1 = sr1.ReadToEnd();
string ParcelNum = getBetween(sourceCode1, "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=", "target=");
ParcelNum = new String(ParcelNum.Where(Char.IsDigit).ToArray());
if (response1 != null)
{
Console.WriteLine("\r\n\r\n request1 Headers");
Console.WriteLine(request1.Headers.ToString());
Console.WriteLine("\r\n\r\nresponse1 Headers");
Console.Write("\t" + response1.Headers);
}
sr1.Close();
response1.Close();
//-------------------------------------------------------------------------------------------------------------------------------//
//-------------------------------------------------------------------------------------------------------------------------------//
//NEW GET request
string url2 = "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=50423370"; //+ ParcelNum;
HttpWebRequest request2 = (HttpWebRequest)WebRequest.Create(url2);
request2.CookieContainer = cookieJar;
request2.Method = "GET";
request2.Accept = "text/html, application/xhtml+xml, */*";
request2.Headers.Add("Accept-Language: en-US");
request2.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
request2.Headers.Add("Accept-Encoding: gzip, deflate");
request2.Referer = url1;
HttpWebResponse response2 = (HttpWebResponse)request2.GetResponse();
StreamReader sr2 = new StreamReader(response2.GetResponseStream());
string sourceCode2 = sr2.ReadToEnd();
if (response2 != null)
{
Console.WriteLine("\r\n\r\nrrequest2 Headers");
Console.WriteLine(request2.Headers);
Console.WriteLine(request2.CookieContainer);
Console.WriteLine("\r\n\r\nresponse2 Headers");
Console.Write("\t" + response2.Headers);
}
sr2.Close();
response2.Close();
//-------------------------------------------------------------------------------------------------------------------------------//
//-------------------------------------------------------------------------------------------------------------------------------//
//new GET request
string url3 = "http://treasurer.maricopa.gov/Parcel/TaxDetails.aspx?taxyear=2013";
HttpWebRequest request3 = (HttpWebRequest)WebRequest.Create(url3);
request3.CookieContainer = cookieJar;
request3.Method = "GET";
request3.Accept = "text/html, application/xhtml+xml, */*";
request3.Headers.Add("Accept-Language: en-US");
request3.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
request3.Headers.Add("Accept-Encoding: gzip, deflate");
request3.Referer = url2;
HttpWebResponse response3 = (HttpWebResponse)request3.GetResponse();
StreamReader sr3 = new StreamReader(response3.GetResponseStream());
string sourceCode3 = sr3.ReadToEnd();
if (response3 != null)
{
Console.WriteLine("\r\n\r\nrrequest3 Headers");
Console.WriteLine(request3.Headers);
Console.WriteLine("\r\n\r\nresponse3 Headers");
Console.Write("\t" + response3.Headers);
}
sr3.Close();
response3.Close();
return sourceCode3;
}