我对asp抓取是全新的,我需要你的帮助以获取以下代码片段:
代码的目的是抓取通过doPostBack
调用检索到的网站的第二个标签页。通过使用HTTPFOX,我知道POST请求中传递的参数。我想重新构建POST请求以使其正常工作。
现在UrlEncode
我得到了
[FormatException:Base-64字符串中的字符无效。]
错误,当我没有发送正确的VIEWSTATE时,我收到了无效的VIEWSTATE错误。
function get_data_with_post_fields($url, $fields_data) {
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_ENCODING, "");
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS,$fields_data);
curl_setopt($ch, CURLOPT_FRESH_CONNECT, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$VIEWSTATE = "/wEPDwUJNzU5NTE3MDU3..........moreeee........Qay9urEI=";
$VIEWSTATE1 = urlencode( $VIEWSTATE ) ;
$url_to_scrape = 'http://www.awebsite.com/en/product/393633343239/ProductDetails.aspx';
$PF = array('__VIEWSTATE' => $VIEWSTATE1,
'__EVENTVALIDATION' => "",
'__EVENTARGUMENT' => "",
'__EVENTTARGET' => urlencode("ctl00$cphMain$lbTab2"),
);
print $raw = get_data_with_post_fields($url_to_scrape, $PF);
如何重建我的POST请求?