我正在使用cURL来获取网站。我可以获取登录页面并使用cURL登录成功通过POST表单发送凭证详细信息。当我尝试使用GET完成下一页时(因为我的Web浏览器在调试实用程序的网络部分中报告它),Web浏览器显示您通常看到的页面,但cURL向我显示另一页(登录页面)。我在我的cURL脚本中启用了cookie。我尝试禁用Javascript以防它正在使用cookie但它仍然在没有它的浏览器中工作。我还缺少什么?
以下是我的curl脚本:
function HttpGet($url)
{
$header = array();
$header[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$header[] = 'Cache-Control: max-age=0';
$header[] = 'Connection: keep-alive';
$header[] = 'Keep-Alive: 300';
$header[] = 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7';
$header[] = 'Accept-Language: en-us,en;q=0.5';
$header[] = 'Pragma: ';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)');
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
//curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookie.txt");
if (! $html_content = curl_exec($ch))
{
trigger_error(curl_error($ch));
}
echo "CURLINFO_EFFECTIVE_URL: " . curl_getinfo($ch, CURLINFO_EFFECTIVE_URL) . "\n";
echo "CURLINFO_HTTP_CODE: " . curl_getinfo($ch, CURLINFO_HTTP_CODE) . "\n";
curl_close ($ch);
return $html_content;
}
和
function HttpPost($address, $fields, $outputFilename, $currentUrl)
{
$header = array();
$header[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$header[] = 'Cache-Control: max-age=0';
$header[] = 'Connection: keep-alive';
$header[] = 'Keep-Alive: 300';
$header[] = 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7';
$header[] = 'Accept-Language: en-us,en;q=0.5';
$header[] = 'Pragma: ';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $address);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)');
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookie.txt");
foreach($fields as $key=>$value) { $fields_string .= $key.'='.$value.'&'; }
$fields_string = rtrim($fields_string, '&');
curl_setopt($ch,CURLOPT_POST, count($fields));
curl_setopt($ch,CURLOPT_POSTFIELDS, $fields_string);
if (! $html_content = curl_exec($ch))
{
trigger_error(curl_error($ch));
}
$currentUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
echo "CURLINFO_EFFECTIVE_URL: " . curl_getinfo($ch, CURLINFO_EFFECTIVE_URL) . "\n";
echo "CURLINFO_HTTP_CODE: " . curl_getinfo($ch, CURLINFO_HTTP_CODE) . "\n";
curl_close ($ch);
return $html_content;
}
答案 0 :(得分:0)
找出原因。我使用的源代码中有错误。这一行:
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
用于在每次执行脚本时重置cookie文件。 通过删除它,cookie保持不变并且一切正常。