我正在寻找一个URL并跟踪它经历的每个URL。出于某种原因,如果不执行不理想的递归cURL调用,我无法完成此操作。也许我错过了一些简单的选择。想法?
$url = "some url with redirects";
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1) Gecko/20061024 BonEcho/2.0");
$html = curl_exec($ch);
$info = array();
if(!curl_errno($ch))
{
$info = curl_getinfo($ch);
echo "<pre>";
print_r($info);
echo "</pre>";
}
我收到了这样的回复
Array
(
[url] => THE LAST URL THAT WAS HIT
[content_type] => text/html; charset=utf-8
[http_code] => 200
[header_size] => 1942
[request_size] => 1047
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 2 <---- I WANT THESE
[total_time] => 0.799589
[namelookup_time] => 0.000741
[connect_time] => 0.104206
[pretransfer_time] => 0.104306
[size_upload] => 0
[size_download] => 49460
[speed_download] => 61856
[speed_upload] => 0
[download_content_length] => 49460
[upload_content_length] => 0
[starttransfer_time] => 0.280781
[redirect_time] => 0.400723
)
答案 0 :(得分:10)
你有
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
这意味着cURL将遵循重定向并仅返回没有位置标题的最终页面。
手动关注位置:
function getWebPage($url, $redirectcallback = null){
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1) Gecko/20061024 BonEcho/2.0");
$html = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_code == 301 || $http_code == 302) {
list($httpheader) = explode("\r\n\r\n", $html, 2);
$matches = array();
preg_match('/(Location:|URI:)(.*?)\n/', $httpheader, $matches);
$nurl = trim(array_pop($matches));
$url_parsed = parse_url($nurl);
if (isset($url_parsed)) {
if($redirectcallback){ // callback
$redirectcallback($nurl, $url);
}
$html = getWebPage($nurl, $redirectcallback);
}
}
return $html;
}
function trackAllLocations($newUrl, $currentUrl){
echo $currentUrl.' ---> '.$newUrl."\r\n";
}
getWebPage('some url with redirects', 'trackAllLocations');
答案 1 :(得分:3)
使用libcurl,您可以使用CURLINFO_REDIRECT_URL getinfo变量查找将重定向到的URL(如果已启用)。这允许程序轻松遍历重定向本身。
这种方法比解析其他人在此处建议的Location:
标题更好,更容易,因为那时你的代码必须重建相对路径等。CURLINFO_REDIRECT_URL
自动为你修复。
PHP / CURL绑定在PHP 5.3.7中添加了support for this feature:
$url = curl_getinfo($ch, CURLINFO_REDIRECT_URL)
修复此问题的提交:
https://github.com/php/php-src/commit/689268a0ba4259c8f199cae6343b3d17cab9b6a5
答案 2 :(得分:2)
我可以提出建议......
preg_match('/(Location:|URI:)(.*?)\n/', $httpheader, $matches);
将正则表达式更改为/(Location:|URI:)(.*?)\n/i,因此它不区分大小写。我注意到有一些网站/地方正在使用位置:其中L是小写的。
只是想帮助那些想知道为什么有时它不起作用的人......看看那个。