我的函数应该获取$ url:
的目标网址function getUrl($url)
{
$user_agent='Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
$ch = curl_init();
$timeout = 10; // set to zero for no timeout
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt ($ch, CURLOPT_HEADER, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, true);
$curl = curl_exec($ch);
$header = curl_getinfo($ch);
curl_close($ch);
return $header;
}
function get_url_list() {
$url = "http://www.webliste.ch/click.aspx?nr=148252";
$result=getUrl($url);
print_r($result);echo "<br>";
}
get_url_list();
这导致以下结果:
Array
(
[url] => http://www.webliste.ch/click.aspx?nr=148252
[content_type] => text/html; charset=iso-8859-1
[http_code] => 200
[header_size] => 320
[request_size] => 139
...
[redirect_time] => 0
[certinfo] => Array
(
)
[redirect_url] =>
)
我很茫然,因为网址正在重定向,如果我回复$ ch,我会收到重定向的网站。
任何人都知道这是什么原因?
以下也不起作用:
$final_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
输出与$ result ['url']相同,这不是我要找的。 p>
答案 0 :(得分:1)
我已经分析了实际发生的情况,现在我发现重定向不是由该页面上的重定向标题引起的,而是通过JavaScript立即提交表单并将您重定向到起始页面。
可能很难确定网页的网址,但您可以做的是查找<form>
代码,然后在其action
属性中找到该网址。
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de">
<head id="Head1">
<title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta name="ROBOTS" content="NOINDEX, NOFOLLOW" />
</head>
<body>
<form id="form1" action="http://www.taxiherold.ch">
<div id="panGo" align="center">
<script type="text/javascript">
document.getElementById('form1').submit();
</script>
</div>
</form>
</body>
</html>
现在尝试使用此代码:
$ch = curl_init('http://www.webliste.ch/click.aspx?nr=148252');
curl_setopt ($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, false);
$data = curl_exec($ch);
$dom = new DOMDocument();
@$dom->loadHTML($data);
$xpath = new DOMXPath($dom);
$url = $xpath->query('//body/form');
$url = ($url->length == 1 ? $url->item(0)->getAttribute('action') : null);
var_dump($url);
将输出:
答案 1 :(得分:0)
我写的这门课可以帮助你
此类返回所有标题信息,如重定向,...
function HeaderProc($response,$Run="",$String=1/*[Is 1 IF Use for String Mode ]*/){
print_r($response);
if($String==1){
$response=explode("\r\n",$response);
}
$PartHeader=0;
$out[$PartHeader]=array();
while(list($key,$val)=each($response)){
$name='';
$value='';
$flag=false;
for($i=0;$i<strlen($val);$i++){
if($val[$i]==":"){
$flag=true;
for($j=$i+1;$j<strlen($val);$j++){
if($val[$i]=="\r" and $val[$i+1]=="\n"){
break;
}
$value.=$val[$j];
}
break;
}
$name.=$val[$i];
}
if($flag){
if($name=='' and $value==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$value;
}
}else{
$out[$PartHeader][$name]=$value;
}
}
}else{
if($name==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$name;
}
}else{
$out[$PartHeader][$name]=$name;
}
}
}
if($Run!=""){
$Run($name,$value);
}
}
return $out;
}
class cURL {
var $headers;
var $user_agent;
var $compression;
var $cookie_file;
var $proxy;
var $Cookie;
function CookieAnalysis($Cookie){//convert str cookie to array cookie
//echo $Cookie;
$this->Cookie=array();
preg_replace_callback("~(.*?)=(.*?);~si",function($m){$this->Cookie[trim($m[1])]=trim($m[2]);},' '.$Cookie.'; ');
return $this->Cookie;
}
function cURL($cookies=false,$cookie='cookies.txt',$compression='gzip',$proxy='') {
$this->headers[] = 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$this->headers[] = 'Accept-Charset:ISO-8859-1,utf-8;q=0.7,*;q=0.3';
$this->headers[] = 'Accept-Encoding:gzip,deflate,sdch';
$this->headers[] = 'Accept-Language:en-US,en;q=0.8';
$this->headers[] = 'Cache-Control:max-age=0';
$this->headers[] = 'Connection:keep-alive';
$this->user_agent = 'User-Agent:Mozilla/5.0 (SepidarSoft [Organic Search Engine Crawler] Linux Edition) AppleWebKit/536.5 (KHTML, like Gecko) SepidarBrowser/1.0.100.52 Safari/536.5';
$this->compression=$compression;
$this->proxy=$proxy;
$this->cookies=$cookies;
if ($this->cookies == TRUE) $this->cookie($cookie);
}
function cookie($cookie_file) {
if (file_exists($cookie_file)) {
$this->cookie_file=$cookie_file;
} else {
fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
$this->cookie_file=$cookie_file;
@fclose($this->cookie_file);
}
}
function GET($url) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process,CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function POST($url,$data) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process, CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_POSTFIELDS, $data);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($process, CURLOPT_POST, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function error($error) {
echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
die;
}
}
样品:
$cc = new cURL();
print_r( $cc->POST('http://www.domain.com'));
for old php
function HeaderProc($response,$Run="",$String=1/*[Is 1 IF Use for String Mode ]*/){
if($String==1){
$response=explode("\r\n",$response);
}
$PartHeader=0;
$out[$PartHeader]=array();
while(list($key,$val)=each($response)){
$name='';
$value='';
$flag=false;
for($i=0;$i<strlen($val);$i++){
if($val[$i]==":"){
$flag=true;
for($j=$i+1;$j<strlen($val);$j++){
if($val[$i]=="\r" and $val[$i+1]=="\n"){
break;
}
$value.=$val[$j];
}
break;
}
$name.=$val[$i];
}
if($flag){
if($name=='' and $value==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$value;
}
}else{
$out[$PartHeader][$name]=$value;
}
}
}else{
if($name==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$name;
}
}else{
$out[$PartHeader][$name]=$name;
}
}
}
if($Run!=""){
$Run($name,$value);
}
}
return $out;
}
class cURL {
var $headers;
var $user_agent;
var $compression;
var $cookie_file;
var $proxy;
var $Cookie;
function CookieAnalysis($Cookie){//convert str cookie to array cookie
//echo $Cookie;
$this->Cookie=array();
preg_match("~(.*?)=(.*?);~si",' '.$Cookie.'; ',$M);
$this->Cookie[trim($M[1])]=trim($M[2]);
return $this->Cookie;
}
function cURL($cookies=false,$cookie='cookies.txt',$compression='gzip',$proxy='') {
$this->headers[] = 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$this->headers[] = 'Accept-Charset:ISO-8859-1,utf-8;q=0.7,*;q=0.3';
$this->headers[] = 'Accept-Encoding:gzip,deflate,sdch';
$this->headers[] = 'Accept-Language:en-US,en;q=0.8';
$this->headers[] = 'Cache-Control:max-age=0';
$this->headers[] = 'Connection:keep-alive';
$this->user_agent = 'User-Agent:Mozilla/5.0 (SepidarSoft [Organic Search Engine Crawler] Linux Edition) AppleWebKit/536.5 (KHTML, like Gecko) SepidarBrowser/1.0.100.52 Safari/536.5';
$this->compression=$compression;
$this->proxy=$proxy;
$this->cookies=$cookies;
if ($this->cookies == TRUE) $this->cookie($cookie);
}
function cookie($cookie_file) {
if (file_exists($cookie_file)) {
$this->cookie_file=$cookie_file;
} else {
fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
$this->cookie_file=$cookie_file;
@fclose($this->cookie_file);
}
}
function GET($url) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process,CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
if(!is_array($HeaderP['Set-Cookie']))continue;
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function POST($url,$data) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process, CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_POSTFIELDS, $data);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($process, CURLOPT_POST, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
if(!is_array($HeaderP['Set-Cookie']))continue;
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function error($error) {
echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
die;
}
}
样品:
$cc = new cURL();
print_r( $cc->POST('http://www.domain.com'));
Yahoo网站示例标题输出
[Header] => Array
(
[0] => Array
(
[HTTP/1.1 302 Found] => HTTP/1.1 302 Found
[Date] => Sat, 02 Mar 2013 14:37:19 GMT
[P3P] => policyref="http://info.yahoo.com/w3c/p3p.xml", CP="CAO DSP COR CUR ADM DEV TAI PSA PSD IVAi IVDi CONi TELo OTPi OUR DELi SAMi OTRi UNRi PUBi IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC GOV"
[Cache-Control] => private
[X-Frame-Options] => SAMEORIGIN
[Set-Cookie] => fpc=d=a2polPzlISX4q5OZQBxq.CKduGwG2Wm1YrPD59ENCUl3uTzrs.8HlnpJROO8MWa6M.B8e1JuCsbW25qwqY5zEs.mA0_EVlAVPMhFCdfCxhZf6vWmmqpPm9bVzGYs8Y7IyTG7IFp9p0MN_FPQmzNM7I8XBu4iGCI8MbHWFvOMKmhN9MTkPC4KbNJ2izSK9xBXTedDnYw-&v=2; expires=Sun, 02-Mar-2014 14:37:19 GMT; path=/; domain=www.yahoo.com
[Location] => http://en-maktoob.yahoo.com/?p=us
[Vary] => Accept-Encoding
[Content-Type] => text/html; charset=utf-8
[Age] => 0
[Transfer-Encoding] => chunked
[Connection] => keep-alive
[Server] => YTS/1.20.13
)
[1] => Array
(
[HTTP/1.1 200 OK] => HTTP/1.1 200 OK
[Date] => Sat, 02 Mar 2013 14:37:20 GMT
[P3P] => policyref="http://info.yahoo.com/w3c/p3p.xml", CP="CAO DSP COR CUR ADM DEV TAI PSA PSD IVAi IVDi CONi TELo OTPi OUR DELi SAMi OTRi UNRi PUBi IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC GOV"
[Cache-Control] => private
[X-Frame-Options] => SAMEORIGIN
[Set-Cookie] => Array
(
[0] => Array
(
[IU] => deleted
)
[1] => Array
(
[PH] => deleted
)
[2] => Array
(
[MSC] => t=1362235040X
)
[3] => Array
(
[fpc] => d=_7tfRPjaISWhpxKrzORZ47ywABwHrUd0vF3WBQH9UYD6KMC7fyjTBdcMMh1FYiufGwiXnhHgDV9gK_VrwVf.q.n_MoJj3B4OMV5Lw42TXrYN_xGhwsnsyUPvQTy79LJ.twkY0IQ3culhr0osKxe0MvGIPSRcYDWH13TUS5YhrnIP731WRyEDZlPh2gPUXxNc1nRtr7Y-&v=2
)
[4] => Array
(
[fpms] => p_30345347=%7B%22loc%22%3A%7B%22id%22%3A1940330%2C%22city%22%3A%22Abu+Dhabi%22%2C%22state%22%3A%22%22%2C%22country%22%3A%22UAE%22%7D%7D
)
[5] => Array
(
[fpc] => d=SaiDIsbaISXJV8ztcJqpafzGA13Lsq0TPQ7HJOn_.yLYWvNZF75ELqLKTLekfVYxmFj0OxOH_thzdIa9UNQIiwYXt99qJ8HNsqpWubAPIFaO1o36VbPBUz9Qu0Rzgzh6Qh.rJQnhPnj1m3NMeFlpYZ7kpVAsjL88RMdcGP82RMUEENd9mWXC7SkuY_CIR76Ne3pEgotZlVDVMABYyxJbM4N4jqG5zkC23Gy8epD4JzxcUuTWDyUn.LZaIqX1Gn6Fcn_f6de3&v=2
)
[6] => Array
(
[fpps] => _page=%7B%22wsid%22%3A%2221445690%22%7D
)
[7] => Array
(
[fpt] => d=Zc7DH53Xe9za_cphyvUoTpFDnmIFF977Sv9yIyBJtqtpcN4aLM18CC3FKuMd6AMXylr7FJjRBtWkJYiIdmrER9MPUOFt22FcF8rNk8Lu_kQMbAEra9CnHEhP0N8DVz6iKlRji6wGv_.3pOxmx_7Td1bq2D4RtVTE93P1kVGFgxlSV7Vtdf8JUxoRTq3dMKZuNQD5vY76rjiXf64lrQ89ONTWEpCGE3MxGVHnegZ71MiuKmYPLxH.AdNFzgw_EoD5QFWyxBxC3GNq7CarXzwJ5D4Uoiw690kzihlRQ66UgGj6sAdIIB_haiXQ6pJ7Q_w86gen6FBolLLiIBrDaujASks1fNzrWOfSH7HDn3GfqcCycIXcJDw_Xb8eGBgJVZFK2yuM0BF68NOW.nkACke1I.ufHsJXrvZH51Pg4dh9hMIsqeI-&v=1
)
[8] => Array
(
[fpc_s] => d=jbVQS4TaISWRQmb4Qu6ANMqdtfYe_QawTKJ.rdl.9vdhjLe6UHD_z3Pvh2HhUHGn2i4oPThLzibGfAmid4zCCnYjxdTbby8pCY566kgiSjnvroDbRszWKfTL4j8Bew5x1VnLUqLfpKWUq2jwAOj1WdBhiSajBzp_hg.8q8O1M0XO.hd7YXRtm66BnbOtcTli3arG1nfT96JakB5i8cyNrUMl1m4czoVB7MqJDipKCfQ.19r98RG0dJELW.fFXfry0AApcU8cweMqTTIuks1LAeVRngCAX7eRfB0eknd5DOqTpZlrMTmW.JjNnbI-&v=2
)
)
[Vary] => Accept-Encoding
[Content-Type] => text/html;charset=utf-8
[Content-Encoding] => gzip
[Age] => 0
[Transfer-Encoding] => chunked
[Connection] => keep-alive
[Server] => YTS/1.20.13
)
)