我正在尝试使用cURL从本网站提取下载链接。
打开链接会给你一个下载按钮,然后按下它会调用一个ajax脚本,该脚本从以下URL返回一些J SON数据
此URL需要2个参数(fileId(int)& __RequestVerificationToken(str))作为POST参数,此标头也需要发送(X-Requested-With:XMLHttpRequest)
这是我用来登录的脚本&获取 __ RequestVerificationToken &&项目页面中的 fileId
$options = array(
'http'=>array(
'method'=>"GET",
'header'=>"Accept-language: en\r\n" .
"Cookie:".$cookie.";\r\n" . // check function.stream-context-create on php.net
"User-Agent: Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.102011-10-16 20:23:10\r\n"
)
);
$context = stream_context_create($options);
$html = file_get_html($url,false,$context);
$token = $html->find('input[name=__RequestVerificationToken]',0)->value;
$id = $html->find('a.fileCopyAction',0)->rel;
return array(
'id' => $id,
'token' => $token
);
接下来是我用来提取下载链接的代码(这不起作用)
$res = array();
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // do not return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_USERAGENT => "spider", // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
CURLOPT_POSTFIELDS => 'fileId='.$file_id.'&__RequestVerificationToken='.$token,
CURLOPT_HTTPHEADER => array(
'Accept:*/*',
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language: en-US,en;q=0.8',
'Cache-Control: max-age=0',
'Connection: keep-alive',
'Content-Type: application/x-www-form-urlencoded',
'Cookie: '.$cookie,
'Host: abelhas.pt',
'Origin: http://abelhas.pt',
'X-Requested-With: XMLHttpRequest',
),
);
$ch = curl_init("http://abelhas.pt/action/License/Download");
curl_setopt_array( $ch, $options );
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
curl_close( $ch );
$res['content'] = $content;
$res['url'] = $header['url'];
return $res;
如果您使用chrome / firefox检查它,则返回J SON数据,但不返回网站返回的内容。
所以问题仍然存在:如何做到这一点,以获得与使用浏览器访问自己相同的结果?
提前致谢。