我需要解析网站http://klassprof.org/。但有Cloudflare反DDOS保护。我怎么能解决这个问题? 我从Chrome浏览器复制了Cookie并将其设置为我的curl功能,但它没有帮助。我的功能在这里:
function get_web_page($url)
{
$uagent = "Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.14";
$ch = curl_init( $url );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
//curl_setopt($ch, CURLOPT_PROXY, "46.242.85.129:8081");
curl_setopt($ch, CURLOPT_USERAGENT, $uagent);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
curl_setopt($ch, CURLOPT_TIMEOUT, 120);
curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_COOKIE, "PHPSESSID=f9sofurv85om9qq91rpt3rgkh2;__cfduid=db1ddd834ca4ec4c1ebdff85f5b00f7e51492411567;cf_clearance=33fc7099c3c9c000dc60be8e546e09182bb9df68-1492411571-1800");
//curl_setopt($ch, CURLOPT_COOKIEFILE,"d://coo.txt");
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
/*foreach($header as $key=>$value){
echo $key.' -- > '.$value.'<br>';
}*/
curl_close( $ch );
$header['errno'] = $err;
$header['errmsg'] = $errmsg;
$header['content'] = $content;
return $header;
}
答案 0 :(得分:0)
这有效:
function get_web_page($url){
$uagent = "Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.14";
$ch = curl_init( $url );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
//curl_setopt($ch, CURLOPT_PROXY, "46.242.85.129:8081");
curl_setopt($ch, CURLOPT_USERAGENT, $uagent);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
curl_setopt($ch, CURLOPT_TIMEOUT, 120);
curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch,CURLOPT_ENCODING , "gzip");
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding:gzip, deflate, sdch',
'Accept-Language:en-US,en;q=0.8,vi;q=0.6,ar;q=0.4,zh-CN;q=0.2,zh;q=0.2,de;q=0.2',
'Cache-Control:max-age=0',
'Connection:keep-alive',
'Cookie: __cfduid=d8994a468f6c05474b2624a246b28e1a91492417194; PHPSESSID=pcfpgp75jhusb216a762p7ofe7; cf_clearance=4e7defa3af691c33e6c567fff872c8e58aa1583a-1492423220-1800',
'Host:klassprof.org',
'Upgrade-Insecure-Requests:1',
'User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
]);
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
/*foreach($header as $key=>$value){
echo $key.' -- > '.$value.'<br>';
}*/
curl_close( $ch );
$header['errno'] = $err;
$header['errmsg'] = $errmsg;
$header['content'] = $content;
return $header;
}
echo get_web_page('http://klassprof.org/')['content'];
输出:
https://gist.github.com/nguyenbathanh/6710dd78a6d870f8a41769f04f347e68