我想索引几个网站,以便将其数据实现到我自己的框架中。有些网站使用像bit.ly这样的服务来创建较小的网址,这非常不方便。我想使用file_get_contents来检索实际的url作为它的内容。
我该怎么做?
答案 0 :(得分:0)
@Renaud的功能
function get_web_page( $url ) {
$res = array();
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // do not return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_USERAGENT => "spider", // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
);
$ch = curl_init( $url );
curl_setopt_array( $ch, $options );
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
curl_close( $ch );
$res['content'] = $content;
$res['url'] = $header['url'];
return $res;
}
print_r(get_web_page("http://google.com/"));