在google pagerank上工作,我插入一个关键字,脚本会返回带有此关键字的网址。还会返回pagerank和alexa流量
这是我的表格..
<form method = "post" action = "post.php">
<input class="enter_key_word" type = "text" name = "key" placeholder = "Enter key words..." />
<select class="drop-down">
<option value = "type"> Set page rank </option>
<option value="1">1</option>
<option value="2">2</option>
<option value="3">3</option>
<option value="4">4</option>
<option value="5">5</option>
<option value="6">6</option>
<option value="7">7</option>
<option value="8">8</option>
<option value="9">9</option>
<option value="10">10</option>
</select>
<select class="dropdown3">
<option value = "type"> How many google pages to check </option>
<option value="1">1</option>
<option value="2">2</option>
<option value="3">3</option>
<option value="4">4</option>
<option value="5">5</option>
<option value="6">6</option>
<option value="7">7</option>
<option value="8">8</option>
<option value="9">9</option>
<option value="10">10</option>
</select>
<input class="submit_button" type = "submit" value = "Search" name = "submit" />
</form>
这是表单提交的代码..
include("alexa.php");
$newGoogleRankChecker = new GoogleRankChecker();
$key = $_POST['key'];
$newquery = $key;
$useproxies = 'TRUE_OR_FALSE';
$arrayproxies = 'ARRAY_PROXIES';
$googledata = $newGoogleRankChecker->find($newquery, $useproxies, $arrayproxies);
这是脚本&#39; alexa.php&#39;显示给定上述表单中的关键字以及pagerank和alexa流量的网址。
if(!class_exists('GoogleRankChecker'))
{
class GoogleRankChecker
{
public $start;
public $end;
public function __construct($start=1, $end=2)
{
$this->start = $start;
$this->end = $end;
}
public function find($keyword, $useproxie, $proxies)
{
for($start = ($this->start-1)*10; $start <= $this->end*10; $start += 10)
{
$ua = array(
0 => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36',
10 => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0',
20 => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1'
);
//var_dump($options);exit;
//}
$options = array(
"http" => array(
"method" => "GET",
"header" => "Accept-language: en\r\n" .
"Cookie: biztech=indonesia\r\n" .
"User-Agent: ".$ua[$start])
);
$keyword = str_replace(" ", "+", trim($keyword));
$url = "https://www.google.com/search?ie=UTF-8&q=$keyword&start=$start&num=0";
$context = stream_context_create($options);
//var_dump($url);exit;
if($this->_isCurl())
{
$data = $this->_curl($url, $useproxie, @$arrayproxies);
}
else
{
$data = @file_get_contents($url, false, $context);
}
//echo htmlentities($data);exit;
if(is_array($data))
{
$errmsg = $data['errmsg'];
$results = array("rank" => "zerox", "url" => $errmsg);
}
else
{
if(strpos($data, "To continue, please type the characters below") !== FALSE || $data == FALSE || strpos($data, "We're sorry") !== FALSE)
{
$results = array("rank" => "zero", "url" => "");
}
else
{
$flag = false;
$j = -1;
$i = 1;
while( ($j = stripos($data,'<cite class="vurls">',$j+1)) !== false )
{
$k = stripos($data,"</cite>",$j);
$link = strip_tags(substr($data,$j,$k-$j));
$url = $link;
$ch = curl_init();
// 2. set the options, including the url
curl_setopt($ch, CURLOPT_URL, "https://tools.digitalpoint.com/pagerank?url=$url");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
// 3. execute and fetch the resulting HTML output
$output = curl_exec($ch);
// 4. free up the curl handle
curl_close($ch);
preg_match('/<dd style="font-weight:bold">(.*)<\/dd>/i', $output, $title);
$pagerank = $title[1];
// $pagerank = page rank
// get alexa rank
$ch = curl_init();
// 2. set the options, including the url
curl_setopt($ch, CURLOPT_URL, "http://data.alexa.com/data?cli=10&dat=snbamz&url=$url");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
// 3. execute and fetch the resulting HTML output
$output = curl_exec($ch);
// 4. free up the curl handle
curl_close($ch);
preg_match('/<REACH RANK="(.*)"/i', $output, $atid);
$alexarank = $atid[1];
//echo $alexarank;
echo "URL - <a href = 'http://$url'> $url </a>, PAGERANK - $pagerank, TRAFFIC(Unique Visitors) - $alexarank <br />";
}
if ($flag) {
break;
}
}
}
$sleep = rand(20,25);
sleep($sleep);
if($this->_isCurl() === false)
{
return $results;
}
}
if($this->_isCurl())
{
return $results;
}
}
private function _isCurl()
{
return function_exists("curl_version");
}
private function _curl($url, $useproxie, $arrayproxies)
{
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36");
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
curl_setopt($ch, CURLOPT_TIMEOUT, 120);
curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSLVERSION, 3);
if($useproxie)
{
if(!empty($arrayproxies))
{
foreach($arrayproxies as $param => $val)
{
curl_setopt($ch, $param, $val);
}
}
}
$content = curl_exec($ch);
$errno = curl_errno($ch);
$error = curl_error($ch);
curl_close($ch);
if($errno == 0)
{
return $content;
}
else
{
return array("errno" => $errno, "errmsg" => $error);
}
}
}
}
这是我使用像&#39; form&#39;这样的关键字时显示的输出示例。
URL - www.form.net.au/ , PAGERANK - 5, TRAFFIC(Unique Visitors) - 2253913
我有两个问题:
我如何在上面的下拉菜单中指定pagerank,以便结果只返回仅在下拉菜单中指定的pagerank的网址,例如如果我在下拉菜单中将pagerank设置为2,则只返回页面排名为2的网址
是否有预定义的方法,我可以指定从哪个谷歌页面返回结果?