我需要在google上检查关键字位置以获取域列表。
示例:
Domain1.com
Domain2.com
这是我的代码:
<?php
///////// GOOGLE KEYWORD RANK - start
// this function get invokes when user click particular link once after create account (for resume creation)
function getmethod_setCurloption($ch,$ckfile,$google_url)
{
//$useragent="Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1";
//curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
curl_setopt($ch, CURLOPT_COOKIEFILE, $ckfile); //The name of the file containing the cookie data. The cookie file can be in Netscape format, or just plain HTTP-style headers dumped into a file.
curl_setopt($ch, CURLOPT_URL,$google_url); // The URL to fetch. This can also be set when initializing a session with curl_init().
curl_setopt ($ch, CURLOPT_COOKIEJAR, $ckfile); // The name of a file to save all internal cookies to when the connection closes.
curl_setopt($ch, CURLOPT_HEADER,0); // TRUE to include the header in the output.
curl_setopt($ch,CURLOPT_AUTOREFERER,1); // TRUE to automatically set the Referer: field in requests where it follows a Location: redirect.
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); // TRUE to return the transfer as a string of the return value of curl_exec() instead of outputting it out directly.
curl_setopt($ch, CURLOPT_POST,0); //TRUE to do a regular HTTP POST. This POST is the normal application/x-www-form-urlencoded kind, most commonly used by HTML forms.
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
//curl_setopt($ch, CURLOPT_REFERER,"http://qualitypointtech.net");
curl_setopt($ch,CURLOPT_HTTPHEADER,array ("Accept: text/plain"));
return $data = curl_exec($ch);
}
function removew($urlstring) // function for removed www
{
if(strstr($urlstring,"www"))
{
$urlstring_arr=explode("www.",$urlstring);
return $removedw=$urlstring_arr[0].$urlstring_arr[1];
}
else
return $urlstring;
}
function getTodayDate()
{
$todayDate=date("d-m-y");
$todayDate=str_replace("-","",$todayDate);
return $todayDate;
}
function extract_string($string, $start, $end)// extract portion of string between two delimiters
{
$pos = stripos($string, $start);
$str = substr($string, $pos);
$str_two = substr($str, strlen($start));
$second_pos = stripos($str_two, $end);
$str_three = substr($str_two, 0, $second_pos);
$portion_of_string = trim($str_three); // remove whitespaces
return $portion_of_string;
}
function googlerank($keyword,$filestring,$position,$searchname)//function for google rank
{
if(!$ch = curl_init()) //Check for error while initializing cURL session
{
echo "Could not initialize cURL session.\n";
die;
}
$pagination=1;
$no_of_pagelink=0; // start page
$totalcount=1;
$ckfile = tempnam("/tmp", "CURLCOOKIE"); // generate the cookie file
$curl_googleUrl="http://www.".$searchname."/search?hl=en&q=".urlencode($keyword);//."+site=".urlencode($filestring);
$data =getmethod_setCurloption($ch,$ckfile,$curl_googleUrl); // using cURL method get all data from selected url
while(true) // loop execute until reaching last page
{
// echo "<b>Extract link from ".$pagination." page</b>"; echo "<br>"; display the number of pages
/*------- scrap all links from webpage --------------------------*/
// parse the data into a DOMDocument
$dom = new DOMDocument();
@$dom->loadHTML($data);
// grab all the on the page
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//li[@class='g']//h3[@class='r']//a");
$linkcount=$hrefs->length;
for ($i = 0; $i < $linkcount; $i++)
{
$href = $hrefs->item($i);
$url = $href->getAttribute('href');
$filestring=removew($filestring);
$url=removew($url);
if (!strstr( $url,$filestring))
{
//echo "The string '$filestring' was not found in the url '$url'";
}
else
{
//echo $filestring.$url.$totalcount.$pagination;
$searchDetails=array($filestring,$url,$totalcount,$pagination);
return $searchDetails;
exit;
}
if($totalcount==$position)
{
return ;
exit;
}
$totalcount++;
}
if (!strstr($data,"Next")) //if (intval($pageno)>7)
{
break;
}
sleep(rand(2,5));
$no_of_pagelink=$no_of_pagelink+10;
$pagination++;
flush();
$curl_googleUrl="http://www.".$searchname."/search?hl=ro&q=".urlencode($keyword)."&start=".$no_of_pagelink."&sa=N";
$data =getmethod_setCurloption($ch,$ckfile,$curl_googleUrl); // using cURL method get all data from selected url
} //end of while loop
}
function getPosition($dom,$key){
$keyword=$key;
$filestring=$dom;
$position='100';
$searchname= 'google.ro';
$siteDetails=googlerank($keyword,$filestring,$position,$searchname);
$userenterdsite=$siteDetails[0];
$url=$siteDetails[1];
$totalcount=$siteDetails[2];
$pagination=$siteDetails[3];
$start_string="/url?q=";
$end_string="&sa=";
$url=extract_string($url,$start_string,$end_string);
if(count($siteDetails)>0)
{
return $totalcount;
}
else
{
return 0;
}
}
我用以下函数调用我的函数:$ position = getPosition(DOMAIN_NAME,KEYWORD); 它工作但有时它几乎立即返回0我有一些网站有10个关键字我的脚本返回正确的位置只有前4-5然后在所有关键字0。
我该如何解决?或者我可以在哪里找到另一个脚本?
答案 0 :(得分:1)
我怀疑这是因为谷歌认识到你的搜索速度太快而且不是人类。如果您从Google页面打印出转储,我认为您已经看到了。
尝试使用sleep(_SOME_NUMBER_);其中_SOME_NUMBER_是您希望脚本在每次搜索之间暂停的秒数。我会从睡眠开始(60);
根据我自己的经验和记忆,这是未经测试的。
最佳, 添
P.S。如果能解决您的问题,请告诉我。然后在下一个人工作后重新发布更正后的代码。