我的项目去年运作良好,但现在同一个项目运行不正常我没有改变任何代码,但现在它没有从谷歌中提取链接。谷歌是否已经更改了从中提取链接的任何设置.. 我的代码如下
<?php
if(isset($_POST['operation']))
{
$op = $_POST['operation'];
$start = time();
if($op == 'search_crawler')
{
if(isset($_POST['text']))
{
include_once('mysqllibrary.php');
mysqlStart();
$start = time();
$text = $_POST['text'];
//Get the words from the text
$words = explode(" ",$text);
$query = "SELECT t1.url url,t1.content content,t2.number_of_clicks number_of_clicks FROM tbllinks t1,tblclicks t2 WHERE t1.id=t2.lid ORDER by t2.number_of_clicks DESC";
$res = searchDatabase($query);
$found = 0;
if(sizeof($res) > 0)
{
for($count=0;$count<sizeof($res);$count++)
{
$content = $res[$count]['content'];
$content_count = 0;
$url = $res[$count]['url'];
for($count2=0;$count2<sizeof($words);$count2++)
{
if(strstr(strtolower($content),strtolower($words[$count2])) === false)
{
}
else
{
$content_count = $content_count + 1;
$found = 1;
}
}
if($content_count > 0)
{
echo "<a href='add_user_url.php?user_id=$user_id&url=$url'>$url</a><br/>";
}
}
}
$end = time();
if($found == 1)
{
echo "<hr/>Time needed for output from crawler:" . ($end-$start) . " ms<hr/>";
}
}
}
else if($op == 'search_crawler2')
{
if(isset($_POST['text']))
{
include_once('simple_html_dom.php');
$text = $text . " audio video images";
$text = str_replace(' ','+',$_POST['text']);
$user_id = $_POST['user_id'];
$file_name = 'test.txt';
$ch = curl_init("https://www.google.com/search?q=" . $text);
$fp = fopen($file_name, "w");
$start = time();
curl_setopt($ch, CURLOPT_FILE, $fp);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_exec($ch);
curl_close($ch);
fclose($fp);
$html = file_get_html($file_name);
// Find all links
foreach($html->find('a') as $element)
{
$url = $element->href;
$index = strpos($url,'q=https://');
if($index > 0)
{
$index2 = strpos($url,'webcache');
if($index2 === false)
{
$index2 = strpos($url,'sa=U');
$url = substr($element->href,$index+2,$index2-$index-3);
echo "<a href='add_user_url.php?user_id=$user_id&url=$url'>$url</a>";
echo '<br/>';
}
}
}
$end = time();
echo "<hr/>Time needed for search output:" . ($end-$start) . " ms<hr/>";
}
}
else if($op == 'search')
{
if(isset($_POST['text']))
{
include_once('simple_html_dom.php');
$text = $text . " audio video images";
$text = str_replace(' ','+',$_POST['text']);
$user_id = $_POST['user_id'];
$file_name = 'test.txt';
//$ch = curl_init("https://in.yahoo.com/search?q=" . $text);
$ch = curl_init("https://www.google.com/search?q=" . $text);
$fp = fopen($file_name, "w");
curl_setopt($ch, CURLOPT_FILE, $fp);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
sleep(4);
curl_exec($ch);
curl_close($ch);
fclose($fp);
$html = file_get_html($file_name);
// Find all links
foreach($html->find('a') as $element) //---this is line no 130
{
$url = $element->href;
$index = strpos($url,'q=https://');
if($index > 0)
{
$index2 = strpos($url,'webcache');
if($index2 === false)
{
$index2 = strpos($url,'sa=U');
$url = substr($element->href,$index+2,$index2-$index-3);
echo "<a href='$url'>$url</a>";
echo '<hr/>';
}
}
}
$end = time();
echo "<hr/>Time needed for search output:" . ($end-$start) . " ms<hr/>";
}
}
}
?>
在本地主机上运行代码时,显示错误 -
( ! ) Fatal error: Call to a member function find() on a non-object in
C:\wamp\www\crawler_based_search_engine\ajax_requests.php on line 130
# Time Memory Function Location
1 0.0151 162928 {main}( ) ..\ajax_requests.php:0
由于此错误,我的程序没有从谷歌
获取链接