我有一个要求,我必须在谷歌上反向查找图像并提取打印在“此图像的最佳猜测:”标题上的名称。 不,我对网上现有的卷曲代码做了一些修改,并且到目前为止:
<?php
function fetch_google($terms="sample search",$numpages=1,$user_agent='Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0')
{
$searched="";
for($i=0;$i<=$numpages;$i++)
{
$ch = curl_init();
$url="http://www.google.com/searchbyimage?hl=en&image_url=".urlencode($terms);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt ($ch, CURLOPT_HEADER, 0);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_REFERER, 'http://www.google.com/');
curl_setopt ($ch,CURLOPT_CONNECTTIMEOUT,120);
curl_setopt ($ch,CURLOPT_TIMEOUT,120);
curl_setopt ($ch,CURLOPT_MAXREDIRS,10);
curl_setopt ($ch,CURLOPT_COOKIEFILE,"cookie.txt");
curl_setopt ($ch,CURLOPT_COOKIEJAR,"cookie.txt");
$searched=$searched.curl_exec ($ch);
curl_close ($ch);
}
$xml = new DOMDocument();
@$xml->loadHTML($searched);
foreach($xml->getElementsByTagName('div') as $div)
{
if(strpos($div->nodeValue,"Best guess for this image:"))
return $div->nodeValue;
}
}
$content = fetch_google("http://media.il.edmunds-media.com/aston-martin/as/03/de/aston-martin_front_03-de-as_1_276.jpg",1);
echo $content."<br>";
?>
但它给了我很多文字,我无法得到它的确切div。 因为'a'没有类属性,所以我必须这样做。
请帮忙!
答案 0 :(得分:3)
您可以改用preg_match。
当您从CURL获取HTML时,您可以使用Regex来匹配文本:
function fetch_google($terms="sample search",$numpages=1,$user_agent='Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0')
{
$searched="";
for($i=0;$i<=$numpages;$i++)
{
$ch = curl_init();
$url="http://www.google.com/searchbyimage?hl=en&image_url=".urlencode($terms);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt ($ch, CURLOPT_HEADER, 0);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_REFERER, 'http://www.google.com/');
curl_setopt ($ch,CURLOPT_CONNECTTIMEOUT,120);
curl_setopt ($ch,CURLOPT_TIMEOUT,120);
curl_setopt ($ch,CURLOPT_MAXREDIRS,10);
curl_setopt ($ch,CURLOPT_COOKIEFILE,"cookie.txt");
curl_setopt ($ch,CURLOPT_COOKIEJAR,"cookie.txt");
$searched=$searched.curl_exec ($ch);
curl_close ($ch);
}
$matches = array();
preg_match('/Best guess for this image:[^<]+<a[^>]+>([^<]+)/', $searched, $matches);
return (count($matches) > 1 ? $matches[1] : false);
}
答案 1 :(得分:2)
如果要使用DOMDocument,可以通过以下修改获取值。
<?php
function fetch_google($terms="sample search",$numpages=1,$user_agent='Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0')
{
$searched="";
for($i=0;$i<=$numpages;$i++)
{
$ch = curl_init();
$url="http://www.google.com/searchbyimage?hl=en&image_url=".urlencode($terms);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt ($ch, CURLOPT_HEADER, 0);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_REFERER, 'http://www.google.com/');
curl_setopt ($ch,CURLOPT_CONNECTTIMEOUT,120);
curl_setopt ($ch,CURLOPT_TIMEOUT,120);
curl_setopt ($ch,CURLOPT_MAXREDIRS,10);
curl_setopt ($ch,CURLOPT_COOKIEFILE,"cookie.txt");
curl_setopt ($ch,CURLOPT_COOKIEJAR,"cookie.txt");
$searched=$searched.curl_exec ($ch);
curl_close ($ch);
}
$xml = new DOMDocument();
@$xml->loadHTML($searched);
if(true == ($topblock = $xml->getElementByID('topstuff')))
{
foreach($topblock->getElementsByTagName('div') as $div){
if(strstr(strtolower($div->nodeValue), "guess")){
foreach($div->getElementsByTagName('a') as $a){
$last = $a->nodeValue;
}
}
}
}
return $last;
}
$content = fetch_google($_GET['img'],1);
echo $content."<br>";
?>