我如何从外部网站网址获取元描述,标题和图像,我已经使用PHP实现了这一点,但我不知道我如何在yii控制器中使用它,我的代码是
function file_get_contents_curl($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$html = file_get_contents_curl("http://www.example.com");
//parsing begins here:
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
//get and display what you need:
$title = $nodes->item(0)->nodeValue;
$metas = $doc->getElementsByTagName('meta');
for ($i = 0; $i < $metas->length; $i++)
{
$meta = $metas->item($i);
if($meta->getAttribute('name') == 'description')
$description = $meta->getAttribute('content');
if($meta->getAttribute('name') == 'keywords')
$keywords = $meta->getAttribute('content');
if($meta->getAttribute('language') == 'language');
$language = $meta->getAttribute('language');
}
echo "Title: $title". '<br/><br/>';
echo "Description: $description". '<br/><br/>';
echo "Keywords: $keywords";
我是yii的新手,任何帮助
答案 0 :(得分:3)
我使用您的代码(带有一些小编辑)来创建以下文件。将其保存在 protected/components/HttpDetails.php
(注意:错误处理未实现 - 如果是http失败或其他原因)
class HttpDetails {
private static function file_get_contents_curl($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
public static function getDetails($url) {
$html = self::file_get_contents_curl($url);
//parsing begins here:
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
//get and display what you need:
$title = $nodes->item(0)->nodeValue;
$metas = $doc->getElementsByTagName('meta');
for ($i = 0; $i < $metas->length; $i++) {
$meta = $metas->item($i);
if ($meta->getAttribute('name') == 'description')
$description = $meta->getAttribute('content');
if ($meta->getAttribute('name') == 'keywords')
$keywords = $meta->getAttribute('content');
}
return array(
'title'=>isset($title)?$title:'Not set',
'description'=> isset($description)?$description:'Not set',
'keywords'=> isset($keywords)?$keywords:'Not set',
);
}
}
修改 import
中的protected\config\main.php
数组,以包含'application.components.HttpDetails'
...
'import' => array(
...
'application.components.HttpDetails',
),
要从页面中读取详细信息,请在任何控制器(或应用程序的其他位置)中执行以下操作
$url = "www.cnn.com";
$details = HttpDetails::getDetails($url);
$title = $details['title'];
$description = $details['description'];
$keywords = $details['keywords'];
上面的确切代码已经过测试并且运行正常。如果您遇到错误,您应该检查您的PHP环境中是否有托管Yii的DOM / libxml扩展。