在yii框架中获取外部站点的标题和元描述和图像

时间:2014-04-19 07:37:58

标签: php yii

我如何从外部网站网址获取元描述,标题和图像,我已经使用PHP实现了这一点,但我不知道我如何在yii控制器中使用它,我的代码是

function file_get_contents_curl($url)
{
$ch = curl_init();

curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);

$data = curl_exec($ch);
curl_close($ch);

return $data;
}

$html = file_get_contents_curl("http://www.example.com");

//parsing begins here:
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');

//get and display what you need:
$title = $nodes->item(0)->nodeValue;

$metas = $doc->getElementsByTagName('meta');

for ($i = 0; $i < $metas->length; $i++)
{
$meta = $metas->item($i);
if($meta->getAttribute('name') == 'description')
    $description = $meta->getAttribute('content');
if($meta->getAttribute('name') == 'keywords')
    $keywords = $meta->getAttribute('content');
if($meta->getAttribute('language') == 'language');
    $language = $meta->getAttribute('language');
}

echo "Title: $title". '<br/><br/>';
echo "Description: $description". '<br/><br/>';
echo "Keywords: $keywords";

我是yii的新手,任何帮助

1 个答案:

答案 0 :(得分:3)

我使用您的代码(带有一些小编辑)来创建以下文件。将其保存在 protected/components/HttpDetails.php (注意:错误处理未实现 - 如果是http失败或其他原因)

class HttpDetails {

  private static function file_get_contents_curl($url) {
    $ch = curl_init();

    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);

    $data = curl_exec($ch);
    curl_close($ch);

    return $data;
  }

  public static function getDetails($url) {
    $html = self::file_get_contents_curl($url);

    //parsing begins here:
    $doc = new DOMDocument();
    @$doc->loadHTML($html);
    $nodes = $doc->getElementsByTagName('title');

    //get and display what you need:
    $title = $nodes->item(0)->nodeValue;

    $metas = $doc->getElementsByTagName('meta');

    for ($i = 0; $i < $metas->length; $i++) {
      $meta = $metas->item($i);
      if ($meta->getAttribute('name') == 'description')
        $description = $meta->getAttribute('content');
      if ($meta->getAttribute('name') == 'keywords')
        $keywords = $meta->getAttribute('content');
    }

    return array(
            'title'=>isset($title)?$title:'Not set',
            'description'=> isset($description)?$description:'Not set',
            'keywords'=> isset($keywords)?$keywords:'Not set',
    );

  }

}

修改 import 中的protected\config\main.php数组,以包含'application.components.HttpDetails'

...
'import' => array(
    ...
    'application.components.HttpDetails',
),

要从页面中读取详细信息,请在任何控制器(或应用程序的其他位置)中执行以下操作

$url = "www.cnn.com";
$details = HttpDetails::getDetails($url);

$title = $details['title'];
$description = $details['description'];
$keywords = $details['keywords'];

上面的确切代码已经过测试并且运行正常。如果您遇到错误,您应该检查您的PHP环境中是否有托管Yii的DOM / libxml扩展。