DOMDocument :: getElementsByTagName没有抓取PHP中的通道

时间:2012-02-23 21:18:36

标签: php xml xml-parsing

使用DomDocument获取RSS提要但遇到了一个奇怪的问题。如果我抓住像http://rss.slashdot.org/Slashdot/slashdot这样的RSS提要,它可以正常工作。但是,我试图解析的RSS提要给了我一些问题:http://www.ryanhache.com/feed

似乎无法找到频道标签,然后循环播放。我继承的函数如下,并使用RSS_Retrieve($ url)调用。我在这些功能中缺少什么,或者我提取的Feed有什么问题?

function RSS_Tags($item, $type)
{
    $y = array();
    $tnl = $item->getElementsByTagName("title");
    $tnl = $tnl->item(0);
    $title = $tnl->firstChild->textContent;

    $tnl = $item->getElementsByTagName("link");
    $tnl = $tnl->item(0);
    $link = $tnl->firstChild->textContent;

    $tnl = $item->getElementsByTagName("pubDate");
    $tnl = $tnl->item(0);
    $date = $tnl->firstChild->textContent;

    $tnl = $item->getElementsByTagName("description");
    $tnl = $tnl->item(0);
    $description = $tnl->firstChild->textContent;

    $y["title"] = $title;
    $y["link"] = $link;
    $y["date"] = $date;
    $y["description"] = $description;
    $y["type"] = $type;

    return $y;
}

function RSS_Channel($channel)
{
    global $RSS_Content;

    $items = $channel->getElementsByTagName("item");

   // Processing channel

   $y = RSS_Tags($channel, 0);      // get description of channel, type 0
   array_push($RSS_Content, $y);

   // Processing articles

   foreach($items as $item)
   {
       $y = RSS_Tags($item, 1); // get description of article, type 1
       array_push($RSS_Content, $y);
   }
}

function RSS_Retrieve($url)
{
   global $RSS_Content;

   $doc  = new DOMDocument();
   $doc->load($url);

   $channels = $doc->getElementsByTagName("channel");

   $RSS_Content = array();

   foreach($channels as $channel)
   {
       RSS_Channel($channel);
   }

}

1 个答案:

答案 0 :(得分:1)

您的代码似乎有效,但可以更简单地完成任务,而无需使用全局变量。

function RSS_Tags($node, $map, $type) {
    $item = array();
    foreach ($map as $elem=>$key) {
        $item[$key] = (string) $node->{$elem};
    }
    $item['type'] = $type;
    return $item;
}

function RSS_Retrieve($url) {
    $rss = simplexml_load_file($url);
    $elements = array('title'=>'title', 'link'=>'link',
        'pubDate'=>'date', 'description'=>'description');
    $feed = array();
    foreach ($rss->channel as $channel) {
        $feed[] = RSS_Tags($channel, $elements, 0);
        foreach ($channel->item as $item) {
            $feed[] = RSS_Tags($item, $elements, 1);
        }
    }
    return $feed;
}

$url = 'http://www.ryanhache.com/feed';
$RSS_Content = RSS_Retrieve($url);