通过Bing News Feed解析元素

时间:2015-09-05 10:23:56

标签: php rss bing

$rss = new DOMDocument();
$rss->load('http://www.bing.com/news/search?q=obama&format=rss');
$feed = array();
foreach ($rss->getElementsByTagName('item') as $node) {
    $item = array ( 
        'title' => $node->getElementsByTagName('title')->item(0)->nodeValue,
        'desc' => $node->getElementsByTagName('description')->item(0)->nodeValue,
        'link' => $node->getElementsByTagName('link')->item(0)->nodeValue,
        'date' => $node->getElementsByTagName('pubDate')->item(0)->nodeValue,
        'image' => $node->getElementsByTagName('News:Image')->item(0)->nodeValue,
        );
    array_push($feed, $item);
}
$limit = 1;
for($x=0;$x<$limit;$x++) {
    $title = str_replace(' & ', ' &amp; ', $feed[$x]['title']);
    $link = $feed[$x]['link'];
    $description = $feed[$x]['desc'];
    $image = $feed[$x]['image'];
    $description = str_replace('<div style=', '<![CDATA[<div style=', $description);
    $image = str_replace('pid=News', 'pid=News.jpg', $image);
    $date = date('l F d, Y', strtotime($feed[$x]['date']));
    echo '<item>';
    echo '<title>'.$title.'</title>';
    echo '<link>'.$link.'</link>';
    echo '<description>'.$description.' <![CDATA[<img align="left" hspace="5" src="'.$image.'"/>]]></description>';
    echo '<pubDate>'.$returnValue = date('D, d M Y g:i:s O').'</pubDate>';
    echo '</item>';
    }

这是PHP代码。

我可以捕获相关Feed的TITLE和DESCRIPTION。我无法捕捉Bing Feed图片。

怎么做?

2 个答案:

答案 0 :(得分:0)

$rss = new DOMDocument();
$rss->load('http://www.bing.com/news/search?q=obama&format=rss');

$feed = array();
foreach ($rss->getElementsByTagName('item') as $itemNode) {
    $dataArray = array();
    foreach ($itemNode->childNodes as $childNode) {
        $dataArray[$childNode->nodeName] = $childNode->nodeValue;
    }
    $feed[] = $dataArray;
}

$limit = 1;
$count = 0;

foreach ($feed as $feedItem) {

    if ($count >= $limit) {
        break;
    }

    // Extract actual news URL from bing URL
    $link = $feedItem['link'];
    if (strpos($link, 'url=') >= 0) {
        $linkInfo = parse_url($link);
        parse_str($linkInfo['query'], $linkParams);
        $link = $linkParams['url'];
    }

    $title = str_replace(' & ', ' &amp; ', $feedItem['title']);
    $description = str_replace('<div style=', '<![CDATA[<div style=', $feedItem['description']);
    $link = $feedItem['link'];
    $pubDateObject = new DateTime($feedItem['pubDate']);
    $pubDate = $pubDateObject->format('D, d M Y g:i:s O');
    $image = str_replace('pid=News', 'pid=News.jpg', $feedItem['News:Image']);
    $descriptionWithImage = $description . '<![CDATA[<img align="left" hspace="5" src="' . $image . '"/>]]>';

    echo sprintf(
        '<item><title>%s</title><link>%s</link><description>%s</description><pubDate>%s</pubDate></item>',
        $title, $link, $descriptionWithImage, $pubDate
    ) . PHP_EOL;

    $count++;
}

答案 1 :(得分:0)

您可以使用Xpath尝试我的代码解决方案:

<?php
$rss = new DOMDocument();
$rss->load('http://www.bing.com/news/search?q=obama&format=rss');
$feed = array();
$xpath = new DOMXPath($rss);
$images = $xpath -> query("//News:Image");
$i = 0;
foreach ($rss->getElementsByTagName('item') as $node) {
    $item = array ( 
    'title' => $node->getElementsByTagName('title')->item(0)->nodeValue,
    'desc' => $node->getElementsByTagName('description')->item(0)->nodeValue,
    'link' => $node->getElementsByTagName('link')->item(0)->nodeValue,
    'date' => $node->getElementsByTagName('pubDate')->item(0)->nodeValue,
    'image' => $images[$i++]->nodeValue,
    );

array_push($feed, $item);
}

工作正常。