在网站上获取RSS源

时间:2010-01-15 21:08:21

标签: php rss

我有这个代码是我去年写的,它会在一段时间内返回RSS feed:

<html>
<body>

<form name="form" action="search.php" method="get">
  <input type="text" name="q" />
  <input type="submit" name="Submit" value="Search" />
</form>

<?php


require_once "RSS.php";


// want to parse the $row[1] variable to get out words, make new string with a + instead of spaces


$college = "college";

$collegelen= strlen($college);

for($i = 0; $i < $collegelen; $i++){
if ($college{$i} == " ")
{$spaced = $spaced . "+";}
else
{$spaced = $spaced . $college{$i};}
}

echo("Yahoo! News RSS");

$rss_yahoo =& new XML_RSS("http://news.search.yahoo.com/news/rss?p=" . $spaced . "&ei=UTF-8&fl=0&x=wrt");
$rss_yahoo->parse();

foreach ($rss_yahoo->getItems() as $item) {
  echo "<li><a href=\"" . $item['link'] . "\">" . $item['title'] . "</a></li><br>";
//echo $item['pubDate'] ."<br>";
  echo $item['description'] . "<br><br>\n"; }

//echo("Google RSS");

//$rss_google =& new XML_RSS("http://news.google.com/news?hl=en&ned=us&q=" . $spaced . "&ie=UTF-8&nolr=1&output=rss");
//$rss_google->parse();

//foreach ($rss_google->getItems() as $item) {
//  echo "<li><a href=\"" . $item['link'] . "\">" . $item['title'] . "</a></li><br>";
//  echo $item['description'] . "<br><br>\n"; }

echo("Google RSS");

$rss_google =& new XML_RSS("http://news.google.com/news?hl=en&ned=us&q=" . $spaced . "&ie=UTF-8&nolr=1&output=rss");
$rss_google->parse();

foreach ($rss_google->getItems() as $item) {
  echo "<li><a href=\"" . $item['link'] . "\">" . $item['title'] . "</a></li><br>";
- Hide quoted text -
 "<br><br>\n"; }

?>

</body>
</html>

RSS.php

<?php
// vim: set expandtab tabstop=4 shiftwidth=4 fdm=marker:
// +----------------------------------------------------------------------+
// | PHP Version 4                                                        |
// +----------------------------------------------------------------------+
// | Copyright (c) 1997-2003 The PHP Group                                |
// +----------------------------------------------------------------------+
// | This source file is subject to version 2.02 of the PHP license,      |
// | that is bundled with this package in the file LICENSE, and is        |
// | available at through the world-wide-web at                           |
// | http://www.php.net/license/2_02.txt.                                 |
// | If you did not receive a copy of the PHP license and are unable to   |
// | obtain it through the world-wide-web, please send a note to          |
// | license@php.net so we can mail you a copy immediately.               |
// +----------------------------------------------------------------------+
// | Authors: Martin Jansen <mj@php.net>                                  |
// |                                                                      |
// +----------------------------------------------------------------------+
//
// $Id: RSS.php,v 1.28 2006/09/14 08:40:05 clay Exp $
//

require_once 'Parser.php';

/**
* RSS parser class.
*
* This class is a parser for Resource Description Framework (RDF) Site
* Summary (RSS) documents. For more information on RSS see the
* website of the RSS working group (http://www.purl.org/rss/).
*
* @author Martin Jansen <mj@php.net>
* @version $Revision: 1.28 $
* @access  public
*/
class XML_RSS extends XML_Parser
{
    // {{{ properties

    /**
     * @var string
     */
    var $insideTag = '';

    /**
     * @var array
     */
    var $insideTagStack = array();

    /**
     * @var string
     */
    var $activeTag = '';

    /**
     * @var array
     */
    var $channel = array();

    /**
     * @var array
     */
    var $items = array();

    /**
     * @var array
     */
    var $item = array();

    /**
     * @var array
     */
    var $image = array();

    /**
     * @var array
     */
    var $textinput = array();

    /**
     * @var array
     */
    var $textinputs = array();

    /**
     * @var array
     */
    var $attribs;

    /**
     * @var array
     */
    var $parentTags = array('CHANNEL', 'ITEM', 'IMAGE', 'TEXTINPUT');

    /**
     * @var array
     */
    var $channelTags = array('TITLE', 'LINK', 'DESCRIPTION', 'IMAGE',
                              'ITEMS', 'TEXTINPUT', 'LANGUAGE', 'COPYRIGHT',
                              'MANAGINGEditor', 'WEBMASTER', 'PUBDATE', 'LASTBUILDDATE',
                              'CATEGORY', 'GENERATOR', 'DOCS', 'CLOUD', 'TTL',
                              'RATING');

    /**
     * @var array
     */
    var $itemTags = array('TITLE', 'LINK', 'DESCRIPTION', 'PUBDATE', 'AUTHOR', 'CATEGORY',
                          'COMMENTS', 'ENCLOSURE', 'GUID', 'PUBDATE', 'SOURCE',
                          'CONTENT:ENCODED');

    /**
     * @var array
     */
    var $imageTags = array('TITLE', 'URL', 'LINK', 'WIDTH', 'HEIGHT');


    var $textinputTags = array('TITLE', 'DESCRIPTION', 'NAME', 'LINK');

    /**
     * List of allowed module tags
     *
     * Currently supported:
     *
     *   Dublin Core Metadata
     *   blogChannel RSS module
     *   CreativeCommons
     *   Content
     *   Syndication
     *   Trackback
     *   GeoCoding
     *   Media
     *   iTunes
     *
     * @var array
     */
    var $moduleTags = array('DC:TITLE', 'DC:CREATOR', 'DC:SUBJECT', 'DC:DESCRIPTION',
                            'DC:PUBLISHER', 'DC:CONTRIBUTOR', 'DC:DATE', 'DC:TYPE',
                            'DC:FORMAT', 'DC:IDENTIFIER', 'DC:SOURCE', 'DC:LANGUAGE',
                            'DC:RELATION', 'DC:COVERAGE', 'DC:RIGHTS',
                            'BLOGCHANNEL:BLOGROLL', 'BLOGCHANNEL:MYSUBSCRIPTIONS',
                            'BLOGCHANNEL:BLINK', 'BLOGCHANNEL:CHANGES',
                            'CREATIVECOMMONS:LICENSE', 'CC:LICENSE', 'CONTENT:ENCODED',
                            'SY:UPDATEPERIOD', 'SY:UPDATEFREQUENCY', 'SY:UPDATEBASE',
                            'TRACKBACK:PING', 'GEO:LAT', 'GEO:LONG',
                            'MEDIA:GROUP', 'MEDIA:CONTENT', 'MEDIA:ADULT',
                            'MEDIA:RATING', 'MEDIA:TITLE', 'MEDIA:DESCRIPTION',
                            'MEDIA:KEYWORDS', 'MEDIA:THUMBNAIL', 'MEDIA:CATEGORY',
                            'MEDIA:HASH', 'MEDIA:PLAYER', 'MEDIA:CREDIT',
                            'MEDIA:COPYRIGHT', 'MEDIA:TEXT', 'MEDIA:RESTRICTION',
                            'ITUNES:AUTHOR', 'ITUNES:BLOCK', 'ITUNES:CATEGORY',
                            'ITUNES:DURATION', 'ITUNES:EXPLICIT', 'ITUNES:IMAGE',
                            'ITUNES:KEYWORDS', 'ITUNES:NEW-FEED-URL', 'ITUNES:OWNER',
                            'ITUNES:PUBDATE', 'ITUNES:SUBTITLE', 'ITUNES:SUMMARY'
                            );

    /**
     * @var array
     */
    var $last = array();

    // }}}
    // {{{ Constructor

    /**
     * Constructor
     *
     * @access public
     * @param mixed File pointer, name of the RSS file, or an RSS string.
     * @param string  Source charset encoding, use null (default) to use
     *                default encoding (ISO-8859-1)
     * @param string  Target charset encoding, use null (default) to use
     *                default encoding (ISO-8859-1)
     * @return void
     */
    function XML_RSS($handle = '', $srcenc = null, $tgtenc = null)
    {
        if ($srcenc === null && $tgtenc === null) {
            $this->XML_Parser();
        } else {
            $this->XML_Parser($srcenc, 'event', $tgtenc);
        }

        $this->setInput($handle);

        if ($handle == '') {
            $this->raiseError('No input passed.');
        }
    }

    // }}}
    // {{{ startHandler()

    /**
     * Start element handler for XML parser
     *
     * @access private
     * @param  object XML parser object
     * @param  string XML element
     * @param  array  Attributes of XML tag
     * @return void
     */
    function startHandler($parser, $element, $attribs)
    {
        if (substr($element, 0, 4) == "RSS:") {
            $element = substr($element, 4);
        }

        switch ($element) {
            case 'CHANNEL':
            case 'ITEM':
            case 'IMAGE':
            case 'TEXTINPUT':
                $this->insideTag = $element;
                array_push($this->insideTagStack, $element);
                break;

            case 'ENCLOSURE' :
                $this->attribs = $attribs;
                break;

            default:
                $this->activeTag = $element;
        }
    }

    // }}}
    // {{{ endHandler()

    /**
     * End element handler for XML parser
     *
     * If the end of <item>, <channel>, <image> or <textinput>
     * is reached, this method updates the structure array
     * $this->struct[] and adds the field "type" to this array,
     * that defines the type of the current field.
     *
     * @access private
     * @param  object XML parser object
     * @param  string
     * @return void
     */
    function endHandler($parser, $element)
    {
        if (substr($element, 0, 4) == "RSS:") {
            $element = substr($element, 4);
        }

        if ($element == $this->insideTag) {
            array_pop($this->insideTagStack);
            $this->insideTag = end($this->insideTagStack);

            $this->struct[] = array_merge(array('type' => strtolower($element)),
                                          $this->last);
        }

        if ($element == 'ITEM') {
            $this->items[] = $this->item;
            $this->item = '';
        }

        if ($element == 'IMAGE') {
            $this->images[] = $this->image;
            $this->image = '';
        }

        if ($element == 'TEXTINPUT') {
            $this->textinputs = $this->textinput;
            $this->textinput = '';
        }

        if ($element == 'ENCLOSURE') {
            if (!isset($this->item['enclosures'])) {
                $this->item['enclosures'] = array();
            }

            $this->item['enclosures'][] = array_change_key_case($this->attribs, CASE_LOWER);
            $this->attribs = array();
        }

        $this->activeTag = '';
    }

    // }}}
    // {{{ cdataHandler()

    /**
     * Handler for character data
     *
     * @access private
     * @param  object XML parser object
     * @param  string CDATA
     * @return void
     */
    function cdataHandler($parser, $cdata)
    {
        if (in_array($this->insideTag, $this->parentTags)) {
            $tagName = strtolower($this->insideTag);
            $var = $this->{$tagName . 'Tags'};

            if (in_array($this->activeTag, $var) ||
                in_array($this->activeTag, $this->moduleTags)) {
                $this->_add($tagName, strtolower($this->activeTag),
                            $cdata);
            }

        }
    }

    // }}}
    // {{{ defaultHandler()

    /**
     * Default handler for XML parser
     *
     * @access private
     * @param  object XML parser object
     * @param  string CDATA
     * @return void
     */
    function defaultHandler($parser, $cdata)
    {
        return;
    }

    // }}}
    // {{{ _add()

    /**
     * Add element to internal result sets
     *
     * @access private
     * @param  string Name of the result set
     * @param  string Fieldname
     * @param  string Value
     * @return void
     * @see    cdataHandler
     */
    function _add($type, $field, $value)
    {
        if (empty($this->{$type}) || empty($this->{$type}[$field])) {
            $this->{$type}[$field] = $value;
        } else {
            $this->{$type}[$field] .= $value;
        }

        $this->last = $this->{$type};
    }

    // }}}
    // {{{ getStructure()

    /**
     * Get complete structure of RSS file
     *
     * @access public
     * @return array
     */
    function getStructure()
    {
        return (array)$this->struct;
    }

    // }}}
    // {{{ getchannelInfo()

    /**
     * Get general information about current channel
     *
     * This method returns an array containing the information
     * that has been extracted from the <channel>-tag while parsing
     * the RSS file.
     *
     * @access public
     * @return array
     */
    function getChannelInfo()
    {
        return (array)$this->channel;
    }

    // }}}
    // {{{ getItems()

    /**
     * Get items from RSS file
     *
     * This method returns an array containing the set of items
     * that are provided by the RSS file.
     *
     * @access public
     * @return array
     */
    function getItems()
    {
        return (array)$this->items;
    }

    // }}}
    // {{{ getImages()

    /**
     * Get images from RSS file
     *
     * This method returns an array containing the set of images
     * that are provided by the RSS file.
     *
     * @access public
     * @return array
     */
    function getImages()
    {
        return (array)$this->images;
    }

    // }}}
    // {{{ getTextinputs()

    /**
     * Get text input fields from RSS file
     *
     * @access public
     * @return array
     */
    function getTextinputs()
    {
        return (array)$this->textinputs;
    }

    // }}}

}
?>

但是,此代码不再起作用,或者我没有正确测试...我在Mac上的xampp上运行它。

我想做这样的事情:

用户输入一个术语,我想显示来自Yahoo,Google或NYTimes的RSS源。

谢谢!

编辑:去年我从一个网站上获得了两个文件RSS.php和Parser.php,现在看起来该网站已关闭。我试图看看是否有人可以提供另一种解决方案。谢谢!

1 个答案:

答案 0 :(得分:0)

也许你可以试试SimplePie。它易于使用并具有积极的开发。您可以start using right away或阅读文档了解更多advance usage