我最近继承了RSS / XML解析器,虽然看起来效果很好,但我发现有些东西丢失了。
例如,从博客中提取RSS源。它缺少项目中的所有类别。它显示每个项目只有一个类别,而实际上它应该显示为具有多个类别。
链接到实际Feed:http://o7thblog.com/feed/
您可以看到feed
中的第一项如何在第一项中总共有8个类别。 (可能需要查看来源)
但是,在Demo
中,您可以看到它只显示1个类别
这是我的全部代码:
<?php
class o7thRssFeedPuller{
public $FeedUrl = ''; // URL of the feed to pull in
public $ReturnJson = false; // Return the array as a JSON encoded string instead?
public $MaxItems = 0; // 0 = unlimited (except by feed), only applicable to GetItems
// Internal holders
private $document;
private $channel;
private $items;
// Get the full RSS feed
public function GetRSS($includeAttributes = false) {
// Pull in our feed
$this->loadParser(file_get_contents($this->FeedUrl, false, $this->randomContext()));
if($includeAttributes) {
// only if we are including attributes
return ($this->ReturnJson) ? json_encode($this->document) : $this->document;
}
// Return either an array or a json encoded string
return ($this->ReturnJson) ? json_encode($this->valueReturner()) : $this->valueReturner();
}
// Get the channel data
public function GetChannel($includeAttributes = false) {
// Pull in our feed
$this->loadParser(file_get_contents($this->FeedUrl, false, $this->randomContext()));
if($includeAttributes) {
// only if we are including attributes
return ($this->ReturnJson) ? json_encode($this->channel) : $this->channel;
}
// Return either an array or a json encoded string
return ($this->ReturnJson) ? json_encode($this->valueReturner($this->channel)) : $this->valueReturner($this->channel);
}
// Get the items
public function GetItems($includeAttributes=false) {
// Pull in our feed
$this->loadParser(file_get_contents($this->FeedUrl, false, $this->randomContext()));
if($includeAttributes) {
// only if we are including attributes
$arr = ($this->MaxItems == 0) ? $this->items : array_slice($this->items, 0, $this->MaxItems);
return ($this->ReturnJson) ? json_encode($arr) : $arr;
}
// Return either an array or a json encoded string
$arr = ($this->MaxItems == 0) ? $this->valueReturner($this->items) : array_slice($this->valueReturner($this->items), 0, $this->MaxItems);
return ($this->ReturnJson) ? json_encode($arr) : $arr;
}
// -------------------------------------------------------------------------------------------------
// Internal Methods
private function loadParser($rss=false) {
if($rss) {
$this->document = array();
$this->channel = array();
$this->items = array();
$DOMDocument = new DOMDocument;
$DOMDocument->strictErrorChecking = false;
$DOMDocument->loadXML($rss);
$this->document = $this->extractDOM($DOMDocument->childNodes);
}
}
private function valueReturner($valueBlock=false) {
if(!$valueBlock) {
$valueBlock = $this->document;
}
foreach($valueBlock as $valueName => $values) {
if(isset($values['value'])) {
$values = $values['value'];
}
if(is_array($values)) {
$valueBlock[$valueName] = $this->valueReturner($values);
} else {
$valueBlock[$valueName] = $values;
}
}
return $valueBlock;
}
private function extractDOM($nodeList,$parentNodeName=false) {
$itemCounter = 0;
foreach($nodeList as $values) {
if(substr($values->nodeName,0,1) != '#') {
if($values->nodeName == 'item') {
$nodeName = $values->nodeName.':'.$itemCounter;
$itemCounter++;
} else {
$nodeName = $values->nodeName;
}
$tempNode[$nodeName] = array();
if($values->attributes) {
for($i=0;$values->attributes->item($i);$i++) {
$tempNode[$nodeName]['properties'][$values->attributes->item($i)->nodeName] = $values->attributes->item($i)->nodeValue;
}
}
if(!$values->firstChild) {
$tempNode[$nodeName]['value'] = $values->textContent;
} else {
$tempNode[$nodeName]['value'] = $this->extractDOM($values->childNodes, $values->nodeName);
}
if(in_array($parentNodeName, array('channel','rdf:RDF'))) {
if($values->nodeName == 'item') {
$this->items[] = $tempNode[$nodeName]['value'];
} elseif(!in_array($values->nodeName, array('rss','channel'))) {
$this->channel[$values->nodeName] = $tempNode[$nodeName];
}
}
} elseif(substr($values->nodeName,1) == 'text') {
$tempValue = trim(preg_replace('/\s\s+/',' ',str_replace("\n",' ', $values->textContent)));
if($tempValue) {
$tempNode = $tempValue;
}
} elseif(substr($values->nodeName,1) == 'cdata-section'){
$tempNode = $values->textContent;
}
}
return (!isset($tempNode)) ? null : $tempNode;
}
// Load in a random header to pass
private function randomContext() {
$headerstrings = array();
$headerstrings['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.'.rand(0,2).'; en-US; rv:1.'.rand(2,9).'.'.rand(0,4).'.'.rand(1,9).') Gecko/2007'.rand(10,12).rand(10,30).' Firefox/2.0.'.rand(0,1).'.'.rand(1,9);
$headerstrings['Accept-Charset'] = rand(0,1) ? 'en-gb,en;q=0.'.rand(3,8) : 'en-us,en;q=0.'.rand(3,8);
$headerstrings['Accept-Language'] = 'en-us,en;q=0.'.rand(4,6);
$setHeaders = 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'."\r\n".
'Accept-Charset: '.$headerstrings['Accept-Charset']."\r\n".
'Accept-Language: '.$headerstrings['Accept-Language']."\r\n".
'User-Agent: '.$headerstrings['User-Agent']."\r\n";
$contextOptions = array(
'http'=>array(
'method'=>"GET",
'header'=>$setHeaders
)
);
return stream_context_create($contextOptions);
}
}
?>
对于演示页面:
<?php
require_once($_SERVER['DOCUMENT_ROOT'] . '/rss/o7th.rss.feed.puller.php');
$fp = new o7thRssFeedPuller();
$fp->FeedUrl = 'http://o7thblog.com/feed';
$fp->MaxItems = 2;
echo '<table width="100%" cellpadding="0" cellspacing="0">';
echo ' <tr>';
echo ' <td>';
echo ' <textarea cols="120" rows="30">';
print_r($fp->GetItems());
echo ' </textarea>';
echo ' </td>';
echo ' </tr>';
echo '</table>';
?>
所以,我认为这个问题存在于valueReturner
方法或extractDOM
方法的某个地方,但我不确定在哪里,也不知道如何才能获得所有类别返回数组。
你能帮忙吗?
答案 0 :(得分:2)
我建议使用SimpleXML
来解析Feed。
以下是如何做到这一点:
$feed_url = 'http://o7thblog.com/feed/';
$feed = simplexml_load_file($feed_url, null, LIBXML_NOCDATA);
$channel = $feed->channel;
echo "<h1><a href=\"{$channel->link}\">{$channel->title}</a></h1>\n";
echo "{$channel->description}\n";
echo "<dl>\n";
foreach ($channel->item as $item) {
echo "<dt><a href=\"{$item->link}\">{$item->title}</a></dt>\n"
. "<dd style=\"margin-bottom: 30px;\"><div style=\"font-size: small;\">{$item->pubDate}</div>\n"
. "<div>{$item->description}</div>\n"
. "Categories: <strong>".implode('</strong>, <strong>', (array) $item->category) . "</strong>\n</dd>";
}
echo "</dl>\n";
上面显示了所有类别。
答案 1 :(得分:0)
您已编写了一个自定义解析器,只需使用一行代码即可完成!
$feed = (array) simplexml_load_file('http://o7thblog.com/feed/', null, LIBXML_NOCDATA);