我想解析一个RSS Feed,但它在某些字符处失败,例如“>” char和所有这一切之前的所有。
示例:
<title>[Maths I > Theory] Maths I, T1.pdf: One file added.</title>
输出:
[Maths I
这是我的RSSHandler:
public class RSSHandler extends DefaultHandler {
final int state_unknown = 0;
final int state_title = 1;
final int state_description = 2;
final int state_link = 3;
final int state_pubdate = 4;
int currentState = state_unknown;
RSSFeed feed;
RSSItem item;
boolean itemFound = false;
RSSHandler(){
}
RSSFeed getFeed(){
return feed;
}
@Override
public void startDocument() throws SAXException {
// TODO Auto-generated method stub
feed = new RSSFeed();
item = new RSSItem();
}
@Override
public void endDocument() throws SAXException {
// TODO Auto-generated method stub
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
// TODO Auto-generated method stub
if (localName.equalsIgnoreCase("item")){
itemFound = true;
item = new RSSItem();
currentState = state_unknown;
}
else if (localName.equalsIgnoreCase("title")){
currentState = state_title;
}
else if (localName.equalsIgnoreCase("description")){
currentState = state_description;
}
else if (localName.equalsIgnoreCase("link")){
currentState = state_link;
}
else if (localName.equalsIgnoreCase("pubdate")){
currentState = state_pubdate;
}
else{
currentState = state_unknown;
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
// TODO Auto-generated method stub
if (localName.equalsIgnoreCase("item")){
feed.addItem(item);
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
// TODO Auto-generated method stub
String strCharacters = new String(ch,start,length);
if (itemFound==true){
// "item" tag found, it's item's parameter
switch(currentState){
case state_title:
item.setTitle(strCharacters);
break;
case state_description:
item.setDescription(strCharacters);
break;
case state_link:
item.setLink(strCharacters);
break;
case state_pubdate:
item.setPubdate(strCharacters);
break;
default:
break;
}
}
else{
// not "item" tag found, it's feed's parameter
switch(currentState){
case state_title:
feed.setTitle(strCharacters);
break;
case state_description:
feed.setDescription(strCharacters);
break;
case state_link:
feed.setLink(strCharacters);
break;
case state_pubdate:
feed.setPubdate(strCharacters);
break;
default:
break;
}
}
currentState = state_unknown;
}
}
答案 0 :(得分:1)
您至少可以使用characters
方法进行5次回调:
1st: [Maths I
2nd: >
3rd: Theory
4th: ]
5th: Maths I, T1.pdf: One file added.
您不应更改currentState
方法的最后一行中的characters
,并且必须缓冲String
并稍后将它们连接起来。
答案 1 :(得分:1)
这是一个稍微修改过的版本,可以很好地解析RSS文件。我希望它有所帮助。
首先,State
枚举:
public enum State {
unknown, title, description, link, pubdate
}
然后是处理程序类:
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class RSSHandler extends DefaultHandler {
private State currentState = State.unknown;
private RSSFeed feed;
private RSSItem item;
private boolean itemFound = false;
private StringBuilder tagContent;
public RSSHandler() {
}
@Override
public void startDocument() throws SAXException {
feed = new RSSFeed();
item = new RSSItem();
}
@Override
public void startElement(final String uri, final String localName,
final String qName, final Attributes attributes)
throws SAXException {
currentState = State.unknown;
tagContent = new StringBuilder();
if (localName.equalsIgnoreCase("item")) {
itemFound = true;
item = new RSSItem();
currentState = State.unknown;
} else if (localName.equalsIgnoreCase("title")) {
currentState = State.title;
} else if (localName.equalsIgnoreCase("description")) {
currentState = State.description;
} else if (localName.equalsIgnoreCase("link")) {
currentState = State.link;
} else if (localName.equalsIgnoreCase("pubdate")) {
currentState = State.pubdate;
}
System.out.println("new state: " + currentState);
}
@Override
public void endElement(final String uri, final String localName,
final String qName) throws SAXException {
if (localName.equalsIgnoreCase("item")) {
feed.addItem(item);
}
if (itemFound == true) {
// "item" tag found, it's item's parameter
switch (currentState) {
case title:
item.setTitle(tagContent.toString());
break;
case description:
item.setDescription(tagContent.toString());
break;
case link:
item.setLink(tagContent.toString());
break;
case pubdate:
item.setPubdate(tagContent.toString());
break;
default:
break;
}
} else {
// not "item" tag found, it's feed's parameter
switch (currentState) {
case title:
feed.setTitle(tagContent.toString());
break;
case description:
feed.setDescription(tagContent.toString());
break;
case link:
feed.setLink(tagContent.toString());
break;
case pubdate:
feed.setPubdate(tagContent.toString());
break;
default:
break;
}
}
}
@Override
public void characters(final char[] ch, final int start, final int length)
throws SAXException {
tagContent.append(ch, start, length);
}
public RSSFeed getFeed() {
return feed;
}
}