我解析了一些RSS提要(尝试了不同的...),每次相当随机的字符被切断。 我究竟做错了什么?为什么它在某些情况下有效,在其他情况下则不然? 还有另一种方法吗? XML(在大多数情况下)将包括UTF-8字符(如ä,ö,ü等),因此解决方案也应该与这些字符一起使用。
如果您需要更多信息(更多代码,更多详细信息等),请告诉我们!
这是我的代码:
public class RSSHandler extends DefaultHandler {
final int state_unknown = 0;
final int state_title = 1;
final int state_description = 2;
final int state_link = 3;
final int state_pubdate = 4;
int currentState = state_unknown;
StringBuilder strCharacters;
RSSFeed feed;
RSSItem item;
boolean inEntity = false;
String entityName = "";
boolean itemFound = false;
public RSSHandler() {
strCharacters = new StringBuilder();
}
public RSSFeed getFeed() {
return feed;
}
@Override
public void startDocument() throws SAXException {
feed = new RSSFeed();
item = new RSSItem();
}
@Override
public void endDocument() throws SAXException {
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
strCharacters = new StringBuilder();
if (localName.equalsIgnoreCase("item")) {
itemFound = true;
item = new RSSItem();
currentState = state_unknown;
} else if (localName.equalsIgnoreCase("title")) {
currentState = state_title;
} else if (localName.equalsIgnoreCase("description")) {
currentState = state_description;
} else if (localName.equalsIgnoreCase("link")) {
currentState = state_link;
} else if (localName.equalsIgnoreCase("pubdate")) {
currentState = state_pubdate;
} else {
currentState = state_unknown;
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (itemFound == true) {
switch (currentState) {
case state_title:
item.setTitle(strCharacters.toString());
break;
case state_description:
break;
case state_link:
item.setLink(strCharacters.toString());
break;
case state_pubdate:
String dateStr = strCharacters.toString();
SimpleDateFormat curFormater = new SimpleDateFormat(
"EEE, dd MMM yyyy HH:mm:ss Z", Locale.ENGLISH);
Date dateObj = null;
try {
dateObj = curFormater.parse(dateStr);
SimpleDateFormat postFormater = new SimpleDateFormat(
"dd.MM.yyyy HH:mm");
String newDateStr = postFormater.format(dateObj);
item.setPubdate(newDateStr);
} catch (ParseException e) {
e.printStackTrace();
}
break;
default:
break;
}
} else {
switch (currentState) {
case state_title:
feed.setTitle(strCharacters.toString());
break;
case state_description:
break;
case state_link:
feed.setLink(strCharacters.toString());
break;
case state_pubdate:
feed.setPubdate(strCharacters.toString());
break;
default:
break;
}
}
currentState = state_unknown;
if (localName.equalsIgnoreCase("item")) {
feed.addItem(item);
}
}
public void startEntity(String name) throws SAXException {
inEntity = true;
entityName = name;
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
strCharacters = new StringBuilder();
if (inEntity) {
inEntity = false;
strCharacters.append("&" + entityName + ";");
} else {
for (int i = start; i < start + length; i++) {
strCharacters.append(ch[i]);
}
}
// strCharacters.append(ch, start, length);
}
}
答案 0 :(得分:0)
您正在为每个StringBuilder
电话创建新的characters()
。这是不正确的。每个元素有几次调用characters()
- 你需要连接所有这些结果,而不只是收集最后一块。