解析RSS提要字符

时间:2012-06-04 09:03:45

标签: android parsing rss character

我解析了一些RSS提要(尝试了不同的...),每次相当随机的字符被切断。 我究竟做错了什么?为什么它在某些情况下有效,在其他情况下则不然? 还有另一种方法吗? XML(在大多数情况下)将包括UTF-8字符(如ä,ö,ü等),因此解决方案也应该与这些字符一起使用。

如果您需要更多信息(更多代码,更多详细信息等),请告诉我们!

这是我的代码:

public class RSSHandler extends DefaultHandler {

final int state_unknown = 0;
final int state_title = 1;
final int state_description = 2;
final int state_link = 3;
final int state_pubdate = 4;
int currentState = state_unknown;
StringBuilder strCharacters;

RSSFeed feed;
RSSItem item;

boolean inEntity = false;
String entityName = "";

boolean itemFound = false;

public RSSHandler() {
    strCharacters = new StringBuilder();
}

public RSSFeed getFeed() {
    return feed;
}

@Override
public void startDocument() throws SAXException {
    feed = new RSSFeed();
    item = new RSSItem();
}

@Override
public void endDocument() throws SAXException {
}

@Override
public void startElement(String uri, String localName, String qName,
        Attributes attributes) throws SAXException {
    strCharacters = new StringBuilder();
    if (localName.equalsIgnoreCase("item")) {
        itemFound = true;
        item = new RSSItem();
        currentState = state_unknown;
    } else if (localName.equalsIgnoreCase("title")) {
        currentState = state_title;
    } else if (localName.equalsIgnoreCase("description")) {
        currentState = state_description;
    } else if (localName.equalsIgnoreCase("link")) {
        currentState = state_link;
    } else if (localName.equalsIgnoreCase("pubdate")) {
        currentState = state_pubdate;
    } else {
        currentState = state_unknown;
    }

}

@Override
public void endElement(String uri, String localName, String qName)
        throws SAXException {
    if (itemFound == true) {
        switch (currentState) {
            case state_title:
                item.setTitle(strCharacters.toString());
                break;
            case state_description:
                break;
            case state_link:
                item.setLink(strCharacters.toString());
                break;
            case state_pubdate:
                String dateStr = strCharacters.toString();
                SimpleDateFormat curFormater = new SimpleDateFormat(
                        "EEE, dd MMM yyyy HH:mm:ss Z", Locale.ENGLISH);
                Date dateObj = null;
                try {
                    dateObj = curFormater.parse(dateStr);
                    SimpleDateFormat postFormater = new SimpleDateFormat(
                            "dd.MM.yyyy HH:mm");
                    String newDateStr = postFormater.format(dateObj);
                    item.setPubdate(newDateStr);
                } catch (ParseException e) {
                    e.printStackTrace();
                }
                break;
            default:
                break;
        }
    } else {
        switch (currentState) {
            case state_title:
                feed.setTitle(strCharacters.toString());
                break;
            case state_description:
                break;
            case state_link:
                feed.setLink(strCharacters.toString());
                break;
            case state_pubdate:
                feed.setPubdate(strCharacters.toString());
                break;
            default:
                break;
        }
    }

    currentState = state_unknown;

    if (localName.equalsIgnoreCase("item")) {
        feed.addItem(item);
    }
}

public void startEntity(String name) throws SAXException {
    inEntity = true;
    entityName = name;
}

@Override
public void characters(char[] ch, int start, int length)
        throws SAXException {

    strCharacters = new StringBuilder();
    if (inEntity) {
        inEntity = false;
        strCharacters.append("&" + entityName + ";");
    } else {
        for (int i = start; i < start + length; i++) {
            strCharacters.append(ch[i]);
        }
    }

    // strCharacters.append(ch, start, length);
}

}

1 个答案:

答案 0 :(得分:0)

您正在为每个StringBuilder电话创建新的characters()。这是不正确的。每个元素有几次调用characters() - 你需要连接所有这些结果,而不只是收集最后一块。