使用SAX Parser获取特定的子节点

时间:2019-11-12 11:04:54

标签: java xml xml-parsing sax saxparser

强调文本我有以下xml:

<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
    <title>Game Analysis</title>
    <item>
        <title>Game</title>
        <description>ABC</description>
        <releaseDate>Sat, 21 Feb 2012 05:18:23 GMT</releaseDate>       
    </item>
    <item>
        <title>CoD</title>
        <description>XYZ</description>
        <releaseDate>Sat, 21 Feb 2011 05:18:23 GMT</releaseDate>            
    </item>
</channel>
</rss>

我必须解析此xml并在“ item”下获取所有childNodes,然后检查其是否包含“ releaseDate”节点。如果没有,那么我必须抛出异常。

我也尝试过使用xpath,但是它不起作用。

    XPathFactory xPathfactory = XPathFactory.newInstance();
    XPath xpath = xPathfactory.newXPath();
    XPathExpression expr = xpath.compile("//channel/item");

    Object result = expr.evaluate(document, XPathConstants.NODESET);
    NodeList nodes = (NodeList) result;
    for (int i = 0; i < nodes.getLength(); i++) {
        System.out.println(nodes.item(i).getChildNodes());
    }

2 个答案:

答案 0 :(得分:0)

尝试此代码。 不要忘记在您的项目中包括SAX解析器库,并从XML文档中删除rss-string(希望可以接受)。

public class SaxParserTest {
    public static void main(String... argv) {
        SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
        try {
            SAXParser saxParser = saxParserFactory.newSAXParser();
            MyHandler handler = new MyHandler();
            saxParser.parse(new File("your path to XML-file here"), handler);
            List<Item> items = handler.getChannel().getItems();
            // your check of item release dates here
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

class MyHandler extends DefaultHandler {
    private StringBuilder data = new StringBuilder();

    private Channel channel;

    private String itemTitle;
    private String itemDescription;
    private String itemReleaseDate;

    private boolean isItem;

    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        if (!qName.equals("rss")) {
            if (qName.equalsIgnoreCase("channel")) {
                channel = new Channel();
            } else if (qName.equalsIgnoreCase("item")) {
                isItem = true;
            }
            data.setLength(0);
        }
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        if (qName.equalsIgnoreCase("title")) {
            if (!isItem) {
                channel.setTitle(data.toString());
            } else {
                itemTitle = data.toString();
            }
        } else if (qName.equalsIgnoreCase("item")) {
            channel.addItem(new Item(itemTitle, itemDescription, itemReleaseDate));
            itemTitle = null;
            itemDescription = null;
            itemReleaseDate = null;
            isItem = false;
        } else if (qName.equalsIgnoreCase("description")) {
            itemDescription = data.toString();
        } else if (qName.equalsIgnoreCase("releaseDate")) {
            itemReleaseDate = data.toString();
        }
    }

    @Override
    public void characters(char ch[], int start, int length) throws SAXException {
        data.append(new String(ch, start, length));
    }

    public Channel getChannel() {
        return channel;
    }
}

class Channel {
    private String title;
    private List<Item> items;

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public List<Item> getItems() {
        return items;
    }

    public void setItems(List<Item> items) {
        this.items = items;
    }

    public void addItem(Item item) {
        if (items == null) {
            items = new ArrayList<Item>();
        }
        items.add(item);
    }
}

class Item {
    private String title;
    private String description;
    private String releaseDate;

    public Item(String title, String description, String releaseDate) {
        this.title = title;
        this.description = description;
        this.releaseDate = releaseDate;
    }
    public String getReleaseDate() {
        return releaseDate;
    }
}

答案 1 :(得分:0)

XPath应该可以正常工作,甚至可以用来创建较短的解决方案。表达式//channel/item[not(releaseDate)]将返回所有 not 没有item子节点的所有releaseDate节点。因此,此代码应为您提供答案:

    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setNamespaceAware(true);

    Document document = dbf
            .newDocumentBuilder()
            .parse(...);

    XPath xpath = XPathFactory
            .newInstance()
            .newXPath();

    NodeList list = (NodeList) xpath.evaluate("//channel/item[not(releaseDate)]", document, XPathConstants.NODESET);
    if (list.getLength() != 0) {
        throw new Exception("Found <item> without <releaseDate>");
    }