以下是我的Saxparser类,它检索标记名称“item”中的所有元素。
class SAXHelper {
public HashMap<String, String> userList = new HashMap<String, String>();
private URL url2;
public SAXHelper(String url1) throws MalformedURLException {
this.url2 = new URL(url1);
}
public RSSHandler parseContent(String parseContent) {
RSSHandler df = new RSSHandler();
try {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
xr.setContentHandler(df);
xr.parse(new InputSource(url2.openStream()));
} catch (Exception e) {
e.printStackTrace();
}
return df;
}
}
class RSSHandler extends DefaultHandler {
private Post currentPost = new Post();
StringBuffer chars = new StringBuffer();
@Override
public void startElement(String uri, String localName, String qName,
Attributes atts) {
chars = new StringBuffer();
if (localName.equalsIgnoreCase("item")) {
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (localName.equalsIgnoreCase("title")
&& currentPost.getTitle() == null) {
currentPost.setTitle(chars.toString());
System.out.println("title1: " + currentPost.getTitle());
}
if (localName.equalsIgnoreCase("category")
&& currentPost.getCategory() == null) {
currentPost.setCategory(chars.toString());
System.out.println("category: " + currentPost.getCategory());
}
if (localName.equalsIgnoreCase("description")
&& currentPost.getDescription() == null) {
currentPost.setDescription(chars.toString());
}
if (localName.equalsIgnoreCase("link")
&& currentPost.getLink() == null) {
currentPost.setLink(chars.toString());
System.out.println("link: " + currentPost.getLink());
}
if (localName.equalsIgnoreCase("pubDate")
&& currentPost.getPubDate() == null) {
currentPost.setPubDate(chars.toString());
String x = currentPost.getPubDate();
String last = x.substring(0, x.length() - 4);
int start = last.length() - 8;
int end = last.length() - 3;
String result = x.substring(start, end);
result = Stringreplace(result);
}
if (localName.equalsIgnoreCase("item")) {
PostListNormal.add(currentPost);
currentPost = new Post();
}
}
@Override
public void characters(char ch[], int start, int length) {
chars.append(new String(ch, start, length));
}
}
这是要提取的xml标记。
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>matches en direct</title>
<link>http://www.match.com/</link>
<atom:link href="http://www.match.com/news,60,0,UTF-8,fulltext.rss" rel="self" type="application/rss+xml" />
<description>Foot 01 - N°1 de l'actu du match, du mercato et des matches en direct</description>
<language>fr-FR</language>
<pubDate>Tue, 11 Mar 2014 06:12:38 GMT</pubDate>
<lastBuildDate>Tue, 11 Mar 2014 06:12:38 GMT</lastBuildDate>
<category>Sports</category>
<copyright>© 2011 match.com</copyright>
<ttl>1</ttl>
<item>
<category>Espagne, match </category>
<title>a</title>
<link>aaaa</link>
<guid isPermaLink="true">http://www.match.com/foot-europeen/espagne/27e-j-les-match-definitifs,138131</guid>
<description>
match progress
</description>
<pubDate>Mon, 10 Mar 2014 22:52:02 GMT</pubDate>
</item>
<item>
<category>x, match </category>
<title>a</title>
<link>aaaa</link>
<guid isPermaLink="true">http://www.match.com/foot-europeen/espagne/27e-j-les-match-definitifs,138131</guid>
<description>
match progress
</description>
<pubDate>Mon, 10 Mar 2014 22:52:02 GMT</pubDate>
</item>
<item>
<category>x, match </category>
<title>a</title>
<link>aaaa</link>
<guid isPermaLink="true">http://www.match.com/foot-europeen/espagne/27e-j-les-match-definitifs,138131</guid>
<description>
match progress
</description>
<pubDate>Mon, 10 Mar 2014 22:52:02 GMT</pubDate>
</item>
<item>
<category>x, match </category>
<title>a</title>
<link>aaaa</link>
<guid isPermaLink="true">http://www.match.com/foot-europeen/espagne/27e-j-les-match-definitifs,138131</guid>
<description>
match progress
</description>
<pubDate>Mon, 10 Mar 2014 22:52:02 GMT</pubDate>
</item>
</channel>
</rss>
当我运行代码时,我的结果是标题的标签打印
直接匹配, 一个, 一个, 一个。
预期结果应为4“a”元素。
简而言之,打印通道标签中的标题,然后忽略第一个“item”标签,最后打印第二个标签项后的所有标签项。
我怎样才能打印所有标签项目。抱歉我的英语不好。
答案 0 :(得分:1)
维护boolean
以识别item
内的当前元素,如下所示......
class RSSHandler extends DefaultHandler {
private Post currentPost = new Post();
StringBuffer chars = new StringBuffer();
boolean isItem = false;
@Override
public void startElement(String uri, String localName, String qName, Attributes atts) {
chars = new StringBuffer();
if (localName.equalsIgnoreCase("item")) {
isItem = true;
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (localName.equalsIgnoreCase("title") && currentPost.getTitle() == null && isItem == true) {
currentPost.setTitle(chars.toString());
System.out.println("title1: " + currentPost.getTitle());
}
if (localName.equalsIgnoreCase("category") && currentPost.getCategory() == null) {
currentPost.setCategory(chars.toString());
System.out.println("category: " + currentPost.getCategory());
}
if (localName.equalsIgnoreCase("description") && currentPost.getDescription() == null) {
currentPost.setDescription(chars.toString());
}
if (localName.equalsIgnoreCase("link") && currentPost.getLink() == null) {
currentPost.setLink(chars.toString());
System.out.println("link: " + currentPost.getLink());
}
if (localName.equalsIgnoreCase("pubDate") && currentPost.getPubDate() == null) {
currentPost.setPubDate(chars.toString());
String x = currentPost.getPubDate();
String last = x.substring(0, x.length() - 4);
int start = last.length() - 8;
int end = last.length() - 3;
String result = x.substring(start, end);
result = Stringreplace(result);
}
if (localName.equalsIgnoreCase("item")) {
PostListNormal.add(currentPost);
currentPost = new Post();
isItem = false;
}
}
@Override
public void characters(char ch[], int start, int length) {
chars.append(new String(ch, start, length));
}
}