我有一堆XML文件,其中包含以下标记中帖子和图像的链接:
<url>
<loc>http://sample.com/sample-post</loc>
<lastmod>2015-12-27T16:42:07-01:00</lastmod>
<image:image>
<image:loc>http://sample.com/1234/5678.jpg</image:loc>
<image:title><![CDATA[tag1,tag2]]></image:title>
</image:image>
<image:image>
<image:loc>http://sample.com/1234/5678.jpg</image:loc>
<image:caption><![CDATA[tag1,tag2]]></image:caption>
</image:image>
</url>
我想提取图像的链接并删除重复项,但没有href
属性,所以我可以使用jQuery挂钩并提取它们。有没有办法用这个标记做到这一点?任何帮助将不胜感激。
P.S:这个问题在互联网上有多个答案,但正如我所提到的,我的XML缺少href
属性。
答案 0 :(得分:0)
您可能希望从link I gave使用Java中的SAX解析器。
这是一个基本骨架,可以帮助您入门 它从您的XML中提取两个URL 骨架打印出来的内容,以便您知道哪些功能可以接收。
package xmlparse;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class xmlparse {
public static void main(String[] args) throws ParserConfigurationException,
SAXException, IOException {
File inputFile = new File("images.xml");
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(inputFile, new MySAXHandler());
}
}
class MySAXHandler extends DefaultHandler {
String currentQName = "";
List<String> imageLocList = new ArrayList<>();
@Override
public void startElement(String uri, String localName,
String qName, Attributes attributes) throws SAXException {
System.out.println("startElement got: uri: " + uri);
System.out.println("startElement got localName: " + uri);
System.out.println("startElement got Name: " + qName);
System.out.println("startElement got attributes: " + attributes);
System.out.println();
this.currentQName = qName;
}
@Override
public void characters (char ch[], int start, int length) {
String s = new String(ch).substring(start, start+length).trim();
System.out.format(" Received characters (s=%d, length=%s): %s%n",
start, length, s);
/* ** Your handling should go here ** */
if (currentQName.equals("image:loc")) {
imageLocList.add(s);
}
}
@Override
public void endElement(String uri, String localName, String qName) {
System.out.println("endElement got: uri: " + uri);
System.out.println("endElement got localName: " + uri);
System.out.println("endElement got qName: " + qName);
System.out.println();
this.currentQName = "";
}
@Override
public void endDocument() {
System.out.println("Document ended. Listing URLs:");
imageLocList.forEach(System.out::println);
}
}