我使用java默认文档构建器来解析一个少于100行代码的xml文档。解析文档需要35毫秒,单个xpath表达式需要15毫秒才能执行。如何优化xml和解析器所需的时间?
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XMLParser {
public static final Logger LOGGER = Logger.getLogger(XMLParser.class.getName());
private Map<String,List<NamedNodeMap>> fileVsProperties = new HashMap<String, List<NamedNodeMap>>();
private Document document;
public XMLParser(File file){
this.document = XMLUtil.getDocument(file);
}
public void setProperties(Element file){
NodeList properties = file.getElementsByTagName("property");
List<NamedNodeMap> props = new ArrayList<NamedNodeMap>();
String type = file.getAttribute("type");
String path = file.getAttribute("path");
if("".equals(path)){
LOGGER.log(Level.INFO,"Attribute path is required for a file.");
return;
}
path = path+":"+type;
for(int i = 0;i<properties.getLength();i++){
Element property = (Element) properties.item(i);
props.add(property.getAttributes());
}
setProperties(props,path);
}
private void setProperties(List<NamedNodeMap> properties , String path){
List<NamedNodeMap> previousValue = fileVsProperties.get(path);
if(previousValue != null){
previousValue.addAll(properties);
}else{
fileVsProperties.put(path,properties);
}
}
public Element getConfiguration(String branchName) throws XPathExpressionException{
return (Element)XMLUtil.getElements("/configurations/configuration[@name='"+branchName+"']",document.getDocumentElement(),XPathConstants.NODE);
}
public static void main(String[] args) throws XPathExpressionException {
long start = System.currentTimeMillis();
File doc = new File("install.xml");
XMLParser parser = new XMLParser(doc);
long end = System.currentTimeMillis();
System.out.println("Time Taken For Parsing :: "+ (end-start) + " milliseconds");
start = end;
Element configuration = parser.getConfiguration("BHARATHIKANNAN");
end = System.currentTimeMillis();
System.out.println("Time Taken For XPATH Expression TO Finding the Configuration :: "+ (end-start) + " milliseconds");
start = end;
NodeList files = parser.getFiles(configuration);
for(int i=0;i<files.getLength();i++){
parser.setProperties((Element) files.item(i));
}
end = System.currentTimeMillis();
System.out.println(parser.fileVsProperties);
System.out.println("Time Taken For Setting Properties :: "+ (end-start) + " milliseconds");
}
public NodeList getFiles(Element configuration){
return configuration.getElementsByTagName("file");
}
}
class XMLUtil{
private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
private static DocumentBuilder builder;
public static final Logger LOGGER = Logger.getLogger(XMLUtil.class.getName());
private static XPathFactory xpathFactory = XPathFactory.newInstance();
private static XPath xpath;
static {
try {
builder = factory.newDocumentBuilder();
xpath = xpathFactory.newXPath();
} catch (ParserConfigurationException e) {
LOGGER.log(Level.INFO,"");
}
}
public static Document getDocument(File f){
Document doc = null;
try {
doc = builder.parse(f);
} catch (SAXException e) {
LOGGER.log(Level.WARNING,"Invalid XML Document ",e);
} catch (IOException e) {
LOGGER.log(Level.SEVERE,"No Document Found in the given path",e);
}
return doc;
}
public static Object getElements(String xpathExpression , Element ele ,QName dataType) throws XPathExpressionException{
return xpath.evaluate(xpathExpression, ele,dataType);
}
}
XML文件
<?xml version="1.0"?>
<!--
Note : Default configuration loaded using your current branch name . You can extend configurations using extend attribute in configuration
node .
-->
<configurations>
<configuration name="default">
<files>
<file type="xml" path="conf/server.xml.orig">
<property regex="(port=).*" replace="\18080" xpath="/Server/Connector"></property>
<property regex="(port=).*" replace="\18080"></property>
</file>
<file type="text" path="conf/system_properties.conf">
<property regex="(username=).*" replace="\1root" ></property>
</file>
</files>
</configuration>
<configuration name="BHARATHIKANNAN" extends="default">
<files>
<file type="text" path="conf/system_properties.conf">
<property regex="(username=).*" replace="\1root" ></property>
</file>
</files>
</configuration>
</configurations>
输出:
Time Taken For Parsing :: 24 milliseconds
Time Taken For XPATH Expression TO Finding the Configuration :: 14 milliseconds
{conf/system_properties.conf:text=[com.sun.org.apache.xerces.internal.dom.AttributeMap@75d9fd51]}
Time Taken For Setting Properties :: 0 milliseconds
答案 0 :(得分:0)
最近有人问过一个非常类似的任务,但有一个更大的文件(2Mb),我在这里给出了一些撒克逊时间:
https://stackoverflow.com/questions/12497928/xpath-speed-comparision/12508614#12508614
在更大的文档上,这些时间比你看到的要快得多。由于您已经在使用Java,因此切换到Saxon应该非常简单。
但有一点需要注意的是,您在进入main()后立即开始计时,这意味着您主要测量类加载时间而不是XML处理时间。在测量开始之前,我的测量工作一直在预热Java VM。
请注意,如果您使用的是Saxon,那么使用Saxon的原生树模型而不是DOM或其他替代方案是最好的。我们最近在这里发布了一些测量值:
http://dev.saxonica.com/blog/mike/2012/09/index.html#000194
DOM平均比Saxon的原生树差8倍,在最坏的情况下差了23倍。