我需要在XML文档中找到文本元素的确切XPath。我认为这样做的一种方法是将Document转换为字符串,在子字符串周围添加临时标记,将其转换回Document然后找到XPath。
这是我到目前为止所做的:
public String findXPathInXMLString(int startIndex, int endIndex, String string) throws IOException, ParserConfigurationException, SAXException {
Conversion conversion = new Conversion();
String xpath;
//Step 1. Replace start to end index with temporary tag in string document
StringBuilder stringBuilder = new StringBuilder(string);
stringBuilder.replace(startIndex, endIndex, "<findXPathInXMLStringTemporaryTag>" + string.substring(startIndex, endIndex) + "</findXPathInXMLStringTemporaryTag>");
//Step 2. Convert string document to DOM document & Find XPath of temporary tag in DOM document
xpath = "/" + getXPath(conversion.stringToDocument(stringBuilder.toString()), "findXPathInXMLStringTemporaryTag");
//Step 3. Cut off last part of the XPath
//xpath = xpath.substring(0, 2).replace("/documentXPathTemporaryTag", "");
//Step 4. Return the XPath
return xpath;
}
public String getXPath(Document root, String elementName) {
try {
XPathExpression expr = XPathFactory.newInstance().newXPath().compile("//" + elementName);
Node node = (Node) expr.evaluate(root, XPathConstants.NODE);
if (node != null) {
return getXPath(node);
}
} catch (XPathExpressionException e) {
}
return null;
}
public String getXPath(Node node) {
if (node == null || node.getNodeType() != Node.ELEMENT_NODE) {
return "";
}
return getXPath(node.getParentNode()) + "/" + node.getNodeName();
}
我到目前为止遇到的问题是方法getXPath
没有放置[x]
所以返回的XPath是错误的,因为子字符串可能在{{1}中} rd特定标记的实例,在这种情况下,XPath将应用于具有相同路径的所有节点。我想得到一个只能引用一个特定元素的确切路径。
答案 0 :(得分:2)
好吧,这是怎么回事(ideone example):
我将startIndex
和endIndex
更改为index
。临时节点可以附加在文本中的单个点上。
public static String findXPathInXMLString(int index, String string) throws XPathExpressionException, SAXException, ParserConfigurationException, IOException {
String xpath;
//Step 1. Insert temporary tag in insert location
StringBuilder stringBuilder = new StringBuilder(string);
stringBuilder.insert(index, "<findXPathInXMLStringTemporaryTag />");
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(
new ByteArrayInputStream(stringBuilder.toString().getBytes())
);
//Step 2. Convert string document to DOM document & Find XPath of temporary tag in DOM document
xpath = getXPath(document, "findXPathInXMLStringTemporaryTag");
//Step 3. Cut off last part of the XPath
xpath = xpath.replace("/findXPathInXMLStringTemporaryTag", "");
//Step 4. Return the XPath
return xpath;
}
private static String getXPath(Document root, String elementName) throws XPathExpressionException
{
XPathExpression expr = XPathFactory.newInstance().newXPath().compile("//"+elementName);
Node node = (Node)expr.evaluate(root, XPathConstants.NODE);
if(node != null) {
return getXPath(node);
}
return null;
}
private static String getXPath(Node node) throws XPathExpressionException {
if(node == null || node.getNodeType() != Node.ELEMENT_NODE) {
return "";
}
return getXPath(node.getParentNode()) + "/" + node.getNodeName() + getIndex(node);
}
private static String getIndex(Node node) throws XPathExpressionException {
XPathExpression expr = XPathFactory.newInstance().newXPath().compile("count(preceding-sibling::*[local-name() = '" + node.getNodeName() + "'])");
int result = (int)(double)(Double)expr.evaluate(node, XPathConstants.NUMBER);
if(result == 0){
return "";
}
else {
return "[" + (result + 1) + "]";
}
}