我想使用Xpath提取单引号html属性的值。我使用JTidy来清理html文档,我的代码如下所示:
try {
String data = string.toString();
InputStream input = new ByteArrayInputStream(data.getBytes());
Document document = new Tidy().parseDOM(input, null);
XPathFactory factory = XPathFactory.newInstance();
XPath xPath = factory.newXPath();
XPathExpression expr = xPath.compile("//a[@class='swatch-2011-link']/@color");
Object evaluate = expr.evaluate(document, XPathConstants.NODESET);
NodeList list = (NodeList) evaluate;
System.out.println(list.getLength());
for (int i = 0; i < list.getLength(); i++) {
String name = list.item(i).getNodeValue();
System.out.println(name);
}
}
catch (XPathExpressionException e) {
e.printStackTrace();
}
<a class="swatch-2011-link"
style='background:url(somelink); background-size:26px 26px; filter:progid:DXImageTransform.Microsoft.AlphaImageLoader(src=http://media.plussizetech.com/womanwithin/zs/0037_19561_zs_2835.jpg, sizingMethod=scale)' mainimageUrl='http://media.plussizetech.com/womanwithin/mc/0037_19561_mc_2835.jpg?wid=271&hei=388&qlt=95&op_sharpen=1' colorName='WILD LIME WHITE'/>