XMLBeam是一个很好的XML to POJO unmarshaler(通过XPath),但它只允许你配置DocumentBuilder或DocumentBuilderFactory。
TagSoup是一个很好的SAX解析器,它可以让你解析讨厌的HTML文档,就好像它们是XML一样。
我想使用TagSoup作为XMLBeam的XML解析器,这样我就可以使用XPath将令人讨厌的HTML解组为POJO。
有没有办法转换或包装SAX解析器,以便我可以将它用作DocumentBuilder或DocumentBuilderFactory?
答案 0 :(得分:6)
您可以在文档构建器中包装SAX。 XMLBeam只使用DocumentBuilder的parse(InputSource)方法,所以它非常简单:
import org.ccil.cowan.tagsoup.Parser;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.xml.sax.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXSource;
import java.io.IOException;
public class MyDocumentBuilder extends DocumentBuilder {
@Override
public Document parse(InputSource inputSource) throws SAXException, IOException {
XMLReader xmlReader = new Parser();
xmlReader.setFeature(Parser.namespacesFeature, false);
xmlReader.setFeature(Parser.namespacePrefixesFeature, false);
try{
Transformer transformer = TransformerFactory.newInstance().newTransformer();
DOMResult domResult = new DOMResult();
transformer.transform(new SAXSource(xmlReader, inputSource), domResult);
return (Document) domResult.getNode();
}
catch(Exception exp){
throw new RuntimeException("Error parsing with Tagsoup");
}
}
@Override
public void setErrorHandler(ErrorHandler errorHandler) {
}
@Override
public Document newDocument() {
return null;
}
@Override
public void setEntityResolver(EntityResolver entityResolver) {
}
@Override
public boolean isValidating() {
return false;
}
@Override
public DOMImplementation getDOMImplementation() {
return null;
}
@Override
public boolean isNamespaceAware() {
return false;
}
}
然后,在其他地方,您可以告诉XMLBeam使用您的DocumentBuilder:
XMLFactoriesConfig xmlFactoriesConfig = new DefaultXMLFactoriesConfig(){
@Override
public DocumentBuilder createDocumentBuilder() {
return new MyDocumentBuilder();
}
};
XBProjector xbProjector = new XBProjector(xmlFactoriesConfig);