我编写了下面的程序,但是当它进入XPath类时它会给出 [致命错误]:1:1:prolog中不允许内容。我试图解决它但不能make it.is有解决这个问题的线索吗?
package xpath;
import com.sun.org.apache.xalan.internal.xsltc.trax.SAX2DOM;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.namespace.QName;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathVariableResolver;
import org.ccil.cowan.tagsoup.Parser;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
public class XPath {
private static int seg;
private static void check(Node node) throws XPathExpressionException {
if (node == null || node.getNodeName() == null)
return;
TFIDF( node.getNodeValue(),"java");
check(node.getFirstChild());
if(node.getFirstChild()==null &&node.getNextSibling()==null)
seg++;
System.out.println(node.getNodeValue() != null && node.getNodeValue().trim().length() == 0 ? "" : node);
check(node.getNextSibling());
}
public static void main(String[] args) throws MalformedURLException, SAXNotRecognizedException, SAXNotSupportedException, ParserConfigurationException, IOException, SAXException, XPathExpressionException {
Parser p = new Parser();
SAX2DOM sax2dom = null;
org.w3c.dom.Node doc = null;
URL url = new URL("http://stackoverflow.com/questions");
p.setFeature(Parser.namespacesFeature, false);
p.setFeature(Parser.namespacePrefixesFeature, false);
sax2dom = new SAX2DOM();
p.setContentHandler(sax2dom);
p.parse(new InputSource(new InputStreamReader(url.openStream())));
doc = sax2dom.getDOM();
Node html=doc.getFirstChild();
check(html);
}
private static void TFIDF(String segment, String keyword) throws XPathExpressionException {
if (segment!=null)
{
InputSource src = new InputSource(new StringReader(segment));
final String term = keyword;
String expression = "//*[contains(text(),$term)]";
final QName termVariableName = new QName("term");
class TermResolver implements XPathVariableResolver {
@Override
public Object resolveVariable(QName variableName) {
return termVariableName.equals(variableName) ? term : null;
}
}
javax.xml.xpath.XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setXPathVariableResolver(new TermResolver());
Node node = (Node) xpath.evaluate(expression, src, XPathConstants.NODE);
}
}
}
答案 0 :(得分:1)
“prolog中不允许使用内容”错误通常意味着您在文档中的第一个XML元素之前有一些内容,通常是空格。由于你抓住http://stackoverflow.com/questions
,我猜它是导致问题的<!doctype>
之后的换行符。根据XML规范,在doctype之前和之后的prolog中都应该允许空格,但很多工具都无法正确处理。
尝试手动删除空白,看看是否有帮助。如果没有,请尝试完全删除doctype声明。