在解析时遇到此错误“ [致命错误] customer_connect_posts.xml:39:209:实体“ nbsp”已被引用,但未声明。”
这是xml文件:
<ROW>
<Id>19281</Id>
<Language_Id>0</Language_Id>
<content><p>I wanted to share an update on the <a href="http://communities.rightnow.com/resources/255b24b92b" target="_blank">Idea Lab</a>. I've actually <em>showing </em></p></content>
</ROW>
这是用于解析的代码:该代码对其他xml文件有效,但对上述文件无效。在下面的代码中,我只是尝试获取“名称”和“内容”元素的文本节点。
public static void main(String[] args) {
BlogParsing blogParsing = new BlogParsing();
String filePath = "D:\\customer_connect_posts.xml";
File xmlFile = new File(filePath);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder;
try {
dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(xmlFile);
doc.getDocumentElement().normalize();
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
NodeList nodeList = doc.getElementsByTagName("ROW");
//now XML is loaded as Document in memory, lets convert it to Object List
List<BlogPost> entry = new ArrayList<BlogPost>();
// int count = 0;
for (int i = 0; i < nodeList.getLength(); i++) {
entry.add(getEmployee(nodeList.item(i)));
}
//lets print Employee list information
for (BlogPost emp : entry) {
System.out.println(emp.getContent());
System.out.println(emp.toString());
}
} catch (SAXException | ParserConfigurationException | IOException e1) {
e1.printStackTrace();
}
}
private static BlogPost getEmployee(Node node) {
//XMLReaderDOM domReader = new XMLReaderDOM();
BlogPost emp = new BlogPost();
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) node;
emp.setTitle(getTagValue("name", element));
emp.setContent(getTagValue("content", element));
}
String test = emp.getContent();
List<String> listOfImg = (List<String>) BlogParsing.checkPostWithoutImage(emp.getContent());
if(!listOfImg.isEmpty()){
//writeToExcel(emp.getTitle(),listOfImg);
}
return emp;
}
private static String getTagValue(String tag, Element element) {
NodeList nodeList = element.getElementsByTagName(tag).item(0).getChildNodes();
Node node = (Node) nodeList.item(0);
return node.getNodeValue();
}