我要求获取子节点的父节点。我需要解析的HTML是biology
例如关于上面的HTML,我需要输出到诸如
之类的文本因为我使用JAVA作为开发语言,所以我指的是使用jsoup, 现在我通过这些节点之前的点来解决这个问题,但不幸的是,有些观点并不依赖于规则。所以有人可以解析这个HTMl,谢谢!
直到现在我有代码部分,但代码不好:
try {
String url = Constants.URL;
System.out.println(url);
Connection connection = (Connection) Jsoup.connect(url).userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36");
Document document = connection.get();
Elements lis = document.select("span.hap");
Iterator<Element> liIterator = lis.iterator();
HashMap<Integer, String> map = new HashMap<>(); //存放点数/当前节点
//begin
map.put(0, "N");
while(liIterator.hasNext()){
Element element = liIterator.next();
Element previous;
if((element.text().contains("N1") || element.text().contains("N2"))) {
previous = element.previousElementSibling();
Element parent = element.parent();
int count=0;
boolean flag= true;
if(previous == null) {
previous = element.parent().previousElementSibling().children().last();
}
while( !previous.tagName().equals("br") && flag) {
char[] array = previous.text().toCharArray();
for(char point : array) {
if(point== '•')
count++;
}
previous = previous.previousElementSibling();
if(previous == null) {
if(parent.previousElementSibling().children() != null)
previous = parent.previousElementSibling().children().last();
parent =parent.previousElementSibling();
}
if( !parent.tagName().equals("br") && previous != null) {
if(previous.children() != null) {
for( Element element2 :previous.children()) {
if(element2.tagName().equals("br"))
flag = false;
}
}
} else {
flag = false;
previous = parent.previousElementSibling();
}
}
map.put(count, element.text());
JsoupUtil.getInstance().writeFile(element.text() + " Parent Node: " + map.get(count-1));
}
}
}catch (Exception e) {
e.printStackTrace();
}
}