请帮我解析一下这个HTML表格
到这样的文本表:
我应该用java做,我选择了jsoup。知道我拥有我需要的一切,但我无法粘贴符号" - "以适当的方式。有人知道如何测量桌子的宽度或者我不知道,也许是另一种方式。我第一次与jsoup发生冲突,我需要帮助。
我的代码:
public class SimpleParser {
public void parseTable() throws IOException {
File file = new File("C:\\Idea\\IdeaProjects\\SimpleParser\\src\\main\\resources\\table.html");
Document doc = Jsoup.parse(file, "UTF-8");
Elements table = doc.select("table");
reflection(table, new String(""));
}
public void reflection(Elements table, String star) {
star += "*";
for (int i = 0; i < table.size(); i++) {
Elements trs = table.get(i).select("tr");
for (int j = 0; j < trs.size(); j++) {
if (tableChecker(trs.get(j).select("td"))) {
} else System.out.println(String.format(("%s%s%s"), star, !!!Should be something!!!, star));
Elements tds = trs.get(j).select("td");
boolean st = false;
for (int k = 0; k < tds.size(); k++) {
if (tds.get(k).select("table").size() != 0) {
j += tds.get(k).select("table").select("tr").size();
reflection(tds.get(k).select("table"), star);
k += tds.get(k).select("table").select("td").size();
if (k < tds.size() - 1) st = true;
i++;
}
else {
if (k == 0 || st) System.out.print(star);
st = false;
System.out.print(String.format(" %s ", tds.get(k).text()));
if (k == tds.size() - 1) System.out.print(star);
else System.out.print("|");
}
}
System.out.println();
}
System.out.println(String.format(("%s%s%s"), star, "-----------------------------", star));
}
}
public boolean tableChecker(Elements tds) {
for (int k = 0; k < tds.size(); k++) {
if (tds.get(k).select("table").size() != 0) {
return true;
}
}
return false;
}
我的HTML:
<!DOCTYPE HTML>
<html>
<head>
<meta charset="utf-8">
<title>Таблица размеров обуви</title>
</head>
<body>
<table border="1">
<tr>
<td>Привет</td>
<td>07.10.2016</td>
<tr>
<td colspan="2">
Объединение столбцов
</td>
</tr>
</tr>
<tr>
<td>
<table border="1">
<tr>
<td>01</td>
<td>qwerty</td>
<td>12345</td>
</tr>
<tr>
<td>02</td>
<td>asdf</td>
<td>789</td>
</tr>
</table>
</td>
</tr>
</table>
</body>
</html>