我有这个:
<a href="http://google.com">Google</a>
但我需要这个结果。如您所见,我想修改网址:
<a href="http://HERE_IS_MY_DOMAIN?http://google.com">Google</a>
这是我在java中解析URL并获取解析页面的代码:
String sitePath = "http://google.com/";
Document doc = Jsoup.connect(sitePath).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36").get();
Elements elements = doc.select("body");
Elements imgElements = doc.select("img");
for (Element element : imgElements) {
element.attr("src", element.attr("abs:src"));
}
Elements hrefElements = doc.select("a");
for (Element element : hrefElements) {
element.attr("href", element.attr("abs:href"));
}
Elements linkElements = doc.head().select("link");
for (Element element : linkElements) {
element.attr("href", element.attr("abs:href"));
}
manipulateElements(elements);
File fileDir = new File("res.html");
Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileDir), "UTF8"));
out.write(doc.toString());
out.flush();
out.close();
}
答案 0 :(得分:2)
Elements imgElements = doc.select("img");
for (Element element : imgElements) {
element.attr("src", "http://HERE_IS_MY_DOMAIN.com?"+element.attr("abs:src"));
}
Elements hrefElements = doc.select("a");
for (Element element : hrefElements) {
element.attr("href", "http://HERE_IS_MY_DOMAIN.com?"+element.attr("abs:href"));
}
Elements linkElements = doc.head().select("link");
for (Element element : linkElements) {
element.attr("href", "http://HERE_IS_MY_DOMAIN.com?"+element.attr("abs:href"));
}