我正在尝试更改源和图像链接的引用,以便在本地查看它们,如“另存为”我是否保存了名称为“w3schools_files”的文件,但输出文件仍然不是相同。通过Wireshark捕获html文件。
public static void main(String[] args) throws IOException {
String sub,i,imgaddr,linkaddr,website,impaddr;
website="http://www.w3schools.com";
File input = new File("E:/w3schools.html");
Document doc = Jsoup.parse(input, "UTF-8");
Elements images;
images = doc.select("img");
//extract images and links and css files
for (Element image : images) {
i = image.attr("src");
sub = i.substring(i.lastIndexOf("/"));
imgaddr = "./" + "w3schools_files" + sub;
image.attr("src", imgaddr);
}
Elements links = doc.select("a");
for (Element link : links) {
i = link.attr("href");
if (i.startsWith("/")) {
linkaddr = website + i;
link.attr("href", linkaddr);
}
}
Elements imports = doc.select("link[href]");
for (Element imp : imports) {
String relat = imp.attr("rel");
if (relat.equals("stylesheet")) {
i = imp.attr("href");
sub = i.substring(i.lastIndexOf("/"));
impaddr = "./" + "w3schools_files" + sub;
imp.attr("src", impaddr);
imp.attr("src", impaddr);
}
i = imp.attr("href");
sub = i.substring(i.lastIndexOf("/"));
if (i.endsWith(".ico")) {
String fav = website + sub;
imp.attr("href", fav);
} else {
impaddr = "./" + "w3schools_files" + sub;
imp.attr("src", impaddr);
}//writing back in files
BufferedWriter htmlwriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
htmlwriter.write(doc.toString())
}
}