/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package imagesget;
import java.io.StringWriter;
import java.util.logging.Level;
import java.util.logging.Logger;
import javafx.application.Application;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.concurrent.Worker;
import javafx.scene.layout.HBox;
import javafx.scene.layout.StackPane;
import javafx.scene.web.WebEngine;
import javafx.scene.web.WebView;
import javafx.stage.Stage;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
/**
*
* @author biznis
*/
/**
*
* @author biznis
*/
public class ImagesGet extends Application {
/**
* @param args the command line arguments
*/
@Override
public void start(Stage primaryStage) throws Exception {
StackPane root = new StackPane();
// create a HBox to hold 2 vboxes
HBox hbox = new HBox(10);
// create a vbox with a textarea that grows vertically
// HBox vbox = new VBox(10);
//Label label1 = new Label("");
final WebView browser = new WebView();
final WebEngine wb = browser.getEngine();
//grid.add(new Label("Input Url: "), 0, 0);
// grid.add(notification, 1, 0);
wb.load("http://epaper.timesgroup.com/Olive/ODN/TheEconomicTimes/#");
wb.getLoadWorker().stateProperty().addListener(
new ChangeListener<Worker.State>() {
@Override
public void changed(ObservableValue ov, Worker.State oldState, Worker.State newState) {
if (newState == Worker.State.SUCCEEDED) {
Document doc = wb.getDocument();
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
StringWriter stringWriter = new StringWriter();
try {
transformer.transform(new DOMSource(doc),
new StreamResult(stringWriter));
} catch (TransformerException ex) {
Logger.getLogger(ImagesGet.class.getName()).log(Level.SEVERE, null, ex);
}
String xml1 = stringWriter.getBuffer().toString();
System.out.println(xml1);
NodeList anchors = doc.getElementsByTagName("img");
System.out.println(anchors);
}catch (TransformerConfigurationException ex) {
Logger.getLogger(ImagesGet.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
});
}
public static void main(String[] args) {
Application.launch(args);
}
}
这是我尝试但是由此
NodeList anchors = doc.getElementsByTagName("img");
System.out.println(anchors);
我的输出是
com.sun.webkit.dom.NodeListImpl@614d6ab6
如何获取img标签 所以任何人都可以告诉我如何从外部HTML中找到img标签并在本地下载所有图像,所以告诉我任何文档或任何方法,以便我可以实现这一点。
答案 0 :(得分:0)
您正在将org.w3c.dom.NodeList
类型的对象打印到System.out
。这就是调用和打印NodeList.toString()
的原因(恰好是对象的名称)。
我认为您希望使用节点本身。此代码应该为您提供正确的方法来处理列表中的节点:
NodeList anchors = doc.getElementsByTagName("img");
for (int index=0; index<anchors.getLength(); index++) {
Node node = anchors.item(index);
System.out.println(node.getNodeName());
// ....
}
请参阅full Javadoc,了解使用org.w3c.dom.Node
可以做些什么。