如何在JavaFX中迭代DOM并找到img和src标签?

时间:2017-12-27 09:44:47

标签: java dom javafx

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package imagesget;

import java.io.StringWriter;
import java.util.logging.Level;
import java.util.logging.Logger;
import javafx.application.Application;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.concurrent.Worker;
import javafx.scene.layout.HBox;
import javafx.scene.layout.StackPane;
import javafx.scene.web.WebEngine;
import javafx.scene.web.WebView;
import javafx.stage.Stage;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;




/**
 *
 * @author biznis
 */
/**
 *
 * @author biznis
 */
public class ImagesGet extends Application {

    /**
     * @param args the command line arguments
     */
    @Override
    public void start(Stage primaryStage) throws Exception {


        StackPane root = new StackPane();
        // create a HBox to hold 2 vboxes        
          HBox hbox = new HBox(10);
        // create a vbox with a textarea that grows vertically
       // HBox vbox = new VBox(10);
        //Label label1 = new Label("");
        final WebView browser = new WebView();
        final WebEngine wb = browser.getEngine();

    //grid.add(new Label("Input Url: "), 0, 0);
   // grid.add(notification, 1, 0);
        wb.load("http://epaper.timesgroup.com/Olive/ODN/TheEconomicTimes/#");
        wb.getLoadWorker().stateProperty().addListener(
            new ChangeListener<Worker.State>() {
                @Override
                public void changed(ObservableValue ov, Worker.State oldState, Worker.State newState) {
                    if (newState == Worker.State.SUCCEEDED) {
                        Document doc =   wb.getDocument();
                        try {
                            Transformer transformer = TransformerFactory.newInstance().newTransformer();
                            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
                            transformer.setOutputProperty(OutputKeys.METHOD, "xml");
                            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
                            transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                            transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
                            StringWriter stringWriter = new StringWriter();
                            try {
                                transformer.transform(new DOMSource(doc),
                                        new StreamResult(stringWriter));
                            } catch (TransformerException ex) {
                                Logger.getLogger(ImagesGet.class.getName()).log(Level.SEVERE, null, ex);
                            }
                            String xml1 = stringWriter.getBuffer().toString();
                            System.out.println(xml1);

NodeList anchors = doc.getElementsByTagName("img");
System.out.println(anchors);

                        }catch (TransformerConfigurationException ex) {
                            Logger.getLogger(ImagesGet.class.getName()).log(Level.SEVERE, null, ex);
                        } 
                    }
                }
            });
    }
    public static void main(String[] args) {
        Application.launch(args);
    }
}

这是我尝试但是由此

NodeList anchors = doc.getElementsByTagName("img"); 
System.out.println(anchors);

我的输出是

  

com.sun.webkit.dom.NodeListImpl@614d6ab6

如何获取img标签 所以任何人都可以告诉我如何从外部HTML中找到img标签并在本地下载所有图像,所以告诉我任何文档或任何方法,以便我可以实现这一点。

1 个答案:

答案 0 :(得分:0)

您正在将org.w3c.dom.NodeList类型的对象打印到System.out。这就是调用和打印NodeList.toString()的原因(恰好是对象的名称)。

我认为您希望使用节点本身。此代码应该为您提供正确的方法来处理列表中的节点:

NodeList anchors = doc.getElementsByTagName("img");
for (int index=0; index<anchors.getLength(); index++) {
    Node node = anchors.item(index);
    System.out.println(node.getNodeName());
    // ....
}

请参阅full Javadoc,了解使用org.w3c.dom.Node可以做些什么。