在递归线程池结构中使用jsoup时出现问题

时间:2018-10-16 10:59:36

标签: java multithreading recursion thread-safety threadpool

我实际上遇到的问题是,我想从许多url中收集数据,并且由于这样的事实,我只能通过解析实际的URL来获取我必须解析的下一个url,而这个问题是,服务器的响应时间,如果请求不是同时发送的,则将花费太长时间,我选择使用Jsoup使用rekursiv FixedThreadPool结构。

但是似乎正在运行的线程越多,程序犯的错误就越多,我找不到问题。

import java.io.IOException;
import java.util.Vector;
import java.util.concurrent.Callable;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadPoolExecutor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class MyCallable implements Callable
{
   private final Tree ak;
   private final ThreadPoolExecutor executor;
   private final Node root;
   private long id;

   public MyCallable(Node Root, Tree aka, ThreadPoolExecutor tpe)
   {
       this.ak = aka;
       this.executor = tpe;
       this.root = Root;
   }

   @Override
    public Object call() throws Exception, IOException {
        Document doc = Jsoup.connect(this.root.getLink()).get();
        Elements links = doc.select("a[href]");
        int i = 0;
        for (Element link : links) {
            String adresse = link.attr("abs:href");
            if(adresse.contains("/keyword/"){
                Node neu = this.ak.addifnotexists(link.text(),adresse, this.id ,root , false); 
                if (neu != null){
                    i++;
                    this.executor.submit(new MyCallable(neu,this.ak,this.executor));
                }
            }
        }
        throw new UnsupportedOperationException("Not supported yet.");
    }

}

编辑:@ akshaya pandey

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Vector;
public class Tree
{
    Vector<Node> allNodes = new Vector<Node> (25000, 1000);
    public synchronized Node addifnotexists(String Kategoriename, String Link, long id, Node Root, boolean first) throws InterruptedException{
        boolean existsalready = false;
        if (!first){
            existsalready = doesexist(Kategoriename, Link);
        }  
        if (!existsalready){
            allNodes.add(new Node(Kategoriename, Link, id, Root, new String[10][101], first));  
        }
        if (existsalready) {
            return null;
        }
        System.out.println(allNodes.size());
        return allNodes.get(allNodes.size()-1);
    }
    private synchronized boolean doesexist (String kategorie, String lilink) { 
        int start = 0;
        int end = 0;
        int j = 0;
        int i = 0;
        String Kat;
        char[] Kate;
        for (Node node : allNodes){  
            if (node.getKategorie().equals(kategorie)){
                Kat = node.getLink();
                Kate = Kat.toCharArray();
                for (char bstb : Kate){
                    if (bstb == ('/')){
                        i++;
                        if (i == 5){
                            start = j;
                        }
                        if (i == 6){
                            end = j;
                        }
                    }
                    j++;
                }
                Kat = Kat.substring(Math.abs(start), Math.abs(end+1));
                Kate = lilink.toCharArray();
                i = 0;
                j = 0;
                for (char bstb : Kate){
                    if (bstb == ('/')){
                        i++;
                        if (i == 5){
                            start = j;
                        }
                        if (i == 6){
                            end = j;
                        }
                    }
                    j++;
                }
                String linkvergleich = lilink.substring(Math.abs(start), Math.abs(end+1));
                if(linkvergleich.equals(Kat)){
                    return true;
                }
            }
        }
        return false;
    }

}


import java.util.ArrayList;
import java.io.*;
public class Node implements Serializable
{
    private final Node root;
    private final String kategorie;
    private final String link;
    private final String[][] bestsellerliste;
    private final long ID;
    private ArrayList <Node> naechster = new ArrayList <Node>();
    public Node(String Kategoriename, String Link, long id, Node Root , String[][] Produktliste, boolean erster)
    {
        this.root = Root;
        this.kategorie = Kategoriename;
        this.link = Link;
        this.bestsellerliste = Produktliste;
        if (!erster){
            this.root.setNaechster(this);
        }
        this.ID = id;
        //System.out.println(Kategoriename);
    }
    public synchronized Node getRoot()
    {
        return this.root;
    }
    public synchronized String getKategorie()
    {
        return this.kategorie;
    }
    public synchronized String getLink()
    {
        return this.link;
    }
    public synchronized  String[][] getBestsellerliste()
    {
        return this.bestsellerliste;
    }
    public synchronized void setNaechster(Node next){
        naechster.add(next);
    }
    public synchronized void deleteNaechster(Node next){
        naechster.remove(next);
    }
    public synchronized ArrayList <Node> getNaechster(){
        return this.naechster;
    }
    public synchronized long getID(){
        return this.ID;
    }
}

0 个答案:

没有答案