为什么以下执行程序服务java Thread程序没有关闭?

时间:2014-08-13 18:09:09

标签: java multithreading parallel-processing jsoup threadpool

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionHandler;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.config.SocketConfig;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class ThreadScrapResults {

    private static final int MYTHREADS = 10000000;
     HttpClient client = HttpClientBuilder.create().build();
     static Hashtable<String, String> subCodeSubName = null;
     static Hashtable<String, String> collCodeCollName = null;

    public static void main(String[] args) throws IOException, InterruptedException{
         BlockingQueue<Runnable> blockingQueue =   
                    new LinkedBlockingQueue<Runnable>(105);
    //ExecutorService executor = Executors.newFixedThreadPool(MYTHREADS);

//       RejectedExecutionHandler block = new RejectedExecutionHandler() {
//            void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
//               executor.getQueue().put( r );
//            }
//          };   

    ThreadPoolExecutor executor = new ThreadPoolExecutor(Integer.MAX_VALUE, Integer.MAX_VALUE, 20, TimeUnit.MILLISECONDS, blockingQueue);



        PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
        cm.setDefaultSocketConfig( 
                SocketConfig.custom().setSoKeepAlive(true)
                  .setSoReuseAddress(true)
                  .setSoTimeout(3000)
                  .build());


        Runnable worker = null;
        //Generating some register Numbers
        for(int year = 11; year <= 13; year++){
            for(int i = 1; i <= 350; i++){
                //generating 1050 URLs at one shot
                StringBuffer regNo = new StringBuffer("1111").append(year).append("111").append(String.format("%03d", i));


                String url = "magicUrl" + regNo;
                System.out.println(url);
                worker = new MyRunnable(url, regNo.toString());
                 executor.execute(worker); 
                }    
            }
            executor.shutdown();

            //I want to execute all those 1050 Urls at one shot and parse the result //web pages. But this actually gives me only one result.



        }






    }

    class MyRunnable implements Runnable{

        private final String url;
        private final String registerNumber;

        public MyRunnable(String url, String registerNumber) {
            // TODO Auto-generated constructor stub

            this.url = url;
            this.registerNumber = registerNumber;
        }


        public void run(){

            HttpClient client = HttpClientBuilder.create().build();
            HttpGet get  = new HttpGet(url); 
            boolean insertOrNot = true;
            HttpResponse response = null;
            try {
                response = client.execute(get);
            } catch (ClientProtocolException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }


            BufferedReader rd = null;
            try {
                rd = new BufferedReader(
                        new InputStreamReader(response.getEntity().getContent()));
            } catch (IllegalStateException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            //I get the result of each url here.
            StringBuffer result = new StringBuffer();
            String line = "";
            try {
                while ((line = rd.readLine()) != null) {
                    result.append(line);
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            Document resultWebPage = Jsoup.parse(result.toString());

            Elements resultForm = resultWebPage.getElementsByTag("strong");     
            Elements error = resultWebPage.getElementsByTag("b");

            if(error.size() == 4){

                String inValidRegNo = error.get(3).html();



                if(inValidRegNo.startsWith("Sorry")){

                    //log here
                    insertOrNot = false;
                }

            }   
            System.out.println(resultForm);

            Iterator<Element> itr = resultForm.iterator();

            int count = 1;
            boolean set = true;


             List<List<String>>  resultDBOject = new ArrayList<List<String>>();
             String regNum = null;
             String name = null;
             String deptName = null;  
             String collName = null;   //TODO : Get collName and deptName from enum. 

             String key = "Super";
             while(itr.hasNext()){
                    // System.out.println(itr.next().html());
                  key = itr.next().html();

                     try {                  

                         if(key.equals("<font color=\"#0000cc\" size=\"3\">Subject Code</font>") || key.equals("<font color=\"#0000cc\" size=\"3\">Grade</font>")
                                 || key.equals("<font color=\"#0000cc\" size=\"3\">Result</font>")){
                             continue;
                         }
                         else if(key.isEmpty()){

                        //   System.out.println("N/A");


                         }else if(!key.isEmpty()){              
                                 if(set){ 
                                     if(count == 1){
                                         regNum = key;
                                    //   System.out.println(regNum);
                                         count++;
                                     }
                                     if(count == 2){
                                         name = itr.next().html();          
                                    //   System.out.println(name);
                                         count++;
                                     }
                                     if(count == 3){
                                         deptName = itr.next().html();
                                    //   System.out.println(deptName);
                                     }
                                } 
                                if(count == 4 || count == 1){
                                     count = 0;
                                     set = false;       
                                    // String temp = itr.next().html();
                                //   Result results = new Result();
                                    // System.out.println(temp);


                                     List<String> resultOfAStudent = new ArrayList<String>();
                                        resultOfAStudent.add( key);
                                        resultOfAStudent.add( itr.next().html());
                                        resultOfAStudent.add(itr.next().html());
                                    //    resultOfAStudent.add(results.getSubjName());
                                        resultDBOject.add(resultOfAStudent);
                                 }  
                         }
                         count++;
                        // System.out.println(count);


                     } finally{

                     }


             }  //end of while   

            //insert it in db
            if(insertOrNot){ 



             System.out.println("Successfully inserted" + registerNumber);
            }
        }
    }       

以下是我要做的事。

我生成1050个网址。 - 在main方法中两个for循环正常工作。 1)运行后我没有看到我的程序终止,但我得到了所有结果。 2)如何在执行500个Url并休眠10秒后让该程序进入休眠状态,然后继续处理下500个Url。

1 个答案:

答案 0 :(得分:2)

看看你的循环:

for(int year = 11; year <= 13; year++){
        for(int i = 1; i <= 350; i++){
            //generating 1050 URLs at one shot
            StringBuffer regNo = new StringBuffer("1111").append(year).append("111").append(String.format("%03d", i));


            String url = "magicUrl" + regNo;
            System.out.println(url);
            worker = new MyRunnable(url, regNo.toString());

        }    
}

您每次循环都会覆盖worker,所以当您到达executor.execute(worker);时,工作人员会保留您分配给它的最后一个值,这将是从中创建的可运行的值生成的最后一个网址。

尝试将worker = new MyRunnable(url, regNo.toString());行替换为executor.execute(new MyRunnable(url, regNo.toString()));,看看是否能解决问题。