同步块和方法不按预期工作

时间:2014-02-12 04:30:12

标签: java multithreading thread-safety synchronized java.util.concurrent

状态:向方法添加静态,变量队列和使synchronized(crawler.class)解决问题。谢谢所有!!

http://pastie.org/8724549#41-42,46,49,100-101,188-189,191

突出显示的方法/块是synchronized

该块/方法应该be accessed by one method at a particular time.

它应该是这样的=第一个线程进入方法,更新大小,所有其他人看到该大小。更新了一个。更新应该只由第一个线程进行。不是其他人

  1. 为什么它甚至被运行。它由所有11个线程运行。
  2. 它正在运行而不等待先前的线程完成。 “queue loaded, new size ------------”其创建/添加元素
  3. package crawler;
    
    import crawler.Main;
    import static crawler.Main.basicDAO;
    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.net.URL;
    import java.util.*;
    import java.util.concurrent.ConcurrentHashMap;
    import java.util.concurrent.ConcurrentLinkedQueue;
    import java.util.concurrent.ConcurrentMap;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    
    /**
     *
     * @author syncsys
     */
    public class Crawler implements Runnable, InterfaceForConstants {
    public static final String patternString = "[_A-Za-z0-9-]+(\\.[_A-Za-z0-9-]+)*@[A-Za-z0-9]+(\\.[A-Za-z0-9]+)*(\\.[A-Za-z]{2,})";
    public ConcurrentLinkedQueue<Link> queue =  new ConcurrentLinkedQueue<Link>();
    private volatile String url;
    
    
    
    
    
        private void crawl(String url) {
    
    
            synchronized (Crawler.class){
                System.out.println("queue size "+queue.size());
                if(queue.size() < (totalSizeOfEmailBucket / 3)){
                    updateQueue();
                }
            System.out.println("This is inside of sync block.   -----------    queue size "+queue.size());
            }
            System.out.println("This is at the end of sync block.   -----------    queue size "+queue.size());
    
    
    
    
    
            BufferedReader bf = null;
            try {
                url = queue.poll().getLink();
                URL target = new URL(url);
                bf = new BufferedReader(
                        new InputStreamReader(target.openStream())
                     );
                StringBuilder html = new StringBuilder();
                String inputLine;
                while ((inputLine = bf.readLine()) != null) {
                    html.append(inputLine);
                }
                List emailList = new ArrayList( getEmailList(html.toString()) );
               // List linkList = new ArrayList( getLinkList(html.toString(), url) );
                System.out.println("Just worked on --------- "+ url);
                Main.processedLinksCount++;
                if(emailList.size()>0){
                    putEmailsInDB(emailList);
                }
    
                // putLinksInDB(linkList);
            } catch (IOException ex) {
                Logging.logError(ex.toString());
                basicDAO.deleteLink(url);
            } catch (Exception ex) {
                Logging.logError(ex.toString()); 
                basicDAO.deleteLink(url);
            }finally{
                if(bf !=null){
                    try {
                    bf.close();
                    } catch (IOException ex) {
                        Logging.logError(ex.toString());
                    }
    
                }
                crawl(null);
            }
        }
    
        public synchronized void  updateQueue() {
    
    
    
                Queue<Link> tempQueue =  new PriorityQueue<Link>();
                tempQueue = getNonProcessedLinkFromDB() ;
    
                queue.addAll(tempQueue);
                BasicDAO.markLinkAsProcesed(tempQueue);
                System.out.println("queue loaded, new  size ------------------------------------ "+queue.size());
    
        }
    
        private  List getLinkList(String html, String url) {
            Document doc = Jsoup.parse(html);
            Elements bodies = doc.select("body");
            List linkList =  new ArrayList();
            for(Element body : bodies ){
                Elements aTags = body.getElementsByTag("a");
                for (Element a: aTags){
                   String link =  a.attr("href");
                   if ( !(link.startsWith("#")) 
                         && 
                        !(link.contains("()"))
                         && 
                        !(link.endsWith(".jpg")) 
                         && 
                        !(link.endsWith(".jpeg"))  
                         && 
                        !(link.endsWith(".png"))  
                         && 
                        !(link.endsWith(".gif"))     ){
    
                        if( link.startsWith("/") ){
                            link = url+link;
                        }
                     linkList.add(link);
                     //put link in db
                   }    
                }
            }
    
            return linkList;
        }
    
        private  List getEmailList(String html) {
            Pattern p = Pattern.compile(patternString);
            Matcher m = p.matcher(html);
            List emailList = new ArrayList();
            while(m.find()){
                emailList.add(m.group());
                Main.nonUniqueEmailsCount++;
            }
    
            return emailList;    
        }
    
    
    
        private Queue<Link> getNonProcessedLinkFromDB() {
            return ( basicDAO.getNonProcessedLink() );
        }
    
        private  void putEmailsInDB(List emailList) {
            basicDAO.insertEmail(emailList);
        }
    
        private  void putLinksInDB(List linkList) {
           basicDAO.insertLinks(linkList);
        }
    
        @Override
        public void run() {
            if(url != null){
                crawl(url);
            }else{
     //          crawl();
            }
    
        }
        public Crawler(String url){
            this.url = url;
        }
    
        public Crawler(){
            this.url =  null;
        }
    }
    

    启动线程的方式:非乐观。我知道。没有使用执行程序服务或池,但以下是有效代码:

    for (int i = 0; i < 11; i++) {
    
    
                        try {
    
    
    
                                 Thread thread = new Thread(new Crawler("https://www.google.com.pk/?gws_rd=cr&ei=-q8vUqqNDIny4QTLlYCwAQ#q=pakistan"/*new BasicDAO().getNonProcessedLink()*/)); 
                                 System.out.println("resume with saved link true");
    
    
                            thread.start();
    
                            System.out.println("thread stared");
                            threadList.add(thread);
                            System.out.println("thread added to arraylist");
    
                        } catch (Exception ex) {
                            new Logging().logError(ex.toString());
                        }
    
                   }
    

    调试:

    for 11 threads , its says in logs:
    
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    queue size 0
    This is at the end of sync block.   -----------    queue size 1000
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    queue size 0
    queue loaded, new  size ------------------------------------ 1000
    This is inside of sync block.   -----------    queue size 1000
    This is at the end of sync block.   -----------    queue size 1000
    Just worked on --------- http://ao.com/Advice/Washing-Machines/Top-Tens/Top-Five-Washing-Machines/Advice/Freezers/Top-Tens/Top-Five-Freezers/flavel
    queue size 999
    Just worked on --------- http://ao.com/Advice/Washing-Machines/Top-Tens/Top-Five-Washing-Machines/l/fridges-width_less_than_50_cm/1-26/29-30//zanussi
    queue loaded, new  size ------------------------------------ 1999
    This is inside of sync block.   -----------    queue size 1999
    This is at the end of sync block.   -----------    queue size 1999
    queue size 999
    queue loaded, new  size ------------------------------------ 1999
    This is inside of sync block.   -----------    queue size 1999
    This is at the end of sync block.   -----------    queue size 1999
    Just worked on --------- http://ao.com/Advice/Washing-Machines/Top-Tens/Top-Five-Washing-Machines/Advice/Refrigerators/Top-Tens/Top-Five-Fridges/l/small_appliances-bodum/1-6/55/
    queue size 999
    queue loaded, new  size ------------------------------------ 1999
    This is inside of sync block.   -----------    queue size 1999
    This is at the end of sync block.   -----------    queue size 1999
    8692 characters / 254 lines
    Advertising from Carbon:
    Advertisement Braintree: 2.9% and 30¢ per transaction. No minimums, no monthly fees.
    

2 个答案:

答案 0 :(得分:2)

由于以下行,代码的行为不会如预期:         synchronized(Crawler.class) 你已经锁定了Crawler的'Class'对象,每个类加载器就有一个,但是你的同步方法是非静态的。用静态同步方法替换非静态同步方法,这应该可以解决问题。

答案 1 :(得分:1)

假设这个

  

第一个线程进入方法,更新大小,所有其他线程看到该大小。

是你的期望,这是根本原因。

您的queue字段是实例字段

public ConcurrentLinkedQueue<Link> queue =  new ConcurrentLinkedQueue<Link>();

你有11个实例

Thread thread = new Thread(new Crawler("https://www.google.com.pk/?gws_rd=cr&ei=-q8vUqqNDIny4QTLlYCwAQ#q=pakistan"/*new BasicDAO().getNonProcessedLink()*/)); 

所以每个Crawler都有自己的队列,因此每个线程都会进入这个块

if(queue.size() < (totalSizeOfEmailBucket / 3)){
    updateQueue();
}

因为每个线程都在更新不同的ConcurrentLinkedQueue对象。

  

它正在运行而不等待先前的线程完成。

事实并非如此。所有线程都将在

处阻止
synchronized (Crawler.class){

但除此之外的任何代码都是合理的游戏。