为什么我的帖子没有结束?

时间:2015-01-23 01:07:29

标签: java multithreading search-engine

我是Java的新手,特别是线程编程。这段代码主要出自一本非常古老的书(2001),其中包含搜索引擎的示例和示例。 但它只是不起作用 现在我不知道我是犯了错误,或者作者是否犯了错误,或者是否与不同版本的java不兼容......我真的不知道!关于它的最奇怪的事情是它在100次中有1次工作...... 经过几个小时的调试后,我将不胜感激!

SearchEngine.java:

import java.util.Vector;
import parsing.SourceElement;
import parsing.WebParserWrapper;
import query.Filter;

public class SearchEngine implements Runnable {

    private Vector linkHistory = new Vector();
    private int currentLink;
    private String beginAt = null;
    private SearchHandler searchHandler = null;
    private boolean searchInProgress = false;
    private boolean stopPending = false;
    boolean firstTime = true;

    public boolean searchInProgress() {
        return searchInProgress;
    }

    public boolean stopPending() {
        return stopPending;
    }

    @SuppressWarnings("unchecked")
    public void followLinks(String url) {

        if (stopPending)
            return;
        try {

            boolean drillDown = false;

            WebParserWrapper webParser = new WebParserWrapper();
            Vector sortedElements = webParser.getElements(url, "", "WITHGET");
            Vector contentElements = Filter.getFilteredElements(sortedElements, Filter.CONTENT, "matches", "*");

            for (int i = 0; i < contentElements.size(); i++) {
                SourceElement thisElement = (SourceElement) contentElements.elementAt(i);
                String thisKey = (String) thisElement.getKey();
                String thisContent = (String) thisElement.getContent();

                boolean goodHit = searchHandler.handleElement(url, thisKey, thisContent);

                if (goodHit) {
                    drillDown = true;
                }
            }
            System.out.println(url + " -- DrillDown " + ((drillDown) ? "positive" : "negative"));

            if (drillDown) {
                Vector linkElements = Filter.getFilteredElements(sortedElements, Filter.KEY, "matches",
                        "*a[*].@href[*]");

                for (int i = 0; i < linkElements.size(); i++) {
                    SourceElement thisElement = (SourceElement) linkElements.elementAt(i);
                    String thisContent = (String) thisElement.getContent();
                    if (!linkHistory.contains(thisContent)) {
                        linkHistory.add(thisContent);
                        System.out.println("Collected: " + thisContent);
                    }

                }

            }

        } 

        catch (Exception e) {}

        if (currentLink < linkHistory.size()) {
            String nextLink = (String) linkHistory.elementAt(currentLink++);
            if (nextLink != null) {
                followLinks(nextLink);
            }
        }

    }

    public boolean startSearch(String url, SearchHandler searchHandler) {
        if (searchInProgress)
            return false;

        beginAt = url;
        this.searchHandler = searchHandler;
        this.linkHistory = new Vector();
        this.currentLink = 0;

        Thread searchThread = new Thread(this);
        searchThread.start();
        return true;
    }

    public void stopSearch() {
        stopPending = true;
    }

    @Override
    public void run() {
        searchInProgress = true;
        followLinks(beginAt);
        searchInProgress = false;
        stopPending = false;
    }

}

SimpleSearcher.java

import java.util.Enumeration;
import java.util.Hashtable;

public class SimpleSearcher implements SearchHandler {
    private SearchEngine searchEngine;
    private String keyword;
    private String startURL;
    private Hashtable hits = new Hashtable();

    public boolean handleElement(String url, String key, String content) {
        boolean goodHit = false;
        int keywordCount = 0;
        int pos = -1;
        while ((pos = content.toLowerCase().indexOf(keyword, pos + 1)) >= 0){
            keywordCount++;

        }

        if (keywordCount > 0) {
            Integer count = (Integer) hits.get(url);
            if (count == null){
                hits.put(url, new Integer(1));
            }
            else {
                hits.remove(url);
                hits.put(url, new Integer(count.intValue() + keywordCount));
            }

            goodHit = true;
        }
        if (hits.size() >= 3)
            searchEngine.stopSearch();
        return goodHit;
    }

    public Hashtable search(String startURL, String keyword) {
        searchEngine = new SearchEngine();
        this.startURL = startURL;
        this.keyword = keyword;

        searchEngine.startSearch(startURL, this);
        try {Thread.sleep(1000);}catch (Exception e){e.printStackTrace();}

        while (searchEngine.searchInProgress());

        return this.hits;
    }

    public static void main(String[] args) {
        SimpleSearcher searcher = new SimpleSearcher();
        String url = "http://www.nzz.ch/";
        String compareWord = "der";

        Hashtable hits = searcher.search(url, compareWord);


        System.out.println("URLs=" + hits.size());
        for (Enumeration keys = hits.keys(); keys.hasMoreElements();) {
            String thisKey = (String) keys.nextElement();
            int thisCount = ((Integer) hits.get(thisKey)).intValue();
            System.out.println(thisCount + " hits at " + thisKey);
        }
    }
}

SearchHandler.java

public interface SearchHandler {
    public boolean handleElement(String url, String key, String content);
}

0 个答案:

没有答案