我是Java的新手,特别是线程编程。这段代码主要出自一本非常古老的书(2001),其中包含搜索引擎的示例和示例。 但它只是不起作用 现在我不知道我是犯了错误,或者作者是否犯了错误,或者是否与不同版本的java不兼容......我真的不知道!关于它的最奇怪的事情是它在100次中有1次工作...... 经过几个小时的调试后,我将不胜感激!
SearchEngine.java:
import java.util.Vector;
import parsing.SourceElement;
import parsing.WebParserWrapper;
import query.Filter;
public class SearchEngine implements Runnable {
private Vector linkHistory = new Vector();
private int currentLink;
private String beginAt = null;
private SearchHandler searchHandler = null;
private boolean searchInProgress = false;
private boolean stopPending = false;
boolean firstTime = true;
public boolean searchInProgress() {
return searchInProgress;
}
public boolean stopPending() {
return stopPending;
}
@SuppressWarnings("unchecked")
public void followLinks(String url) {
if (stopPending)
return;
try {
boolean drillDown = false;
WebParserWrapper webParser = new WebParserWrapper();
Vector sortedElements = webParser.getElements(url, "", "WITHGET");
Vector contentElements = Filter.getFilteredElements(sortedElements, Filter.CONTENT, "matches", "*");
for (int i = 0; i < contentElements.size(); i++) {
SourceElement thisElement = (SourceElement) contentElements.elementAt(i);
String thisKey = (String) thisElement.getKey();
String thisContent = (String) thisElement.getContent();
boolean goodHit = searchHandler.handleElement(url, thisKey, thisContent);
if (goodHit) {
drillDown = true;
}
}
System.out.println(url + " -- DrillDown " + ((drillDown) ? "positive" : "negative"));
if (drillDown) {
Vector linkElements = Filter.getFilteredElements(sortedElements, Filter.KEY, "matches",
"*a[*].@href[*]");
for (int i = 0; i < linkElements.size(); i++) {
SourceElement thisElement = (SourceElement) linkElements.elementAt(i);
String thisContent = (String) thisElement.getContent();
if (!linkHistory.contains(thisContent)) {
linkHistory.add(thisContent);
System.out.println("Collected: " + thisContent);
}
}
}
}
catch (Exception e) {}
if (currentLink < linkHistory.size()) {
String nextLink = (String) linkHistory.elementAt(currentLink++);
if (nextLink != null) {
followLinks(nextLink);
}
}
}
public boolean startSearch(String url, SearchHandler searchHandler) {
if (searchInProgress)
return false;
beginAt = url;
this.searchHandler = searchHandler;
this.linkHistory = new Vector();
this.currentLink = 0;
Thread searchThread = new Thread(this);
searchThread.start();
return true;
}
public void stopSearch() {
stopPending = true;
}
@Override
public void run() {
searchInProgress = true;
followLinks(beginAt);
searchInProgress = false;
stopPending = false;
}
}
SimpleSearcher.java
import java.util.Enumeration;
import java.util.Hashtable;
public class SimpleSearcher implements SearchHandler {
private SearchEngine searchEngine;
private String keyword;
private String startURL;
private Hashtable hits = new Hashtable();
public boolean handleElement(String url, String key, String content) {
boolean goodHit = false;
int keywordCount = 0;
int pos = -1;
while ((pos = content.toLowerCase().indexOf(keyword, pos + 1)) >= 0){
keywordCount++;
}
if (keywordCount > 0) {
Integer count = (Integer) hits.get(url);
if (count == null){
hits.put(url, new Integer(1));
}
else {
hits.remove(url);
hits.put(url, new Integer(count.intValue() + keywordCount));
}
goodHit = true;
}
if (hits.size() >= 3)
searchEngine.stopSearch();
return goodHit;
}
public Hashtable search(String startURL, String keyword) {
searchEngine = new SearchEngine();
this.startURL = startURL;
this.keyword = keyword;
searchEngine.startSearch(startURL, this);
try {Thread.sleep(1000);}catch (Exception e){e.printStackTrace();}
while (searchEngine.searchInProgress());
return this.hits;
}
public static void main(String[] args) {
SimpleSearcher searcher = new SimpleSearcher();
String url = "http://www.nzz.ch/";
String compareWord = "der";
Hashtable hits = searcher.search(url, compareWord);
System.out.println("URLs=" + hits.size());
for (Enumeration keys = hits.keys(); keys.hasMoreElements();) {
String thisKey = (String) keys.nextElement();
int thisCount = ((Integer) hits.get(thisKey)).intValue();
System.out.println(thisCount + " hits at " + thisKey);
}
}
}
SearchHandler.java
public interface SearchHandler {
public boolean handleElement(String url, String key, String content);
}