我有以下代码。它被反复调用来抓取网络
public GetWebPageResult(String htmlCode) //CONSTRUCTORS FOR GetWebPageResults
{
if(htmlCode!=null)
{
this.htmlCode=htmlCode;
htmlErrorCode=-1;
excCode=0;
}
else
{
this.htmlCode=null;
htmlErrorCode=-1;
excCode=UNKNOWN_ERROR;
}
}
public GetWebPageResult(int excCode, int htmlErrorCode)
{
this.htmlCode=null;
this.excCode=excCode;
this.htmlErrorCode=htmlErrorCode;
} //END CONSTRUCTORS FOR GetWebPageResults
static private GetWebPageResult getWebPage(PageNode pagenode)
{
String result;
String inputLine;
URI url;
int cicliLettura=0;
long startTime=0, endTime, openConnTime=0,connTime=0, readTime=0;
try
{
startTime=System.nanoTime();
result="";
url=pagenode.getUri(); //fare qualcosa se getURI è null
if(Core.logGetWebPage())
openConnTime=System.nanoTime();
if(url!=null)
{
HttpURLConnection yc = (HttpURLConnection) url.toURL().openConnection(); //controllare yc
if(url.toURL().getProtocol().equalsIgnoreCase("https"))
yc=(HttpsURLConnection)yc;
yc.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13 (.NET CLR 3.5.30729)");
yc.connect(); //controllare il risultato di .connect => non c'è! al max lancia IOEXC
if(checkResponseCode(yc.getResponseCode())==false)
return new GetWebPageResult(GetWebPageResult.ERR_BAD_RESPONSE_CODE,yc.getResponseCode());
if(Core.logGetWebPage())
connTime=System.nanoTime();
BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream()));//può lanciare IOEXC
StringBuffer buffer = new StringBuffer();
while ((inputLine = in.readLine()) != null)
{
buffer.append(inputLine).append('\n');
cicliLettura++;
}
result = buffer.toString();
if(Core.logGetWebPage())
readTime=System.nanoTime();
in.close();
yc.disconnect();
if(Core.logGetWebPage())
{
endTime=System.nanoTime();
//url.toURL() non è null, controllato prima
System.out.println(/*result+*/"getWebPage eseguito in "+(endTime-startTime)/1000000+" ms. Size: "+result.length()+" Response Code="+yc.getResponseCode()+" Protocollo="+url.toURL().getProtocol()+" openConnTime: "+(openConnTime-startTime)/1000000+" connTime:"+(connTime-openConnTime)/1000000+" readTime:"+(readTime-connTime)/1000000+" cicliLettura="+cicliLettura+" pagina:"+url.toURL());
}
return new GetWebPageResult(result);
}
else
return new GetWebPageResult(GetWebPageResult.ERR_NULL_URI,-2);
}catch(IOException e){
System.out.println("Eccezione1: "+e.toString());
e.printStackTrace();
return new GetWebPageResult(GetWebPageResult.ERR_HTML_IOEXCEPTION,-2);
}catch(ClassCastException e){
System.out.println("Eccezione2: "+e.toString());
e.printStackTrace();
return new GetWebPageResult(GetWebPageResult.ERR_CLASS_CAST_EXC,-2);
}catch(Exception e){
System.out.println("Eccezione ERR_NOT_LISTED_EXC: "+e.toString());
return new GetWebPageResult(GetWebPageResult.ERR_NOT_LISTED_EXC,-2);
}
}
线程因此异常而停止:
java.net.UnknownHostException: www.notjohnchow.com
at java.net.AbstractPlainSocketImpl.connect(Unknown Source)
at java.net.PlainSocketImpl.connect(Unknown Source)
at java.net.SocksSocketImpl.connect(Unknown Source)
at java.net.Socket.connect(Unknown Source)
at java.net.Socket.connect(Unknown Source)
at sun.net.NetworkClient.doConnect(Unknown Source)
at sun.net.www.http.HttpClient.openServer(Unknown Source)
at sun.net.www.http.HttpClient.openServer(Unknown Source)
at sun.net.www.http.HttpClient.<init>(Unknown Source)
at sun.net.www.http.HttpClient.New(Unknown Source)
at sun.net.www.http.HttpClient.New(Unknown Source)
at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(Unknown Source)
at sun.net.www.protocol.http.HttpURLConnection.plainConnect(Unknown Source)
at sun.net.www.protocol.http.HttpURLConnection.connect(Unknown Source)
at WebsiteCrawler.getWebPage(WebsiteCrawler.java:315)
at WebsiteCrawler.crawlNextPage(WebsiteCrawler.java:71)
at Website.run(Website.java:51)
当尝试连接到&#34; www.notjohnchow.com&#34;时,yc.connect启动了例外。 为什么不是最后一个获得异常的catch块?我已经设置了try-catch以捕获许多异常,如果当前异常不是第一个异常,我将其称为&#34;未知错误&#34;在catch(例外e)块中。
答案 0 :(得分:2)
你确定你的线程死了吗?如果我的头脑中没有完全错误,则UnknownHostException是IOException的后代,并且由第一个catch块捕获,因此打印出StackTrace。