//下面的代码连续点击网页。这次我们得到java.util.ConcurrentModificationException at java.util.ArrayList $ Itr.checkForComodification(ArrayList.java:782) at java.util.ArrayList $ Itr.remove(ArrayList.java:768) 在com.gargoylesoftware.htmlunit.WebWindowImpl.destroyChildren(WebWindowImpl.java:185) 在com.gargoylesoftware.htmlunit.WebWindowImpl.setEnclosedPage(WebWindowImpl.java:110) 在com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:209) 在com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:187) 在com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:268) 在com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:156) 在com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:434) 在com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:309) 在com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:374) 在com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:359) at sample.ShareScraper.getHtmlPage(ShareScraper.java:303) at sample.ShareScraper.GetData(ShareScraper.java:116)getHtmlPagemethod中的异常。
package sample;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.ConcurrentModificationException;
import java.util.Vector;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlTableDataCell;
import com.jls.library.JLStructure;
public class ShareScraper {
private WebClient wc1,wc2,wc3,wc4,wc5;
private JLStructure JLSObj = null;
HtmlPage details_page1 = null , details_page2 = null , details_page3 = null , details_page4 = null , details_page5 = null;
//Object[] share_values = null;
Vector<String> share_values = null;
public void GetData() {
System.out.println("it is getdata method");
// TODO Auto-generated method stub
JLSObj = new JLStructure();
HtmlPage homePage = null;
String[] share_codes1 = null , share_codes2 = null , share_codes3 = null , share_codes4 = null , share_codes5 = null;
try{
wc1 = new WebClient();
wc1.getOptions().setUseInsecureSSL(true);
wc1.getOptions().setJavaScriptEnabled(true);
wc1.getOptions().setCssEnabled(true);
wc1.getOptions().setThrowExceptionOnScriptError(false);
wc1.getOptions().setThrowExceptionOnFailingStatusCode(false);
wc1.setAjaxController(new NicelyResynchronizingAjaxController());
java.util.logging.Logger.getLogger("com.gargoylesoftware.htmlunit").setLevel(java.util.logging.Level.OFF);
java.util.logging.Logger.getLogger("org.apache.http").setLevel(java.util.logging.Level.OFF);
homePage = wc1.getPage("http://www.google.com/finance");
share_codes1 = new String[]{"NSE:UNITECH" , "NSE:RENUKA" , "NSE:DISHMAN" , "NSE:TATASTEEL" , "NSE:TATAMOTORS" , "NSE:NECLIFE" , "NSE:RAIN" , "NSE:ORIENTCEM" , "NSE:JSWSTEEL" , "NSE:JINDALSTEL"};
for (int share_code_no = 0; share_code_no < share_codes1.length; share_code_no++) {
String ei = homePage.getAnchorByText("Finance").getHrefAttribute();
ei = JLSObj.matchData("(?<=\\?ei=).*", ei);
details_page1 = getHtmlPage("http://www.google.com/finance?q="+share_codes1[share_code_no]+"&ei="+ei , wc1 , 1);
System.out.println(share_codes1[share_code_no]);
}
System.out.println("finished");
}catch(Exception e){
e.printStackTrace();
}
}
public Vector<String> GetShareValues(){
System.out.println("it is get sharae value method");
String NSE_UNITECH = null , NSE_BAJAJHIND = null , NSE_RENUKA = null , NSE_DISHMAN = null , NSE_TATASTEEL = null , NSE_TATAMOTORS = null , NSE_RAIN = null, NSE_ORIENTCEM = null , NSE_JSWSTEEL = null ;
try{
synchronized(this){
details_page1 = (HtmlPage) details_page1.refresh();
}
NSE_UNITECH = GetTableDataCellText("NSE:UNITECH",details_page1);
if (NSE_UNITECH == ""){
details_page1 = null ; details_page2 = null ; details_page3 = null ; details_page4 = null ; details_page5 = null ; wc1 = null ; wc2 = null ; wc3 = null ; wc4 = null ; wc5 = null;
System.out.println("page is destroyed again getdata is called");
System.gc();
GetData();
}
//"NSE:UNITECH" , "NSE:RENUKA" , "NSE:DISHMAN" , "NSE:TATASTEEL" , "NSE:TATAMOTORS" , "NSE:NECLIFE" , "NSE:RAIN" , "NSE:ORIENTCEM" , "NSE:JSWSTEEL" , "NSE:JINDALSTEL"
//"UNITECH" , "RENUKA" , "DISHMAN" , "TATASTEEL" , "TATAMOTORS" , "RAIN" , "ORIENTCEM" , "JSWSTEEL" , "JINDALSTEL" ,
NSE_UNITECH = GetTableDataCellText("NSE:UNITECH",details_page1);
NSE_RENUKA = GetTableDataCellText("NSE:RENUKA",details_page1);
NSE_DISHMAN = GetTableDataCellText("NSE:DISHMAN",details_page1);
NSE_TATASTEEL = GetTableDataCellText("NSE:TATASTEEL",details_page1);
NSE_TATAMOTORS = GetTableDataCellText("NSE:TATAMOTORS",details_page1);
NSE_RAIN = GetTableDataCellText("NSE:RAIN",details_page1);
NSE_ORIENTCEM = GetTableDataCellText("NSE:ORIENTCEM",details_page1);
NSE_JSWSTEEL = GetTableDataCellText("NSE:JSWSTEEL",details_page1);
NSE_JINDALSTEL = GetTableDataCellText("NSE:JINDALSTEL",details_page1);
share_values = new Vector<String>();
String timeStamp = new SimpleDateFormat("dd-MM-yyyy HH:mm:ss").format(Calendar.getInstance().getTime());
share_values.add(timeStamp);
System.out.println("Now time is :"+timeStamp);
//"UNITECH" , "RENUKA" , "DISHMAN" , "TATASTEEL" , "TATAMOTORS" , "RAIN" , "ORIENTCEM" , "JSWSTEEL" , "JINDALSTEL" ,
share_values.add("UNITECH :"+NSE_UNITECH);
share_values.add("RENUKA :"+NSE_RENUKA);
share_values.add("DISHMAN :"+NSE_DISHMAN);
share_values.add("TATASTEEL :"+NSE_TATASTEEL);
share_values.add("TATAMOTORS :"+NSE_TATAMOTORS);
share_values.add("RAIN :"+NSE_RAIN);
share_values.add("ORIENTCEM :"+NSE_ORIENTCEM);
share_values.add("JSWSTEEL :"+NSE_JSWSTEEL);
share_values.add("JINDALSTEL :"+NSE_JINDALSTEL);
//websitevpn.com
//freevpn
}catch(Exception e){
e.printStackTrace();
}
return share_values;
}
private String GetTableDataCellText(String param , HtmlPage data_page) throws IOException{
String result = null;
HtmlTableDataCell span = null;
HtmlPage data = null;
if (data_page!=null){
data = data_page;
}
else{
data = details_page1;
}
span = ((HtmlTableDataCell) data.getFirstByXPath("//td[preceding-sibling::td[a[contains(@title ,'"+param+"')]]]"));
if (span!= null){
result = span.asText();
}else{
result = "";
}
return result;
}
public synchronized HtmlPage getHtmlPage(String url , WebClient webClient , int count) {
HtmlPage result_page = null;
try {
result_page = webClient.getPage(url);
} catch (FailingHttpStatusCodeException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}catch (ConcurrentModificationException e) {
// TODO Auto-generated catch block
try {
Thread.sleep(10000);
if (count <= 6){
getHtmlPage (url , webClient , count++);
}
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
e.printStackTrace();
}
return result_page;
}
}
答案 0 :(得分:0)
WebClient不是线程安全的,请参考此处:
http://htmlunit.sourceforge.net/apidocs/com/gargoylesoftware/htmlunit/WebClient.html