编写以下代码以点击"下一步按钮"在HTML页面中打开新页面并获取每个页面的数据:
try (final WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
webClient.getOptions().setJavaScriptEnabled(true);
java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
String url = "http://codal.ir/ReportList.aspx?1=&2=&3=&4=-1&5=%d8%af%d8%b9%d9%88%d8%aa&6=&7=&8=-1&9=-1&10=-1&11=&12=False&13=0";
WebRequest webRequest = new WebRequest(new URL(url));
webRequest.setCharset(StandardCharsets.UTF_8);
HtmlPage page = webClient.getPage(url);
for (int page_counter = 0; page_counter < number_of_codal_pages_for_going_back; page_counter++) {
HtmlTable html_table = page.getHtmlElementById("ctl00_ContentPlaceHolder1_gvList");
for (int i_h_table_row = html_table.getRowCount() - 1; i_h_table_row > 0; i_h_table_row--) {
final HtmlTableRow html_row = html_table.getRow(i_h_table_row);
String ticker = html_row.getCell(0).asText();
System.out.println(ticker);
}
System.out.println("______________________________");
HtmlForm form = page.getFormByName("aspnetForm");
HtmlSubmitInput button = form.getInputByName("ctl00$ContentPlaceHolder1$ucPager1$btnNext");
page = button.click();
}
}
但无论打印什么都重复(与第一页相同),似乎程序无法进入下一页。
为什么呢?
感谢。
答案 0 :(得分:0)
这适用于最新的SNAPSHOT
try (WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
String url = "http://codal.ir/ReportList.aspx?1=&2=&3=&4=-1&5=%d8%af%d8%b9%d9%88%d8%aa&6=&7=&8=-1&9=-1&10=-1&11=&12=False&13=0";
HtmlPage page = webClient.getPage(url);
System.out.println("______________________________");
System.out.println(page.getElementById("ctl00_ContentPlaceHolder1_ucPager1_gridpager").asXml());
System.out.println("______________________________");
HtmlForm form = page.getFormByName("aspnetForm");
HtmlSubmitInput button = form.getInputByName("ctl00$ContentPlaceHolder1$ucPager1$btnNext");
page = button.click();
System.out.println("______________________________");
System.out.println(page.getElementById("ctl00_ContentPlaceHolder1_ucPager1_gridpager").asXml());
System.out.println("______________________________");
}