如何从java中的动态网页中提取html字符串?我使用下面的代码,到目前为止它只适用于静态网页
public String DynamicExtractHtmlString(String url, int timeout, boolean options)
throws MalformedURLException, IOException {
HttpURLConnection hConn = createUrlConnection(url, timeout, options);
hConn.setInstanceFollowRedirects(true);
String encoding = hConn.getContentEncoding();
InputStream is;
if (encoding != null && encoding.equalsIgnoreCase("gzip")) {
is = new GZIPInputStream(hConn.getInputStream());
} else if (encoding != null && encoding.equalsIgnoreCase("deflate")) {
is = new InflaterInputStream(hConn.getInputStream(), new Inflater(true));
} else {
is = hConn.getInputStream();
}
String enc = Converter.extractEncoding(hConn.getContentType());
String res = createConverter(urlAsString).streamToString(is, enc);
if (logger.isDebugEnabled())
logger.debug(res.length() + " FetchAsString:" + urlAsString);
return res;
}