考虑以下html文件a.html
<html>
<frameset>
<frame src="frame_a.html">
</frameset>
</html>
frame_a.html是以下
<html>
<body>
aaaaaa
</body>
</html>
以下代码:
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.List;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.FrameWindow;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class TestFramset {
public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException, InterruptedException {
WebClient client = new WebClient(BrowserVersion.FIREFOX_17);
client.getOptions().setJavaScriptEnabled(true);
client.getOptions().setRedirectEnabled(true);
client.getOptions().setThrowExceptionOnScriptError(true);
client.getOptions().setCssEnabled(true);
client.getOptions().setUseInsecureSSL(true);
client.getOptions().setThrowExceptionOnFailingStatusCode(false);
HtmlPage page = client.getPage("file:///...a.html");
System.out.println("page as text will give nothing:"+page.asText());
System.out.println("recursive function will give:"+getText(page));
}
public static String getText (HtmlPage page) {
String text = page.asText();
List<FrameWindow> frames = page.getFrames();
for (FrameWindow frame:frames) {
text += getText((HtmlPage) frame.getEnclosedPage());
}
return text;
}
}
将提供输出
页面,因为文字不会提供任何内容:
递归函数将给出:aaaaaa
我的问题是,如果page.asText函数不返回帧的文本这一事实是理想的,如果我以递归的方式获取帧的文本是最好的方式