我需要访问由javascript生成的渲染html。像javascript生成的html可以通过inspect看到,但我将无法在页面的viewsource中使用。所以我需要得到那个HTML。我正在尝试使用htmlunit。
import java.io.IOException;
import java.net.MalformedURLException;
import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/**
*
* @author biznis
*/
public class JsoupImageEx {
public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException {
java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(java.util.logging.Level.OFF); /* comment out to turn off annoying htmlunit warnings*/
WebClient webClient = new WebClient();
String url = "";
System.out.println("Loading page now: "+url);
HtmlPage page = webClient.getPage(url);
webClient.waitForBackgroundJavaScript(30 * 1000); /* will wait JavaScript to execute up to 30s */
String pageAsXml = page.asXml();
System.out.println(pageAsXml);
}
}
使用html unit 2.12 maven依赖
我正在异常
输出:
Loading page now:
Exception in thread "main" ======= EXCEPTION START ========
EcmaError: lineNumber=[2] column=[0] lineSource=[<no source>] name=[TypeError] sourceName=[https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content] message=[TypeError: Cannot find function addEventListener in object [object HTMLDocument]. (https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content#2)]
com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot find function addEventListener in object [object HTMLDocument]. (https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content#2)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:669)
at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:601)
at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:507)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:555)
at com.gargoylesoftware.htmlunit.html.HtmlPage.loadExternalJavaScriptFile(HtmlPage.java:1082)
at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:399)
at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:260)
at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:276)
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:676)
at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source)
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:635)
at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170)
at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072)
at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206)
at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330)
at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3074)
at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2041)
at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:918)
at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499)
at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452)
at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:892)
at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:241)
at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:187)
at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:268)
at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:156)
at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:434)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:309)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:374)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:359)
at JsoupImageEx.main(JsoupImageEx.java:25)
Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot find function addEventListener in object [object HTMLDocument]. (https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content#2)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3603)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3587)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3608)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3624)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.notFunctionError(ScriptRuntime.java:3688)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getPropFunctionAndThisHelper(ScriptRuntime.java:2207)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getPropFunctionAndThis(ScriptRuntime.java:2189)
at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1333)
at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105)
at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:405)
at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:275)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3031)
at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:546)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:654)
... 30 more
Enclosed exception:
net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot find function addEventListener in object [object HTMLDocument]. (https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content#2)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getPropFunctionAndThisHelper(ScriptRuntime.java:2207)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getPropFunctionAndThis(ScriptRuntime.java:2189)
at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1333)
at script(https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content:2)
at script(https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content:2)
at script(https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content:2)
at script(https://inapi.ankiti.com/api/mms.bulksms101.com/www/parts/jquery-2.2.4.min.js?field=content:2)
at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105)
at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:405)
at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:275)
at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3031)
at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:546)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:654)
at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:601)
at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:507)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:555)
at com.gargoylesoftware.htmlunit.html.HtmlPage.loadExternalJavaScriptFile(HtmlPage.java:1082)
at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:399)
at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:260)
at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:276)
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:676)
at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source)
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:635)
at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170)
at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072)
at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206)
at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330)
at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3074)
at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2041)
at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:918)
at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499)
at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452)
at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:892)
at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:241)
at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:187)
at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:268)
at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:156)
at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:434)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:309)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:374)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:359)
at JsoupImageEx.main(JsoupImageEx.java:25)
======= EXCEPTION END ========
我需要获得渲染的outerhtml。
答案 0 :(得分:0)
HtmlUnit 2.12于2013年3月6日发布。你真的希望这个版本仍然受支持吗? 我们现在在2.28和2.29正在路上。据我所见,Changes可以在2.14中修复此错误,但不确定。
所以请更新到最新版本。如果你仍然遇到这样的问题,请为HtmlUnit打开一个问题。
最后复制像Stackoverflow一样思考
java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(java.util.logging.Level.OFF); /* comment out to turn off annoying htmlunit warnings*/
根本不是最好的主意。您正在寻找一个错误并禁用整个日志。通常,日志可以帮助您和其他人提供有用的信息来查找问题。