我正在编写解析器。一页不会加载为完整版本(并非所有脚本都已执行)。如果我用浏览器加载page(https://hh.ru/employer/negotiations/change_topic?r=5598e4e9000318fe590000bde1526e666d5968)很好,但是用htmlunit却不能加载一些脚本(我认为)。 Firefox页面已启用按钮。但是加载了htmluntit的同一页面禁用了按钮的属性,因此我无法提交(即使我删除了此attr,也已发送但无法正常工作)。所以,我不明白为什么htmlunit页面不起作用。
我的设置:
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_60);
webClient.getCookieManager().setCookiesEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setTimeout(35000);
webClient.getOptions().setUseInsecureSSL(true);
webClient.getOptions().setRedirectEnabled(true);
//overcome problems in js
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setPrintContentOnFailingStatusCode(false);
webClient.setCssErrorHandler(new SilentCssErrorHandler());
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.waitForBackgroundJavaScript(100000);
webClient.waitForBackgroundJavaScriptStartingBefore(100000);
getWebClient().setAlertHandler(new CollectingAlertHandler(new ArrayList<>()));
webClient.getOptions().setCssEnabled(true);
WebRequest requestSettings = new WebRequest(url, HttpMethod.GET);
requestSettings.setAdditionalHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
requestSettings.setAdditionalHeader("Accept-Encoding", "gzip, deflate, br");
requestSettings.setAdditionalHeader("Accept-Language", "en-US,en;q=0.9");
requestSettings.setAdditionalHeader("Connection", "keep-alive");
requestSettings.setAdditionalHeader("Host", "hh.ru");
requestSettings.setAdditionalHeader("TE", "Trailers");
requestSettings.setAdditionalHeader("Upgrade-Insecure-Requests", "1");
requestSettings.setAdditionalHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36");
webClient.getPage(requestSettings);
当我尝试调用myButton.click()时,出现此错误:
12:57:25,905 [Thread-7] ERROR com.gargoylesoftware.htmlunit.javascript.DefaultJavaScriptErrorListener - Error during JavaScript execution
======= EXCEPTION START ========
Exception class=[net.sourceforge.htmlunit.corejs.javascript.EvaluatorException]
com.gargoylesoftware.htmlunit.ScriptException: syntax error (script in https://hh.ru/employer/negotiations/change_topic from (2, 454) to (39, 18)#34)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:885)
...
at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:859)
at com.headhunter.hhelper.Headhunter.invite(Headhunter.java:233)
at com.headhunter.hhelper.Headhunter.doInvite(Headhunter.java:150)
at com.headhunter.hhelper.SearchController$2.run(SearchController.java:126)
at java.lang.Thread.run(Thread.java:745)
Caused by: net.sourceforge.htmlunit.corejs.javascript.EvaluatorException: syntax error (script in https://hh.ru/employer/negotiations/change_topic from (2, 454) to (39, 18)#34)
at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory$HtmlUnitErrorReporter.error(HtmlUnitContextFactory.java:420)
at net.sourceforge.htmlunit.corejs.javascript.Parser.addError(Parser.java:259)
...
at net.sourceforge.htmlunit.corejs.javascript.Context.compileString(Context.java:1584)
at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory$TimeoutContext.compileString(HtmlUnitContextFactory.java:222)
at net.sourceforge.htmlunit.corejs.javascript.Context.compileString(Context.java:1573)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$1.doRun(JavaScriptEngine.java:707)
at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:870)
... 42 more
Enclosed exception:
net.sourceforge.htmlunit.corejs.javascript.EvaluatorException: syntax error (script in https://hh.ru/employer/negotiations/change_topic from (2, 454) to (39, 18)#34)
at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory$HtmlUnitErrorReporter.error(HtmlUnitContextFactory.java:420)
at net.sourceforge.htmlunit.corejs.javascript.Parser.addError(Parser.java:259)
...
at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:859)
at com.headhunter.hhelper.Headhunter.invite(Headhunter.java:233)
at com.headhunter.hhelper.Headhunter.doInvite(Headhunter.java:150)
at com.headhunter.hhelper.SearchController$2.run(SearchController.java:126)
at java.lang.Thread.run(Thread.java:745)
== CALLING JAVASCRIPT ==
window.bloko = {
fontUrl: '/'
};
window.globalVars = {
locale: '',
country: '',
area: '',
build: '',
lang: '' || 'RU',
requestId: '',
sentryDSN: '',
siteId: '' || '1',
staticHost: '',
hhcdnHost: '',
apiHost: '',
timeStamp: '',
userType: '' || 'anonymous',
cryptedUserId: '',
employerState: '',
vishnuIframeSrc: '',
login: "",
userId: '',
hhid: '',
autotestsComponentsInitEnd: false,
performanceObserverEnabled: true,
features: {"hide_resume_photo_from_untrusted_users": true, "disable_counters": false, "sentry_logging": true, "fingerprinting_enable": true, "secure_portal_enabled": true, "secure_portal_employer_registration_only": false, "employer_extensions_to_detect": "{\"vera\": \"veraBar\", \"friendwork\": \"fwi-popup\", \"potok\": \"potok_io__chrome_extension_iframe\", \"extrasaur\": \"custom-table-iframe-div\"}", "anonymous_resume_enabled": true, "sentry_js_config": "{\r\n \"ignorePaths\": {\r\n \"regexps\": [\r\n \"[\\\\da-f]+/[\\\\da-f-]+/main\\\\.js\",\r\n \".*akamaihd\\\\.net.+$\",\r\n \"\\\\/inj_js\\\\/common\\\\.js\",\r\n \"fingerprintjs\",\r\n \"ckeditor4.5\",\r\n \"axios/lib/core/createError\"\r\n ]\r\n },\r\n \"ignoreErrors\": {\r\n \"strings\": [\r\n \"'e.data.indexOf' is not a function\",\r\n \"Load timeout for modules:\",\r\n \"__gCrWeb.autofill.extractForms\",\r\n \"HTML Parsing Error: Unable to modify the parent container element before the child element is closed\",\r\n \"Uncaught exception: TypeError: Cannot convert 'd.body' to object\",\r\n \"Node cannot be inserted at the specified point in the hierarchy\",\r\n \"TypeError: \u041d\u0435\u0434\u043e\u043f\u0443\u0441\u0442\u0438\u043c\u044b\u0439 \u0432\u044b\u0437\u044b\u0432\u0430\u044e\u0449\u0438\u0439 \u043e\u0431\u044a\u0435\u043a\u0442\",\r\n \"TypeError: Invalid calling object\",\r\n \"TypeError: 'undefined' is not an object (evaluating 'doc.forms')\",\r\n \"Uncaught exception: TypeError: Cannot convert 'a.mini' to object\",\r\n \"window.zAdv\",\r\n \"backbone in Function.e.Router [as extend]\",\r\n \"this._doc.documentElement\",\r\n \"Can't find variable: inf\",\r\n \"SkypeClick2Call\",\r\n \"\u0421\u0438\u043d\u0442\u0430\u043a\u0441\u0438\u0447\u0435\u0441\u043a\u0430\u044f \u043e\u0448\u0438\u0431\u043a\u0430\",\r\n \"Invalid or unexpected token\",\r\n \"Unexpected token <\",\r\n \"Blocked a frame with origin\",\r\n \"__show__deepen\",\r\n \"expected expression, got '<'\",\r\n \"Cannot read property 'forms' of undefined\",\r\n \"GM_addStyle is not defined\",\r\n \"can't redefine non-configurable property \\\"userAgent\\\"\",\r\n \"Can't find varfiable: auto\",\r\n \"only one instance of babel-polyfill is allowed\",\r\n \"this.matches is not a function\",\r\n \"NS_ERROR_NOT_INITIALIZED\",\r\n \"NS_ERROR_UNEXPECTED\",\r\n \"jQuery(...).size is not a function\",\r\n \"Unexpected token ILLEGAL\",\r\n \"Unexpected identifier\",\r\n \"Unexpected end of input\",\r\n \"yndx_svtn_e\",\r\n \"TypeError: Cannot set property 'destroySlots' of undefined\",\r\n \"Non-Error exception captured with keys: status, statusText\",\r\n \"SyntaxError: The string did not match the expected pattern.\",\r\n \"The operation is insecure\",\r\n \"No identifiers allowed directly after numeric literal\",\r\n \"wmrzz_time2 is not defined\",\r\n \"Request failed with status code 403\",\r\n \"SYNTAX_ERR: DOM Exception 12\",\r\n \"maxthon\",\r\n \"Request aborted\"\r\n ],\r\n \"regexps\": [\r\n \"^undefined$\",\r\n \"^Syntax error$\",\r\n \"^\u041d\u0435\u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u043d\u0430\u044f \u043e\u0448\u0438\u0431\u043a\u0430\\\\.$\",\r\n \"^\u041d\u0435\u0434\u043e\u043f\u0443\u0441\u0442\u0438\u043c\u044b\u0439 \u0437\u043d\u0430\u043a$\",\r\n \"^\\\\[object Event\\\\]$\",\r\n \"\\\\bgST\\\\b\",\r\n \"pixelPositionVal\",\r\n \"\u041d\u0435\u0434\u043e\u0441\u0442\u0430\u0442\u043e\u0447\u043d\u043e \u043f\u0430\u043c\u044f\u0442\u0438 \u0434\u043b\u044f \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u0438\u044f \u043e\u043f\u0435\u0440\u0430\u0446\u0438\u0438[\\\\s\\\\S]+?fingerprint2\",\r\n \"^illegal character$\",\r\n \"^Access is denied\\\\.\\\\s*$\",\r\n \"^Timeout$\",\r\n \"^Unexpected token else$\",\r\n \"^\u041d\u0435\u0434\u043e\u0441\u0442\u0430\u0442\u043e\u0447\u043d\u043e \u043f\u0430\u043c\u044f\u0442\u0438$\",\r\n \"^\\\\[CKEDITOR.resourceManager.load\\\\] Resource name \\\"default\\\" was not found at\",\r\n \"can't redefine non-configurable property \\\"AceScript\\\"\",\r\n \"\u041e\u043f\u0435\u0440\u0430\u0446\u0438\u044f \u0431\u044b\u043b\u0430 \u043e\u0442\u043c\u0435\u043d\u0435\u043d\u0430 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u0435\u043c.\",\r\n \"out of memory\",\r\n \"Network Error\",\r\n \"Loading chunk\",\r\n \"^No error message$\",\r\n \"^\\\"Timeout\\\"$\"\r\n ]\r\n }\r\n}", "vishnu_webim_integration": true, "iframe_fix_size_banners": "504,514,500,502,260,348,674,675,370,369,368,345,346", "personal_manager_rating_enabled": true, "fp_pro_enabled": true},
variables: ,
cssMaping: ,
firebaseMessagingSenderId: '',
google_dfp_sandbox: '',
};
======= EXCEPTION END ========
12:57:25,919 [Thread-7] WARN com.gargoylesoftware.htmlunit.html.HtmlScript - Script is not JavaScript (type: text/html, language: ). Skipping execution.