我想用HtmlUnit抓取Google搜索结果。我的代码工作了一段时间,但现在我得到了这个例外:
com.gargoylesoftware.htmlunit.ScriptException:503 https://ipv4.google.com/sorry/IndexRedirect?continue=的服务不可用......
它还会打印出我认为无效的JavaScript:
function () {
if (1 != c.readyState) {
var e = !1;
try {
e = 0 == c.status && 4 == c.readyState;
}
catch (k) {
e = !0;
}
var f, g = d;
e ? f = 21 : QS_kea(c.readyState, c.status) && 0 > (c.getResponseHeader("Content-Type") || "").indexOf("application/json") ? (f = 12, g = {response: c.responseText, url: d}) : QS_lea(c.status, 400, 500) ? f = 25 : QS_lea(c.status, 500, 600) && (f = 1);
if (void 0 !== f) {
QS_gea(a, f, null, g), QS_mea(a, b);
} else {
if (3 == c.readyState && a.ya && !a.ka) {
b.RJ = QS_hea(a, c.responseText, b.RJ, d);
} else {
if (4 == c.readyState && !b.complete) {
b.complete = !0;
var h = QS_d(function (b) {
a.ka || (b.RJ = QS_hea(a, b.ov.responseText, b.RJ, b.url, !0));
if (a.ka) {
var c = QS_d(a.Aa, a, h, 0);
a.ra.push(window.requestAnimationFrame(c));
} else {
QS_mea(a, b);
}
}, a, b);
200 == c.status ? h() : QS_mea(a, b);
}
}
}
}
}
如果我压制脚本错误和状态代码错误,我无法得到任何结果,我猜是因为页面加载不正确?
我也想到,也许Google会检测到刮擦并阻止我的代码。那会很奇怪,因为它会立即检测到第一次尝试。