我正在尝试使用 XMLHTTP 从 https://www.businesstimes.com.sg/keywords/singapore-parliament 获取新闻文章列表,但它似乎返回一个仅包含没有内容的脚本的文档。
我尝试了一个基本请求:
Private Sub Test()
Dim xmlhttp As MSXML2.XMLHTTP60
Set xmlhttp = New MSXML2.XMLHTTP60
xmlhttp.Open "GET", "https://www.businesstimes.com.sg/keywords/singapore-parliament", False
xmlhttp.Send
Set xmlhttp = Nothing
End Sub
它返回一个包含一堆除此之外的缩小脚本的文档:
<script>
(function() {
'use strict';
var afterReadyCbCalled = false;
var originalHeaders = ["X-Host", "www.businesstimes.com.sg","X-EC-Hot-Hash", "7790000207959645976","x-ec-pop", "sgb","X-Forwarded-For", "103.252.200.88, 165.225.112.130, 152.195.199.174, 35.201.102.132","X-EC-Session-ID", "12399570086198404337867903748881746029","Accept", "*/*","Accept-Language", "en-US,en-GB;q=0.8,en;q=0.5,ja;q=0.3","True-Client-IP", "165.225.112.130","X-Cloud-Trace-Context", "ff9cf2795015e71e68e65cd11ad81a87/6844774677298190324","X-EC-Uuid", "12399570086198404337867903748881746029","X-Forwarded-Proto", "https","UA-CPU", "AMD64",];
var originalBody = "";
function afterReadyCb() {
if (afterReadyCbCalled) return;
afterReadyCbCalled = true;
var xhr = new XMLHttpRequest();
xhr.onload = function() {
var isValid = xhr.getResponseHeader("ISTL-INFINITE-LOOP");
if (isValid != null && isValid != '') return;
var a = xhr.getResponseHeader("ISTL-REDIRECT-TO");
if (a != null && a != '') {
location.replace(a);
} else {
if (window.history != null && typeof history.replaceState === 'function') {
var responseURL = xhr.responseURL != null ? xhr.responseURL : xhr.getResponseHeader("ISTL-RESPONSE-URL");
if (responseURL != null && responseURL != '') {
history.replaceState(null, '', responseURL);
}
}
// DO NOT INLINE. There is a bug specific to IE/Edge.
var responseText = xhr.responseText;
document.open();
document.write(responseText);
document.close();
}
};
xhr.open("get", location.href, true);
for (var i = 0; i < originalHeaders.length; i += 2) {
var headerName = originalHeaders[i];
try {
xhr.setRequestHeader(headerName, originalHeaders[i + 1]);
} catch (e) {}
}
xhr.setRequestHeader("ISTL-INFINITE-LOOP", '1');
xhr.send(originalBody);
var evt = document.createEvent('Event');
evt.initEvent('BJNyvohAx', true, true);
dispatchEvent(evt);
}
addEventListener('afterReady', afterReadyCb, false);
setTimeout(afterReadyCb, 200);
}());
</script>
我还尝试了 .setRequestHeader
与脚本中声明的 originalHeaders
和 .setRequestHeader "ISTL-INFINITE-LOOP", "1"
中的所有值,但是却得到了 403 Forbidden 错误。
谁能告诉我我缺少什么才能获取文档内容? (如果有可能的话)
提前致谢!