尝试使用PhantomJS获取http://gibdd.ru
的屏幕截图。
然而,迄今为止没有任何帮助。
我有:
var page = require('webpage').create();
var system = require('system');
page.onResourceRequested = function (request) {
system.stderr.writeLine('= onResourceRequested()');
system.stderr.writeLine(' request: ' + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function(response) {
system.stderr.writeLine('= onResourceReceived()' );
system.stderr.writeLine(' id: ' + response.id + ', stage: "' + response.stage + '", response: ' + JSON.stringify(response));
};
page.onLoadStarted = function() {
system.stderr.writeLine('= onLoadStarted()');
var currentUrl = page.evaluate(function() {
return window.location.href;
});
system.stderr.writeLine(' leaving url: ' + currentUrl);
};
page.onLoadFinished = function(status) {
system.stderr.writeLine('= onLoadFinished()');
system.stderr.writeLine(' status: ' + status);
};
page.onNavigationRequested = function(url, type, willNavigate, main) {
system.stderr.writeLine('= onNavigationRequested');
system.stderr.writeLine(' destination_url: ' + url);
system.stderr.writeLine(' type (cause): ' + type);
system.stderr.writeLine(' will navigate: ' + willNavigate);
system.stderr.writeLine(' from page\'s main frame: ' + main);
};
page.onResourceError = function(resourceError) {
system.stderr.writeLine('= onResourceError()');
system.stderr.writeLine(' - unable to load url: "' + resourceError.url + '"');
system.stderr.writeLine(' - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString );
};
page.onError = function(msg, trace) {
system.stderr.writeLine('= onError()');
var msgStack = [' ERROR: ' + msg];
if (trace) {
msgStack.push(' TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
system.stderr.writeLine(msgStack.join('\n'));
};
page.open('http://gibdd.ru/', function(status, b, c) {
page.render('g.png');
console.log("DONE WITH ", status, page);
phantom.exit();
});
以上代码的控制台输出:
= onNavigationRequested
destination_url: http://gibdd.ru/
type (cause): Other
will navigate: true
from page's main frame: true
= onResourceRequested()
request: {
"headers": [
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
},
{
"name": "User-Agent",
"value": "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1"
}
],
"id": 1,
"method": "GET",
"time": "2016-05-18T16:35:37.982Z",
"url": "http://gibdd.ru/"
}
= onLoadStarted()
leaving url: about:blank
= onResourceReceived()
id: 1, stage: "start", response: {"bodySize":302,"contentType":"text/html; charset=iso-8859-1","headers":[{"name":"Date","value":"Wed, 18 May 2016 16:33:07 GMT"},{"name":"Server","value":"Apache/2.2.22 (Ubuntu)"},{"name":"Location","value":"http://www.gibdd.ru/"},{"name":"Vary","value":"Accept-Encoding"},{"name":"Content-Encoding","value":"gzip"},{"name":"Keep-Alive","value":"timeout=5, max=100"},{"name":"Connection","value":"Keep-Alive"},{"name":"Content-Type","value":"text/html; charset=iso-8859-1"}],"id":1,"redirectURL":"http://www.gibdd.ru/","stage":"start","status":301,"statusText":"Moved Permanently","time":"2016-05-18T16:35:38.176Z","url":"http://gibdd.ru/"}
= onNavigationRequested
destination_url: http://www.gibdd.ru/
type (cause): Other
will navigate: true
from page's main frame: true
= onResourceRequested()
request: {
"headers": [
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
},
{
"name": "User-Agent",
"value": "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1"
}
],
"id": 2,
"method": "GET",
"time": "2016-05-18T16:35:38.182Z",
"url": "http://www.gibdd.ru/"
}
= onResourceReceived()
id: 1, stage: "end", response: {"contentType":"text/html; charset=iso-8859-1","headers":[{"name":"Date","value":"Wed, 18 May 2016 16:33:07 GMT"},{"name":"Server","value":"Apache/2.2.22 (Ubuntu)"},{"name":"Location","value":"http://www.gibdd.ru/"},{"name":"Vary","value":"Accept-Encoding"},{"name":"Content-Encoding","value":"gzip"},{"name":"Keep-Alive","value":"timeout=5, max=100"},{"name":"Connection","value":"Keep-Alive"},{"name":"Content-Type","value":"text/html; charset=iso-8859-1"}],"id":1,"redirectURL":"http://www.gibdd.ru/","stage":"end","status":301,"statusText":"Moved Permanently","time":"2016-05-18T16:35:38.185Z","url":"http://gibdd.ru/"}
= onResourceError()
- unable to load url: "http://www.gibdd.ru/"
- error code: 4, description: Socket operation timed out
= onResourceReceived()
id: 2, stage: "end", response: {"contentType":null,"headers":[],"id":2,"redirectURL":null,"stage":"end","status":null,"statusText":null,"time":"2016-05-18T16:36:38.586Z","url":"http://www.gibdd.ru/"}
= onLoadFinished()
status: fail
DONE WITH fail WebPage(name = "WebPage")
= onNavigationRequested
destination_url: about:blank
type (cause): Other
will navigate: true
from page's main frame: true
似乎这个网页阻止了与PhantomJS相关项目的连接,因为CasperJS不能正常工作。
我尝试将标题更改为:
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36';
并没有帮助。
有没有绕过它?此脚本适用于任何其他网页。