我正在使用CasperJS下载15 MB的文件。在浏览器中,下载大约需要3分钟才能完成。使用Casper,相同url的.download
函数在30秒后返回,写入磁盘的文件为0字节。我尝试设置更长的超时时间:
var casper = require("casper").create({
pageSettings: {
webSecurityEnabled: false
},
waitTimeout: 500000,
stepTimeout: 500000
});
但它们没有效果。这是我的下载功能:
casper.on('resource.received', function (resource) {
var url, file;
if ((resource.url.indexOf("myDownloadUniqueString=") !== -1) ) {
this.echo(resource.url); // the echo'ed url can be downloaded in a web browser
url = resource.url;
file = "downloaded_file.wav"; // this will be 0 bytes
try {
var fs = require("fs"); // phantom js file system (not node)
casper.download(resource.url, file);
} catch (e) {
this.echo(e); // no error is thrown
}
}
});
有什么想法吗?也许PhantomJS fs
方法存在问题,但that documentation不完整......
答案 0 :(得分:2)
我通过在evaluate函数中手动运行异步XMLHTTPrequest(xhr),然后将结果写入全局(窗口)变量,解决了这个问题(对于Excel .xls二进制下载,大约需要30s,大约6Mb)。等待这个全局设置。
在xhr对象上,您可以在下面的示例中设置自定义超时,15 * 60 * 1000 = 15分钟。
需要注意将二进制下载编码为ascii / base64,然后解码它们以写入二进制文件。这可以针对文本下载进行调整/简化。
var fs = require('fs');
var casper = require('casper').create({
//options here
});
var xhr = this.evaluate(function(url){
var xhr = new XMLHttpRequest();
xhr.timeout = 15 * 60 * 1000;
xhr.overrideMimeType("text/plain; charset=x-user-defined");
xhr.open("GET", url); // synchronous request banned, so use waitfor to wait on a global variable
xhr.onreadystatechange = function () {
if (xhr.readyState == 4) {
//if (xhr.status == 200) {
window.xhrstatus = xhr.status; //write to a global (window) variable
window.xhrResponseText = __utils__.encode(xhr.responseText); //base64 encode using casper functions (btoa fails)
//}
}
};
xhr.send(null);
return true;
},'http://example.com/download.xls');
casper.waitFor(function() {
return this.getGlobal('xhrstatus') != undefined;
}, function() {
this.echo('XHR status: ' + this.getGlobal('xhrstatus'));
this.echo('Saving report...');
//http://phantomjs.org/api/fs/method/is-writable.html to check if file writable first
//decode using casper clientutil function and then write binary file
fs.write('saveFileName.xls', decode(this.getGlobal('xhrResponseText')), 'wb');
},null,15*60*1000);
casper.js clientutils库中的编码/解码函数如下所示。这些似乎适用于Javascript的atob()和btoa()没有。
/*
* encode / decode function from casper.js clientutils
* https://github.com/casperjs/casperjs/blob/master/modules/clientutils.js
* Included here for reference - you could just reference the file in your code
*/
var BASE64_ENCODE_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var BASE64_DECODE_CHARS = [
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1
];
/**
* Decodes a base64 encoded string. Succeeds where window.atob() fails.
*
* @param String str The base64 encoded contents
* @return string
*/
var decode = function decode(str) {
/*eslint max-statements:0, complexity:0 */
var c1, c2, c3, c4, i = 0, len = str.length, out = "";
while (i < len) {
do {
c1 = BASE64_DECODE_CHARS[str.charCodeAt(i++) & 0xff];
} while (i < len && c1 === -1);
if (c1 === -1) {
break;
}
do {
c2 = BASE64_DECODE_CHARS[str.charCodeAt(i++) & 0xff];
} while (i < len && c2 === -1);
if (c2 === -1) {
break;
}
out += String.fromCharCode(c1 << 2 | (c2 & 0x30) >> 4);
do {
c3 = str.charCodeAt(i++) & 0xff;
if (c3 === 61) {
return out;
}
c3 = BASE64_DECODE_CHARS[c3];
} while (i < len && c3 === -1);
if (c3 === -1) {
break;
}
out += String.fromCharCode((c2 & 0XF) << 4 | (c3 & 0x3C) >> 2);
do {
c4 = str.charCodeAt(i++) & 0xff;
if (c4 === 61) {
return out;
}
c4 = BASE64_DECODE_CHARS[c4];
} while (i < len && c4 === -1);
if (c4 === -1) {
break;
}
out += String.fromCharCode((c3 & 0x03) << 6 | c4);
}
return out;
};
/**
* Base64 encodes a string, even binary ones. Succeeds where
* window.btoa() fails.
*
* @param String str The string content to encode
* @return string
*/
var encode = function encode(str) {
/*eslint max-statements:0 */
var out = "", i = 0, len = str.length, c1, c2, c3;
while (i < len) {
c1 = str.charCodeAt(i++) & 0xff;
if (i === len) {
out += BASE64_ENCODE_CHARS.charAt(c1 >> 2);
out += BASE64_ENCODE_CHARS.charAt((c1 & 0x3) << 4);
out += "==";
break;
}
c2 = str.charCodeAt(i++);
if (i === len) {
out += BASE64_ENCODE_CHARS.charAt(c1 >> 2);
out += BASE64_ENCODE_CHARS.charAt((c1 & 0x3) << 4 | (c2 & 0xF0) >> 4);
out += BASE64_ENCODE_CHARS.charAt((c2 & 0xF) << 2);
out += "=";
break;
}
c3 = str.charCodeAt(i++);
out += BASE64_ENCODE_CHARS.charAt(c1 >> 2);
out += BASE64_ENCODE_CHARS.charAt((c1 & 0x3) << 4 | (c2 & 0xF0) >> 4);
out += BASE64_ENCODE_CHARS.charAt((c2 & 0xF) << 2 | (c3 & 0xC0) >> 6);
out += BASE64_ENCODE_CHARS.charAt(c3 & 0x3F);
}
return out;
};
答案 1 :(得分:0)
如何添加resourceTimeout
:
pageSettings: {
webSecurityEnabled: false,
resourceTimeout: 240000 //240s
},
This answer表示已在PhantomJS 1.9中添加,但尚未记录。