我正在使用while循环从csv文件中打开用户名列表。对于这些用户名,我必须打开一个URL并将页面转储到一个文件中。
然而,casper.thenOpen总是只运行一次。我从Asynchronous Process inside a javascript for loop了解到这是因为它是一个异步过程。我需要对下面的代码执行相同的操作:
casper.then(function(){
stream = fs.open('usernames.csv', 'r');
targetusername = stream.readLine();
i = 0;
while(targetusername) {
var url = "http://blablalb" + targetusername;
console.log("current url is " + url);
casper.thenOpen(url, function() {
console.log ("I am here");
fs.write(targetusername,this.getTitle() + "\n",'w');
fs.write(targetusername,this.page.plainText,'a');
});
targetusername = stream.readLine();
i++;
}
});
casper.thenOpen总是只运行一次,给我这个输出:
current url is first_url
current url is second_url
current url is third_url
I am here
我需要的是这样的
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
我正在拉我的头发,以便在循环正常运行时获得它!
答案 0 :(得分:2)
我认为该代码没有任何问题。我编写此代码进行测试(基本上,它与您的代码相同):
var casper = require('casper').create();
var url_list = [
'http://phantomjs.org/',
'https://github.com/',
'https://nodejs.org/'
]
casper.start()
casper.then(function () {
for (var i = 0; i < url_list.length; i++) {
casper.echo('assign a then step for ' + url_list[i])
casper.thenOpen(url_list[i], function () {
casper.echo("current url is " + casper.getCurrentUrl());
})
}
}
)
casper.run()
输出:
assign a then step for http://phantomjs.org/
assign a then step for https://github.com/
assign a then step for https://nodejs.org/
current url is http://phantomjs.org/
current url is https://github.com/
current url is https://nodejs.org/en/
如您所见,它打开了每个网址。
所以让我们回答你的问题:
Q1:为什么它不输出:
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
A1:因为CasperJS首先分配步骤,更准确地说,将步骤推送到堆栈,然后从该堆栈弹出步骤,然后运行该步骤。请查看great answer以获取更多信息。
Q2:为什么它不输出(为什么循环只运行一次):
current url is first_url
current url is second_url
current url is third_url
I am here
I am here
I am here
A2:你可能会在打开第二个网址和PhantomJS崩溃时遇到一些例外情况。此代码可以帮助您了解会发生什么:
var casper = require('casper').create({
verbose: true,
logLevel: "debug",
}); //see more logs
casper.on('error', function (msg, backtrace) {
var msgStack = ['PHANTOM ERROR: ' + msg];
if (backtrace && backtrace.length) {
msgStack.push('TRACE:');
backtrace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
this.log(msgStack.join('\n'), "error");
});// watch the error event which PhantomJS emits
答案 1 :(得分:1)
我可以达到我需要的确切输出:
current url is first_url
I am here
current url is second_url
I am here
current url is third_url
I am here
使用repeat函数,如下:
casper.then(function(){
stream = fs.open('usernames.csv', 'r');
casper.repeat(3, function() {
targetusername = stream.readLine();
var url = "http://blablalb" + targetusername;
console.log("current url is " + url);
casper.thenOpen(url, function() {
console.log ("I am here");
fs.write(targetusername,this.getTitle() + "\n",'w');
fs.write(targetusername,this.page.plainText,'a');
});
}
)});