试图用casperjs迭代一些链接

时间:2015-07-02 15:20:31

标签: javascript web-scraping casperjs

这就是我目前所拥有的:

transition

我正在尝试迭代一些链接来抓取一些数据。我没有收到任何错误,但var casper = require('casper').create(); var fs = require('fs'); var folderName = 'CARD_DATA'; var fileName = 'allChampionDecks.txt'; var save = fs.pathJoin(fs.workingDirectory, folderName, fileName); // init jquery var casper = require('casper').create({ clientScripts: ['jquery.min.js'] }); casper.start(URL, function() { }); var links = ["http://magic.wizards.com/en/events/coverage/mtgochamp14","http://magic.wizards.com/node/335986","http://magic.wizards.com/en/events/coverage/2014WC"]; var i = -1; var linkData = ''; // iterate casper.then(function() { this.each(links, function() { i++; this.thenOpen((links[i]), function() { linkData += this.evaluate(getLinkDeckData); }); }); fs.write(save, linkData + '\n', 'w'); }); // scrape function getLinkDeckData() { var meta = $('.deck-meta h4'); var event = $('.deck-meta h5'); var allDecks = $('.toggle-text .deck-list-text'); var json = '{'; for(var i = 0; i < meta.length; i++) { json += '"event": "'+$(event[i]).text().trim()+'",' +'"deckName": "'+$(meta[i]).text()+'",' +'"deck": ['; var cardCount = $(allDecks[i]).find('.sorted-by-overview-container .row .card-count'); var cardName = $(allDecks[i]).find('.sorted-by-overview-container .row .card-name'); for(var j = 0; j < cardCount.length; j++) { if(j < cardCount.length-1) json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"},'; else json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"}'; } json += '],' +'"sideboard": ['; var cardCount = $(allDecks[i]).find('.sorted-by-sideboard-container .row .card-count'); var cardName = $(allDecks[i]).find('.sorted-by-sideboard-container .row .card-name'); for(var j = 0; j < cardCount.length; j++) { if(j < cardCount.length-1) json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"},'; else json += '{"quantity":"'+$(cardCount[j]).text()+'", "name":"'+$(cardName[j]).text()+'"}'; } if(i < meta.length-1) json += '],' else json += ']}' /**/ } return json; } casper.run(); 为空,没有任何内容写入文件。

对于单个页面,我使用了以下内容并且工作正常:

linkData

1 个答案:

答案 0 :(得分:2)

所有then*(和wait*)函数都是异步步骤函数。当你创建一个循环并在循环内部调用casper.thenOpen()时,你可以安排一个开放步骤,并附带then回调作为一个单独的步骤。

问题在于,当您尝试编写linkData时,它不是单独的步骤。只需将其包裹在casper.then()中即可。

修正片段:

casper.then(function() {
    links.forEach(links, function(link, i) { 
        this.thenOpen(link, function() {
            linkData += this.evaluate(getLinkDeckData);
        });
    });

    this.then(function(){
        fs.write(save, linkData + '\n', 'w');
    });
});

您应该使用each而不是使用CasperJS'Array.prototype.forEach。这样,您就不需要全局计数器变量。