我的目标是获取作业网站的每个作业链接,通过以下作业链接转到每个作业详细信息页面,通过CASPERJS下载并保存html中的详细信息。 每当我们回来时,每个工作链接的ID都会改变。在作业链接和作业详细信息页面之间,我需要每次在casper.repeat下获取所有作业ID。但NoOfLink数组在重复函数之外变为空[我在代码中注释该部分]。问题是什么?
var casper = require('casper').create();
var noOfRecordsToLoop = 0;
var TotalNoofNullElement = 0;
var NoOfLink = [];
var x = require('casper').selectXPath;
casper.echo('\nStart loding site......');
//---------------------------------------------Load and Scroll the site---------------------------------------//
casper.start('https://........../...../.......Careers/');
casper.wait(10000, function () {
//---------Total no of Job posting------//
var noOfRecords = this.fetchText(x('//*[@id="...........................jobProfile......"]'));
noOfRecordsToLoop = noOfRecords.replace(/[^0-9]/g, "");
var totalNoOfPage = Math.ceil(parseInt(noOfRecords) / 50);
casper.echo('\nStart scrolling site......');
casper.repeat(totalNoOfPage, function () {
this.scrollToBottom(); //-----------------------Scroll down
casper.wait(10000, function () {})
})
})
//------------------------------------------------Load and Scroll the site---------------------------------------//
casper.then(function () {
//-----------------------------------------Get all the link elements --------------------------//
var countForLink = 0;
var numTimesForRpt = noOfRecordsToLoop;
var numTimes = noOfRecordsToLoop;
casper.repeat(numTimesForRpt, function () {
RetElement = this.evaluate(function () {
var startingRow = '//*[contains(@id, "...-uid-")]'
var element = __utils__.getElementByXPath(startingRow).getAttribute('id');
return element;
});
var count = RetElement.replace(/[^0-9]/g, "");
casper.repeat(numTimes, function () {
var MatchElements = this.evaluate(function (count) {
var xp = '//*[contains(@id, "...-uid-' + count + '")]'
var element = __utils__.getElementByXPath(xp).getAttribute('id');
return element;
}, count++);
if (!MatchElements) {
TotalNoofNullElement = TotalNoofNullElement + 1
} else {
NoOfLink.push(MatchElements);
}
//**Here array elements are accessible**
for (var k = 0; k < NoOfLink.length; k++) {
this.echo(NoOfLink[k]);
}
});
//**But here array elements are not accessible outside of repeat** function
this.echo("Size of array is" + NoOfLink.length);
for (var q = 0; q < NoOfLink.length; q++) {
this.echo(NoOfLink[q]);
}
//-----------------------------------------Get all the link elements----------------------------//
//------------------------------------Go to the Job Detail Page Extract HTML and Save---------------------------//
this.echo("\n Inside repeat to Generate HTML");
var num = NoOfLink[countForLink];
this.echo("\nLink id is " + NoOfLink[countForLink]);
num = parseInt(num.replace(/[^0-9]/g, ""));
this.echo("\nNum is " + num);
//-----------------Click to go to the Job Detail Page------------------//
casper.thenClick(x('//*[@id="..-uid-' + num + '"]/div/div'));
casper.wait(5000, function getJobDetail() {
var content = this.getElementInfo(x(".//*[contains(@id,'......t-uid-')]")).html;
var divStart = '<div id="extrdHtml">'
var divEnd = '</div>'
var body = divStart + content + divEnd
this.echo("\nContent of Job detail :" + body);
var fs = require('fs');
fs.write('extractedJob' + NoOfLink[countForLink] + '.html', body, 'w');
this.echo("\nFile saved");
//------------------------------------Go to the Job Detail Page Extract HTML and Save---------------------------//
}); //casper.wait
casper.back();
casper.wait(5000);
countForLink++
}); //casper.repeat
}); //casper.then
//-------------------------------------------Get all the link elements------------------------------//
casper.run();
有两个重复循环。
casper.repeat(numTimesForRpt, function () {
- 这是主要的外循环,第二个循环所在的位置。casper.repeat(numTimes, function ()
- 我在哪里获取链接并填充NoOfLink
数组。我试图在第二个循环之外(在主外循环内)获取数组元素值,但它不起作用。答案 0 :(得分:0)
所有then*
和wait*
函数都是异步步骤函数。如果您调用它们,则需要安排在当前步骤结束时执行的步骤。 casper.repeat()
是uses casper.then()
的函数,因此也是异步的。 casper.repeat()
之后的每个同步代码都将在 repeat
回调的内容之前执行。
您有两种选择:
casper.repeat()
中casper.then()
之后的所有内容换行,以使其异步或repeat
,如果repeat
的回调不需要像您的情况那样异步进行评估。顺便说一句,通过利用CasperJS提供的辅助函数,您可以经常减少代码。例如,您不需要仅使用evaluate()
来获取XPath的某些元素的id属性。您可以使用casper.getElementsAttribute()
执行此操作。
示例:
var count = RetElement.replace(/[^0-9]/g, "");
for(var i = count; i < (numTimes + count); i++) {
var MatchElements = this.getElementsAttribute(x('//*[contains(@id, "...-uid-' + i + '")]'), 'id');
if (!MatchElements) {
TotalNoofNullElement = TotalNoofNullElement + 1
} else {
NoOfLink.push(MatchElements);
}
//**Here array elements are accessible**
for (var k = 0; k < NoOfLink.length; k++) {
this.echo(NoOfLink[k]);
}
}