当我通过命令行运行它时,它会持续一两个小时,然后命令行吐出“Memory Exhausted”。我无法弄清楚发生了什么。
此外,关于如何使其更具可读性或可修改性的一些一般性建议,因为我将在一个月内通过该项目。
var fs = require('fs');
var currentPhysician = [];
var physicianData = [];
var permitMax = 99999;
var alreadyParsed = [];
var targetFile = "CMQphysicians.csv";
var startTime = new Date().getTime();
var permitNumber = -1;
var firstLicense = 0;
var utils = require('utils');
String.prototype.contains = function (s) {
return (this.indexOf(s) != -1);
}
var casper = require('casper').create({
verbose : true,
logLevel : "info",
pageSettings : {
loadImages : false, // do not load images
loadPlugins : false // do not load NPAPI plugins (Flash, Silverlight, ...)
}
});
function getPermitNumberString() {
var pn = permitNumber.toString();
var l = pn.length;
var i;
var leadingZeros = '';
for (i = 0; i < (5 - pn.length); i++) {
leadingZeros = leadingZeros + '0';
}
return leadingZeros + pn;
}
function getDetailsData() {
var details = document.querySelectorAll('#content-html > table.griddetails > tbody > tr > td');
return Array.prototype.map.call(details, function (e) {
return e.innerText;
});
}
function getPhysicianCount() {
return document.querySelectorAll("#GViewList > tbody > tr:nth-child(2) > td:nth-child(1) > a").length;
}
casper.on("resource.error", function (resourceError) {
if (!resourceError.url.contains('google')) {
this.echo("Resource error: " + "Error code: " + resourceError.errorCode + " ErrorString: " + resourceError.errorString + " url: " + resourceError.url + " id: " + resourceError.id, "ERROR");
}
while (resourceError.errorString.contains('undefined')) {}
});
casper.on('load.started', function () {
//casper.echo('load started');
});
casper.on('navigation.requested', function (url, navigationType, navigationLocked, isMainFrame) {
//casper.echo('navigation requested');
//casper.echo(navigationType);
});
casper.on('remote.message', function (msg) {
this.echo('from within remote page DOM' + msg);
});
casper.start('https://www.google.ca/?gws_rd=ssl', function () { // Loads the initial page.
casper.echo('Starting!');
});
casper.on('load.finished', function (status) {
//casper.echo('load finished');
var date = new Date();
var hours = date.getHours();
var minutes = date.getMinutes();
//casper.echo(hours.toString() + ':' + minutes.toString() + ' ' + this.getCurrentUrl().toUpperCase());
var urlPrefix = this.getCurrentUrl().substring(0, this.getCurrentUrl().indexOf('.aspx'));
if (urlPrefix.length == 0) {
casper.echo('undefined');
urlPrefix = 'https://www.google.ca/?gws_rd=ssl'.toUpperCase();
}
switch (urlPrefix.toUpperCase()) {
case 'https://www.google.ca/?gws_rd=ssl'.toUpperCase():
casper.echo('on google');
if (fs.exists('CMQphysicians.csv')) {
stream = fs.open('CMQphysicians.csv', 'r');
line = stream.readLine();
var i = 0;
while (line) {
if (i > 0) {
alreadyParsed.push(Number(line.substring(0, line.indexOf(','))));
}
line = stream.readLine();
i++;
}
stream.close();
permitNumber = Math.max.apply(null, alreadyParsed) + 1;
firstLicense = permitNumber;
casper.echo(permitNumber);
} else {
fs.write(targetFile, "\uFEFF" + 'Permit Number,Last Name,First Name,Gender,Permit,Status,Specialty,Activity,Authorization,Address,Phone\n', 'a');
}
casper.thenOpen('http://www.cmq.org/bottin/index.aspx?lang=en&a=1');
break;
case 'http://www.cmq.org/bottin/index'.toUpperCase():
casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
var finishedSoFar = permitNumber - firstLicense;
var timeSoFar = new Date().getTime() - startTime;
var licensesToDo = permitMax - permitNumber;
var msPerLicense = timeSoFar / finishedSoFar;
var minutesToGo = (licensesToDo * msPerLicense) / 1000 / 60;
//casper.echo(licensesToDo + ' licenses to go. ' + msPerLicense.toString() + 'ms per license. ' + minutesToGo.toString() + ' minutes remaining.');
casper.echo('index stage');
permitNumber++;
if (permitNumber > permitMax) {
casper.echo('Permit number maxed out');
} else {
var permitNumberString = getPermitNumberString();
casper.echo('going to list');
casper.sendKeys('#txbNoPermis', permitNumberString);
//casper.wait(100);
casper.echo('sent keys, now clicking');
casper.thenClick('#btSubmit');
casper.echo('after the click');
}
});
break;
case 'http://www.cmq.org/bottin/list'.toUpperCase():
casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
casper.echo('list stage');
// Three cases:
// No results, one result, many results
// No results: go back (00000)
// One result: go forward (82365)
// Many results: crash (?????)
a = casper.evaluate(getPhysicianCount);
if (a == 0) {
casper.echo('No physicians for license ' + getPermitNumberString());
casper.echo('going to index');
casper.thenClick('#btSubmit');
//casper.wait(1000);
} else if (a == 1) {
casper.echo('Physician exists for license ' + getPermitNumberString());
casper.echo('going to details');
casper.thenClick('#GViewList > tbody > tr:nth-child(2) > td:nth-child(1) > a');
//casper.wait(1000);
} else if (a > 1) {
casper.echo('a > 1 at ') + getPermitNumberString();
while(true){}
} else {
casper.echo('negative a at ') + getPermitNumberString();
while(true){}
}
// No results
});
break;
case 'http://www.cmq.org/bottin/details'.toUpperCase():
casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
casper.echo('details stage');
var name = casper.getHTML('#content-html > table.griddetails > tbody > tr:nth-child(1) > th').substring(0, casper.getHTML('#content-html > table.griddetails > tbody > tr:nth-child(1) > th').indexOf('(')).trim().split(',');
tableData = (casper.evaluate(getDetailsData));
currentPhysician.push(tableData[4]);
currentPhysician.push(name[0].trim());
currentPhysician.push(name[1].trim());
for (i = 2; i < tableData.length; i++) {
if (i % 2 == 0 && i != 4) {
currentPhysician.push(tableData[i]);
}
}
for (i = 0; i < currentPhysician.length; i++) {
currentPhysician[i] = currentPhysician[i].replace(/,/g, ';').replace(/\n/g, ';');
}
var physicianString = currentPhysician.join(',') + '\n';
casper.echo('writing to file!');
fs.write(targetFile, physicianString, 'a');
currentPhysician = [];
casper.echo(casper.exists('#btNewsearch'));
casper.echo('going to index');
casper.thenClick('#btNewsearch');
//casper.wait(1000);
});
break;
default:
casper.echo("Wrong URL!");
casper.back();
break;
}});
casper.run(function () {
casper.echo('ending!');
casper.echo(physicianData.length);
});
答案 0 :(得分:0)
由于一个错误:
https://bugs.webkit.org/show_bug.cgi?id=154452
通过关闭图像加载解决。
编辑:似乎仍然是一个问题。我的猜测是因为casperjs已经过时了,所以我放弃了它并使用python。