情况:在我的crontab中,我有一个运行.sh脚本的作业。这个脚本反过来执行我的,比如casperjs casper.js - 一切正常。当我从命令行运行脚本时,脚本会执行。从命令行调用时,shell脚本执行正常。我从crontab运行作业并输出到日志文件,我可以看到它正在工作。
然而,我从未得到预期的输出:即。 casper_output.json,这是运行的完整 casper.js 文件:
var casper = require('casper').create({
waitTimeout: 10000,
stepTimeout: 10000,
verbose: true,
//debug, write out results, errors
logLevel: "debug",
loadImages: true,
loadPlugins: true,
pageSettings: {
javascriptEnabled: true,
webSecurityEnabled: false
},
onWaitTimeout: function() {
this.echo('** Wait-TimeOut **');
},
onStepTimeout: function() {
this.echo('** Step-TimeOut **');
}
});
//vars for this page
var url = 'http://urltoscrape.com';
var results = [];
var menuResults = [];
var fs = require('fs');
var terminate = function() {
this.echo("Exiting..").exit();
};
var getContent = function() {
//grab the values that we need for json file, store in empty results array
var results = [];
var URL = document.querySelectorAll('#results .event-details-link');
var title = document.querySelectorAll('#results h3');
var date = document.querySelectorAll('#results span');
var desc = document.querySelectorAll('#results p');
for (var i = 0;
//count through URL, title, date and description
i < URL.length
&& i < title.length
&& i < date.length
&& i < desc.length;
i++) {
var innerURL = URL[i].getAttribute("href");;
var innerTitle = title[i].innerText;
var innerDate = date[i].innerText;
//messy regex, TODO: make more efficient
var newDate = innerDate.replace(/\s[at].*$/g,"");
var strip = newDate.replace(/[on]/g,"");
var nows = strip.replace(/\s/g, "");
var innerDesc = desc[i].innerText;
var dept = "Dept";
//push into json, TODO: order
results.push({ URL: innerURL, title: innerTitle, date: nows, desc: innerDesc, dept: dept});
}
return results;
}
var processPage = function() {
//crude but waits for page to load
this.wait(2000, function() {
//run the get content function
results = this.evaluate(getContent);
require('utils').dump(results);
//write to file
fs.write("casper_output.json", JSON.stringify(results, null, ' '), 'w');
});
}
casper.start(url, function() {
this.waitForSelector('#results', processPage, terminate);
});
casper.run();
调用它的shell脚本 casper_run.sh :
#!/bin/bash
PATH=/output/of/path/MAMP/Library/bin
PHANTOMJS_EXECUTABLE=/usr/local/bin/phantomjs ; /usr/local/bin/casperjs /Applications/AMPPS/www/test/casper.js 2>&1
最后,在我的crontab中:
* * * * * /Applications/AMPPS/www/test/casper_run.sh
任何洞察casper_output.json为什么不从我的cron作业写入(但是从命令行运行时)都是如此受欢迎。我一直在研究这个问题太久了。
谢谢!
答案 0 :(得分:1)
Cron(或更确切地说是CasperJS)可能没有写入当前工作目录的写入权限。
你可以change the working directory通过PhantomJS'文件系统模块,或者你可以使用完整路径:
fs.write("/Applications/AMPPS/www/test/casper_output.json", JSON.stringify(results, null, ' '), 'w');