我正在使用casperJS 1.1.2和phantomJS 2.1.1从网页中检索一些链接。我感兴趣的链接都在href属性中有字符串“javascript”,如下所示:
<td>
<a href="javascript:WebForm_DoPostBackWithOptions(new
WebForm_PostBackOptions("ctl00$CenterContent$ctl01",
"", true, "", "", false,
true))">Species A
</a></td>
<td>
<a href="javascript:WebForm_DoPostBackWithOptions(new
WebForm_PostBackOptions("ctl00$CenterContent$ctl02",
"", true, "", "", false,
true))">Species B </a></td>
<td><a href="javascript:WebForm_DoPostBackWithOptions(new
WebForm_PostBackOptions("ctl00$CenterContent$ctl03",
"", true, "", "", false,
true))">Sepcies C </a></td>
<td>
<a href="javascript:WebForm_DoPostBackWithOptions(new
WebForm_PostBackOptions("ctl00$CenterContent$ctl04",
"", true, "", "", false,
true))">Species D</a></td>
<td>
<a href="javascript:WebForm_DoPostBackWithOptions(new
WebForm_PostBackOptions("ctl00$CenterContent$ctl05",
"", true, "", "", false,
true))">Species E </a></td>
我在casperJS中写了一些脚本来刮掉所有并写入文件所有href属性包含“javascript”字符串的链接,如下所示。
var links=[];
var casper = require('casper').create({
waitTimeout: 10000,
verbose: true,
logLevel: 'debug',
pageSettings: {
loadImages: false,
loadPlugins: false
}
});
var fs = require('fs');
casper.start("https://apps.ams.usda.gov/CMS/", function()
{
links = _utils_.getElementsByXPath('.//td/a[contains(@href,"javascript")]');
});
fs.write("plantVarietyResults.json", links, 'w');
casper.run();
我无法弄清楚为什么我的脚本没有正确写入文件的链接。
答案 0 :(得分:0)
CasperJS存在一些误解,代码中存在错误:
__utils__
and not _utils_
。__utils__
is only available in the page context。casper.evaluate
。href
感兴趣,那么您必须从DOM元素中读取href
属性。casper.start
是异步的,但fs.write
不是,因此您必须将fs.write
调用放在casper.start
之后执行的另一个异步函数中。以下是一个应该有效的示例(未经测试):
casper.start("https://apps.ams.usda.gov/CMS/", function() {
var links = this.evaluate(function(){
return __utils__.getElementsByXPath('.//td/a[contains(@href,"javascript")]')
.map(function(element){
return element.href;
});
});
fs.write("plantVarietyResults.json", JSON.stringify(links), 'w');
});
casper.run();
这是一个略短的方式:
var x = require('casper').selectXPath;
casper.start("https://apps.ams.usda.gov/CMS/", function() {
var links = this.getElementsAttribute(x('.//td/a[contains(@href,"javascript")]'), 'href');
fs.write("plantVarietyResults.json", JSON.stringify(links), 'w');
});
casper.run();