我正在按照此处列出的教程进行操作:
http://code.tutsplus.com/tutorials/screen-scraping-with-nodejs--net-25560
当我运行代码时:
var host = 'http://www.shoutcast.com/?action=sub&cat=Hindi#134';
var phantom = require('phantom');
phantom.create(function(ph) {
return ph.createPage(function(page) {
return page.open(host, function(status) {
console.log("opened site? ", status);
page.injectJs('http://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js', function() {
//jQuery Loaded.
//Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function() {
return page.evaluate(function() {
//Get what you want from the page using jQuery. A good way is to populate an object with all the jQuery commands that you need and then return the object.
console.log(document.getElementsByClassName('transition')[0]);
return document.getElementsByClassName('transition')[0];
}, function(result) {
console.log(result);
ph.exit();
});
}, 5000);
});
});
});
});
我收到以下错误:
phantom stdout: ReferenceError: Can't find variable: $
phantom stdout: phantomjs://webpage.evaluate():7
phantomjs://webpage.evaluate():10
phantomjs://webpage.evaluate():10
我不知道这意味着什么,如何解决它没有任何帮助...... 怎么解决这个问题?
基本上我想要所有'a'标签,从我正在抓取的网站进行类转换。所有这些标记都在站点上异步加载。
答案 0 :(得分:3)
$
是由于jQuery和可能的冲突。你几乎不需要注入jQuery只是为了用类transition
来抓取'a'标签。您始终拥有document.querySelector
或document.querySelectorAll
。
var host = 'http://www.shoutcast.com/?action=sub&cat=Hindi#134';
var phantom = require('phantom');
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open(host, function(status) {
console.log("opened site? ", status);
//Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function() {
page.evaluate(function() {
// here you need to add more code to get the html/text
// more code incase you use querySelectorAll
return document.document.querySelector('a.transition');
//return document.document.querySelectorAll('a.transition');
},
function(result) {
console.log(result);
ph.exit();
});
}, 5000);
});
});
});
但是,我无法理解function (result) { console.log(result); ...}
的编码方式。我不知道page.evaluate
是否将回调函数作为第二个参数。请查看文档。