有没有办法使用PhantomJS来抓取数据然后使用nodejs解析结果XPath和DOM然后保存到MySQL中?
我已经安装了PhantomJS核心和PhantomJS Node模块,但尝试从shell级别运行Node来执行刮刀,然后设置cronjobs以按计划运行它们。
答案 0 :(得分:2)
我尝试过节点和PhantomJS之间的各种桥梁,最后写了另一座桥;)。它被称为phridge并提供了一种将函数传递给PhantomJS并将结果返回节点的方法:
// node
phantom.run("h1", function (selector, resolve) {
// this code runs inside PhantomJS
phantom.addCookie("cookie_name", "cookie_value", "localhost");
var page = webpage.create();
page.customHeaders = {
Referer: "http://google.com"
};
page.settings = {
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)"
};
page.open("http://www.google.com", function () {
var text = page.evaluate(function (selector) {
return document.querySelector(selector).innerText;
}, selector);
// resolve the promise and pass 'text' back to node
resolve(text);
});
}).then(function (text) {
// inside node again
console.log("The element contains the following text: "+ text);
});