PhantomJS包括JS无法正常工作

时间:2018-05-18 09:14:23

标签: node.js web-scraping phantomjs

我尝试使用PhantomJS来从这样的网站获取数据:

app.get('/scrape', function(req, res) {
var _ph, _page;
phantom.create()
.then(function (ph) {
    _ph = ph;
    return ph.createPage();
})
.then(function (page) {
    _page = page; 
    var url = "https://banc.nl/";
    return page.open(url);
})
.then(function(page) {
    page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js', function() {
    page.evaluate(function() {
        $('.sc-dnqmqq kaMmif').filter(function () {
            var data = $(this);
            price = data.children().first().text();
            console.log("Price: " + price);
        });
    });
    });
})
.catch(function(err) {
    console.log("Error: " , err);
    _page.close();
    //_ph.exit();
});
});

问题是我收到以下错误:Error: TypeError: page.includeJs is not a function

如果我取消注释ph.exit(),我也会收到警告:warn: exit() was called before waiting for commands to finish. Make sure you are not calling exit() prematurely.

我在SO中发现了几个关于此的问题,但没有解答我的问题。

1 个答案:

答案 0 :(得分:1)

最新版本的phantomjs-node模块使用Promises,因此我们可以使用带有async运算符的await函数重写脚本 - 更具可读性:

var phantom = require('phantom');
var url = 'https://www.huobipro.com';

(async function(req, res) {
    const instance = await phantom.create();
    const page = await instance.createPage();

    await page.on('onConsoleMessage', function(msg) {
        console.info(msg);
    });
    await page.on('onError', function(msg) {
        console.info(msg);
    });

    const status = await page.open(url);
    await console.log('STATUS:', status);

    // Wait a bit for javascript to load and run
    await new Promise(resolve => setTimeout(resolve, 3000))

    await page.includeJs('https://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js');
    await page.evaluate(function() {
        $('span[price]').filter(function () {
            var data = $(this);
            console.log("Price: " + data.text());
        });
    });

    await instance.exit();
})();