Node.js应用程序可在本地运行,但是当部署到heroku时出现应用程序错误

时间:2019-12-30 07:35:36

标签: javascript node.js heroku web-scraping

我为实时体育比分制作了一个网络抓取工具。我有2个文件index.js充当服务器,还有一个scraper.js文件负责抓取。这是我的index.js文件代码:

const express = require('express');

const scraper = require('./util/scraper');

const app = express();

app.get('/scores', (req, res) => {
    const scores = new Promise((resolve, reject) => {
        scraper
         .scrapeLiveScores()
         .then(data => {
             resolve(data)
        }).catch(err => reject('scores scrape failed'))
    });

    // res.send(JSON.stringify(scores));
    // Use promise.all([]) if more than one scraper is used and list each of them
    Promise.resolve(scores)
     .then(data => {
         res.send(JSON.stringify(data));
     }).catch(function() {
         console.log("promise rejected");
     });
});

app.set( 'port', ( process.env.PORT || 5000 ));

// Start node server
app.listen( app.get( 'port' ), function() {
  console.log( 'Node server is running on port ' + app.get( 'port' ));
  });
module.exports = app;

当我在本地运行heroku时,它返回所需数据的json。但是,当我将其部署到heroku时,出现控制台错误“承诺被拒绝”的应用程序错误。

const puppeteer = require('puppeteer');


const scrapeLiveScores = async () => {
    try{
        //Open the browser
        var browser = await puppeteer.launch({ headless: true });

        //Open a new page
        var page = await browser.newPage();

        //Enter url in the page
        await page.goto('https://www.flashscore.com.au/basketball/usa/nba/');

        //wait for selector to load in
        await page.waitForSelector('div.event__score.event__score--home');

        var scores = await page.evaluate(() => {
            var basePath = 'div.leagues--live > div > div.event__match';
            var homeScore = document.querySelectorAll('div.leagues--live > div > div.event__match > div.event__score--home');
            var awayScore = document.querySelectorAll('div.leagues--live > div > div.event__match > div.event__score--away');
            var homeTeam = document.querySelectorAll('div.leagues--live > div > div.event__match > div.event__participant.event__participant--home');
            var awayTeam = document.querySelectorAll('div.leagues--live > div > div.event__match > div.event__participant.event__participant--away');
            var stages = document.querySelectorAll('div.leagues--live > div > div.event__match > div.event__stage > div');


            //#g_3_6LLArB7N > div.event__stage > div
            var scoresArray = [];

            for(var i = 0; i < homeScore.length; i++){

                if(stages[i] != null){
                    scoresArray[i] = {
                        homeTeam: homeTeam[i].innerText.trim(),
                        homeScore: homeScore[i].innerText.trim(),
                        awayTeam: awayTeam[i].innerText.trim(),
                        awayScore: awayScore[i].innerText.trim(),
                        stage: stages[i].innerText.trim()
                    };
                }else{
                    scoresArray[i] = {
                        homeTeam: homeTeam[i].innerText.trim(),
                        homeScore: homeScore[i].innerText.trim(),
                        awayTeam: awayTeam[i].innerText.trim(),
                        awayScore: awayScore[i].innerText.trim(),
                        stage: "-"
                    };
                }

            }

            return scoresArray;
        });

        console.log(scores);
        await browser.close();
        return scores;
    }catch(err){

        await browser.close();
    }
}

module.exports.scrapeLiveScores = scrapeLiveScores;

我已经坚持了几个小时。这是日志:

2019-12-30T08:06:23.178178+00:00 app[web.1]: scraper error TypeError: Cannot read property 'close' of undefined
2019-12-30T08:06:23.178216+00:00 app[web.1]:     at Object.scrapeLiveScores (/app/util/scraper.js:60:23)
2019-12-30T08:06:23.178218+00:00 app[web.1]:     at processTicksAndRejections (internal/process/task_queues.js:93:5)
2019-12-30T08:06:23.182971+00:00 heroku[router]: at=info method=GET path="/scores" host=nbalive-api.herokuapp.com request_id=7f5b79cc-dd2e-433b-a4db-56052f8a5cdd fwd="99.247.208.27" dyno=web.1 connect=1ms service=23ms status=500 bytes=231 protocol=https

第60行是catch(err)中的{await browswer.close()}

2 个答案:

答案 0 :(得分:0)

要调试此问题,您需要记录所得到的实际错误。而且,在使用它时,请简化代码并删除将现有承诺包装在另一个不必要的承诺中的承诺反模式。

将代码更改为此:

app.get('/scores', (req, res) => {
    scraper.scrapeLiveScores().then(data => {
         res.send(data);
    }).catch(function(e) {
         console.log("scraper error", e);
         res.status(500).send("scraper error");
     });
});

然后,console.log()中的.catch()语句将向您显示您得到的确切错误。

除了删除promise反模式,简化代码并记录实际获得的错误并消除JSON.stringify()自动完成的res.send()之外,这还会在存在以下情况时发送对请求的响应错误,而不仅仅是让浏览器挂起。

答案 1 :(得分:0)

我没有使用刮板,但是我已经在Heroku上成功使用了Cheerio和Puppeteer:

const puppeteer = require('puppeteer');

const $ = require('cheerio');

exports.scraper = (req, res) => {
  const url = 'https://www.my-target-url.com';
  puppeteer
    .launch()
    .then(function(browser) {
      return browser.newPage();
    })
    .then(function(page) {
      return page.goto(url).then(function() {
        return page.content();
      });
    })
    .then(function(html) {
      // target a css selector
      $('#my_selector', html).each(function() {

        console.log($(this).text());
        res.send($(this).text());
      });
    })
    .catch(function(err) {
      //handle error
    });
};

相关问题