使用Cheerio.js抓取IMDb剧集-仅返回电视剧集的第一页

时间:2018-11-20 22:05:25

标签: javascript node.js cheerio imdb

正在从IMDb抓取电视剧集(在下面的示例中为Breaking Bad)。问题是在实现for循环时,仅返回j的第一次迭代。

我的假设是return语句正在退出循环,但是我不确定如何解决该问题。

const fetch = require('node-fetch');
const cheerio = require('cheerio');

const searchUrl = 'https://www.imdb.com/find?s=tt&ttype=tv&ref_=fn_tv&q=';
const movieUrl = 'https://www.imdb.com/title/';

async function getEpisodes(searchTerm) {

  //const imdbID = await getID(searchTerm);
  //const numSeasons = await getSeasons(imdbID);

  const imdbID = 'tt0903747';
  const numSeasons = 5;
  const episodes = [];

  for (let j = 1; j <= numSeasons; j++) {
    return fetch(`${movieUrl}${imdbID}/episodes?season=${j}`)
      .then(response => response.text())
      .then(body => {
        const $ = cheerio.load(body);

        $('div[itemProp="episodes"]').each(function (i, element) {
          const airdate = $(element).find('.airdate').text().trim();
          const episodeTitle = $(element).find('a[itemProp="name"]').text().trim();
          const votes = $(element).find('.ipl-rating-star__total-votes').text().trim().match(/\(([^)]+)\)/)[1];
          const rating = $(element).find('.ipl-rating-star ').find('.ipl-rating-star__rating').text().trim().slice(0, 3);

          episode = {
            season: j,
            episodeTitle,
            airdate,
            votes,
            rating
          };
          episodes.push(episode);
        }); 
        return episodes; //Only season 1 is returned.
      }); 
  }
}

1 个答案:

答案 0 :(得分:0)

让我们使用异步等待样式重写函数。这样,我们可以确保fetch numSeasons次触发,等待所有它们,然后一个接一个地处理它们。

async function processResponse(response, season) {
    const body = await response.text();
    const $ = cheerio.load(body);

    let episodes = [];
    $('div[itemProp="episodes"]').each(function (i, element) {
        const airdate = $(element).find('.airdate').text().trim();
        const episodeTitle = $(element).find('a[itemProp="name"]').text().trim();
        const votes = $(element).find('.ipl-rating-star__total-votes').text().trim().match(/\(([^)]+)\)/)[1];
        const rating = $(element).find('.ipl-rating-star ').find('.ipl-rating-star__rating').text().trim().slice(0, 3);

        episode = {
            season,
            episodeTitle,
            airdate,
            votes,
            rating
        };

        episodes.push(episode);
    });

    return episodes;
}

async function getEpisodes(searchTerm) {

    //const imdbID = await getID(searchTerm);
    //const numSeasons = await getSeasons(imdbID);

    const imdbID = 'tt0903747';
    const numSeasons = 5;

    let promises = [];

    for (let j = 1; j <= numSeasons; j++) {
        promises.push(fetch(`${movieUrl}${imdbID}/episodes?season=${j}`));
    }

    const responses = await Promise.all(promises);
    return responses.reduce((accumulator, response, index) => {
        return accumulator.concat(await processResponse(response, index + 1));
    }, []);
}