Question

我正在尝试使用Node JS Axios爬网此网站以获得大学名称。我注意到该网站使用Paginated API，因此要检索所有我必须发送多个请求的大学名称。

const url = 'https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table&_page=1;
const url = 'https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table&_page=2;
const url = 'https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table&_page=3;
...
const url = 'https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table&_page=55;

我编写了仅爬网一页的代码。我不知道如何抓取超过1页的内容。这是我的代码

const axios = require('axios');
const cheerio = require('cheerio');
var request = require('request');
fs = require('fs');

_sort=rank&_sortDirection=asc&study=Engineering";
// table view
page= 1;
const url = 'https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table&_page=' +page;

fetchData(url).then((res) => {
    const html = res.data;
    const $ = cheerio.load(html);
    const unilist = $('.TableTabular__TableContainer-febmbj-0.guaRKP > tbody > tr >td ');

    unilist.each(function() {

        let title = $(this).find('div').attr("name");

        if (typeof(title) == 'string') {
            console.log(title);
            fs.appendFileSync('universityRanking.txt', title+'\n', function (err) {
                if (err) return console.log(err);
              });

            } 

    });
})


async function fetchData(url){

    console.log("Crawling data...")

    // make http call to url
    let response = await axios(url).catch((err) => console.log(err));

    if(response.status !== 200){
        console.log("Error occurred while fetching data");
        return;
    }
    return response;

}

我需要有关如何发出55个Axios请求的帮助？我检查该页面有55页。我需要将每个页面的所有大学名称附加到文本文件中。

Answer 1

axios.all()方法可以帮助您解决用例。

axios.all([])   // Pass the array of axios requests for all the 55 pages here
  .then({
    // Multiple requests complete
  });

使用Axios发送多个HTTP请求

1 个答案: