我正在尝试获取页面中的所有 URL 问题。但我遇到了一个问题,我从其他地方得到了所有无用的 URL。谢谢你,快乐编码。
const request = require("request");
const cheerio = require("cheerio");
const fs = require("fs");
let answersJson = fs.readFileSync("answersScrapes.json", "utf-8");
let url = "https://www.answers.com/t/literature-and-language/new?page=0";
request(url, (error, response, html) => {
if (!error && response.statusCode == 200) {
let $ = cheerio.load(html);
var anchors = [];
var links = $("a");
links.each(function (i, link) {
anchors[i] = $(link).attr("href");
console.log(link);
});
// Logging the Data
console.log("Scraping Done...");
} else {
console.log(error);
}
});