我遇到麻烦的代码区域位于get(“ /”)路由中。我正试图从NYT网站上抓取。我正在尝试记录标题和链接。当我这样做时,我得到了两次记录的文章和一堆未定义的文章。我正在尝试包含if语句,该语句仅使用属性刮取H2标签。
var express = require("express");
var exphbs = require("express-handlebars");
var cheerio = require("cheerio");
var axios = require("axios");
var mongoose = require("mongoose");
var app = express();
var PORT = process.env.PORT || 3000;
var db = require("./models/Article");
//ignore before publishing
mongoose.connect(
"mongodb://localhost/scrapper",
{ useNewUrlParser: true }
);
//works with express for template engine"
app.engine("handlebars", exphbs({ defaultLayout: "main" }));
app.set("view engine", "handlebars");
app.use(express.static("public"));
app.use(express.urlencoded({ extended: true }));
app.use(express.json());
app.set("port", process.env.PORT || 3000);
app.get("/", function(req, res) {
axios
.get("https://www.nytimes.com/section/technology")
.then(function(response) {
var $ = cheerio.load(response.data);
var result = {};
var title = $(element)
.find("h2.headline")
.text()
.trim();
var link = $(element)
.find("a")
.attr("href");
if (title && link) {
result.title = $(element)
.find("h2.headline")
.text()
.trim();
result.link = $(element)
.find("a")
.attr("href");
console.log(result.title);
console.log("https://www.nytimes.com/section/technology" +
result.link);
}
});
res.render("index");
});
app.use(function(req, res) {
res.type("text/plain");
res.status(404);
res.send("404 - not found");
});
var MONGODB_URI =
process.env.MONGODB_URI || "mongodb://localhost/mongoHeadlines";
mongoose.connect(MONGODB_URI);
app.listen(PORT, function() {
console.log("Express Server Started on http://localhost:3000");
});