这是我第一次使用Node.js和Express。 我会创建一个网络抓取。 这是我的项目结构:
WebScrape:
|_ bin
|_ node_modules
|_ public
|_ routes
|_ view
|_ app.js
|_ package.js
|_ package-lock.json
我在scrape.js
目录中创建了一个routes
文件:
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function(req, res) {
// the URL we will scrape from - in our example Anchorman 2
url = 'http://www.imdb.com/title/tt1229340/';
/**
* The structure of our request call.
* The first parameter is our URL.
* The callback function takes 3 parameters: an error, a response status code and the html.
*/
request(url, function(error, response, html) {
// check to make sure no errors occurred when making the request
if(!error) {
// utilize the cheerio library on the returned html which will essentially give us jQuery functionality
var $ = cheerio.load(html);
// finally, we'll define the variables we're going to capture
var title, release, rating;
var json = { title : "", release : "", rating : ""};
}
}) // end request
}) // end get
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
我该如何测试?这是正确的地方吗?
答案 0 :(得分:0)
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var router = express.Router();
router.get('/scrape', function(req, res) {
// the URL we will scrape from - in our example Anchorman 2
url = 'http://www.imdb.com/title/tt1229340/';
/**
* The structure of our request call.
* The first parameter is our URL.
* The callback function takes 3 parameters: an error, a response status code and the html.
*/
request(url, function(error, response, html) {
// check to make sure no errors occurred when making the request
if(!error) {
// utilize the cheerio library on the returned html which will essentially give us jQuery functionality
var $ = cheerio.load(html);
// finally, we'll define the variables we're going to capture
var title, release, rating;
var json = { title : "", release : "", rating : ""};
}
}) // end request
}) // end get
exports = module.exports = router;
通常,app.js
侦听端口以获取请求。您可以使用express.Router
在单独的路由器文件中进一步扩展和添加路由。
在app.js
中你必须这样做才能真正添加路线:
const routes = require('./routes/scraper.js');
// app is the express() app
app.use(routes);