我有一个需要提取的标签列表。该列表名为list
。
我正在尝试找到与列表对应的所有'og:*'元数据,并且在获取的html中可用。然后我需要在包含这些元标记的JSON中向用户返回一个哈希值。但process
方法返回undefined
而不是哈希。
var http = require('http');
var url = require('url');
var request = require('request');
var jsdom = require("jsdom");
var fs = require('fs');
var cssom = require('cssom');
var list = ['title', 'description']; //here the og-tags I need to extract
var meta = {};
function process(url) {
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
jsdom.env({
html: body,
scripts: [
'http://code.jquery.com/jquery-1.5.min.js'
],
done: function(errors, window) {
var $ = window.$;
$('meta[property^="og:"]').each(function() {
for (var element in list) {
if ($(this).attr('property') == 'og:' + list[element]) {
meta[list[element]] = $(this).attr('content');
// this works well, if I do console.log(meta), I get the hash correctly filled.
}
}
});
}
});
}
});
return meta; // this is where the probleme is. This return undefined.
}
http.createServer(function (request, response) {
request.setEncoding('utf8');
response.writeHead(200, {'Content-Type': 'text/plain'});
process(url.parse(request.url, true).query['content'], function(result) {
console.log(result); // prints no result
});
response.end();
}).listen(8124);
console.log('Server running at http://0.0.0.0:8124');
答案 0 :(得分:1)
由于request
是异步的,因此您还需要使process
异步。这意味着让process
接受一个meta
可用后将调用的回调参数。就像现在一样,process
在meta
回调填充之前返回request
。
function process(url, callback) {
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
jsdom.env({
html: body,
scripts: [
'http://code.jquery.com/jquery-1.5.min.js'
],
done: function(errors, window) {
var $ = window.$;
$('meta[property^="og:"]').each(function() {
for (var element in list) {
if ($(this).attr('property') == 'og:' + list[element]) {
meta[list[element]] = $(this).attr('content');
callback(null, meta);
}
}
});
}
});
} else {
callback(error);
}
});
}
http.createServer(function (request, response) {
request.setEncoding('utf8');
response.writeHead(200, {'Content-Type': 'text/plain'});
process(url.parse(request.url, true).query['content'], function(error, result) {
console.log(result); // prints no result
});
response.end();
}).listen(8124);