以下脚本在我的NodeJS服务器中运行得很好,但是当我试图搜索一些西里尔语网站时,很少会返回这样的响应。
脚本
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]@content',
metaDescription: 'meta[name="description"]@content',
ogImage: 'meta[property="og:image"]@content',
twitterImage: 'meta[name="name="twitter:image:src""]@content',
metaImage: 'meta[name="image"]@content',
headImage: 'head img@src',
contentImage_1: '.content img@src',
contentImage_2: '.image img@src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
编码错误的回复示例
firstData { name: [ '(Rock, Pop) [15LP] [24/96] Queen - Studio Collection - 2015,
FLAC (tracks) :: RuTracker.org' ],
description:
[ 'RuTracker.org » ���������� ��� (����������� ���������) »
������� ������� (Rock, Pop) [15LP] [24/96] Queen -
Studio Collection - 2015, FLAC (tracks)',
undefined ],
image: [ undefined, undefined, undefined, undefined, undefined, undefined ] }
我该如何解决这个问题?
答案 0 :(得分:0)
您可以将request用作X射线的驱动程序,并在其中对正文进行图标处理:
var options = {};
var conv = null;
options.encoding = 'binary';
iconv = new require('iconv').Iconv('Windows-1251', 'utf8');
conv = function(body) {
if (!body) return body;
body = new Buffer.from(body, 'binary');
return iconv.convert(body).toString();
}
var request = require('request').defaults(options);
var driver = function driver(context, callback) {
var url = context.url;
request(url, function(err, response, body) {
if (!err && conv) body = conv(body);
return callback(err, body);
})
};
x.driver(driver);
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]@content',
metaDescription: 'meta[name="description"]@content',
ogImage: 'meta[property="og:image"]@content',
twitterImage: 'meta[name="name="twitter:image:src""]@content',
metaImage: 'meta[name="image"]@content',
headImage: 'head img@src',
contentImage_1: '.content img@src',
contentImage_2: '.image img@src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
console.log(firstData);
});