我使用get
模块的request
方法来获取外部网站的内容。如果外部站点的编码是utf-8,那没关系,但是与其他编码有显示错误,例如shift-jis
function getExternalUrl(request, response, url){
mod_request.get(url, function (err, res, body) {
//mod_request.get({uri: url, encoding: 'binary'}, function (err, res, body) {
if (err){
console.log("\terr=" + err);
}else{
var result = res.body;
// Process res.body
response.write(result);
}
response.end();
});
}
如何使用正确的编码获取外部网站的内容?
答案 0 :(得分:0)
我找到了办法:
获取binary
编码
var mod_request = require('request');
mod_request.get({uri:url,encoding:'binary',headers:headers},function(err,res,body){});
使用Buffer
格式
binary
var contentBuffer = new Buffer(res.body,'binary');
按detect-character-encoding
npm
var mod_detect_character_encoding = require('detect-character-encoding');
var charsetMatch = mod_detect_character_encoding(contentBuffer);
按utf-8
npm
iconv
var mod_iconv = require('iconv')。Iconv;
var iconv = new mod_iconv(charsetMatch.encoding,'utf-8');
var result = iconv.convert(contentBuffer).toString();
P / S:This way is only applied for text file (html, css, js). Please do not apply for image file or others which is not text