Node.js服务器:图像上传/损坏问题

时间:2014-10-19 22:59:35

标签: javascript node.js utf-8

所以我正在尝试在Node.js中编写一个基本的文件服务器,我尝试上传和存储的所有图像都会损坏。问题似乎与Node Buffers处理转换为UTF-8并再次返回的方式有关(我必须这样做才能将POST主体标题输出并远离二进制数据)。

这是一个简单的节点服务器,它显示了我当前的方法以及我遇到的问题:

var http = require('http');

var server = http.createServer(function(request, response) {
    if (request.method === "GET") {
        // on GET request, output a simple web page with a file upload form
        var mypage = '<!doctype html><html><head><meta charset="utf-8">' + 
                        '<title>Submit POST Form</title></head>\r\n<body>' +
                        '<form action="http://127.0.0.1:8008" method="POST" ' + 
                        'enctype="multipart/form-data"> <input name="upload" ' + 
                        'type="file"><p><button type="submit">Submit</button>' + 
                        '</p></form></body></html>\r\n';
        response.writeHead(200, {
            "Content-Type": "text/html",
            "Content-Length": mypage.length
        });
        response.end(mypage);

    } else if (request.method === "POST") {
        // if we have a return post request, let's capture it
        var upload = new Buffer([]);

        // get the data
        request.on('data', function(chunk) {
            // copy post data 
            upload = Buffer.concat([upload, chunk]);
        });

        // when we have all the data
        request.on('end', function() {
            // convert to UTF8 so we can pull out the post headers
            var str = upload.toString('utf8');
            // get post headers with a regular expression
            var re = /(\S+)\r\nContent-Disposition:\s*form-data;\s*name="\w+";\s*filename="[^"]*"\r\nContent-Type: (\S+)\r\n\r\n/i,
                reMatch = str.match(re);
            var lengthOfHeaders = reMatch[0].length,
                boundary = reMatch[1],
                mimeType = reMatch[2];
            // slice headers off top of post body
            str = str.slice(lengthOfHeaders);
            // remove the end boundary
            str = str.replace("\r\n" + boundary + "--\r\n", '');
            // convert back to buffer
            var rawdata = new Buffer(str, 'utf8');
            // echo back to client
            response.writeHead(200, {
                "Content-Type": mimeType
            });
            response.end(rawdata);
        });
    }
});

server.listen(8008);
console.log("server running on port 8008");

要测试它,请在节点中运行脚本,然后在浏览器中转到127.0.0.1:8008。尝试上传图片并提交表单。每次图像都会损坏 - 即使脚本应该直接将图像数据回显到浏览器。

所以有人知道我在这里做错了吗?有没有更好的方法来处理我还没想到的Node中的POST主体头? (在任何人说什么之前,不,我想要使用Express。我想找出并理解这个问题。)

3 个答案:

答案 0 :(得分:0)

真的不应该使用这样的正则表达式来解析多部分有效负载,因为它可以轻松地尝试解析你的图像数据非常不可靠。 npm上有模块可以为您解析表单,例如busboymultipartyformidable。他们都没有使用正则表达式,也不需要Express。

答案 1 :(得分:0)

  

问题似乎与Node Buffers处理转换为UTF-8并再次返回的方式有关

我猜你是对的,转换为UTF-8是一个坏主意,但可以只使用文件并获取标题和边界位置,但保持缓冲文件不变,当你有获取文件头和边界的所有位置只是将缓冲区复制到新的缓冲区,如

originalBuffer.copy(newBuffer,0,positionHeader,positionEndBoundary)

var http = require('http');
var fs = require('fs');
var connections = 0;

var server = http.createServer(function (req, res) {
connections++;
console.log(req.url,"connections: "+connections);
if(req.url == '/'){
    res.writeHead(200, { 'content-type': 'text/html' });
    res.end(
        '<form action="/upload" enctype="multipart/form-data" method="post">' +
        '<input type="file" name="upload" multiple="multiple"><br>' +
        '<input type="submit" value="Upload">' +
        '</form>'
    );
}

var body = new Buffer([]);
if (req.url == '/upload') {
    req.on('data', function (foo) {

        //f.write(foo);
        body = Buffer.concat([body,foo]);
        if(isImage(body.toString())){
            console.log("é imagem do tipo "+isImage(body.toString()));
        }
        else{
            console.log("Não é imagem");
            res.end("Não é imagem");
        }
        console.log(body.length, body.toString().length);
    });
    req.on('end', function () {
        // console.log(req.headers);
        //I converted the buffer to "utf 8" but i kept the original buffer
        var str = body.toString();
        console.log(str.length);
        imageType = isImage(body.toString());
        //get the index of the last header character
        //I'm just using the string to find the postions to cut the headers and boundaries
        var index = str.indexOf(imageType)+(imageType+"\r\n\r\n").length;
        // var headers= str.slice(0,index).split(';');
        // console.log(headers);

        //Here comes the trick
        /*
        *I have to cut the last boundaries, so i use the lastIndexOf to cut the second boundary
        * And maybe that is the corruption issues, because, I'm not sure, but I guess
        * the UTF-8 format only use 7bits to represent all characters, and the buffer can use 8bits, or two hex,
        *So, i need to take the difference here (body.length-str.length)  
        */ 
        var indexBoundayToBuffer = str.lastIndexOf('------WebKitFormBoundary')+(body.length-str.length);
        console.log(index, indexBoundayToBuffer);
        //maybe you can change this to use less memory, whatever
        var newBuffer = Buffer.alloc(body.length);
        /*
        *And now use the index, and the indexBoudayToBuffer and you will have only the binary
        */
        body.copy(newBuffer,0,index,indexBoundayToBuffer);

        // f.end();
        //file type
        var type = imageType.substr("image/".length);
        console.log("END");
        fs.writeFile("nameFile."+type,newBuffer,function(err,ok){
            if(err){
                console.log(err);
                return false;
            }
            res.end();

        });
    });
}

});

function isImage(str){

if(str.indexOf('image/png')!=-1) return 'image/png';
else if(str.indexOf('image/jpeg')!=-1) return 'image/jpeg';
else if(str.indexOf('image/bmp'!=-1)) return 'image/bmp';
else if(str.indexOf('image/gif'!=-1)) return 'image/gif';
else false;
}

var port = process.env.PORT || 8080;
server.listen(port, function () {
console.log('Recording connections on port %s', port);
});

答案 2 :(得分:-1)

我遇到了完全相同的问题然后我遇到了这篇关于将二进制数据转换为utf8的危险的文章......继承人link

作者演示了它如何破坏原始数据。我自己尝试过它确实改变了图像数据