Node.js-传输大文件而不消耗大量内存

时间:2018-08-22 08:16:06

标签: javascript node.js sockets

动机:使用节点将大约10gb到50gb的文件从客户端传输到服务器

问题:有时消耗大量内存大约6gb并且系统挂起

我想要的是::我的代码使用的RAM不应超过200mb。

我在做什么:当内存使用量达到200 mb时,我暂停了流

预期::当内存使用量达到200 mb时,我将暂停流数据,并在使用量下降时恢复数据。

发生了什么事:当使用率超过200 mb时我会暂停,但即使脚本暂停了ram使用率也不会降低,它也不会恢复cos。

Client.js

  var net = require('net'),
   fs = require('fs'),
  path = require('path');

  socket = new net.Socket();
  socket.connect(6000, 127.0.0.1);
  socket.on('connect',function(){

 // I am reading around 10 gb of file in chunks

 var readStream = fs.createReadStream("File Name", {highWaterMark: 16384});

  // Checking ram usage every second to ensure it does not consume more than 200 mb of ram, If i do not write this check it even uses 4gb+ ram for this much big file and hangs my node script.

  setInterval(function(){
    if(process.memoryUsage().rss > 209715200){
        // if ram consumtion is more that 200 mb
        console.log("Pause");
        global.gc();
        readStream.pause();
    }else{
        readStream.on('pause',function(){
          readStream.resume();
        });
    }
  },1000);

 readStream.on('data', function(chunk){

   console.log("Used Mem "+process.memoryUsage().rss);
   var head = new Buffer.from("FILE");
   var sizeHex = chunk.length.toString(16);
   while(sizeHex.length < 4){
     sizeHex = "0" + sizeHex;
   }
   var size = new Buffer.from(sizeHex);
   var delimiter = new Buffer.from("@");
   var pack = Buffer.concat([head, size, chunk, delimiter]);
     // sending data to server
     // This sending part start consuming ram 
     socket.write(pack,function(){
     });
 });

 readStream.on('close', function(){
   socket.end();
   global.gc();
 });


 });

Server.js

  var net = require('net'),
  fs = require('fs'),
  path = require('path');

  var server = net.createServer(function(socket){
    var packets = 0;
    var buffer = new Buffer.alloc(0);
// Receiving Data
    socket.on('data', function(chunk){
      buffer = Buffer.concat([buffer, chunk]);
    });


// when Client socket ends write file on server 
    socket.on('close', function(){

      var writeStream = fs.createWriteStream("New File Name");
      while(buffer.length){
        var head = buffer.slice(0, 4);

        if(head.toString() != "FILE"){
          console.log("ERROR!!!!");
          process.exit(1);
        }

        var sizeHex = buffer.slice(4, 8);
        var size = parseInt(sizeHex, 16);


        var content = buffer.slice(8, size + 8);
        var delimiter = buffer.slice(size + 8, size + 9);

        if(delimiter != "@"){
          console.log("wrong delimiter!!!");
          process.exit(1);
        }
        writeStream.write(content);
        buffer = buffer.slice(size + 9);
      }

      setTimeout(function(){
        writeStream.end();
      }, 2000);

    });   

  });

   server.listen(6000);

系统监控器中的Ram使用情况

  Before Running Above Script : 1.6gb of 6 gb 
  After Running Above Script : 1.8 gb of 6gb

1 个答案:

答案 0 :(得分:3)

问题是您没有等待socket.write完成... socket.write中的回调函数在那里表明写入已完成,您可以发送另一个块。

使用pipepipeline来手动管理套接字,而不是手动写入套接字。

这是我的看法

client.js

const net = require('net');
const fs = require('fs');
const { pipeline } = require('stream');

const socket = new net.Socket();
socket.connect(6000, '127.0.0.1');
socket.on('connect', function () {
  const fileStream = fs.createReadStream('/dev/zero', { highWaterMark: 16384, end: 2 * 1024 * 1024 * 1024 }); // read 2GB of zeros, replace with real file
  console.log('New file transfer');

  pipeline(
    fileStream,
    socket,
    (error) => {
      if (error) { console.error(error) }
      console.log('File transfer done');
    }
  );
});

server.js

const net = require('net');
const fs = require('fs');
const { pipeline } = require('stream');

const server = net.createServer(function (socket) {
  const fileStream = fs.createWriteStream('/dev/null');
  console.log('New file transfer');

  pipeline(
    socket,
    fileStream,
    (error) => {
      if (error) { console.error(error) }
      console.log('File transfer done');
    }
  )
});

server.listen(6000);

根据我的测试,它的RAM永远不会超过100MB,并且总体而言,代码的行为合理-因此不需要gc和内存检查。

以上代码使用pipeline函数,该函数仅在最新的Node.js 10中可用-如果您使用的是较旧的Node,请使用工作原理相同的pump软件包。