解压缩/解压缩到节点中的流

时间:2018-01-26 11:38:58

标签: node.js amazon-web-services amazon-s3 gzip tar

我正在尝试编写一个AWS Lambda,它将从tar.gz存储桶中获取S3,对其进行充气,然后将文件解压缩,同时将文件传输回另一个S3存储桶。< / p>

我有这段代码:

var AWS = require('aws-sdk');
var fs = require('fs');
var zlib = require('zlib');
var uuid = require('uuid/v4');
var tar = require('tar-stream')
var pack = tar.pack()
var s3 = new AWS.S3();

exports.handler = (event, context, callback) => {
  var bucket = event.Records[0].s3.bucket.name;
  var key = event.Records[0].s3.object.key;

  var file = 'S3://' + bucket + '/' + key;

  console.log(bucket)
  console.log(key)

  var readParams = {
    Bucket: bucket,
    Key: key
  };

  var dataStream = s3.getObject(readParams).createReadStream();

  var extract = tar.extract()

  extract.on('entry', function(header, stream, next) {
    console.log(header.name)
    var writeParams = {
      Bucket: process.env.JOB_PROCESSING_BUCKET,
      Key: uuid() + '-' + header.name,
      Body: stream
    };

    s3.upload(writeParams).
    on('httpUploadProgress', function(evt) {
      console.log('Progress:', evt.loaded, '/', evt.total);
    }).
    send(function(err, data) {
      if (err) console.log("An error occurred", err);
      console.log("Uploaded the file at", data.Location);
    });
    stream.on('end', function() {
      next() // ready for next entry
    })
    stream.resume() // just auto drain the stream
  })

  extract.on('finish', function() {
    // all entries read
  })

  dataStream.pipe(zlib.createGunzip()).pipe(extract);

  callback(null, 'Gunzip Lambda Function');
};

它拉取文件,对gzipping进行排序,然后我可以看到每个文件在输入时被提取。然后代码尝试将文件传送到S3,这会创建一个0kb文件,就像它读取流然后继续到下一个。

为什么它似乎无法读取/处理流体? 有没有更好的方法呢?

由于

2 个答案:

答案 0 :(得分:2)

我不知道这是否是最好的解决方案,但是以下代码对我有用。

const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const tar = require('tar-stream');
const zlib = require('zlib');
const stream = require('stream');
const uuid = require('uuid');

exports.get = (event, context) => {

  var params = {
      Bucket: event.Records[0].s3.bucket.name,
      Key: event.Records[0].s3.object.key
  };

  var dataStream = s3.getObject(params).createReadStream();

  var extract = tar.extract();

  extract.on('entry', function(header, inputStream, next) {

      inputStream.pipe(uploadFromStream(s3,header));

      inputStream.on('end', function() {
          next(); // ready for next entry
      });

      inputStream.resume(); // just auto drain the stream
  });

  extract.on('finish', function() {
      // all entries read
  });

  dataStream.pipe(zlib.createGunzip()).pipe(extract);

}

function uploadFromStream(s3,header) {
    var pass = new stream.PassThrough();

    var writeParams = {
        Bucket: process.env.JOB_PROCESSING_BUCKET,
        Key: uuid.v1() + '-' + header.name,
        Body: pass
    };

    s3.upload(writeParams, function(err, data) {
        context.done(err, data);
    });

    return pass;
}

答案 1 :(得分:0)

尝试了几个小时以使其正常工作,结果“完成”事件已替换为“结束”。所以 - 上面的答案很好用,只是很小的变化 -

inputStream.on('end', function() {
  next(); // ready for next entry
});

- Should be - 

inputStream.on('finish', function() {
  next(); // ready for next entry
});