从S3下载xlsx并解析它

时间:2016-11-10 19:43:07

标签: node.js excel express

我需要一项服务从Amazon S3下载excel文件,然后用node-xlsx解析

问题是我无法通过xlsx解析文件。当我尝试回读我刚才写的文件时,代码找不到它。

我不太确定这是否是最佳方法,但这是我到目前为止所得到的:

router.get('/process', (req, res) => {
    var fileName = 'https://some-bucket.s3.amazonaws.com/some-excel-file.xlsx'
    https.get(fileName, response => {
        var body = ''
        response.on('data', chunk => body += chunk)
        response.on('end', () => {

            //fs is being imported early on this file
            fs.writeFile(__dirname + '/test.xlsx', body)

            var f = fs.createReadStream(__dirname + '/test.xlsx')

            var book = xlsx.parse(f)
            book.forEach(sheet => console.log('sheet', sheet.name) )

            res.status(200)          
        })
        .on('error', e => {
            res.status(500)
        })
    })
    return
})

5 个答案:

答案 0 :(得分:4)

这是如何从S3 nodejs读取文件并将其保留在内存中而无需先将文件写入磁盘上的某个位置。它可以与S3和AWS Lambda的组合一起使用,因此您不必将文件写入Lambda上的某个位置。

请记住,此过程是异步的。

   var params = {
        Bucket: "",
        Key: ""
    };

    var file = s3.getObject(params).createReadStream();
    var buffers = [];

    file.on('data', function (data) {
        buffers.push(data);
    });

    file.on('end', function () {
        var buffer = Buffer.concat(buffers);
        var workbook = xlsx.parse(buffer);
        console.log("workbook", workbook)
    });

答案 1 :(得分:2)

fs.writeFile是异步的。在调用回叫之前,文件不会存在。

https://nodejs.org/api/fs.html#fs_fs_writefile_file_data_options_callback

fs.writeFile('message.txt', 'Hello Node.js', (err) => {
  if (err) throw err;
  console.log('It\'s saved!');
});

答案 2 :(得分:2)

node-xlsx模块要求整个xlsx缓冲区可用。所以你不能像你目前那样传递ReadStream。试试这种完全避免写入磁盘的方法:

router.get('/process', (req, res) => {
    var fileName = 'https://some-bucket.s3.amazonaws.com/some-excel-file.xlsx'
    https.get(fileName, response => {
        var chunks = []
        response.on('data', chunk => chunks.push(chunk))
        .on('end', () => {
            var book = xlsx.parse(Buffer.concat(chunks))
            book.forEach(sheet => console.log('sheet', sheet.name))
            res.status(200)          
        })
        .on('error', e => {
            res.status(500)
        })
    })
    return
})

答案 3 :(得分:1)

如果要使用异步/等待,请在此处解决:

const AWS = require('aws-sdk');
const XLSX = require('xlsx');

AWS.config.update({
  accessKeyId: AMAZON_ACCESS_KEY,
  secretAccessKey: AMAZON_SECRET_ACCESS_KEY,
});

// Get buffered file from as    
function getBufferFromS3(file, callback){
  const buffers = [];
  const s3 = new AWS.S3();
  const stream = s3.getObject({ Bucket: 'yor_buket', Key: file}).createReadStream();
  stream.on('data', data => buffers.push(data));
  stream.on('end', () => callback(null, Buffer.concat(buffers)));
  stream.on('error', error => callback(error));
}

// promisify read stream from s3
function getBufferFromS3Promise(file) {
  return new Promise((resolve, reject) => {
    getBufferFromS3(file, (error, s3buffer) => {
      if (error) return reject(error);
      return resolve(s3buffer);
    });
  }
};

// create workbook from buffer
const buffer = await getBufferFromS3Promise(file);
const workbook = XLSX.read(buffer);

答案 4 :(得分:0)

另一种实现方法是使用exceljs

const AWS = require('aws-sdk');
const Excel = require('exceljs');

async function downloadFile(){
    AWS.config.update({
      accessKeyId: AMAZON_ACCESS_KEY,
      secretAccessKey: AMAZON_SECRET_ACCESS_KEY,
    });

    const s3 = new AWS.S3();
    const stream = await s3.getObject({ Bucket: 'yor_buket', Key: 'file_name'}).createReadStream();
    return stream;
}


async function loadWorkbook(stream){

    return new Promise((resolve, reject) = > {
        let rows = [];
        const workbook = new Excel.Workbook();

        workbook.xlsx.read(stream).then(function(workbook){

            const worksheet = workbook.getWorksheet('sheet_name');

            worksheet.eachRow({ includeEmpty: false}, function(row) {
                rows.push(row.values); 
            });
        });
        resolve(rows);
    });

}

async function loadFromS3(){
    const stream = await downloadFile();
    const dataRows = await loadWorkbook(stream);
    console.log(dataRows);
}