我正在尝试通过http下载pdf文件并将其上传到S3 Bucket。
http响应Content-Type为application/pdf
,我试图在S3上保存/上传该文件。
http.get(options, function (resp) {
resp.setEncoding('binary');
var data = "";
resp.on('data', function (chunk) {
data += chunk;
});
resp.on("end", function () {
var fileName = documentId + "___test.pdf";
awsS3.uploadBinaryData(fileName, data, function (err, data) {
return res.json(new CustomResponse('done'));
})
});
并且uploadBinaryData
方法如下所示:
var params = {
Bucket: config.get("S3.BUCKET_NAME"),
Key: fileName,
Body: data,
ACL: "public-read",
ContentLength: Buffer.byteLength(data),
ContentEncoding: 'application/pdf'
}
awsS3.putObject(params, function (err, data) {
if (err) callback(err, err.stack);
else {
var path = buildS3Url(fileName);
callback(null, path);
}
});
上传有效,但pdf内容搞砸了(我只看到12页中的3页,有些是空的,有些包含奇怪的字符)。
它看起来像编码问题,但我不知道出了什么问题。
答案 0 :(得分:0)
我刚刚用aws-sdk v2.329.0
来解决了这个任务。
我的Lambda:
const storage = require('./utils/save-to-s3');
module.exports.handler = (event) => {
const { downloadUrl, fileName } = event;
console.info(`Downloading URL ${downloadUrl} to storage`, event);
return storage.saveUrl(downloadUrl, fileName);
};
我的实用程序:
const AWS = require('aws-sdk');
const https = require('https');
const http = require('http');
AWS.config.update({ region: process.env.AWS_REGION });
const storage = new AWS.S3({ apiVersion: '2006-03-01' });
function saveUrl(url, fileName) {
return new Promise((resolve, reject) => {
const protocol = url.startsWith('https') ? https : http;
protocol.get(url, (response) => {
const buffers = [];
response.on('error', (err) => { reject(err); });
response.on('data', (buffer) => {
buffers.push(buffer);
});
response.on('end', () => {
const uploadParams = {
Bucket: process.env.BUCKET,
Key: fileName,
Body: Buffer.concat(buffers),
};
resolve(storage.putObject(uploadParams).promise());
});
});
});
}
module.exports = {
saveUrl,
}
NB:记住使用s3:PutObject