我正在尝试使用
读取aws s3存储桶中的文件fs.readFile(file, function (err, contents) {
var myLines = contents.Body.toString().split('\n')
})
我已经能够使用节点aws-sdk下载和上传文件,但我不知道如何简单地阅读它并解析内容。
以下是我如何从s3读取文件的示例:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myKey.csv'}
var s3file = s3.getObject(params)
答案 0 :(得分:74)
你有几个选择。您可以将回调包含为第二个参数,将使用任何错误消息和对象调用该参数。此example直接来自AWS文档:
s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
});
或者,您可以将输出转换为流。 AWS文档中还有example:
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).createReadStream().pipe(file);
答案 1 :(得分:30)
这样做:
Option Explicit
Sub CopyCells()
Dim ws As Worksheet
Dim rng As Range
Dim sRow As Long, lRow As Long
'~~> Change this to the relevant worksheet
Set ws = ThisWorkbook.Sheets("Sheet1")
sRow = 5 '<~~ Starting row
With ws
'~~> Find last row in Col C
lRow = .Range("C" & .Rows.Count).End(xlUp).Row
'~~> If the last row < Start Row
If lRow < sRow Then
MsgBox "Start Row cannot be greater then last row"
Else
'~~> Create your range
Set rng = .Range("C" & sRow & ":C" & lRow)
'~~> Copy
rng.Copy
'
' Do what you want with copied data
'
End If
End With
End Sub
答案 2 :(得分:19)
因为您似乎想逐行处理S3文本文件。这是一个Node版本,它使用标准的readline模块和AWS&#39; createReadStream()
const readline = require('readline');
const rl = readline.createInterface({
input: s3.getObject(params).createReadStream()
});
rl.on('line', function(line) {
console.log(line);
})
.on('close', function() {
});
答案 3 :(得分:7)
我无法理解为什么,但createReadStream
/ pipe
方法并不适用于我。我试图下载一个大的CSV文件(300MB +),我得到了重复的行。这似乎是一个随机的问题。每次尝试下载时,最终文件大小都会有所不同。
我最终使用另一种方式,基于AWS JS SDK examples:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).
on('httpData', function(chunk) { file.write(chunk); }).
on('httpDone', function() { file.end(); }).
send();
这样,它就像一个魅力。
答案 4 :(得分:4)
这是我用来从s3中检索和解析json数据的例子。
var params = {Bucket: BUCKET_NAME, Key: KEY_NAME};
new AWS.S3().getObject(params, function(err, json_data)
{
if (!err) {
var json = JSON.parse(new Buffer(json_data.Body).toString("utf8"));
// PROCESS JSON DATA
......
}
});
答案 5 :(得分:3)
从S3非常大的文件下载时,我遇到了完全相同的问题。
来自AWS文档的示例解决方案不起作用:
var file = fs.createWriteStream(options.filePath);
file.on('close', function(){
if(self.logger) self.logger.info("S3Dataset file download saved to %s", options.filePath );
return callback(null,done);
});
s3.getObject({ Key: documentKey }).createReadStream().on('error', function(err) {
if(self.logger) self.logger.error("S3Dataset download error key:%s error:%@", options.fileName, error);
return callback(error);
}).pipe(file);
虽然这个解决方案可行:
var file = fs.createWriteStream(options.filePath);
s3.getObject({ Bucket: this._options.s3.Bucket, Key: documentKey })
.on('error', function(err) {
if(self.logger) self.logger.error("S3Dataset download error key:%s error:%@", options.fileName, error);
return callback(error);
})
.on('httpData', function(chunk) { file.write(chunk); })
.on('httpDone', function() {
file.end();
if(self.logger) self.logger.info("S3Dataset file download saved to %s", options.filePath );
return callback(null,done);
})
.send();
由于某种原因,createReadStream
尝试无法触发end
,close
或error
回调。有关此内容,请参阅here。
我正在使用该解决方案将档案写入gzip,因为第一个(AWS示例)在这种情况下也不起作用:
var gunzip = zlib.createGunzip();
var file = fs.createWriteStream( options.filePath );
s3.getObject({ Bucket: this._options.s3.Bucket, Key: documentKey })
.on('error', function (error) {
if(self.logger) self.logger.error("%@",error);
return callback(error);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
if(self.logger) self.logger.info("downloadArchive downloaded %s", options.filePath);
fs.createReadStream( options.filePath )
.on('error', (error) => {
return callback(error);
})
.on('end', () => {
if(self.logger) self.logger.info("downloadArchive unarchived %s", options.fileDest);
return callback(null, options.fileDest);
})
.pipe(gunzip)
.pipe(fs.createWriteStream(options.fileDest))
})
.send();
答案 6 :(得分:2)
如果要保存内存并希望将每一行作为json对象获取,则可以使用fast-csv
创建readstream,并可以将每行读取为json对象,如下所示:
const csv = require('fast-csv');
const AWS = require('aws-sdk');
const credentials = new AWS.Credentials("ACCESSKEY", "SECRETEKEY", "SESSIONTOKEN");
AWS.config.update({
credentials: credentials, // credentials required for local execution
region: 'your_region'
});
const dynamoS3Bucket = new AWS.S3();
const stream = dynamoS3Bucket.getObject({ Bucket: 'your_bucket', Key: 'example.csv' }).createReadStream();
var parser = csv.fromStream(stream, { headers: true }).on("data", function (data) {
parser.pause(); //can pause reading using this at a particular row
parser.resume(); // to continue reading
console.log(data);
}).on("end", function () {
console.log('process finished');
});
答案 7 :(得分:2)
对于新版本的sdk,接受的答案不起作用-它不等待对象被下载。以下代码段将对新版本有所帮助:
// dependencies
const AWS = require('aws-sdk');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
var bucket = "TestBucket"
var key = "TestKey"
try {
const params = {
Bucket: Bucket,
Key: Key
};
var theObject = await s3.getObject(params).promise();
} catch (error) {
console.log(error);
return;
}
}
答案 8 :(得分:0)
我更喜欢 Buffer.from(data.Body).toString('utf8')
。它支持编码参数。对于其他AWS服务(例如Kinesis Streams),可能有人希望将'utf8'
编码替换为'base64'
。
new AWS.S3().getObject(
{ Bucket: this.awsBucketName, Key: keyName },
function(err, data) {
if (!err) {
const body = Buffer.from(data.Body).toString('utf8');
console.log(body);
}
}
);
答案 9 :(得分:0)
如果您希望避免回调,则可以利用sdk .promise()函数,如下所示:
const s3 = new AWS.S3();
const params = {Bucket: 'myBucket', Key: 'myKey.csv'}
const response = await s3.getObject(params).promise() // await the promise
const fileContent = getObjectResult.Body.toString('utf-8'); // can also do 'base64' here if desired
我敢肯定,这里提到的其他方法也有其优点,但这对我很有用。源自此线程(请参阅AWS的最新回复):https://forums.aws.amazon.com/thread.jspa?threadID=116788
答案 10 :(得分:0)
var fileStream = fs.createWriteStream('/path/to/file.jpg');
var s3Stream = s3.getObject({Bucket: 'myBucket', Key: 'myImageFile.jpg'}).createReadStream();
// Listen for errors returned by the service
s3Stream.on('error', function(err) {
// NoSuchKey: The specified key does not exist
console.error(err);
});
s3Stream.pipe(fileStream).on('error', function(err) {
// capture any errors that occur when writing data to the file
console.error('File Stream:', err);
}).on('close', function() {
console.log('Done.');
});
参考:https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/requests-using-stream-objects.html