我有一个带有一个日志文件的AWS S3存储桶。我有一个使用node.js 8.10运行时的lambda函数,该函数读取日志文件的每一行。这就是我所拥有的:
const readline = require('readline');
exports.handler = async (event) => {
try {
let bucket = event.Records[0].s3.bucket.name;
let key = event.Records[0].s3.object.key;
// documentation for this method:
// https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#getObject-property
let readStream = S3.getObject({ Bucket: bucket, Key: key }).createReadStream(); // 1
const rl = readline.createInterface({
input: readStream
});
rl.on('line', async (line) => {
console.log(line);
// process line as needed
});
} catch (err) {
console.log(err);
return err;
}
};
在上面的代码段中,我将日志文件中的行打印到控制台上只是为了进行测试,但没有看到任何输出。
但是,如果我像这样重构代码,它将起作用:
const stream = require('stream');
const bufferStream = new stream.PassThrough();
const readline = require('readline');
exports.handler = async (event) => {
try {
// retrieving first record only just for
// testing
let bucket = event.Records[0].s3.bucket.name;
let key = event.Records[0].s3.object.key;
// data.Body is a Buffer
let data = await S3.getObject({ Bucket: bucket, Key: key }).promise();
bufferStream.end(data.Body); // 2
const rl = readline.createInterface({
input: bufferStream
});
rl.on('line', (line) => {
console.log(line);
// process line as needed
});
} catch (err) {
console.log(err);
return err;
}
};
对于标记为2的行,getObject函数返回一个缓冲区并将其转换为流。
是否可以在不使用缓冲区的情况下执行此操作?我的想法是,如果日志文件很大,则将缓冲区转换为流效率很低。我想知道是否可以像标记为1的行那样直接使用流。
编辑: 我进行了更多测试,并使其正常运行,但没有异步lambda函数。在这里:
exports.handler = function (event, context, callback) {
// for testing I'm looking at the first record
let bucket = event.Records[0].s3.bucket.name;
let key = event.Records[0].s3.object.key;
const readStream = S3.getObject({ Bucket: bucket, Key: key }).createReadStream();
const rl = readline.createInterface({
input: readStream,
crlfDelay: Infinity
});
rl.on('line', (line) => {
console.log(line);
});
}
有人知道为什么这个重构的代码可以工作,但不能使用异步lambda吗?