我正在为数据处理管道编写一个简单的Lambda函数,该管道由Kinesis Firehose调用。此功能从URL下载图像,将它们上传到S3,使用OpenCV检查它们是否有面孔,如果有,则将图像加载到Rekognition中。
当我在本地运行时,我没有任何问题。一切都下载,上传和处理没有错误。但是,如果我在Lambda上运行它,它会在完成之前给出一个关于退出的错误。
'use strict';
const AWS = require('aws-sdk');
const request = require('request');
const cv = require('opencv');
const s3 = new AWS.S3();
const rek = new AWS.Rekognition();
const uploadImage = data => {
return new Promise((resolve, reject) => {
request({
url: data.images.standard,
encoding: null // creates a buffer
}, function(err, res, body) {
if (err) return reject({ type: 'err', err: err });
if (res.statusCode != 200) return reject({ type: 'fail', code: res.statusCode });
console.log(`Fetched ${data._id}`)
// Upload to s3
s3.putObject({
Bucket: 'mybucket',
Key: `${data._id}.jpg`,
ContentType: res.headers['content-type'],
ContentLength: res.headers['content-length'],
Body: body // buffer
}, (err) => {
if (err) return reject({ type: err, err: err });
resolve({ record: data, buffer: body });
});
})
})
}
const indexFacesLocal = data => {
return new Promise((resolve, reject) => {
cv.readImage(data.buffer, (err, image) => {
if (err) return reject({ type: 'err', err: err });
image.detectObject(cv.FACE_CASCADE, {}, (err, faces) => {
if (err) return reject({ type: 'err', err: err });
// Set default if undefined
if (!faces) faces = [];
console.log(`Completed OpenCV ${data.record._id}, should process = ${!!faces.length}`)
data._jackal = !!faces.length;
resolve(data);
})
})
})
}
const indexFacesJackal = data => {
return new Promise((resolve, reject) => {
if (!data._jackal) return resolve(data.record);
// Discard buffer and other data
data = data.record;
let params = {
CollectionId: process.env.REK_COLLECTION,
Image: {
S3Object: {
Bucket: `mybucket`,
Name: `${data._id}.jpg`,
}
},
DetectionAttributes: ['ALL'],
ExternalImageId: data._id
}
rek.indexFaces(params, (err, faces) => {
if (err) return reject({ type: 'err', err: err });
console.log(`Indexed on Rek ${data._id}`)
// Check if data is present
if (!faces.FaceRecords.length) {
return resolve(data);
}
// Do some data manipulation stuff here, nothing big
// just used to reformat AWS response
console.log(`Mapped ${data._id}`)
return resolve(data);
})
})
}
exports.handler = function(event, ctx, callback) {
/* Process the list of records and transform them */
Promise.all(event.records.map(record => {
return uploadImage(record.data)
.then(indexFacesLocal)
.then(indexFacesJackal)
.then(data => {
return {
recordId: record.recordId,
result: 'Ok',
data: data,
}
}).catch(res => {
if (res.type == 'fail') {
// Unable to fetch media from Instagram
console.log(`[${res.code}] - ${record.recordId}`);
return {
recordId: record.recordId,
result: 'Dropped'
}
}
console.log(`Processing failed for ${record.recordId}`);
console.log(res.err.stack);
return {
recordId: record.recordId,
result: 'ProcessingFailed'
}
})
})).then(output => {
console.log(`Processing completed, handled ${output.length} items`)
callback(null, { records: output })
})
};
在本地运行时,我的输出是:
Fetched 1392753031552166622
Fetched 1379923796962022364
Fetched 1392750801239391628
Fetched 1392748163315653017
Completed OpenCV 1379923796962022364, should process = true
Completed OpenCV 1392748163315653017, should process = false
Completed OpenCV 1392750801239391628, should process = true
Completed OpenCV 1392753031552166622, should process = true
Indexed on Rek 1379923796962022364
Mapped 1379923796962022364
Indexed on Rek 1392750801239391628
Mapped 1392750801239391628
Indexed on Rek 1392753031552166622
Mapped 1392753031552166622
Processing completed, handled 4 items
{ records:
[ { recordId: '1379923796962022364', result: 'Ok', data: [Object] },
{ recordId: '1392748163315653017', result: 'Ok', data: [Object] },
{ recordId: '1392750801239391628', result: 'Ok', data: [Object] },
{ recordId: '1392753031552166622', result: 'Ok', data: [Object] } ] }
在AWS上运行时,我得到:
START Version: $LATEST
Fetched 1392753031552166622
Fetched 1392748163315653017
Fetched 1392750801239391628
Fetched 1379923796962022364
Completed OpenCV 1379923796962022364, should process = true
Completed OpenCV 1392748163315653017, should process = false
Completed OpenCV 1392750801239391628, should process = true
END
Process exited before completing request
我已经检查了内存分配,它只使用~130MB的512.没有其他错误被抛出。它成功导入了亚马逊Linux的OpenCV版本,所以它也不是二进制文件的问题。我还检查了超时,设置为5分钟(每次运行约8秒)。