Kinesis Firehose与Lambda装饰者被扼杀

时间:2018-02-24 14:53:37

标签: python-2.7 aws-lambda amazon-redshift amazon-vpc amazon-kinesis-firehose

我正在使用带有lambda装饰器的Firehose将vpc流日志摄取到Redshift中。 (VPC流日志 - > Kinesis数据流 - > Kinesis Firehose - > Lambda装饰器 - > Redshift)当重新将未处理的记录重新发送回firehose时,流量很大,导致lambda错误输出,任务超时。 lambda具有最大超时和3GB内存。

我认为这个问题与lambda的6mb有效载荷大小有关。有没有办法批量或减少有效负载,以确保功能不会出错?提前谢谢。

    import base64
    import json
    import gzip
    import StringIO
    import boto3
    import datetime

    def transformLogEvent(log_event):
        version     = log_event['extractedFields']['version']
        accountid   = log_event['extractedFields']['account_id']
        interfaceid = log_event['extractedFields']['interface_id']
        srcaddr     = log_event['extractedFields']['srcaddr']
        dstaddr     = log_event['extractedFields']['dstaddr']
        srcport     = log_event['extractedFields']['srcport']
        dstport     = log_event['extractedFields']['dstport']
        protocol    = log_event['extractedFields']['protocol']
        packets     = log_event['extractedFields']['packets']
        bytes       = log_event['extractedFields']['bytes']
        starttime   = datetime.datetime.fromtimestamp(int(log_event['extractedFields']['start'])).strftime('%Y-%m-%d %H:%M:%S')
        endtime     = datetime.datetime.fromtimestamp(int(log_event['extractedFields']['end'])).strftime('%Y-%m-%d %H:%M:%S')
        action      = log_event['extractedFields']['action']
        logstatus   = log_event['extractedFields']['log_status']

        row = '"' + str(version) + '"' + "," + '"' + str(accountid) + '"' + "," + '"' + str(interfaceid) + '"' + "," + '"' + str(srcaddr) + '"' + "," + '"' + str(dstaddr) + '"' + "," + '"' + str(srcport) + '"' + "," + '"' + str(dstport) + '"' + "," + '"' + str(protocol) + '"' + "," + '"' + str(packets) + '"' + "," + '"' + str(bytes) + '"' + "," + '"' + str(starttime) + '"' + "," + '"' + str(endtime) + '"' + "," + '"' + str(action) + '"' + "," + '"' + str(logstatus) + '"' + "\n"
        #print(row)
        return row

    def processRecords(records):
        for r in records:
            data = base64.b64decode(r['data'])
            striodata = StringIO.StringIO(data)
            try:
                with gzip.GzipFile(fileobj=striodata, mode='r') as f:
                    data = json.loads(f.read())
            except IOError:
                # likely the data was re-ingested into firehose
                pass

            recId = r['recordId']
            # re-ingested data into firehose
            if type(data) == str:
                yield {
                    'data': data,
                    'result': 'Ok',
                    'recordId': recId
                }
            elif data['messageType'] != 'DATA_MESSAGE':
                yield {
                    'result': 'ProcessingFailed',
                    'recordId': recId
                }
            else:
                data = ''.join([transformLogEvent(e) for e in data['logEvents']])
                #print(data)
                data = base64.b64encode(data)
                yield {
                    'data': data,
                    'result': 'Ok',
                    'recordId': recId
                }


    def putRecords(streamName, records, client, attemptsMade, maxAttempts):
        failedRecords = []
        codes = []
        errMsg = ''
        try:
            response = client.put_record_batch(DeliveryStreamName=streamName, Records=records)
        except Exception as e:
            failedRecords = records
            errMsg = str(e)

        # if there are no failedRecords (put_record_batch succeeded), iterate over the response to gather results
        if not failedRecords and response['FailedPutCount'] > 0:
            for idx, res in enumerate(response['RequestResponses']):
                if not res['ErrorCode']:
                    continue

                codes.append(res['ErrorCode'])
                failedRecords.append(records[idx])

            errMsg = 'Individual error codes: ' + ','.join(codes)

        if len(failedRecords) > 0:
            if attemptsMade + 1 < maxAttempts:
                print('Some records failed while calling PutRecords, retrying. %s' % (errMsg))
                putRecords(streamName, failedRecords, client, attemptsMade + 1, maxAttempts)
            else:
                raise RuntimeError('Could not put records after %s attempts. %s' % (str(maxAttempts), errMsg))


    def handler(event, context):
        streamARN = ''
        region = ''
        streamName = ''

        records = list(processRecords(event['records']))
        projectedSize = 0
        recordsToReingest = []
        for idx, rec in enumerate(records):
            if rec['result'] == 'ProcessingFailed':
                continue
            projectedSize += len(rec['data']) + len(rec['recordId'])
            # 4000000 instead of 6291456 to leave ample headroom for the stuff we didn't account for
            if projectedSize > 4000000:
                recordsToReingest.append({
                    'Data': rec['data']
                })
                records[idx]['result'] = 'Dropped'
                del(records[idx]['data'])

        if len(recordsToReingest) > 0:
            client = boto3.client('firehose', region_name=region)
            putRecords(streamName, recordsToReingest, client, attemptsMade=0, maxAttempts=20)
            print('Reingested %d records out of %d' % (len(recordsToReingest), len(event['records'])))
        else:
            print('No records to be reingested')

        return {"records": records}

0 个答案:

没有答案