将数据从Kinesis写入S3

时间:2017-03-16 10:44:12

标签: java aws-lambda amazon-kinesis amazon-kinesis-firehose

我正在使用AWS SDK从一个将数据发布到Kinesis流的Java应用程序中编写数据。这是使用以下代码一次批量完成10条记录;

// Convert to JSON object, and then to bytes...
                ObjectWriter ow = new ObjectMapper().writer().withDefaultPrettyPrinter();
                String json = ow.writeValueAsString(transaction);

                // Add byte array to PutRecordsRequestEntry
                PutRecordsRequestEntry record = new PutRecordsRequestEntry();
                record.setPartitionKey(String.valueOf(java.util.UUID.randomUUID()));
                record.setData(ByteBuffer.wrap(json.getBytes()));

                // Add to list...
                batch.add(record);

                // Check and send batches
                if(counter>=batchLimit){

                    logger.info("Sending batch of " + batchLimit + " rows.");

                    putRecordsRequest.setRecords(batch);
                    PutRecordsResult result = amazonKinesisClient.putRecords(putRecordsRequest);
                    batch = new ArrayList<>();
                    counter=0;

                }else{
                    counter++;
                }

然后我有一个nodejs lambda函数,它在Kinesis上收到的每个事务上被触发,并且它的想法是编写来自Kinesis的事务,并将它们放入一个firehose数据流中,以便将它们保存到S3 。

    var AWS = require('aws-sdk');
var firehose = new AWS.Firehose();

exports.handler = function(event, context) {

    console.log(event);

    var params = {
        DeliveryStreamName: "transaction-postings",
        Record: { 
            Data:  decodeURIComponent(event)
        }
    };
    firehose.putRecord(params, function(err, data) {
        if (err) console.log(err, err.stack); // an error occurred
        else    {  
            console.log(data);           // successful response
        }

        context.done();
    });
};

然而,在查看S3上的数据时,我看到的只是以下内容,而不是我期待的JSON对象列表......

[object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object]

有人可以指点我将Kinesis中的数据作为JSON对象传输到s3时缺少什么?

2 个答案:

答案 0 :(得分:1)

Data:  decodeURIComponent(event)

您需要序列化事件,因为Lambda会自动反序列化参数。即:

Data: JSON.stringify(decodeURIComponent(event))

答案 1 :(得分:0)

对于那些想要更改所需代码的人来说......要向生成器发送实际消息的S3,需要解码PutRecordsRequestEntry的data属性。换句话说,这些代码块显示了使用的依赖关系,lambda解析了Kinesis流中的数据......

var AWS = require('aws-sdk');
var firehose = new AWS.Firehose();
var firehoseStreamName = "transaction-postings";

exports.handler = function(event, context) {

    // This is the actual transaction, encapsulated with Kinesis Put properties
    var transaction = event;

    // Convert data object because this is all that we need
    var buf = new Buffer(transaction.data, "base64");

    // Convert to actual string which is readable
    var jsonString = buf.toString("utf8");

    // Prepare storage to postings firehose stream...
    var params = { 
        DeliveryStreamName: firehoseStreamName, 
        Record: { 
            Data:  jsonString
        }
    };

    // Store data!
    firehose.putRecord(params, function(err, data) {
        if (err) {

            // This needs to be fired to Kinesis in the future...
            console.log(err, err.stack); 
        }
        else{  
            console.log(data);            
        }

        context.done();
    });
};

这是因为使用AWS生产者依赖关系发送的记录

<dependency>
        <groupId>com.amazonaws</groupId>
        <artifactId>amazon-kinesis-producer</artifactId>
        <version>0.12.3</version>
    </dependency>

看起来像这样;

{
  "kinesisSchemaVersion": "1.0",
  "partitionKey": "cb3ff3cd-769e-4d48-969d-918b5378e81b",
  "sequenceNumber": "49571132156681255058105982949134963643939775644952428546",
  "data": "[base64 string]",
  "approximateArrivalTimestamp": 1490191017.614
}