跳过第一行-使用get_object API读取对象时

时间:2018-10-11 17:05:26

标签: python python-3.x amazon-web-services api lambda

如何跳过第一行-使用get_object API读取对象时

import os
import boto3
import json
import logging

def lambda_handler(event, context):

    # Fetch the bucket name and the file
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']


    # Generate record in DynamoDB
    try :
        # Declare S3 bucket and DynamoDB Boto3 Clients
        s3_client = boto3.client('s3')
        dynamodb = boto3.resource('dynamodb')

        # Read the Object using get_object API
        obj = s3_client.get_object(Bucket=bucket, Key=key)
        rows = obj['Body'].read().decode("utf-8").split('\n')

        tableName = os.environ['DB_TABLE_NAME']
        table = dynamodb.Table(tableName)

        log.info("TableName: " + tableName)

        # Need client just to access the Exception
        dynamodb_client = boto3.client('dynamodb')

        try :
            # Write the CSV file to the DynamoDB Table
            with table.batch_writer() as batch:
                for row in rows:       
                    batch.put_item(Item={
                        'x': row.split(',')[0],
                        'c': row.split(',')[1],
                        'w': row.split(',')[2],
                        'f': row.split(',')[3]
                        })


            print('Finished Inserting into TableName: ' + tableName)
        except dynamodb_client.exceptions.ResourceNotFoundException as tableNotFoundEx:
            return ('ERROR: Unable to locate DynamoDB table: ', tableName)


    except KeyError as dynamoDBKeyError:
        msg = 'ERROR: Need DynamoDB Environment Var: DB_TABLE_NAME'
        print(dynamoDBKeyError)
        return msg;

以上代码读取CSV并将其插入dynamo数据库。这里的问题是-标头行(列nmaes)也插入到表中。如何跳过第一行并从第二行开始解析? next对我不起作用

1 个答案:

答案 0 :(得分:3)

也许不是最好的解决方案,但这应该可以解决问题:

import os
import boto3
import json
import logging

def lambda_handler(event, context):

    # Fetch the bucket name and the file
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']


    # Generate record in DynamoDB
    try :
        # Declare S3 bucket and DynamoDB Boto3 Clients
        s3_client = boto3.client('s3')
        dynamodb = boto3.resource('dynamodb')

        # Read the Object using get_object API
        obj = s3_client.get_object(Bucket=bucket, Key=key)
        rows = obj['Body'].read().decode("utf-8").split('\n')

        tableName = os.environ['DB_TABLE_NAME']
        table = dynamodb.Table(tableName)

        log.info("TableName: " + tableName)

        # Need client just to access the Exception
        dynamodb_client = boto3.client('dynamodb')

        try :
            first = True
            # Write the CSV file to the DynamoDB Table
            with table.batch_writer() as batch:
                for row in rows:
                    if first:
                        first = False
                    else:       
                        batch.put_item(Item={
                            'x': row.split(',')[0],
                            'c': row.split(',')[1],
                            'w': row.split(',')[2],
                            'f': row.split(',')[3]
                            })


            print('Finished Inserting into TableName: ' + tableName)
        except dynamodb_client.exceptions.ResourceNotFoundException as tableNotFoundEx:
            return ('ERROR: Unable to locate DynamoDB table: ', tableName)


    except KeyError as dynamoDBKeyError:
        msg = 'ERROR: Need DynamoDB Environment Var: DB_TABLE_NAME'
        print(dynamoDBKeyError)
        return msg;

使用for i in range(1, len(rows))循环可能会更好,但是上面的代码所需的代码更改较少