如何使用boto3将数据从csv上传到aws kinesis
我尝试了三种方法,并且都对我有用。
1)要将csv中的数据分块上传到kinesis 2)将本地生成的随机数据上传到运动学 3)使用boto3从本地将ksv数据逐行上传到运动学
此外,如何从kinesis到python sdk使用数据
答案 0 :(得分:0)
import csv
import json
import boto3
from random import randint
def chunkit(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
kinesis = boto3.client("kinesis")
with open("flights.csv") as f:
#Creating the ordered Dict
reader = csv.DictReader(f)
#putting the json as per the number of chunk we will give in below function
#Create the list of json and push like a chunk. I am sending 100 rows together
records = chunkit([{"PartitionKey": 'sau', "Data": json.dumps(row)} for row in reader], 100)
for chunk in records:
kinesis.put_records(StreamName="Flight-Simulator", Records=chunk)
答案 1 :(得分:0)
Method2- Random generated JSON to Kinesis
#Generating the random number of record and sendint to Kinesis data stream
import boto3
import json
from datetime import datetime
import calendar
import random
import time
my_stream_name = 'Flight-Simulator'
kinesis_client = boto3.client('kinesis', region_name='us-east-1')
def put_to_stream(thing_id, property_value, property_timestamp):
payload = {
'prop': str(property_value),
'timestamp': str(property_timestamp),
'thing_id': thing_id
}
print(payload)
put_response = kinesis_client.put_record(
StreamName=my_stream_name,
Data=json.dumps(payload),
PartitionKey=thing_id)
while True:
property_value = random.randint(40, 120)
property_timestamp = calendar.timegm(datetime.utcnow().timetuple())
thing_id = 'aa-bb'
put_to_stream(thing_id, property_value, property_timestamp)
# wait for 5 second
time.sleep(5)
答案 2 :(得分:0)
Method3 - Row by row from csv to Kinesis
#Sending the data from CSV to Kinesis data stream row by row
my_stream_name = 'Flight-Simulator'
thing_id ='XYZ'
kinesis_client = boto3.client('kinesis', region_name='us-east-1')
with open("flights_Test.csv") as f:
#Creating the ordered Dict
reader = csv.DictReader(f)
for row in reader:
put_response = kinesis_client.put_record(
StreamName=my_stream_name,
Data=json.dumps(row),
PartitionKey=thing_id)
答案 3 :(得分:0)
# Consumer SDK using python3
import boto3
import json
from datetime import datetime
import time
my_stream_name = 'Flight-Simulator'
kinesis_client = boto3.client('kinesis', region_name='us-east-1')
#Get the description of kinesis shard, it is json from which we will get the the
shard ID
response = kinesis_client.describe_stream(StreamName=my_stream_name)
my_shard_id = response['StreamDescription']['Shards'][0]['ShardId']
shard_iterator = kinesis_client.get_shard_iterator(StreamName=my_stream_name,
ShardId=my_shard_id,
ShardIteratorType='LATEST')
my_shard_iterator = shard_iterator['ShardIterator']
record_response = kinesis_client.get_records(ShardIterator=my_shard_iterator,
Limit=2)
while 'NextShardIterator' in record_response:
record_response =
kinesis_client.get_records(ShardIterator=record_response['NextShardIterator'],
Limit=2)
if len(record_response['Records'])>0:
print(json.loads(record_response['Records'][0]['Data']))
time.sleep(5)