我正在使用google-cloud-datastore python模块在python中编写脚本,以将数据从我的CSV上传到数据存储区。该脚本似乎工作正常,但似乎有一个问题,我坚持。我看到我的CSV中的整数值被存储为浮点数。它是将数据发送到数据存储区的默认方式还是我做错了什么?
这是我的代码:
import sys
import getopt
import pandas as pd
from google.cloud import datastore
def write_dict_chunks(data, SIZE=100):
log_count = 0
datastore_client = datastore.Client()
task_key = datastore_client.key(kind)
for i in xrange(0, len(data), SIZE):
entities = []
for each_entry in data[i : i+SIZE]:
nan_check = lambda v: v if str(v)!='nan' else None
string_check = lambda v: v.decode('utf-8') if isinstance(v, str) else v
write_row = {k: nan_check(string_check(v)) for k, v in each_entry.iteritems()}
entity = datastore.Entity(key=task_key)
entity.update(write_row)
entities.append(entity)
datastore_client.put_multi(entities)
log_count += len(entities)
print 'Wrote {} entities to datastore'.format(log_count)
try:
opts, args = getopt.getopt(sys.argv[1:], "ho:v", ["kind=", "filepath="])
if len(args) > 0:
for each in args:
print 'Unrecognized argument: '+each
sys.exit(2)
except getopt.GetoptError as err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
print 'Usage: python parse_csv.py --kind=kind_name --filepath=path_to_csv'
kind = None
filepath = None
for option, argument in opts:
if option in '--kind':
kind = argument
elif option in '--filepath':
filepath = argument
df = pd.read_csv(filepath)
df = df.to_dict(orient='records')
write_dict_chunks(df)