我目前正在努力解决这个错误试图告诉我的问题。我无法判断它是指我输入的一些数据,还是它是否是我创建的数据字典的引用。我甚至很难制定一个问题,这对我来说太困惑了。
Traceback (most recent call last):
File "push data.py", line 78, in <module>
uploadFile(filename, uri, dbname)
File "push data.py", line 69, in uploadFile
docs = upload(db,docs)
File "push data.py", line 27, in upload
db.bulk_save(docs)
File "C:\Python27\lib\site-packages\couchdbkit\client.py", line 564, in save_docs
payload=payload, **params).json_body
File "C:\Python27\lib\site-packages\restkit\resource.py", line 144, in post
headers=headers, params_dict=params_dict, **params)
File "C:\Python27\lib\site-packages\couchdbkit\resource.py", line 105, in request
payload = json.dumps(payload).encode('utf-8')
File "C:\Python27\lib\json\__init__.py", line 244, in dumps
return _default_encoder.encode(obj)
File "C:\Python27\lib\json\encoder.py", line 207, in encode
chunks = self.iterencode(o, _one_shot=True)
File "C:\Python27\lib\json\encoder.py", line 270, in iterencode
return _iterencode(o, 0)
UnicodeDecodeError: 'utf8' codec can't decode byte 0x92 in position 3: invalid start byte
修改
这是我的代码,我实际上从这个问题的第一个答案中偷了它 - &gt; How to import CSV/TSV data to Couch DB?。我的困难是试图找到文档以确保我没有在我的方面做错任何事。如果有人知道我在哪里可以找到couchdbkit文档,那就太好了!他们的官方网站已关闭。
#!/usr/bin/env python
from couchdbkit import Server, Database
from couchdbkit.loaders import FileSystemDocsLoader
from csv import DictReader
import sys, subprocess, math, os
def parseDoc(doc):
for k,v in doc.items():
if (isinstance(v,str)):
print k, v, v.isdigit()
# #see if this string is really an int or a float
if v.isdigit()==True: #int
doc[k] = int(v)
else: #try a float
try:
if math.isnan(float(v))==False:
doc[k] = float(v)
except:
pass
return doc
def upload(db, docs):
db.bulk_save(docs)
del docs
return list()
def uploadFile(fname, uri, dbname):
print 'Upload contents of %s to %s/%s' % (fname, uri, dbname)
# #connect to the db
theServer = Server(uri)
db = theServer.get_or_create_db(dbname)
#loop on file for upload
reader = DictReader(open(fname, 'rU'), dialect = 'excel') #see the python csv module
#for other options, such as using the tab delimeter. The first line in your csv
#file should contain all of the "key" and all subsequent lines hold the values
#for those keys.
#used for bulk uploading
docs = list()
checkpoint = 100
for doc in reader:
newdoc = parseDoc(doc) #this just converts strings that are really numbers into ints and floats
#Here I check to see if the doc is already on the database. If it is, then I assign
#the _rev key so that it updates the doc on the db.
print('\n')
print(newdoc)
print('\n')
if db.doc_exist(newdoc.get('_id')):
newdoc['_rev'] = db.get_rev(newdoc.get('_id'))
docs.append(newdoc)
if len(docs)%checkpoint==0:
docs = upload(db,docs)
#don't forget the last batch
docs = upload(db,docs)
if __name__=='__main__':
filename = "Kardiology Data.csv"
uri = "http://localhost:5984"
dbname = "kardiology"
uploadFile(filename, uri, dbname)
答案 0 :(得分:0)
存在问题:
reader = DictReader(open(fname, 'rU'), dialect = 'excel')
尝试添加UTF-8编码:
reader = DictReader(open(fname, 'rU'),encoding='utf-8', dialect = 'excel')