CSV到sqlite3数据库。从Utf8到unicode的转换列表

时间:2011-10-27 16:11:05

标签: python sqlite

import csv, sqlite3

conn = sqlite3.connect("mycustomers9.sql")
curs =  conn.cursor()
try:
    curs.execute("CREATE TABLE t (unknown1 TEXT, county TEXT, businessName TEXT, address1 TEXT, city1 TEXT, zip1 INTEGER, phone1 INTEGER,Email1 TEXT, approvalstatus TEXT, date1 TEXT, date2 TEXT, typeofConstruct TEXT, typeofBiz TEXT, unknown2 TEXT, unknown3 TEXT, unknown4 TEXT, unknown5 TEXT, unknown6 TEXT,BizName2 TEXT,Address2 TEXT, City2 TEXT,Zip2 TEXT,Country2 TEXT,Phone2 TEXT,Email2 TEXT,Phone3 TEXT);")
except sqlite3.OperationalError:
    print "Table already exist"
with open('HR_plan_review.csv', 'rb') as infile:
    dr = csv.DictReader(infile, delimiter = ',')
    to_db = [(i["unknown1"], i['county'], i['businessName'], i['address1'], i['city1'], i['zip1'], i['phone1'], i['Email1'], i['approvalstatus'], i['date1'],i['date2'], i['typeofConstruct'], i['typeofBiz'], i['unknown2'], i['unknown3'], i['unknown4'], i['unknown5'], i['unknown6'], i['BizName2'], i['Address2'], i['City2'], i['Zip2'], i['Country2'], i['Phone2'], i['Email2'], i['Phone3']) for i in dr]

curs.executemany("INSERT INTO t (unknown1, county, businessName, address1, city1,zip1, phone1, Email1, approvalstatus, date1, date2,typeofConstruct, typeofBiz, unknown2, unknown3, unknown4,unknown5, unknown6,BizName2,Address2, City2,Zip2,Country2,Phone2,Email2,Phone3) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);", to_db)

to_db返回一个以utf-8编码的列表,sqllite数据库似乎要求格式化为unicode。如何在运行上面的sql语句之前将“to_db”列表转换为unicode。以下是我运行上述代码时收到的错误消息。

  

sqlite3.ProgrammingError:除非使用8位字节串,否则不得使用   你使用一个可以解释8位字节串的te xt_factory(比如   text_factory = str)。强烈建议您改为   将您的应用程序切换到Unicode strings。

根据答案输入

编辑

修订后的代码(如下)现在成功执行,但它没有将从csv获取的值插入数据库。

import csv, sqlite3

conn = sqlite3.connect("mycustomers12.sql")
curs =  conn.cursor()
try:
    curs.execute(""" CREATE TABLE t (unknown1 TEXT, county TEXT, businessName TEXT, address1 TEXT, city1 TEXT, zip1 INTEGER, \n
    phone1 INTEGER,Email1 TEXT, approvalstatus TEXT, date1 TEXT, date2 TEXT, typeofConstruct TEXT, typeofBiz TEXT, unknown2 TEXT, \n
    unknown3 TEXT, unknown4 TEXT, unknown5 TEXT, unknown6 TEXT,BizName2 TEXT,Address2 TEXT, City2 TEXT,Zip2 TEXT,Country2 TEXT,\n
    Phone2 TEXT,Email2 TEXT,Phone3 TEXT);""")
except sqlite3.OperationalError:
    print "Table already exist"


infile = open('HR_plan_review.csv', 'rb') 
dr = csv.DictReader(infile, delimiter = ',')
keys=("unknown1", 'county', 'businessName', 'address1',
    'city1', 'zip1', 'phone1', 'Email1', 'approvalstatus',
    'date1','date2', 'typeofConstruct', 'typeofBiz', 'unknown2',
    'unknown3', 'unknown4', 'unknown5', 'unknown6', 'BizName2',
    'Address2', 'City2', 'Zip2', 'Country2', 'Phone2',
    'Email2', 'Phone3')
args=[tuple(key.decode('utf-8') for key in keys) for row in dr]
sql='INSERT INTO t ({f}) VALUES ({p})'.format(
    f=','.join(keys),
    p=','.join(['?']*len(keys)))
curs.executemany(sql, args)

3 个答案:

答案 0 :(得分:1)

可悲的是,csv无法处理unicode(至少在Python 2.7中)。但是你可以通过将DictReader包装在生成器中来解决这个问题:

with open('HR_plan_review.csv', 'rb') as infile:
    dr = csv.DictReader(infile, delimiter = ',')
    def unicoded_data():
        for row in dr:
            # Assuming infile encoding is utf-8.
            yield dict([(key, unicode(value, encoding='utf-8'))
                       for key, value in row.iteritems()])

    to_db = [(i["unknown1"], i['county'], i['businessName'], i['address1'], i['city1'], i['zip1'], i['phone1'], i['Email1'], i['approvalstatus'], i['date1'],i['date2'], i['typeofConstruct'], i['typeofBiz'], i['unknown2'], i['unknown3'], i['unknown4'], i['unknown5'], i['unknown6'], i['BizName2'], i['Address2'], i['City2'], i['Zip2'], i['Country2'], i['Phone2'], i['Email2'], i['Phone3']) for i in unicoded_data()]

答案 1 :(得分:0)

keys=("unknown1", 'county', 'businessName', 'address1',
    'city1', 'zip1', 'phone1', 'Email1', 'approvalstatus',
    'date1','date2', 'typeofConstruct', 'typeofBiz', 'unknown2',
    'unknown3', 'unknown4', 'unknown5', 'unknown6', 'BizName2',
    'Address2', 'City2', 'Zip2', 'Country2', 'Phone2',
    'Email2', 'Phone3')
args=[tuple(i[key].decode('utf-8') for key in keys) for row in dr]
sql='INSERT INTO t ({f}) VALUES ({p})'.format(
    f=','.join(keys),
    p=','.join(['?']*len(keys)))
curs.executemany(sql, args)

或者,对于更强大的解决方案,您可以使用UnicodeDictReader,稍微修改后的UnicodeReader (from the csv docs)版本,以unicode值的形式返回行:

class UTF8Recoder:
    """
    Iterator that reads an encoded stream and reencodes the input to UTF-8
    """
    def __init__(self, f, encoding):
        self.reader = codecs.getreader(encoding)(f)

    def __iter__(self):
        return self

    def next(self):
        return self.reader.next().encode("utf-8")

class UnicodeDictReader:
    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        f = UTF8Recoder(f, encoding)
        self.reader = csv.DictReader(f, dialect=dialect, **kwds)

    def next(self):
        row = self.reader.next()
        return dict((key,unicode(val, "utf-8")) for key,val in row.iteritems())

    def __iter__(self):
        return self

with open('HR_plan_review.csv', 'rb') as infile:
    dr = UnicodeDictReader(infile, delimiter = ',')

我上面发布的代码仍然可以使用,只需更改

即可
args=[tuple(i[key].decode('utf-8') for key in keys) for row in dr]

args=[tuple(i[key] for key in keys) for row in dr]

答案 2 :(得分:0)

您需要提交执行:

conn.commit()

你的executemany语句也应放在“with”块中:

import csv, sqlite3

myfile = 'CSV FILE PATH'
conn = sqlite3.connect("DBNAME.sqlite3")
curs =  conn.cursor()
try:
    curs.execute("CREATE TABLE t (webrank INTEGER, term, TEXT PRIMARY KEY, gloss TEXT);")
except sqlite3.OperationalError:
    print "Table already exist"
with open('{}.csv'.format(myfile), 'rb') as infile:
    dr = csv.DictReader(infile, delimiter = ',')
    def unicoded_data():
        for row in dr:
            # Assuming infile encoding is utf-8.
            yield int(row['WebRank']), unicode(row['term'], encoding='utf-8'), unicode(row['gloss'], encoding='utf-8')

    curs.executemany("INSERT INTO t (webrank, term, gloss) VALUES (?,?,?);", unicoded_data())
    conn.commit()