我正在尝试根据CSV文件中的信息插入或更新一些数据库行。我能够将CSV文件读取到dict()对象的列表中,并将这些dict处理为类对象。但是,尝试在这些对象上使用Session.merge()
会导致以下错误:
sqlalchemy.exc.IntegrityError: (MySQLdb._exceptions.IntegrityError) (1062, "Duplicate entry '27445-5810' for key 'SiteId'")
以前使用sqlalchemy时,merge()
将在UPDATE
失败时自动执行INSERT
。为什么在这种情况下会出现完整性错误?
这是我的剧本:
from sqlalchemy import create_engine,and_,or_,func
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.automap import automap_base
from io import StringIO
from csv import reader
import xmltodict
from requests import get
from json import loads
from sys import argv
from datetime import datetime, timedelta
from traceback import print_exc
from tqdm import tqdm
#Set-Up DB connection/classes
engine = create_engine(
"<DATABSE_URI_HERE>"
)
Base = automap_base()
Base.prepare(engine, reflect=True)
ProductSource = Base.classes.ProductSource
Products = Base.classes.Products
Session = sessionmaker()
siteid = argv[2] if 'site' in argv else input('SiteId: ')
def _findhead(obj, key):
if key in obj: return obj[key]
for _, v in obj.items():
if isinstance(v,dict):
item = _findhead(v, key)
if item is not None:
return item
conn = Session(bind=engine)
source = conn.query(ProductSource).filter(ProductSource.SiteId == siteid).first()
feed_file = get(source.Url).text
mapping = loads(source.CsvMap)
try:
if source.FileType == 'csv':
delimiters = {
'comma' : ',',
'tab' : '\t',
'pipe' : '|',
'semicolon' : ';'
}
with StringIO(feed_file) as f:
products = list(reader(f, delimiter=delimiters[source.Other]))
else:
parsed = xmltodict.parse(feed_file)
products = _findhead(parsed, source.HeadElement)
except:
conn.close()
print('Malformed file, cannot read this feed.')
exit(1)
updated = 0
failed = 0
valid_keys =[]
started = datetime.now()
source.Started = started
source.Updated = updated
conn.commit()
for key in mapping.keys():
if mapping[key] != None:
valid_keys.append(key)
if 'Description' not in valid_keys or 'ProductId' not in valid_keys:
print('Invalid mapping, nothing to do')
conn.close()
exit(1)
for product in tqdm(products):
try:
new_product_row = {'SiteId' : siteid}
for key in valid_keys:
new_product_row[key] = product[mapping[key]]
conn.merge(Products(**new_product_row))
conn.commit()
updated += 1
except:
conn.rollback()
failed += 1
finished = datetime.now()
next_read = started + timedelta(days=source.ReadInterval)
source.LastRead = finished
source.NextRead = next_read
source.Added = added
source.Updated = updated
conn.commit()
print(f'{source.Updated} products updated.')
print(f'{failed} products failed.')
conn.close()