大家好,我正在与django争论,因为我有一个将csv文件内容上传到一个mysql关系innodb引擎,文件变得像我最大的2.2mb,我得到更多与此处和其他页面相关的信息以及许多人都认为批量函数在这类任务上很好但是django doc上的问题对于关系表是没用的,外键确实有人有办法让bilk使用一个innodb引擎这是我的代码。
def csv_to_db():
global check_files
check_files = glob.glob(media_path+filematch)
try:
logger.info('\n')
logger.info('========================[ Starting CSV to Database Process ]======================')
logger.info('\n')
logger.info('Checking Csv Files in csvfiles folder....')
if check_files:
logger.info(str(len(check_files))+' '+'Found'+' '+str(check_files))
for filename in os.listdir(media_path):
if filename.endswith('.csv'):
with open(media_path + filename, 'rb') as csv_files:
csv_lines = csv_files.readlines()
csv_lines = csv_lines[:-1]
reader = csv.reader(csv_lines)
line1 = reader.next()
line2 = reader.next()
line3 = reader.next()
# take the date from the file and convert into string month and year values.
take_date = line2[2].split()
date_report = take_date[1]
date_split = date_report.split('/')
year = date_split[0]
month_key = int(date_split[1].strip('0'))
month = month_dict.get(month_key)
for keys, vals in CategoriesContent.cat.iteritems():
category, created = Categories.objects.get_or_create(description=keys, fee=vals)
# Iterates into the data to retrieve the lines and begin to add the values into the database
for data in reader:
global get_customer_id
get_customer_id = Customers.objects.filter(pk=data[26])
retails, created = Retails.objects.get_or_create(customers_id=get_customer_id[0].id,
code=data[8],
name=data[9], phone=data[25],
address=data[20], address2=data[21],
city=data[22],
state=data[23], zip=data[24])
ff_vendor, created = Vendor.objects.get_or_create(code=data[0], description=data[1])
ff_type, created = Type.objects.get_or_create(code=data[2], description=data[3])
ff_material, created = Material.objects.get_or_create(code=data[4],description=data[5])
ff_color, created = Color.objects.get_or_create(code=data[6],description=data[7])
ff_product, created = Ffdesign.objects.get_or_create(
customers_id=get_customer_id[0].id,
designvendor=ff_vendor, material=ff_material,
designcolor=ff_color, type=ff_type,
)
relation, created = CustomersHasFfdesign.objects.get_or_create(
customers_id=get_customer_id[0].id,
ffdesign=ff_product,
month=month, year=year, docnum=data[10],
eye=data[11], lenses=data[12], remake_nc=data[13],
remake=data[14], credit=data[15], coupon=data[16],
outsourced=data[19], no_charges=data[17],
recalcs=data[18], retails=retails
)
if Ffdesign.objects.filter(categories=None):
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.adv).values('type__code').update(categories=1)
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.basic).values('type__code').update(categories=2)
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.bifocal).values('type__code').update(categories=3)
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.camber).values('type__code').update(categories=4)
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.sv).values('type__code').update(categories=6)
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.svlen).values('type__code').update(categories=7)
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.slimsv).values('type__code').update(categories=8)
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.ultimate).values('type__code').update(categories=9)
Ffdesign.objects.filter(
type_id__code__in=DesignCatRanges.office).values('type__code').update(categories=5)
if CustomersHasFfdesign.objects.filter(categories=None):
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.adv).values('ffdesign').update(categories=1)
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.basic).values('ffdesign').update(categories=2)
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.bifocal).values('ffdesign').update(categories=3)
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.camber).values('ffdesign').update(categories=4)
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.sv).values('ffdesign').update(categories=6)
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.svlen).values('ffdesign').update(categories=7)
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.slimsv).values('ffdesign').update(categories=8)
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.ultimate).values('ffdesign').update(categories=9)
CustomersHasFfdesign.objects.filter(
ffdesign_id__type_id__code__in=DesignCatRanges.office).values('ffdesign').update(categories=5)
csv_files.close()
logger.info('\n')
logger.info('========================[ End CSV to Database Process ]======================')
logger.info('\n')
except Exception as e:
logger.error(str(e))
logger.info('\n')
logger.info('========================End CSV to Database Process======================')
logger.info('\n')
答案 0 :(得分:0)
嗯,我看到问题是什么,代码上有两个瓶颈,这部分代码中的第一个。
retails, created = Retails.objects.get_or_create(customers_id=get_customer_id[0].id,
code=data[8],
name=data[9], phone=data[25],
address=data[20], address2=data[21],
city=data[22],
state=data[23], zip=data[24])
另一个瓶颈在代码的另一部分
relation, created = CustomersHasFfdesign.objects.get_or_create(
customers_id=get_customer_id[0].id,
ffdesign=ff_product,
month=month, year=year, docnum=data[10],
eye=data[11], lenses=data[12], remake_nc=data[13],
remake=data[14], credit=data[15], coupon=data[16],
outsourced=data[19], no_charges=data[17],
recalcs=data[18], retails=retails
)
get_or_create的使用需要小心,可能需要插入少量数据但是在这种情况下很多寄存器是一个好方法是使用bulk_create对bulk_create有一些限制你可以在Django docs中读取它但是它有效细
if not checking_results.exists():
CustomersHasFfdesign.objects.bulk_create([CustomersHasFfdesign(
customers_id=get_customer_id[0].id,
ffdesign=ff_product,
month=month, year=year, docnum=row[10],
eye=row[11], lenses=row[12], remake_nc=row[13],
remake=row[14], credit=row[15], coupon=row[16],
outsourced=row[19], no_charges=row[17],
recalcs=row[18], retails=retails)for row in reader]
)