Django bulk_Create,大型csv文件

时间:2016-03-08 02:38:08

标签: python mysql django csv

大家好,我正在与django争论,因为我有一个将csv文件内容上传到一个mysql关系innodb引擎,文件变得像我最大的2.2mb,我得到更多与此处和其他页面相关的信息以及许多人都认为批量函数在这类任务上很好但是django doc上的问题对于关系表是没用的,外键确实有人有办法让bilk使用一个innodb引擎这是我的代码。

def csv_to_db():


        global check_files

        check_files = glob.glob(media_path+filematch)


        try:

            logger.info('\n')
            logger.info('========================[ Starting CSV to Database Process ]======================')
            logger.info('\n')
            logger.info('Checking Csv Files in csvfiles folder....')

            if check_files:

                logger.info(str(len(check_files))+' '+'Found'+' '+str(check_files))


                for filename in os.listdir(media_path):
                    if filename.endswith('.csv'):
                        with open(media_path + filename, 'rb') as csv_files:
                            csv_lines = csv_files.readlines()
                            csv_lines = csv_lines[:-1]

                            reader = csv.reader(csv_lines)

                            line1 = reader.next()
                            line2 = reader.next()
                            line3 = reader.next()

                            # take the date from the file and convert into string month and year values.

                            take_date = line2[2].split()
                            date_report = take_date[1]
                            date_split = date_report.split('/')
                            year = date_split[0]
                            month_key = int(date_split[1].strip('0'))
                            month = month_dict.get(month_key)

                            for keys, vals in CategoriesContent.cat.iteritems():

                                category, created = Categories.objects.get_or_create(description=keys, fee=vals)

                            # Iterates into the data to retrieve the lines and begin to add the values into the database

                            for data in reader:

                                global get_customer_id

                                get_customer_id = Customers.objects.filter(pk=data[26])


                                retails, created = Retails.objects.get_or_create(customers_id=get_customer_id[0].id,
                                                                                 code=data[8],
                                                                                 name=data[9], phone=data[25],
                                                                                 address=data[20], address2=data[21],
                                                                                 city=data[22],
                                                                                 state=data[23], zip=data[24])

                                ff_vendor, created = Vendor.objects.get_or_create(code=data[0], description=data[1])


                                ff_type, created = Type.objects.get_or_create(code=data[2], description=data[3])


                                ff_material, created = Material.objects.get_or_create(code=data[4],description=data[5])


                                ff_color, created = Color.objects.get_or_create(code=data[6],description=data[7])



                                ff_product, created = Ffdesign.objects.get_or_create(
                                                customers_id=get_customer_id[0].id,
                                                designvendor=ff_vendor, material=ff_material,
                                                designcolor=ff_color, type=ff_type,


                                                )

                                relation, created = CustomersHasFfdesign.objects.get_or_create(
                                    customers_id=get_customer_id[0].id,
                                    ffdesign=ff_product,
                                    month=month, year=year, docnum=data[10],
                                    eye=data[11], lenses=data[12], remake_nc=data[13],
                                    remake=data[14], credit=data[15], coupon=data[16],
                                    outsourced=data[19], no_charges=data[17],
                                    recalcs=data[18], retails=retails

                                )

                                if Ffdesign.objects.filter(categories=None):

                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.adv).values('type__code').update(categories=1)

                                    Ffdesign.objects.filter(

                                        type_id__code__in=DesignCatRanges.basic).values('type__code').update(categories=2)

                                    Ffdesign.objects.filter(

                                        type_id__code__in=DesignCatRanges.bifocal).values('type__code').update(categories=3)

                                    Ffdesign.objects.filter(

                                        type_id__code__in=DesignCatRanges.camber).values('type__code').update(categories=4)

                                    Ffdesign.objects.filter(
                                       type_id__code__in=DesignCatRanges.sv).values('type__code').update(categories=6)
                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.svlen).values('type__code').update(categories=7)
                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.slimsv).values('type__code').update(categories=8)
                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.ultimate).values('type__code').update(categories=9)
                                    Ffdesign.objects.filter(
                                        type_id__code__in=DesignCatRanges.office).values('type__code').update(categories=5)


                                if CustomersHasFfdesign.objects.filter(categories=None):

                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.adv).values('ffdesign').update(categories=1)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.basic).values('ffdesign').update(categories=2)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.bifocal).values('ffdesign').update(categories=3)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.camber).values('ffdesign').update(categories=4)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.sv).values('ffdesign').update(categories=6)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.svlen).values('ffdesign').update(categories=7)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.slimsv).values('ffdesign').update(categories=8)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.ultimate).values('ffdesign').update(categories=9)
                                    CustomersHasFfdesign.objects.filter(
                                        ffdesign_id__type_id__code__in=DesignCatRanges.office).values('ffdesign').update(categories=5)


                                    csv_files.close()


            logger.info('\n')
            logger.info('========================[ End CSV to Database Process ]======================')
            logger.info('\n')




        except Exception as e:
            logger.error(str(e))
            logger.info('\n')
            logger.info('========================End CSV to Database Process======================')
            logger.info('\n')

1 个答案:

答案 0 :(得分:0)

嗯,我看到问题是什么,代码上有两个瓶颈,这部分代码中的第一个。

 retails, created = Retails.objects.get_or_create(customers_id=get_customer_id[0].id,
                                                                             code=data[8],
                                                                             name=data[9], phone=data[25],
                                                                             address=data[20], address2=data[21],
                                                                             city=data[22],
                                                                             state=data[23], zip=data[24])

另一个瓶颈在代码的另一部分

relation, created = CustomersHasFfdesign.objects.get_or_create(
                                customers_id=get_customer_id[0].id,
                                ffdesign=ff_product,
                                month=month, year=year, docnum=data[10],
                                eye=data[11], lenses=data[12], remake_nc=data[13],
                                remake=data[14], credit=data[15], coupon=data[16],
                                outsourced=data[19], no_charges=data[17],
                                recalcs=data[18], retails=retails

                            )

get_or_create的使用需要小心,可能需要插入少量数据但是在这种情况下很多寄存器是一个好方法是使用bulk_create对bulk_create有一些限制你可以在Django docs中读取它但是它有效细

if not checking_results.exists():
                                    CustomersHasFfdesign.objects.bulk_create([CustomersHasFfdesign(
                                        customers_id=get_customer_id[0].id,
                                        ffdesign=ff_product,
                                        month=month, year=year, docnum=row[10],
                                        eye=row[11], lenses=row[12], remake_nc=row[13],
                                        remake=row[14], credit=row[15], coupon=row[16],
                                        outsourced=row[19], no_charges=row[17],
                                        recalcs=row[18], retails=retails)for row in reader]

                                    )