我已经完成了关于Django's bulk_create()的一些阅读,我正在尝试实现它,但它没有像我期望的那样运作。最初我的代码没有使用bulk_create,导入6074行数据大约需要33秒左右。慢但它有效。
型号:
class Building(models.Model):
community = models.ForeignKey('Community', related_name='Building Community Name')
physical_location = models.CharField(max_length=80, null=True, blank=True)
data_source = models.CharField(max_length=50, null=True, blank=True)
facility_name = models.CharField(max_length=120, null=True, blank=True)
facility_type = models.CharField(max_length=80, null=True, blank=True)
size = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
audited = models.NullBooleanField(blank=True)
audit_notes = models.TextField(blank=True)
class RetrofitData(models.Model):
building_id = models.ForeignKey('Building')
retrofits_done = models.NullBooleanField(blank=True)
retrofit_notes = models.TextField(blank=True)
fuel_oil_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
district_heating_oil_usage_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
electricity_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
natural_gas_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
propane_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
biomass_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
fuel_oil_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
district_heating_oil_usage_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
electricity_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
natural_gas_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
propane_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
biomass_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
retrofit_cost = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
biomass_heat = models.NullBooleanField(blank=True)
heat_recovery = models.NullBooleanField(blank=True)
原始代码:
class BuildingInventoryImporter(dataimport.DataFileImporter):
def toTrueFalse(self, val):
if val == "Yes":
return True
elif val == "No":
return False
else:
return None
def decCleaner(self, val):
if val == '':
return None
else:
return val2dec(val)
models = [Building, RetrofitData]
@transaction.commit_manually
@rollback_on_exception
def do_import(self):
book = xlrd.open_workbook(self.data_file.file.path,
encoding_override='cp1252')
sheet = book.sheet_by_index(2)
for row_index in range(1,sheet.nrows):
row = sheet.row_values(row_index)
temp_id= row_index
community_name = row[0]
gnis = row[1]
physical_location = row[2]
data_source = row[3]
facility_type = row[5]
if row[4] == '':
if facility_type =="Other" or facility_type == "Office" or facility_type == "Public Assembly" or facility_type == "Public Safety":
facility_name = "Unavailable"
elif facility_type =="Health Care - Hospitals":
facility_name = community_name + " Clinic"
elif facility_type == "Education - K - 12":
facility_name = community_name + " School(s)"
else:
facility_name = row[4]
size = self.decCleaner(row[6])
audited = self.toTrueFalse(row[7])
audit_notes = row[8]
building, created = self.get_or_new(Building, id=temp_id)
try:
community = Community.objects.get(gnis_feature_id=gnis)
except Community.DoesNotExist:
self.warning("The value entered for the Community GNIS: {0} does not exist.".format(row[1]))
try:
community = Community.objects.get(name=community_name)
self.warning("The Community name: {0} is in the db but does not match its associated Community GNIS").format(row[0])
except Community.DoesNotExist:
self.warning("Neither the Community name: {0} nor the Community GNIS: {1} exist.".format(row[0], row[1]))
continue
building.community = community
building.physical_location = physical_location
building.data_source = data_source
building.facility_name = facility_name
building.facility_type = facility_type
building.size = size
building.audited = audited
building.audit_notes = audit_notes
building.save()
retrofit_data, created = self.get_or_new(RetrofitData, building_id=building)
retrofit_data.retrofits_done = self.toTrueFalse(row[9])
retrofit_data.retrofit_notes = row[10]
retrofit_data.fuel_oil_preretrofit = self.decCleaner(row[11])
if row[12] == 999999999: #They decided that a unknown value would be represented as 999999999 in the dataset.
retrofit_data.district_heating_oil_usage_preretrofit = None
else:
retrofit_data.district_heating_oil_usage_preretrofit = self.decCleaner(row[12])
retrofit_data.electricity_preretrofit = self.decCleaner(row[13])
retrofit_data.natural_gas_preretrofit = self.decCleaner(row[14])
retrofit_data.propane_preretrofit = self.decCleaner(row[15])
retrofit_data.biomass_preretrofit = self.decCleaner(row[16])
retrofit_data.fuel_oil_postretrofit = self.decCleaner(row[17])
retrofit_data.district_heating_oil_usage_postretrofit = self.decCleaner(row[18])
retrofit_data.electricity_postretrofit = self.decCleaner(row[19])
retrofit_data.natural_gas_postretrofit = self.decCleaner(row[20])
retrofit_data.propane_postretrofit = self.decCleaner(row[21])
retrofit_data.biomass_postretrofit = self.decCleaner(row[22])
retrofit_data.retrofit_cost = self.decCleaner(row[23])
retrofit_data.biomass_heat = self.toTrueFalse(row[24])
retrofit_data.heat_recovery = self.toTrueFalse(row[25])
retrofit_data.save()
if self.dry_run:
transaction.rollback()
else:
transaction.commit()
dataimport.register(BuildingInventoryImporter)
在整个数据导入过程中,它必须像〜1200次一样打到数据库,导致导入缓慢。所以为了解决这个问题,我研究了使用bulk_create()
修改后的代码:
class BuildingInventoryImporterV2(dataimport.DataFileImporter):
models = [Building, RetrofitData]
def do_import(self, dry_run=True):
book = xlrd.open_workbook(self.data_file.file.path,
encoding_override='cp1252')
sheet = book.sheet_by_index(2)
building_bulk_list = []
retrofit_bulk_list = []
for row_index in range(1,sheet.nrows):
row = sheet.row_values(row_index)
temp_id= row_index
community_name = row[0]
gnis = row[1]
facility_type = row[5]
try:
community = Community.objects.get(gnis_feature_id=gnis)
except Community.DoesNotExist:
self.warning("The value entered for the Community GNIS: {0} does not exist.".format(row[1]))
try:
community = Community.objects.get(name=community_name)
self.warning("The Community name: {0} is in the db but does not match its associated Community GNIS").format(row[0])
except Community.DoesNotExist:
self.warning("Neither the Community name: {0} nor the Community GNIS: {1} exist.".format(row[0], row[1]))
continue
if row[4] == '':
if facility_type =="Other" or facility_type == "Office" or facility_type == "Public Assembly" or facility_type == "Public Safety":
facility_name = "Unavailable"
elif facility_type =="Health Care - Hospitals":
facility_name = community_name + " Clinic"
elif facility_type == "Education - K - 12":
facility_name = community_name + " School(s)"
else:
facility_name = row[4]
building_to_add = Building(
community=community,
physical_location=row[2],
data_source=row[3],
facility_name=facility_name,
facility_type=facility_type,
size=self.decCleaner(row[6]),
audited=self.toTrueFalse(row[7]),
audit_notes=row[8]
)
building_bulk_list.append(building_to_add)
if self.dry_run is False:
Building.objects.bulk_create(building_bulk_list)
for row_index in range(1,sheet.nrows):
row = sheet.row_values(row_index)
#They decided that a unknown value would be represented as 999999999 in the dataset.
if row[12] == 999999999:
district_heating_oil_usage_preretrofit = None
else:
district_heating_oil_usage_preretrofit = self.decCleaner(row[12])
retrofit_data_to_add = RetrofitData(
building_id=Building.objects.get(id=temp_id),
retrofits_done=self.toTrueFalse(row[9]),
retrofit_notes=row[10],
fuel_oil_preretrofit=self.decCleaner(row[11]),
district_heating_oil_usage_preretrofit=district_heating_oil_usage_preretrofit,
electricity_preretrofit=self.decCleaner(row[13]),
natural_gas_preretrofit=self.decCleaner(row[14]),
propane_preretrofit=self.decCleaner(row[15]),
biomass_preretrofit=self.decCleaner(row[16]),
fuel_oil_postretrofit=self.decCleaner(row[17]),
district_heating_oil_usage_postretrofit=self.decCleaner(row[18]),
electricity_postretrofit=self.decCleaner(row[19]),
natural_gas_postretrofit=self.decCleaner(row[20]),
propane_postretrofit=self.decCleaner(row[21]),
biomass_postretrofit=self.decCleaner(row[22]),
retrofit_cost=self.decCleaner(row[23]),
biomass_heat=self.toTrueFalse(row[24]),
heat_recovery=self.toTrueFalse(row[25])
)
retrofit_bulk_list.append(retrofit_data_to_add)
if self.dry_run is False:
Building.objects.bulk_create(retrofit_bulk_list)
dataimport.register(BuildingInventoryImporterV2)
当我到第二个代码块批量导入RetroFitData时出现问题。据我所知,bulk_create()在调用时不会分配AutoField pk因此你需要在分配AutoField pk之前将bulk_create()数据放在数据库中。但似乎这也不准确。运行导入后,我收到以下错误:
Traceback:
File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/core/handlers/base.py" in get_response
111. response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/utils/decorators.py" in _wrapped_view
105. response = view_func(request, *args, **kwargs)
File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/views/decorators/cache.py" in _wrapped_view_func
52. response = view_func(request, *args, **kwargs)
File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/contrib/admin/sites.py" in inner
206. return view(request, *args, **kwargs)
File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/contrib/auth/decorators.py" in _wrapped_view
21. return view_func(request, *args, **kwargs)
File "/home/bhernandez/ISER/aedg/core/adminviews.py" in data_import
465. results = importer.run()
File "/home/bhernandez/ISER/aedg/core/dataimport/__init__.py" in run
114. self.do_import()
File "/home/bhernandez/ISER/aedg/akw/dataimport/etc.py" in do_import
656. building_id=Building.objects.get(id=temp_id),
File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/db/models/manager.py" in manager_method
92. return getattr(self.get_queryset(), name)(*args, **kwargs)
File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/db/models/query.py" in get
357. self.model._meta.object_name)
Exception Type: DoesNotExist at /admin/core/datafile/174/import/
Exception Value: Building matching query does not exist.
但是,当我查看我的Buildings table it's been populated...时,我们非常感谢任何帮助或建议。
答案 0 :(得分:0)
因此,当您开始创建RetrofitData
时,您需要知道您刚刚创建的Building
个对象的ID。
您可能正在使用ID字段设置为自动增量的数据库,因此您使用bulk_create
创建的对象无法获得PK分配。
我想使用building_bulk_list
您可以使用它的长度来从数据库中获取最后一组Building
个对象,但是为什么不创建building_bulk_list
更多的save()
创建对象的传统方法,调用bulk_create
,允许您创建ID列表?
然后使用该ID列表,您可以为RetrofitData
运行Building
,迭代该ID列表以设置与<Ip_Adress> <User_Id> <User_Name>
的关系?