我正在尝试使用bulk_create为不同的项目进行基准测试,并发现它比我喜欢的慢。我正在运行下面的内容,并且看到bulk_create每天可能需要一分钟。 This示例引用了10K对象的1/2,所以我很惊讶5倍大的内容需要花费20倍的时间。有没有办法让这更快?
import requests
import random
from fake_model.models import FakeNode, FakeStation, FakePrice, FakeWeather
from datetime import datetime, timedelta
from random import randint
#Make a word list
word_site = "http://www.cs.duke.edu/~ola/ap/linuxwords"
response = requests.get(word_site)
WORDS = response.content.splitlines()
def random_name(WORDS):
name = "{0}-{1}".format(
random.choice(WORDS).title(),
random.choice(WORDS).title()
)
if len(name) < 100:
return name
else:
return random_name(WORDS)
print "Deleting"
FakeNode.objects.all().delete()
FakeStation.objects.all().delete()
FakePrice.objects.all().delete()
FakeWeather.objects.all().delete()
print "Prepping"
node_list = [FakeNode(name=random_name(WORDS)) for x in range(0, 1000)]
station_list = [FakeStation(name=random_name(WORDS)) for x in range(0, 1000)]
print "Making"
FakeNode.objects.bulk_create(node_list)
FakeStation.objects.bulk_create(station_list)
start_date = datetime(2012, 1, 1)
end_date = datetime(2012, 1, 2)
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
print "@profile"
@profile
def make():
price_list = []
temp_list = []
nodes = FakeNode.objects.all()
stations = FakeStation.objects.all()
for single_date in daterange(start_date, end_date):
price_list = [
FakePrice(
node=node,
dt=single_date,
hr=hr,
price=randint(0, 1000)
)
for node in nodes for hr in range(1, 25)
]
temp_list = [
FakeWeather(
station=station,
dt=single_date,
hr=hr,
temp=randint(0, 1000)
)
for station in stations for hr in range(1, 25)
]
print single_date
c = FakePrice.objects.bulk_create(price_list)
c = FakeWeather.objects.bulk_create(temp_list)
make()
from django.db.models import Count
print FakePrice.objects.all().aggregate(Count('id'))
定时:
Total time: 13.8182 s
File: generate.py
Function: make at line 49
Line # Hits Time Per Hit % Time Line Contents
==============================================================
49 @profile
50 def make():
51 1 4 4.0 0.0 price_list = []
52 1 2 2.0 0.0 temp_list = []
53 1 221 221.0 0.0 nodes = FakeNode.objects.all()
54 1 146 146.0 0.0 stations = FakeStation.objects.all()
55 2 65 32.5 0.0 for single_date in daterange(start_date, end_date):
56 price_list = [
57 1 3 3.0 0.0 FakePrice(
58 node=node,
59 dt=single_date,
60 hr=hr,
61 price=randint(0, 1000)
62 )
63 25001 3373017 134.9 24.4 for node in nodes for hr in range(1, 25)
64 ]
65 temp_list = [
66 1 2 2.0 0.0 FakeWeather(
67 station=station,
68 dt=single_date,
69 hr=hr,
70 temp=randint(0, 1000)
71 )
72 25001 3463471 138.5 25.1 for station in stations for hr in range(1, 25)
73 ]
74
75 1 70 70.0 0.0 print single_date
76 1 3526035 3526035.0 25.5 c = FakePrice.objects.bulk_create(price_list)
77 1 3455148 3455148.0 25.0 c = FakeWeather.objects.bulk_create(temp_list)