Django CSV导入速度慢下来

时间:2015-04-27 10:14:56

标签: python django postgresql csv postgresql-copy

我将csv城市列表导入我的Django应用程序。我是Django和Python的新手,导入运行速度相当快,前25,000行大约需要5分钟,接下来的25,000行需要2小时。我停止了导入并在它停止的地方重新开始,接下来的25,000花了大约4分钟。显然我做错了,因为看起来每次插入都会减慢。

任何帮助都会很棒,我主要这样做是为了学习而不仅仅是导入数据,它目前更快地直接导入postgresql所以我可以继续我的项目,但我&# 39;我想知道我做错了什么,所以我可以用Django / Python做得更好。

TIA

from myapp import Country, State, City

def add_country(isocode, name):
    c = Country.objects.get_or_create(name=name.strip().replace('"', ''), isocode=isocode.strip())[0]
    return c


def add_state(country, isocode, name, statetype):
    country_model = Country.objects.get(isocode=country.strip().lower())
    s = State.objects.get_or_create(name=name.strip().replace('"', ''), isocode=isocode.strip().lower().replace('"', ''), country=country_model, statetype=statetype.strip().replace('"', ''))[0]
    return s


def add_city(country, state, name):
    country_model = Country.objects.get(isocode=country.strip().lower().replace('"', ''))
    try:
        state_model = State.objects.get(name=state.strip().replace('"', ''), country=country_model)
    except State.DoesNotExist:
        state_model = None
    ci = City.objects.get_or_create(name=name.strip().replace('"', ''), state=state_model, postcode='')[0]
    return ci


with open('country.csv', 'rb') as csvfile:
    myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for counrow in myreader:
        add_country(counrow[0], counrow[1])


with open('state.csv', 'rb') as csvfile:
    myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for counrow in myreader:
        add_state(counrow[0], counrow[1], counrow[2], counrow[3])


with open('city1.csv', 'rb') as csvfile:
    myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for counrow in myreader:
        add_city(counrow[0], counrow[1], counrow[2])

with open('city2.csv', 'rb') as csvfile:
    myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for counrow in myreader:
        add_city(counrow[0], counrow[1], counrow[2])

with open('city3.csv', 'rb') as csvfile:
    myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for counrow in myreader:
        add_city(counrow[0], counrow[1], counrow[2])



更新
所以我改变了使用批量插入的代码,第一组城市现在刚刚超过两分钟,第二组是10分钟,几小时后我还没有完成第三组。必须有某种垃圾收集过程或我遗漏的东西,因为我甚至切换了文件,每个文件在它首先运行时花费相同的时间。

新代码如下所示:

def add_country(isocode, name, created_by, changed_by, country_list):
    country_list.append(Country(name=name.strip().replace('"', ''), isocode=isocode.strip()))

def add_state(country, isocode, name, statetype, created_by, changed_by, state_list):
    country_model = Country.objects.get(isocode=country.strip().lower())
    state_list.append(State(name=name.strip().replace('"', ''), isocode=isocode.strip().lower().replace('"', ''), country=country_model, statetype=statetype.strip().replace('"', '')))

def add_city(country, state, name, created_by, changed_by, city_list):
    country_model = Country.objects.get(isocode=country.strip().lower().replace('"', ''))
    try:
        state_model = State.objects.get(name=state.strip().replace('"', ''), country=country_model)
    except State.DoesNotExist:
        state_model = None
    city_list.append(City(name=name.strip().replace('"', ''), state=state_model, postcode=''))

    country_list = []
    state_list = []
    city_list = []

    print "Countries"
    print time.strftime("%H:%M:%S")
    with open('country.csv', 'rb') as csvfile:
        myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for counrow in myreader:
            add_country(counrow[0], counrow[1], adminuser, adminuser, country_list)

    Country.objects.bulk_create(country_list)

    print "States"
    print time.strftime("%H:%M:%S")
    with open('state.csv', 'rb') as csvfile:
        myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for counrow in myreader:
            add_state(counrow[0], counrow[1], counrow[2], counrow[3], adminuser, adminuser, state_list)

    State.objects.bulk_create(state_list)

    print "Cities 1"
    print time.strftime("%H:%M:%S")
    with open('city1.csv', 'rb') as csvfile:
        myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for counrow in myreader:
            add_city(counrow[0], counrow[1], counrow[2], adminuser, adminuser, city_list)

    City.objects.bulk_create(city_list)

    print "Cities 2"
    print time.strftime("%H:%M:%S")
    city_list = []
    with open('city2.csv', 'rb') as csvfile:
        myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for counrow in myreader:
            add_city(counrow[0], counrow[1], counrow[2], adminuser, adminuser, city_list)

    City.objects.bulk_create(city_list)

    print "Cities 3"
    print time.strftime("%H:%M:%S")
    city_list = []
    with open('city3.csv', 'rb') as csvfile:
        myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for counrow in myreader:
            add_city(counrow[0], counrow[1], counrow[2], adminuser, adminuser, city_list)

    City.objects.bulk_create(city_list)

1 个答案:

答案 0 :(得分:1)

您应该在更新过程结束时更新代码以使用批量操作 - bulk_create