使用多线程进行数据迁移

时间:2018-05-12 14:20:45

标签: django python-multithreading data-migration

我需要使用csv文件中的初始数据填充数据库。由于csv文件中的数据非常大,我试图在数据迁移中使用多线程。但是这样做我经常遇到错误。我试过了阻挡和锁定,但似乎没有任何东西对我有利。这是我的迁移文件。

from django.db import migrations
import csv
from django.db import transaction
# from multiprocessing import Pool
import threading
import time


def process_data(data,State,District,Region,Pincode):
    for row in data:
        state=row['statename']
        district=row['districtname']
        region=row['officename'].split(' '+row['officetype'])[0]
        pincode=row['pincode']
        try:
            state_object = State.objects.get(name=state)
        except State.DoesNotExist:
            state_object = State.objects.create(name=state)
        try:
            district_object = District.objects.get(
                name=district,
                state=state_object
            )
        except District.DoesNotExist:
            district_object = District.objects.create(
                name=district,
                state=state_object
            )
        try:
            pincode_object = Pincode.objects.get(
                value=pincode,
                district=district_object
            )
        except Pincode.DoesNotExist:
            pincode_object = Pincode.objects.create(
                value=pincode,
                district=district_object
            )
        try:
            Region.objects.get(
                name=region,
                district=district_object,
                pincode=pincode_object
            )
        except Region.DoesNotExist:
            Region.objects.create(
                name=region,
                district=district_object,
                pincode=pincode_object
            )


def add_data(apps, schema_editor):
    State = apps.get_model('locations', 'State')
    District = apps.get_model('locations', 'District')
    Region = apps.get_model('locations', 'Region')
    Pincode = apps.get_model('locations', 'Pincode')
    thread_list = []
    filename = 'All_India_pincode_data_26022018.csv'
    file_data = csv.DictReader(open(filename,'r',errors='ignore'))
    count=0
    rows=[]
    for row in file_data:
        rows.append(row)
        count+=1
        if count==10000:
            thread_list.append(threading.Thread(target=process_data, 
            args=(rows,State,District,Region,Pincode)))
            rows=[]
            print(rows,len(thread_list))
            count=0
    if count>0:
        thread_list.append(threading.Thread(target=process_data, 
        args=(rows,State,District,Region,Pincode)))
    for thread in thread_list:
        thread.start()
    for thread in thread_list:
        thread.join()
    print('Number of threads: ',len(thread_list))


class Migration(migrations.Migration):

    dependencies = [
        ('locations', '0001_initial'),
    ]

    operations = [
        migrations.RunPython(add_data)
    ]

这是我得到的堆栈跟踪的一小部分。

Exception in thread Thread-15:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 46, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Pincode matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(232101) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 51, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(232101) already exists.


Exception in thread Thread-16:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 46, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Pincode matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(713150) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 51, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(713150) already exists.


Exception in thread Thread-12:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 60, in process_data
    pincode=pincode_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Region matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_region_name_district_id_a93784e1_uniq"
DETAIL:  Key (name, district_id)=(Rayagiri, 49594) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 66, in process_data
    pincode=pincode_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_region_name_district_id_a93784e1_uniq"
DETAIL:  Key (name, district_id)=(Rayagiri, 49594) already exists.


Exception in thread Thread-10:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 46, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Pincode matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(755019) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 51, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(755019) already exists.


Exception in thread Thread-11:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 60, in process_data
    pincode=pincode_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Region matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_region_name_district_id_a93784e1_uniq"
DETAIL:  Key (name, district_id)=(Jajod, 49630) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 66, in process_data
    pincode=pincode_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_region_name_district_id_a93784e1_uniq"
DETAIL:  Key (name, district_id)=(Jajod, 49630) already exists.

0 个答案:

没有答案