Django插入具有多个嵌套模型的性能

时间:2018-10-11 19:13:59

标签: django performance django-serializer

我有以下模型定义:

class Workflow(models.Model):
    name = models.CharField(max_length=255)

class Step(models.Model):
    workflow = models.ForeignKey(Section, on_delete=models.CASCADE, related_name='steps')
    title = models.CharField(max_length=255)

class Section(models.Model):
    body = models.CharField(max_length=255)
    step = models.ForeignKey(Section, on_delete=models.CASCADE, related_name='sections')

class Question(models.Model):
    description = models.CharField(max_length=255)
    section = models.ForeignKey(Section, on_delete=models.CASCADE, related_name='questions')

class Option(models.Model):
    set_fail = models.BooleanField()
    question = models.ForeignKey(Question, on_delete=models.CASCADE, related_name='options')

class Action(models.Model):
    yes_no = models.BooleanField()
    option = models.ForeignKey(Option, on_delete=models.CASCADE, related_name='actions')

# Workflow -> Step -> Section -> Question -> Option -> Action

我从客户端发送了以下用于插入数据的正文(请求正文很大,无法在此处粘贴)

https://jsoneditoronline.org/?id=e970abc01b2a489c9933464867d11eaf

您会看到数据很大,每个级别都有多个记录,然后执行插入操作确实需要时间。

我目前正在采用这种插入方法:

class WorkflowUpdateSerializer(serializers.Serializer):

    def update(self, workflow, data):
        self.update_steps(workflow, data) # data is request JSON body

    def update_steps(self, workflow, steps):
        step_clones = [{key: value for key, value in step.items() if key != 'sections'} for step in steps]

        step_instances = Step.objects.bulk_create(
            [Step(workflow=workflow, **step) for step in step_clones])

        for index, step in enumerate(steps):
            self.update_sections(step_instances[index], step.pop('sections'))

    def update_sections(self, step, sections):
        section_clones = [{key: value for key, value in section.items() if
                           key != 'questions'} for section in sections]

        section_instances = Section.objects.bulk_create(
            [Section(step=step, **section) for section in section_clones])

        for index, section in enumerate(sections):
            self.update_questions(section=section_instances[index], questions=section.pop('questions'))

    def update_questions(self, section, questions):
    # code

    def update_options(self, question, options):
    # code

    def update_actions(self, option, actions):
    # code

您是否有任何改进的想法?

谢谢。

1 个答案:

答案 0 :(得分:2)

这是我的解决方案。它在一个数据库调用中批量创建每个模型的所有实例,仅导致5次批量插入。

class WorkflowUpdateSerializer(serializers.Serializer):
    steps = serializers.JSONField()

    def update(self, workflow, validated_data):
        steps_dicts = [s['step'] for s in validated_data['steps']]
        sections_dicts = []
        questions_dicts = []
        options_dicts = []
        actions_dicts = []

        def _kws(d, exclude):
            return {k: v for k, v in d.items() if k != exclude}

        steps = []
        for step_dict in steps_dicts:
            sections_dicts.extend(step_dict['section'])
            steps.append(Step(workflow=workflow, **_kws(step_dict, 'section')))
        steps = Step.objects.bulk_create(steps)

        sections = []
        for step, step_dict in zip(steps, steps_dicts):
            for section_dict in step_dict['section']:
                questions_dicts.extend(section_dict['questions'])
                sections.append(Section(step=step, **_kws(section_dict, 'questions')))
        sections = Section.objects.bulk_create(sections)

        questions = []
        for section, section_dict in zip(sections, sections_dicts):
            for question_dict in section_dict['questions']:
                options_dicts.extend(question_dict['options'])
                questions.append(Question(section=section, **_kws(question_dict, 'options')))
        questions = Question.objects.bulk_create(questions)

        options = []
        for question, question_dict in zip(questions, questions_dicts):
            for option_dict in question_dict['options']:
                actions_dicts.extend(option_dict['actions'])
                options.append(Option(question=question, **_kws(option_dict, 'actions')))
        options = Option.objects.bulk_create(options)

        actions = []
        for option, option_dict in zip(options, options_dicts):
            for action_dict in option_dict['actions']:
                actions.append(Action(option=option, **action_dict))
        actions = Action.objects.bulk_create(actions)

        return workflow

请注意,要使bulk_create返回实例ID,模型必须具有除AutoField之外的其他内容作为其主键。我必须像这样创建一个抽象的BaseModel

# models.py
class BaseModel(models.Model):
    id = models.UUIDField(default=uuid.uuid4, primary_key=True)

    class Meta:
        abstract = True

class Step(BaseModel):
    workflow = models.ForeignKey(Workflow, on_delete=models.CASCADE, related_name='steps')
    title = models.CharField(max_length=255)

...

这就是我测试串行器的方式

# tests.py   
with open('./data.json') as f:
    data = json.load(f)


class TestSerializer(TestCase):

    def test_serializer(self):
        workflow = Workflow.objects.create(name='test')
        serializer = WorkflowUpdateSerializer(instance=workflow, data={'steps': data})
        serializer.is_valid(raise_exception=True)
        serializer.save()
        self.assertEqual(Step.objects.count(), 3)
        self.assertEqual(Section.objects.count(), 9)
        self.assertEqual(Question.objects.count(), 18)
        self.assertEqual(Option.objects.count(), 54)
        self.assertEqual(Action.objects.count(), 162)