数据提取和转换效率

时间:2014-04-16 04:30:33

标签: python mysql

我有一个Python脚本连接到MySQL数据库并执行许多嵌套的SELECT查询。它基本上是一个巨大的循环。数据库的结构使得企业有菜单,菜单有章节,而章节有项目。该脚本查询所有企业,并为每个企业查询其所有菜单,依此类推。它构建了一个大字典,然后以JSON的形式吐出。

它看起来像这样:

#!/usr/bin/env python

from bottle import route, run
import mysql.connector
import json
import collections
import datetime

def getBusinesses():
    conn = mysql.connector.connect(user="APIUser", password="abc123", host="12.34.56.78", port="54321", database="businesses")
    cursor = conn.cursor()
    objects = {}

    businessesQuery = ("SELECT * FROM business")
    cursor.execute(businessesQuery)
    businessRows = cursor.fetchall()

    businessObjects = []
    for businessRow in businessRows:
        print businessRow[0]
        businessDict = collections.OrderedDict()
        businessDict['id'] = businessRow[0]
        businessDict['business_name'] = businessRow[1]
        businessDict['business_address1'] = businessRow[2]
        businessDict['business_address2'] = businessRow[3]
        businessDict['business_city'] = businessRow[4]
        businessDict['business_state'] = businessRow[5]
        businessDict['business_zip'] = businessRow[6]
        businessObjects.append(businessDict)

        menuQuery = ("SELECT * FROM menu WHERE business_id = %s" % businessRow[0])
        cursor.execute(menuQuery)
        menuRows = cursor.fetchall()

        menuObjects = []
        for menuRow in menuRows:
            menuDict = collections.OrderedDict()
            menuDict['id'] = menuRow[0]
            menuDict['menu_name'] = menuRow[1]
            menuDict['menu_description'] = menuRow[2]
            menuDict['menu_note'] = menuRow[3]
            menuDict['business_id'] = menuRow[4]
            menuObjects.append(menuDict)

        businessDict['menus'] = menuObjects

        for menuIdx, menuRow in enumerate(menuRows):
            sectionQuery = ("SELECT * FROM menu_section WHERE menu_id = %s" % menuRow[0])
            cursor.execute(sectionQuery)
            sectionRows = cursor.fetchall()

            sectionObjects = []
            for sectionIdx, sectionRow in enumerate(sectionRows):
                sectionDict = collections.OrderedDict()
                sectionDict['id'] = sectionRow[0]
                sectionDict['section_name'] = sectionRow[1]
                sectionDict['section_note'] = sectionRow[2]
                sectionDict['section_description'] = sectionRow[3]
                sectionDict['menu_id'] = sectionRow[4]
                sectionObjects.append(sectionDict)

                businessDict['menus'][menuIdx]['sections'] = sectionObjects

                itemQuery = ("SELECT * FROM menu_item WHERE section_id = %s" % sectionRow[0])
                cursor.execute(itemQuery)
                itemRows = cursor.fetchall()

                itemObjects = []
                for itemIdx, itemRow in enumerate(itemRows):
                    itemDict = collections.OrderedDict()
                    itemDict['id'] = itemRow[0]
                    itemDict['item_name'] = itemRow[1]
                    itemDict['item_description'] = itemRow[2]
                    itemDict['item_note'] = itemRow[3]
                    itemDict['item_price'] = itemRow[4]
                    itemDict['section_id'] = itemRow[5]
                    itemObjects.append(itemDict)

                    businessDict['menus'][menuIdx]['sections'][sectionIdx]['items'] = itemObjects


    objects['businesses'] = businessObjects
    return objects

@route('/test')
def index():
    return json.dumps(getBusinesses())

run(host='192.168.1.70', port=7070)

我想知道这是否是一种有效的做事方式。当我远程部署我的数据库(WebFaction)并在本地运行Bottle服务器时,几乎需要40秒才能返回几百行。所以似乎有些不对劲。我有一种直觉,可能有更好的方法来做到这一点。只是不确定那是什么方式!

1 个答案:

答案 0 :(得分:0)

如果我不得不猜测:注意代码的粗略结构是:

def getBusinesses():
    businessesQuery = ("SELECT * FROM business")
    businessRows = cursor.fetchall()

    businessObjects = []
    for businessRow in businessRows:
        menuQuery = ("SELECT * FROM menu WHERE business_id = %s" % businessRow[0])
        menuRows = cursor.fetchall()


        for menuIdx, menuRow in enumerate(menuRows):
            sectionQuery = ("SELECT * FROM menu_section WHERE menu_id = %s" % menuRow[0])
            cursor.execute(sectionQuery)
            sectionRows = cursor.fetchall()

            sectionObjects = []
            for sectionIdx, sectionRow in enumerate(sectionRows):
                itemQuery = ("SELECT * FROM menu_item WHERE section_id = %s" % sectionRow[0])
                itemRows = cursor.fetchall()

也就是说,您在menumenu_section,尤其是menu_item的循环中执行几乎相同的查询。此外,您正在使用fetchall()返回结果集的完整内容,但只在循环中检查每个元素一次,您将在其中创建另一个对象列表。

您可能想要的更像是:

businesses = []
cursor.execute("select * from business")
row = cursor.fetchone()
while row is not None:
    business.append(...(row))
    row = cursor.fetchone()

cursor.execute("select * from menu")
row = cursor.fetchone()
while row is not None:
    business[row['business_id']].menus.append(...(row))
    row = cursor.fetchone()

cursor.execute("select menu.business_id, menu_section.*"
               " from menu_section"
               " join menu on menu.id = menu_section.menu_id")
row = cursor.fetchone()
while row is not None:
    business[row['business_id']][row['menu_id']].sections.append(...(row))
    row = cursor.fetchone()

cursor.execute("select menu.business_id, menu_section.menu_id, menu_item.*"
               " from menu_item"
               " join menu_section on menu_section.id = menu_item.section_id"
               " join menu on menu.id = menu_section.menu_id")
row = cursor.fetchone()
while row is not None:        
    business[row['business_id']][row['menu_id']][row['section_id'].items.append(...(row))
    row = cursor.fetchone()

这样您就可以发出少得多的查询,只加载一次可以处理的数据量。