Python试图重构(干掉)一个长控制流

时间:2011-09-29 15:04:00

标签: python refactoring dry python-2.5 control-flow

我从SQL查询中获取了大量数据,需要很长时间才能运行。由于SQL查询需要很长时间才能运行,因此我以最精细的形式从数据库中获取数据。然后我循环浏览一下这些数据并将其汇总到对我有用的表格中。

我的问题是我一遍又一遍地重复自己。但是,我不确定重构此控制流的最佳方法。提前谢谢!

def processClickOutData(cls, raw_data):
    singles = {}
    total={}
    absolute_total = 0
    channels = {}

    singles_true = {}
    total_true={}
    channels_true = {}
    absolute_total_true = 0

    list_channels = set([])
    list_tids = set([])


    total_position = {}
    total_position_true = {}
    tid_position = {}
    channel_position = {}
    channel_position_true = {}
    tid_position_true = {}

    for row in raw_data:
        gap=row[0]
        count=row[1]
        tid=row[2]
        prefered=row[3]
        channel=row[4]
        position=row[5]

        list_channels.add(channel)
        list_tids.add(tid)


        absolute_total += int(count)

        if total.has_key(gap):
            total[gap] += count
        else:
            total[gap] = count

        if singles.has_key(gap) and singles[gap].has_key(tid):
            singles[gap][tid] += count
        elif singles.has_key(gap):
            singles[gap][tid] = count
        else:
            singles[gap] = {}
            singles[gap][tid] = count

        if channels.has_key(gap) and channels[gap].has_key(channel):
            channels[gap][channel] += count
        elif channels.has_key(gap):
            channels[gap][channel] = count
        else:
            channels[gap] = {}
            channels[gap][channel] = count
        if total_position.has_key(position):
            total_position[position] += count
        else:
            total_position[position] = count
        if tid_position.has_key(position) and tid_position[position].has_key(tid):
            tid_position[position][tid] += count     
        elif tid_position.has_key(position):
            tid_position[position][tid] = count
        else:
            tid_position[position] = {}
            tid_position[position][tid] = count

        if channel_position.has_key(position) and channel_position[position].has_key(channel):
            channel_position[position][channel] += count     
        elif channel_position.has_key(position):
            channel_position[position][channel] = count
        else:
            channel_position[position] = {}
            channel_position[position][channel] = count

        if prefered == 0:
            absolute_total_true += count
            if total_true.has_key(gap):
                total_true[gap] += count
            else:
                total_true[gap] = count

            if singles_true.has_key(gap) and singles_true[gap].has_key(tid):
                singles_true[gap][tid] += count
            elif singles_true.has_key(gap):
                singles_true[gap][tid] = count
            else:
                singles_true[gap] = {}
                singles_true[gap][tid] = count

            if channels_true.has_key(gap) and channels_true[gap].has_key(channel):
               channels_true[gap][channel] += count
            elif channels_true.has_key(gap):
               channels_true[gap][channel] = count
            else:
               channels_true[gap] = {}
               channels_true[gap][channel] = count

            if total_position_true.has_key(position):
               total_position_true[position] += count
            else:
               total_position_true[position] = count 

            if tid_position_true.has_key(position) and tid_position_true[position].has_key(tid):
               tid_position_true[position][tid] += count     
            elif tid_position_true.has_key(position):
               tid_position_true[position][tid] = count
            else:
               tid_position_true[position] = {}
               tid_position_true[position][tid] = count

            if channel_position_true.has_key(position) and channel_position_true[position].has_key(channel):
               channel_position_true[position][channel] += count     
            elif channel_position_true.has_key(position):
               channel_position_true[position][channel] = count
            else:
               channel_position_true[position] = {}
               channel_position_true[position][channel] = count




    final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
                     "total_position" :  total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
    return final_values

1 个答案:

答案 0 :(得分:1)

您用于存储数据的整个结构可能是错误的,但由于我不知道您是如何使用它的,所以我无法帮助您。

您可以使用collections.defaultdict删除这些has_key()来电的所有。注意{@ 1}}已被弃用,您应该只使用thedict.has_key(key)

看看我如何更改key in thedict循环 - 您可以在for语句中分配名称,无需单独执行。

for

如果密钥不存在,这样做会自动填写正确的默认值。你在这里有两种。在您添加from collections import defaultdict def processClickOutData(cls, raw_data): absolute_total = 0 absolute_total_true = 0 list_channels = set() list_tids = set() total = defaultdict(int) total_true = defaultdict(int) total_position = defaultdict(int) total_position_true = defaultdict(int) def defaultdict_int(): return defaultdict(int) singles = defaultdict(defaultdict_int) singles_true = defaultdict(defaultdict_int) channels = defaultdict(defaultdict_int) channels_true = defaultdict(defaultdict_int) tid_position = defaultdict(defaultdict_int) tid_position_true = defaultdict(defaultdict_int) channel_position = defaultdict(defaultdict_int) channel_position_true = defaultdict(defaultdict_int) for gap, count, prefered, channel, position in raw_data: list_channels.add(channel) list_tids.add(tid) absolute_total += count total[gap] += count singles[gap][tid] += count channels[gap][channel] += count total_position[position] += count tid_position[position][tid] += count channel_position[position][channel] += count if prefered == 0: absolute_total_true += count total_true[gap] += count singles_true[gap][tid] += count channels_true[gap][channel] += count total_position_true[position] += count tid_position_true[position][tid] += count channel_position_true[position][channel] += count final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true, "total_position" : total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true } return final_values 的位置,如果int不存在,您希望从0开始 - 这是int返回的内容,因此defaultdict(int)。如果要添加添加int的字典,则需要使用返回defaultdict(int) defaultdict_int的函数。

修改:建议的备用词典结构:

position = defaultdict(lambda: defaultdict(defaultdict_int))
gap = defaultdict(lambda: defaultdict(defaultdict_int))
absolute_total = 0

for gap, count, prefered, channel, position in raw_data:
    absolute_total += count

    posd = position[position]
    posd.setdefault('total', 0)
    posd['total'] += count
    posd['tid'][tid] += count
    posd['channel'][channel] += count

    gapd = gap[gap]
    gapd.setdefault('total', 0)
    gapd['total'] += count
    gapd['tid'][tid] += count
    gapd['channel'][channel] += count

同样对_true版本执行相同操作,您已从12 dict更新为4。