如何在不使用递归的情况下遍历和构建多维字典

时间:2014-10-24 21:29:15

标签: python algorithm python-2.7 recursion

我有一条json消息,在最高级别,我有一个未知深度和结构的字典,我希望遍历它以格式化它,最后得到一个新的,格式化的字典。在使用timeit之后,我发现它非常慢并且发现python中的递归根本不是很快。所有这些都被理解,我不知道如何实际转换我的递归函数" Foo.format_it"如果可能的话,进入基于循环的循环。

import time
import json
class Foo(object):
    def __init__(self):
        self.msg_out = {}
        self.msg_in  = None
        self.sample_data = """
        {
            "data": {
                        "a": "",
                        "b": "",
                        "c": "127.0.0.1",
                        "d": 80,
                        "e": {"f": false,"g": false,"h": false,"i": false,"j": false,"k": false},
                        "l": [ {"ii": 2, "hh": 10, "gg": 200, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": 3, "ee": 0},
                               {"ii": 5, "hh": 20, "gg": 300, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": -1, "ee": -1},
                               {"ii": 5, "hh": 30, "gg": -400, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": -1, "ee": -1}],
                        "m": true,
                        "n": true,
                        "o": 1000,
                        "p": 2000,
                        "q": "",
                        "r": 5,
                        "s": 0,
                        "t": true,
                        "u": true,
                        "v": {"jj": 5, "kk": 0, "ll": 10, "mm": 9, "nn":  [ { "aa": 20, "bb": 30 }, { "aa": 20, "bb": 30 } ] }
                    }
        }
        """
    def format(self, msg_in):
        print msg_in
        self.msg_in = json.loads( msg_in )
        self.msg_out = {}
        self.format_it(self.msg_in, self.msg_out)
        import pprint
        print pprint.pformat(self.msg_out)
        return json.dumps( self.msg_out )

    def ff(self, val, out_struct):
        if int(val) < 0:
            out_struct[u'ff'] = ""
        else:
            out_struct[u'ff'] = str(val)

    def format_it(self, item, out_struct):
        if isinstance(item, dict):
            for dict_key, dict_val in item.iteritems():
                if dict_key in dir(self):
                    dict_key = getattr(self, dict_key)(dict_val, out_struct)
                if dict_key:
                    if isinstance(dict_val, dict):
                        out_struct[dict_key] = {}
                        self.format_it(dict_val, out_struct[dict_key])
                    elif isinstance(dict_val, list):
                        out_struct[dict_key] = []
                        self.format_it(dict_val, out_struct[dict_key])
                    else:
                        out_struct[dict_key] = dict_val
        elif isinstance(item, list):
            for list_val in item:
                if isinstance(list_val, dict):
                    out_struct.append({})
                    self.format_it(list_val, out_struct[-1])
                elif isinstance(list_val, list):
                    out_struct.append([])
                    self.format_it(list_val, out_struct[-1])
                else:
                    out_struct.append(list_val)
        else:
            pass

if __name__ == "__main__":
    tic = time.clock()
    f = Foo()
    f.format(f.sample_data)
    print (time.clock()-tic)

以下是每个请求的数据和输出数据,在最简单的情况下,只有关键字&f 39&#39;需要格式化,所以-1变为空字符串:

[IN]
        {
            "data": {
                        "a": "",
                        "b": "",
                        "c": "127.0.0.1",
                        "d": 80,
                        "e": {"f": false,"g": false,"h": false,"i": false,"j": false,"k": false},
                        "l": [ {"ii": 2, "hh": 10, "gg": 200, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": 3, "ee": 0},
                               {"ii": 5, "hh": 20, "gg": 300, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": -1, "ee": -1},
                               {"ii": 5, "hh": 30, "gg": -400, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": -1, "ee": -1}],
                        "m": true,
                        "n": true,
                        "o": 1000,
                        "p": 2000,
                        "q": "",
                        "r": 5,
                        "s": 0,
                        "t": true,
                        "u": true,
                        "v": {"jj": 5, "kk": 0, "ll": 10, "mm": 9, "nn":  [ { "aa": 20, "bb": 30 }, { "aa": 20, "bb": 30 } ] }
                    }
        }
[OUT]
{u'data': {u'a': u'',
           u'b': u'',
           u'c': u'127.0.0.1',
           u'd': 80,
           u'e': {u'f': False,
                  u'g': False,
                  u'h': False,
                  u'i': False,
                  u'j': False,
                  u'k': False},
           u'l': [{u'aa': -1,
                   u'bb': -1,
                   u'cc': -1,
                   u'dd': 3,
                   u'ee': 0,
                   u'ff': '',
                   u'gg': 200,
                   u'hh': 10,
                   u'ii': 2},
                  {u'aa': -1,
                   u'bb': -1,
                   u'cc': -1,
                   u'dd': -1,
                   u'ee': -1,
                   u'ff': '',
                   u'gg': 300,
                   u'hh': 20,
                   u'ii': 5},
                  {u'aa': -1,
                   u'bb': -1,
                   u'cc': -1,
                   u'dd': -1,
                   u'ee': -1,
                   u'ff': '',
                   u'gg': -400,
                   u'hh': 30,
                   u'ii': 5}],
           u'm': True,
           u'n': True,
           u'o': 1000,
           u'p': 2000,
           u'q': u'',
           u'r': 5,
           u's': 0,
           u't': True,
           u'u': True,
           u'v': {u'jj': 5,
                  u'kk': 0,
                  u'll': 10,
                  u'mm': 9,
                  u'nn': [{u'aa': 20, u'bb': 30}, {u'aa': 20, u'bb': 30}]}}}

代码有点削减并使用tic / toc vs timeit。在使用两者时,只是递归的执行似乎是在.0012s左右(我甚至从时间计算中删除了对象创建和json加载)。

0 个答案:

没有答案