我有一条json消息,在最高级别,我有一个未知深度和结构的字典,我希望遍历它以格式化它,最后得到一个新的,格式化的字典。在使用timeit之后,我发现它非常慢并且发现python中的递归根本不是很快。所有这些都被理解,我不知道如何实际转换我的递归函数" Foo.format_it"如果可能的话,进入基于循环的循环。
import time
import json
class Foo(object):
def __init__(self):
self.msg_out = {}
self.msg_in = None
self.sample_data = """
{
"data": {
"a": "",
"b": "",
"c": "127.0.0.1",
"d": 80,
"e": {"f": false,"g": false,"h": false,"i": false,"j": false,"k": false},
"l": [ {"ii": 2, "hh": 10, "gg": 200, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": 3, "ee": 0},
{"ii": 5, "hh": 20, "gg": 300, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": -1, "ee": -1},
{"ii": 5, "hh": 30, "gg": -400, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": -1, "ee": -1}],
"m": true,
"n": true,
"o": 1000,
"p": 2000,
"q": "",
"r": 5,
"s": 0,
"t": true,
"u": true,
"v": {"jj": 5, "kk": 0, "ll": 10, "mm": 9, "nn": [ { "aa": 20, "bb": 30 }, { "aa": 20, "bb": 30 } ] }
}
}
"""
def format(self, msg_in):
print msg_in
self.msg_in = json.loads( msg_in )
self.msg_out = {}
self.format_it(self.msg_in, self.msg_out)
import pprint
print pprint.pformat(self.msg_out)
return json.dumps( self.msg_out )
def ff(self, val, out_struct):
if int(val) < 0:
out_struct[u'ff'] = ""
else:
out_struct[u'ff'] = str(val)
def format_it(self, item, out_struct):
if isinstance(item, dict):
for dict_key, dict_val in item.iteritems():
if dict_key in dir(self):
dict_key = getattr(self, dict_key)(dict_val, out_struct)
if dict_key:
if isinstance(dict_val, dict):
out_struct[dict_key] = {}
self.format_it(dict_val, out_struct[dict_key])
elif isinstance(dict_val, list):
out_struct[dict_key] = []
self.format_it(dict_val, out_struct[dict_key])
else:
out_struct[dict_key] = dict_val
elif isinstance(item, list):
for list_val in item:
if isinstance(list_val, dict):
out_struct.append({})
self.format_it(list_val, out_struct[-1])
elif isinstance(list_val, list):
out_struct.append([])
self.format_it(list_val, out_struct[-1])
else:
out_struct.append(list_val)
else:
pass
if __name__ == "__main__":
tic = time.clock()
f = Foo()
f.format(f.sample_data)
print (time.clock()-tic)
以下是每个请求的数据和输出数据,在最简单的情况下,只有关键字&f 39&#39;需要格式化,所以-1变为空字符串:
[IN]
{
"data": {
"a": "",
"b": "",
"c": "127.0.0.1",
"d": 80,
"e": {"f": false,"g": false,"h": false,"i": false,"j": false,"k": false},
"l": [ {"ii": 2, "hh": 10, "gg": 200, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": 3, "ee": 0},
{"ii": 5, "hh": 20, "gg": 300, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": -1, "ee": -1},
{"ii": 5, "hh": 30, "gg": -400, "aa": -1, "bb": -1, "ff":-1, "cc": -1, "dd": -1, "ee": -1}],
"m": true,
"n": true,
"o": 1000,
"p": 2000,
"q": "",
"r": 5,
"s": 0,
"t": true,
"u": true,
"v": {"jj": 5, "kk": 0, "ll": 10, "mm": 9, "nn": [ { "aa": 20, "bb": 30 }, { "aa": 20, "bb": 30 } ] }
}
}
[OUT]
{u'data': {u'a': u'',
u'b': u'',
u'c': u'127.0.0.1',
u'd': 80,
u'e': {u'f': False,
u'g': False,
u'h': False,
u'i': False,
u'j': False,
u'k': False},
u'l': [{u'aa': -1,
u'bb': -1,
u'cc': -1,
u'dd': 3,
u'ee': 0,
u'ff': '',
u'gg': 200,
u'hh': 10,
u'ii': 2},
{u'aa': -1,
u'bb': -1,
u'cc': -1,
u'dd': -1,
u'ee': -1,
u'ff': '',
u'gg': 300,
u'hh': 20,
u'ii': 5},
{u'aa': -1,
u'bb': -1,
u'cc': -1,
u'dd': -1,
u'ee': -1,
u'ff': '',
u'gg': -400,
u'hh': 30,
u'ii': 5}],
u'm': True,
u'n': True,
u'o': 1000,
u'p': 2000,
u'q': u'',
u'r': 5,
u's': 0,
u't': True,
u'u': True,
u'v': {u'jj': 5,
u'kk': 0,
u'll': 10,
u'mm': 9,
u'nn': [{u'aa': 20, u'bb': 30}, {u'aa': 20, u'bb': 30}]}}}
代码有点削减并使用tic / toc vs timeit。在使用两者时,只是递归的执行似乎是在.0012s左右(我甚至从时间计算中删除了对象创建和json加载)。