复杂的Python JSON对象到自定义字典转换

时间:2013-07-11 18:32:13

标签: python json

我确实有以下JSON对象 -

{
    "Resource": [
        {
            "@name": "Bravo",
            "@signature": "h#Bravo",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": {
                "@attrId": "cpuUsage",
                "@attrName": "Usage",
                "Data": [
                    {
                        "@data": "26.00",
                        "@end": "01:05:00",
                        "@interval": "60",
                        "@start": "01:04:00"
                    },
                    {
                        "@data": "24.00",
                        "@end": "01:04:00",
                        "@interval": "60",
                        "@start": "01:03:00"
                    },
                    {
                        "@data": "36.00",
                        "@end": "01:03:00",
                        "@interval": "60",
                        "@start": "01:02:00"
                    },
                    {
                        "@data": "38.00",
                        "@end": "01:02:00",
                        "@interval": "60",
                        "@start": "01:01:00"
                    },
                    {
                        "@data": "37.00",
                        "@end": "01:01:00",
                        "@interval": "60",
                        "@start": "01:00:00"
                    }
                ]
            },
            "Resource": [
                {
                    "@name": "Tango",
                    "@signature": "vm#Tango",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": {
                            "@data": "12.00",
                            "@end": "04:05:00",
                            "@interval": "60",
                            "@start": "04:04:00"
                        }
                    }
                },
                {
                    "@name": "Charlie",
                    "@signature": "vm#Charlie",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": [
                            {
                                "@data": "12.00",
                                "@end": "04:20:00",
                                "@interval": "60",
                                "@start": "04:19:00"
                            },
                            {
                                "@data": "12.00",
                                "@end": "04:19:00",
                                "@interval": "60",
                                "@start": "04:18:00"
                            }
                        ]
                    }
                }
            ]
        },
        {
            "@name": "Alpha",
            "@signature": "h#Alpha",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": [
                {
                    "@attrId": "cpuUsage",
                    "@attrName": "Usage",
                    "Data": {
                        "@data": "9",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                },
                {
                    "@attrId": "cpuUsagemhz",
                    "@attrName": "Usage MHz",
                    "Data": {
                        "@data": "479",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                }
            ]
        }
    ]
}

我正在寻找一些JSON Traversal来获取所有密钥并将上面的代码转换为以下预期的python字典 -

d = { 'ESX_5.x' : 
        { 
            'Bravo' :
                {
                    "@typeDisplayName" : "ESX Server",
                    "@signature" : "h#Bravo",
                    "cpuUsage" :
                        {
                            "from_01:04:00_to_01:05:00" : 26.00,
                            "from_01:03:00_to_01:04:00" : 24.00,
                            "from_01:02:00_to_01:03:00" : 36.00,
                            "from_01:01:00_to_01:02:00" : 38.00,
                            "from_01:00:00_to_01:01:00" : 37.00,
                            "interval" : 60
                        },
                    "vm" :
                        {
                            "Tango" :
                                {
                                    "@typeDisplayName" : "Virtual Machine",
                                    "@signature" : "vm#Tango",
                                    "cpuUsage" :
                                        {
                                            "from_04:04:00_to_04:05:00" : 12.00,
                                            "interval" : 60
                                        }
                                },
                            "Charlie" :
                                {
                                    "@typeDisplayName" : "Virtual Machine",
                                    "@signature": "vm#Charlie",
                                    "cpuUsage" : 
                                        {
                                            "from_04:19:00_to_04:20:00" : "12.00",
                                            "from_04:18:00_to_04:19:00" : "12.00",
                                            "@interval": "60",
                                        }
                                }
                        },
                },
            'Alpha' :
                {
                    "@typeDisplayName" : "ESX Server",
                    "@signature" : "h#Alpha",
                    "cpuUsage" :
                        {
                            "from_06:09:00_to_06:10:00" : 9,
                            "@interval": "60"
                        },
                    "cpuUsagemhz" :
                        {
                            "from_06:09:00_to_06:10:00" : 479,
                            "@interval": "60"
                        }
                }
        }
    }

需要递归函数来获取资源& PerfList&数据和自定义字典。

可能存在错误的拼写/ syntax_errs手工烹饪预期字典......

这是我的代码所以 - 但是,对于N个嵌套资源,这是失败的。

import json

class MQLPrettyPrint():
    KEY_RESPONSE = 'Response'
    KEY_RESULTS = 'Results'
    KEY_RESOURCE = 'Resource'

    def __init__(self,file=None):
        self._json_file = file
        self._json_data = self.read_json_file()
        self._json_dict = self.json_to_dict()

    def json_file(self):
        return self._json_file

    def read_json_file(self):
        json_data = ""
        try:
            JSON = open(self._json_file,"r")
            json_data = JSON.read()
            JSON.close()
        except:
            raise

        return json_data

    def json_to_dict(self):
        return json.loads(self._json_data)

    def json_data(self):
        return self._json_data

    def json_dict(self):
        return self._json_dict

    def json2mql(self):
        for key in self._json_dict:
            if key == self.KEY_RESPONSE:
                val = self._json_dict[key]
                response = self.fetch_response(val)

    def fetch_response(self,dict):
        for key in dict:
            if key == self.KEY_RESULTS:
                val = dict[key]
                results = self.fetch_results(val)

    def fetch_results(self,dict):
        for key in dict:
            if key == self.KEY_RESOURCE:
                val = dict[key]
                resource = self.fetch_resource(val)

    def fetch_resource(self,resources,dict={}):
        if isinstance(resources,list):
            for resource in resources:
                print "\n\n",resource
                if isinstance(resource,__builtins__.dict):
                    #header = self.fetch_resource_header(resource)
                    #perfList = self.fetch_perf_list(resource)
                    self.fetch_resource(resource)
        elif isinstance(resources,dict):
            header = self.fetch_resource_header(resource)
            perfList = self.fetch_perf_list(resource)
        else:
            print resources

    def fetch_resouce_header(resource):
        name = resource['@name']
        signature = resource['@signature']
        type = resource['@type']
        typeDisplayName = resource['@typeDisplayName']
        resource_dict = {'@name' : name,
                         '@signature' : signature,
                         '@type' : type,
                         '@typeDisplayName' : typeDisplayName}
        return resource_dict

    def fetch_perf_list(self,resource,perfDict={}):
        perfLists = resource['PerfList']
        if isinstance(perfLists,list):
            for perf in perfLists:
                self.fetch_perf_list(perf,perfDict)
        elif isinstance(perfLists,dict):
            header = self.fetch_perf_header(perf)
            dataList = self.fetch_data(perf)
            key = ""
            if len(perfDict) == 0:
                key = header['@attrId']
                perfDict[key] = header
                perfDict[key]['Data'] = dataList
            else:
                if not perfDict.has_key(key):
                    perfDict[key] = header
                    perfDict[key]['Data'] = dataList
                else:
                    if perfDict.has_key('Data'):
                        perfDict[key]['Data'].update(dataList)
                    else:
                        perfDict[key]['Data'] = dataList
        else:
            print perfLists
        return perfDict


    def fetch_perf_header(self,perfDict):
        header = {}
        attrID = perfDict['@attrId']
        attrName = perfDict['@attrName']
        header = {'@attrId' : attrID,
                  '@attrName' : attrName}
        return header

    def fetch_data(self,perfDict,dataDict={}):
        dataList = perfDict['Data']
        if isinstance(dataList,list):
            for data in dataList:
                #Fetch internal data
                self.fetch_data(data,dataDict)
        elif isinstance(dataList,dict):
            start = dataList['@start']
            end = dataList['@end']
            interval = dataList['@interval']
            data = dataList['@data']
            key = "%s_%s" % (start,end)
            dataDict[key] = dataList
            #data_dict = {key : dataList}
            #if len(dataDict) == 0:
            #    dataDict[key] = data_dict
            #else:
            #    dataDict['Data'].update(data_dict)
        else:
            print dataList
        return dataDict

1 个答案:

答案 0 :(得分:1)

有时,当使用递归函数对嵌套结构进行操作时,就行走函数和操作函数而言更容易思考。因此,我们希望定位json结构中包含的所有dicts,并对它们执行转换操作。

在处理巢时,就地转换结构而不是重新创建新结构要容易得多。从json结构构造嵌套dicts的更困难的方法是能够处理特定的json元素,将它们放置在新结构的正确深度和分支处;这涉及两个平行的步行操作。

但要注意的一件事是,在遍历它时修改嵌套结构,因为转换操作可能会更改行走函数当前正在迭代的列表。在这种情况下,只有儿童(而不是兄弟姐妹)才会在走向较低的树枝之前进行修改。

from copy import deepcopy
import json
from pprint import pprint
from StringIO import StringIO

json_str = \
'''
{
    "Resource": [
        {
            "@name": "Bravo",
            "@signature": "h#Bravo",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": {
                "@attrId": "cpuUsage",
                "@attrName": "Usage",
                "Data": [
                    {
                        "@data": "26.00",
                        "@end": "01:05:00",
                        "@interval": "60",
                        "@start": "01:04:00"
                    },
                    {
                        "@data": "24.00",
                        "@end": "01:04:00",
                        "@interval": "60",
                        "@start": "01:03:00"
                    },
                    {
                        "@data": "36.00",
                        "@end": "01:03:00",
                        "@interval": "60",
                        "@start": "01:02:00"
                    },
                    {
                        "@data": "38.00",
                        "@end": "01:02:00",
                        "@interval": "60",
                        "@start": "01:01:00"
                    },
                    {
                        "@data": "37.00",
                        "@end": "01:01:00",
                        "@interval": "60",
                        "@start": "01:00:00"
                    }
                ]
            },
            "Resource": [
                {
                    "@name": "Tango",
                    "@signature": "vm#Tango",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": {
                            "@data": "12.00",
                            "@end": "04:05:00",
                            "@interval": "60",
                            "@start": "04:04:00"
                        }
                    }
                },
                {
                    "@name": "Charlie",
                    "@signature": "vm#Charlie",
                    "@type": "vm",
                    "@typeDisplayName": "Virtual Machine",
                    "PerfList": {
                        "@attrId": "cpuUsage",
                        "@attrName": "Usage",
                        "Data": [
                            {
                                "@data": "12.00",
                                "@end": "04:20:00",
                                "@interval": "60",
                                "@start": "04:19:00"
                            },
                            {
                                "@data": "12.00",
                                "@end": "04:19:00",
                                "@interval": "60",
                                "@start": "04:18:00"
                            }
                        ]
                    }
                }
            ]
        },
        {
            "@name": "Alpha",
            "@signature": "h#Alpha",
            "@type": "ESX_5.x",
            "@typeDisplayName": "ESX Server",
            "PerfList": [
                {
                    "@attrId": "cpuUsage",
                    "@attrName": "Usage",
                    "Data": {
                        "@data": "9",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                },
                {
                    "@attrId": "cpuUsagemhz",
                    "@attrName": "Usage MHz",
                    "Data": {
                        "@data": "479",
                        "@end": "06:10:00",
                        "@interval": "60",
                        "@start": "06:09:00"
                    }
                }
            ]
        }
    ]
}
'''

def walk_fun_lim(ilist, func=None):
    '''
    Recursively walk a nested list and dict structure, running func on all dicts
    '''
    def walk_fun_lim_helper(ilist, func=None, count=0):
        tlist = []
        ttlist = []
        if(isinstance(ilist, list)):
            ttlist = filter(lambda x: x, func(filter(lambda x: isinstance(x, dict), ilist)))
            if(ttlist):
                tlist += ttlist
            for q in ilist:
                ttlist = filter(lambda x: x, walk_fun_lim_helper(q, func, count+1))
                if(ttlist):
                    tlist += ttlist
        elif(isinstance(ilist, dict)):
            ttlist = filter(lambda x: x, func([ilist]))
            if(ttlist):
                tlist += ttlist
            for q in ilist:
                ttlist = filter(lambda x: x, walk_fun_lim_helper(ilist[q], func, count+1))
                if(ttlist):
                    tlist += ttlist
        return [tlist] if(count != 0) else tlist
    if(func != None and hasattr(func, "__call__")):
        return walk_fun_lim_helper(ilist, func)
    else:
        return []

def transformers_robots_in_disguise(x):
    for idict in x:
        plist = idict.pop("PerfList", [])
        plist = plist if(isinstance(plist, list)) else [plist]
        for sub_dict in plist:
            sub_name = sub_dict.pop("@attrId")
            dlist = sub_dict.pop("Data", [])
            dlist = dlist if(isinstance(dlist, list)) else [dlist]
            new_dict = {}
            for sub_dict in dlist:
                new_dict["from_%(@start)s_to_%(@end)s" % sub_dict] = sub_dict["@data"]
                new_dict["@interval"] = sub_dict["@interval"]
            idict[sub_name] = new_dict
        rlist = idict.pop("Resource", [])
        rlist = rlist if(isinstance(rlist, list)) else [rlist]
        for sub_dict in rlist:
            sub_type = sub_dict.pop("@type")
            sub_name = sub_dict.pop("@name")
            idict.setdefault(sub_type, {})[sub_name] = sub_dict
    return []

json_data = json.load(StringIO(json_str))
data_copy = deepcopy(json_data)
walk_fun_lim(data_copy, transformers_robots_in_disguise)
pprint(data_copy)