如何将此XML解析为python数组或dict?

时间:2018-02-05 22:12:28

标签: python xml elementtree xml.etree

这是我要解析的XML的片段:

<DSMs>
<DSM class="ACE" order="320"/>
<DSM class="ACS" order="1900"/>
<DSM class="Aironet" order="1050"/>
<DSM class="Apache" order="4700"/>
<DSM class="AppSecDbProtect" order="1477"/>
<DSM class="ArborNetworksPravail" order="1554">
  <Thresholds>
    <Threshold name="MinNumEvents" value="5"/>
    <Threshold name="AbandonAfterSuccessiveFailures" value="3"/>
  </Thresholds>
  <Templates>
    <Template name="DeviceName" value="Arbor Networks Pravail @ $$SOURCE_ADDRESS$$"/>
  </Templates>
</DSM>
<DSM class="ARN" order="2000"/>
<DSM class="ArpeggioSIFTIT" order="1553"/>
<DSM class="ArubaClearPass" order="545">
  <Thresholds>
    <Threshold name="MinNumEvents" value="5"/>
    <Threshold name="AbandonAfterSuccessiveFailures" value="3"/>
  </Thresholds>
  <Templates>
    <Template name="DeviceName" value="Aruba ClearPass Policy Manager @ $$SOURCE_ADDRESS$$"/>
    <Template name="DeviceDescription" value="Aruba ClearPass Policy Manager Device"/>
  </Templates>
 </DSM>
</DSMs>  

到目前为止我做了什么(部分代码):

ta_dsms = []
for level1 in root:
   if level1.tag == 'DSMs':
       for level2 in level1:
           ta_dsm = level2.attrib
           ta_dsms.append(ta_dsm)
print ta_dsms

ta_dsms的当前输出如下:

 [{'class': 'ACE', 'order': '320'}, 
  {'class': 'ACS', 'order': '1900'}, 
 ...]

我的问题是优雅的方法是获取阈值和模板信息并将它们添加到我的阵列。只有一些DSM有孩子。我整天都被困在这一天。谢谢你拯救我的生命!

2 个答案:

答案 0 :(得分:1)

这会得到你想要的吗?

import xml.etree.ElementTree as ET
tree = ET.parse('data.xml')
root = tree.getroot()

ta_dsms = []
for level1 in root:
    d = {}
    if level1.tag == 'DSM':
        for k,v in level1.attrib.items():
            d[k] = v

            for level2 in level1:
                threshold_list = []
                if level2.tag == "Thresholds":
                    for c in level2.getchildren():
                        threshold_dic = {}
                        for k,v in c.attrib.items():
                            threshold_dic[k] = v
                        threshold_list.append(threshold_dic)
                    d["Thresholds"] = threshold_list
                template_list = []
                if level2.tag == "Templates":
                    for c in level2.getchildren():
                        template_dic = {}
                        for k,v in c.attrib.items():
                            template_dic[k] = v
                        template_list.append(template_dic)
                    d["Templates"] = template_list
        ta_dsms.append(d)


print(ta_dsms)

结果是:

[  
   {  
      "class":"ACE",
      "order":"320"
   },
   {  
      "class":"ACS",
      "order":"1900"
   },
   {  
      "class":"Aironet",
      "order":"1050"
   },
   {  
      "class":"Apache",
      "order":"4700"
   },
   {  
      "class":"AppSecDbProtect",
      "order":"1477"
   },
   {  
      "class":"ArborNetworksPravail",
      "Thresholds":[  
         {  
            "name":"MinNumEvents",
            "value":"5"
         },
         {  
            "name":"AbandonAfterSuccessiveFailures",
            "value":"3"
         }
      ],
      "Templates":[  
         {  
            "name":"DeviceName",
            "value":"Arbor Networks Pravail @ $$SOURCE_ADDRESS$$"
         }
      ],
      "order":"1554"
   },
   {  
      "class":"ARN",
      "order":"2000"
   },
   {  
      "class":"ArpeggioSIFTIT",
      "order":"1553"
   },
   {  
      "class":"ArubaClearPass",
      "Thresholds":[  
         {  
            "name":"MinNumEvents",
            "value":"5"
         },
         {  
            "name":"AbandonAfterSuccessiveFailures",
            "value":"3"
         }
      ],
      "Templates":[  
         {  
            "name":"DeviceName",
            "value":"Aruba ClearPass Policy Manager @ $$SOURCE_ADDRESS$$"
         },
         {  
            "name":"DeviceDescription",
            "value":"Aruba ClearPass Policy Manager Device"
         }
      ],
      "order":"545"
   }
]

答案 1 :(得分:1)

from lxml import etree

class XmlParser(object):
    results = []
    def __init__(self, filename, **kwargs):
        self.__dict__.update(kwargs)
        self.filename = filename
        self._process()

    def _process(self):
        f=open(self.filename, "r")
        self.data = f.read()

    def get_result_dict(self):
        self._parse()
        return self._map_to_dict( )

    def _map_to_dict(self):
        for row in self.root:
            self.results.append(self.map_by_keys(row))
        return self.results

    def _parse(self):
        self.root = etree.fromstring(self.data)

    def map_by_keys(self, row ):
        """can be DMS"""
        """can be Threshhold no children"""
        """Can be Threshold with children"""
        if row.get('name') is not None:
            # threshold with children
            return (row.tag, {'name':row.get('name'), 'value':row.get('value')})

        elif (row.get('name') is None) and row.get('class') is None:
            # Threshold with no children
            children = []
            for child in row.getchildren():
                key, values = self.map_by_keys(child)
                children.append({key: values})
            return (row.tag, children )

        else:
            # parent DMS
            unit = {'class': row.get('class'), 'order': row.get('order')}
            if len(row.getchildren()) > 0:
                for child in row.getchildren():
                    key, values = self.map_by_keys( child )
                    unit[key] = values

            return unit


file = './x.xml'
parser = XmlParser(file)
print(parser.get_result_dict())

打印:

[{'class':'ACE','order':'320'},{'class':'ACS','order':'1900'},{'class':'Aironet',' order':'1050'},{'class':'Apache','order':'4700'},{'class':'AppSecDbProtect','order':'1477'},{'class':' ArborNetworksPravail','order':'1554','Thresholds':[{'Threshold':{'value':'5','name':'MinNumEvents'}},{'Threshold':{'value': '3','name':'AbandonAfterSuccessiveFailures'}}],'模板':[{'模板':{'价值':'Arbor Networks Pravail @ $$ SOURCE_ADDRESS $$','name':'DeviceName'} }}},{'class':'ARN','order':'2000'},{'class':'ArpeggioSIFTIT','order':'1553'},{'class':'ArubaClearPass',' order':'545','Thresholds':[{'Threshold':{'value':'5','name':'MinNumEvents'}},{'Threshold':{'value':'3', 'name':'AbandonAfterSuccessiveFailures'}}],'模板':[{'模板':{'价值':'Aruba ClearPass政策经理@ $$ SOURCE_ADDRESS $$','名称':'DeviceName'}},{ '模板':{'值':'Aruba ClearPass政策管理器设备','名称':'DeviceDescription'}}}}]

为了理解递归,你必须先了解递归