使用Python将复杂的XML解析为CSV

时间:2017-08-31 10:34:28

标签: python xml csv

我正在尝试将复杂的XML文件解析为CSV。

这就是我的XML文件的样子:

<?xml version="1.0" encoding="UTF-8"?>
<ke3600-menu-file language="en" display="English" index="1">
  <version major="0" minor="1" patch="0"/>
  <locale name="en_EN" timezone="CET-1CEST,M3.5.0,M10.5.0/3"/>
  <menu name="main_menu" display="Main Menu">
    <menu name="broadband" display="Broadband" help="100_help_broadband">
      <onenter proc="activateGfast"/>
      <menu name="load_save_profiles" display="Load and Save Profiles" help="601_help_profiles">
        <application name="load_profiles" display="Load Profile"/>
        <application name="save_profiles" display="Save Profile"/>
        <application name="remove_profiles" display="Delete Profile"/>
      </menu>
      <parameter type="list" name="wanInterface" display="Interface" help="101_help_wanInterface">
        <value id="0" name="ifDSL" display="xDSL"/>
        <value id="1" name="ifSFP" display="SFP"/>
        <value id="2" name="ifETH" display="Ethernet"/>
        <!-- <value i d="3" name="ifWLAN" display="WLAN"/> -->
        <value id="4" name="ifSHDSL" display="SHDSL"/>
      </parameter>

      <menu name="xdslinterface" display="xDSL Interface" help="200_help_xDslInterface">
        <parameter type="list" name="annex" display="xDSL Mode" help="201_help_xdslModi">
          <value id="0" name="xdsl_line_annex_a" display="Annex A/M"/>
          <value id="1" name="xdsl_line_annex_b" display="Annex B/J"/>
        </parameter>
        <application name="xdsl_macAddr" display="MAC Address" caption="MAC Address">
          <param type="string" name="xdsl_mac_addr" display="MAC Address" length="25"/>
        </application>
        <parameter type="list" name="xdsl_vectoring_mode" display="Vectoring Mode" help="202_help_Vectoring">
          <value id="0" name="xdsl_vectoring_mode_disabled" display="Disabled"/>
          <value id="1" name="xdsl_vectoring_mode_enabled" display="Enabled"/>
          <value id="2" name="xdsl_vectoring_mode_friendly" display="Friendly"/>
        </parameter>
        <parameter type="list" name="xdsl_gfast_mode" display="G.FAST">
          <value id="0" name="xdsl_gfast_mode_disabled" display="Disabled"/>
          <value id="1" name="xdsl_gfast_mode_enabled" display="Enabled"/>
        </parameter>
        <parameter type="list" name="resync" display="Auto Re-Sync">
          <value id="1" name="re_sync_on" display="On"/>
          <value id="0" name="re_sync_off" display="Off"/>
        </parameter>
        <parameter type="list" name="seamlessRateAdaption" display="Seamless Rate Adaption">
          <value id="0" name="sra_off" display="Off"/>
          <value id="1" name="sra_on" display="On"/>
        </parameter>
        <parameter type="list" name="xdsl_ginp" display="G.INP">
          <value id="0" name="xdsl_ginp_none" display="Off"/>
          <value id="1" name="xdsl_ginp_down" display="Down"/>
          <value id="2" name="xdsl_ginp_up" display="Up"/>
          <value id="3" name="xdsl_ginp_both" display="Down/Up"/>
        </parameter>
        <parameter type="list" name="power_cut_back" display="Power Cut Back">
          <value id="0" name="power_cut_back_off" display="Off"/>
          <value id="1" name="power_cut_back_on" display="On"/>
        </parameter>
        <parameter type="list" name="atmType" display="ATM Type">
          <value id="0" name="atm_type_aal5" display="AAL5"/>
          <value id="1" name="atm_type_aal0pkt" display="AAL0PKT"/>
          <value id="2" name="atm_type_aal0cell" display="AAL0CELL"/>
        </parameter>
        <parameter type="list" name="voip_atmType" display="Voip Interface ATM Type">
          <value id="0" name="voip_atm_type_aal5" display="AAL5"/>
          <value id="1" name="voip_atm_type_aal0pkt" display="AAL0PKT"/>
          <value id="2" name="voip_atm_type_aal0cell" display="AAL0CELL"/>
        </parameter>
        <parameter type="list" name="iptv_atmType" display="Iptv Interface ATM Type">
          <value id="0" name="iptv_atm_type_aal5" display="AAL5"/>
          <value id="1" name="iptv_atm_type_aal0pkt" display="AAL0PKT"/>
          <value id="2" name="iptv_atm_type_aal0cell" display="AAL0CELL"/>
        </parameter>
      </menu>

      <menu name="ethernetinterface" display="Ethernet Interface" help="300_help_ethernetInterface">
        <parameter type="list" name="ethInterfaceAuto" display="Autonegotiation">
          <value id="1" name="ethInterfaceAutoOn" display="On"/>
          <value id="0" name="ethInterfaceAutoOff" display="Off"/>
        </parameter>
        <parameter type="list" name="ethInterfaceSpeed" display="Speed">
          <value id="2" name="ethIf1000" display="1000MBit/s"/>
          <value id="1" name="ethIf100" display="100MBit/s"/>
          <value id="0" name="ethIf10" display="10MBit/s"/>
        </parameter>
        <parameter type="list" name="ethInterfaceDuplexMode" display="Duplex Mode">
          <value id="1" name="ethIfFull" display="Full"/>
          <value id="0" name="ethIfHalf" display="Half"/>
        </parameter>
      </menu>

      <menu name="shdslinterface" display="SHDSL Interface" help="400_help_shdslInterface">
        <menu name="shdslservice" display="SHDSL Service">
          <parameter type="list" name="shdslmode" display="SHDSL Mode" help="401_help_shdslModi">
            <value id="0" name="shdslAtm" display="ATM"/>
            <value id="1" name="shdslEfm" display="EFM"/>
            <value id="2" name="shdslTdm" display="TDM"/>
            <value id="3" name="shdslTu12" display="TU-12oSHDSL"/>
          </parameter>
          <parameter type="uint" name="shdsl_vpi" display="VPI" min="0" max="127" step="1" default="1" help="402_help_shdsl_vpi"/>
          <parameter type="uint" name="shdsl_vci" display="VCI" min="32" max="255" step="1" default="32"/>
          <parameter type="list" name="shdsl_encapsulation" display="Encapsulation">
            <value id="0" name="encap_vcmux" display="VCMUX"/>
            <value id="1" name="encap_llc" display="LLC"/>
          </parameter>
          <parameter type="list" name="shdsl_eth_fcs" display="Eth-FCS in PDU">
            <value id="1" name="eth_fcs_enabled" display="Enabled"/>
            <value id="0" name="eth_fcs_disabled" display="Disabled"/>
          </parameter>
          <parameter type="list" name="shdsl_efm_standard" display="Standard">
            <value id="0" name="shdsl_efm_standard_8023ah" display="IEEE 802.3ah"/>
            <value id="1" name="shdsl_efm_standard_g9912" display="G.991.2"/>
            <value id="2" name="shdsl_efm_standard_auto" display="Auto"/>
          </parameter>
          <parameter type="list" name="shdsl_tdm_framing" display="Clockmode">
            <value id="4" name="shdsl_tdm_framing_cm3a" display="CM3a: Synchron"/>
            <value id="1" name="shdsl_tdm_framing_cm1" display="CM1: Plesiochron"/>
            <value id="2" name="shdsl_tdm_framing_cm2" display="CM2: Plesiochron NTR"/>
          </parameter>
          <parameter type="list" name="shdsl_tdm_loop" display="Loopback">
            <value id="0" name="shdsl_tdm_loop_disabled" display="Disabled"/>
            <value id="1" name="shdsl_tdm_loop_lineside" display="Lineside"/>
          </parameter>
        </menu>
        <parameter type="list" name="shdsl_side" display="Side">
          <value id="0" name="shdsl_tur" display="STU-R"/>
          <value id="1" name="shdsl_tuc" display="STU-C"/>
        </parameter>
        <menu name="shdslmapping" display="Line Mapping">
          <parameter type="list" name="shdsl_mapping_line0" display="Line 1">
            <value id="0" name="shdsl_mapping_line0_0" display="4+5"/>
            <value id="1" name="shdsl_mapping_line0_1" display="3+6"/>
            <value id="2" name="shdsl_mapping_line0_2" display="7+8"/>
            <value id="3" name="shdsl_mapping_line0_3" display="1+2"/>
          </parameter>
          <parameter type="list" name="shdsl_mapping_line1" display="Line 2">
            <value id="1" name="shdsl_mapping_line1_1" display="3+6"/>
            <value id="2" name="shdsl_mapping_line1_2" display="7+8"/>
            <value id="3" name="shdsl_mapping_line1_3" display="1+2"/>
            <value id="0" name="shdsl_mapping_line1_0" display="4+5"/>
          </parameter>
          <parameter type="list" name="shdsl_mapping_line2" display="Line 3">
            <value id="2" name="shdsl_mapping_line2_2" display="7+8"/>
            <value id="3" name="shdsl_mapping_line2_3" display="1+2"/>
            <value id="0" name="shdsl_mapping_line2_0" display="4+5"/>
            <value id="1" name="shdsl_mapping_line2_1" display="3+6"/>
          </parameter>
          <parameter type="list" name="shdsl_mapping_line3" display="Line 4">
            <value id="3" name="shdsl_mapping_line3_3" display="1+2"/>
            <value id="0" name="shdsl_mapping_line3_0" display="4+5"/>
            <value id="1" name="shdsl_mapping_line3_1" display="3+6"/>
            <value id="2" name="shdsl_mapping_line3_2" display="7+8"/>
          </parameter>
        </menu>

我想提取以下数据:

"Count" "Label" "Name" "Type"
"1" "" "locale" ""
"1" "Main Menu" "menu" ""
"2" "Broadband" "menu" ""
"3" "Load and Save Profiles" "menu" ""

第一行定义列名,其余数据只是制表符分隔值,每行记录一行。 Count表示XML树中的菜单位置。

我尝试使用以下代码:

import xml.etree.ElementTree as ET
import csv

tree = ET.parse("menu.xml")
root = tree.getroot()

# open a file for writing

menu_data = open('/tmp/menu.csv', 'w')

# create the csv writer object

csvwriter = csv.writer(menu_data)
menu_head = []

count = 0
for member in root.findall('menu'):
    menu = []
    menu_list = []
    if count == 0:
        name = member.find('name').tag
        menu_head.append(name)
        label = member.find('Label').tag
        menu_head.append(Label)
        Count = member[3].tag
        menu_head.append(Count)
        csvwriter.writerow(menu_head)
        count = count + 1

    name = member.find('name').text
    menu.append(name)
    Count = member[3][0].text
    menu_list.append(Count)
    csvwriter.writerow(menu)
menu_data.close()

但是我收到以下错误:

Detail AttributeError: 'NoneType' object has no attribute 'tag' 

我无法弄清楚出了什么问题。另外我跟随的tutorial没有太多的子分支,但我在菜单里面有菜单,你可以在我的XML文件中看到。如果您有任何建议,请告诉我。

1 个答案:

答案 0 :(得分:1)

  

问题:&#39; NoneType&#39;对象没有属性&#39;标记&#39;

你在这一行得到了错误:

name = member.find('name').tag

原因是,没有标记<name>'name'是标记attribute的{​​{1}}。因此你必须使用:

<menu name="main_menu">

我认为你想要这样的东西:

name = member.attrib['name']
  

输出

    csvwriter.writerow(["Count", "Label", "Name", "Type"])

    def iter_menu(menu_list, level=[1], deep=1):
        for n, menu in enumerate(menu_list, 1):
            level[deep-1] = n

            csvwriter.writerow(('.'.join([str(i) for i in level]), menu.attrib['display'], menu.attrib['name'], 'Type?'))
            level.append(0)
            iter_menu(menu.findall('menu'), level, deep+1)
            level = level[:deep]

    iter_menu(root.findall('menu'))

使用Python测试:3.4.2