在Python中编写多个csv文件的xml响应

时间:2015-11-10 09:12:38

标签: python xml csv

我想在这些文件上创建三个csv文件并编写xml响应(REPORT_ITEM)..
 我有以下xml数据:

<?xml version="1.0" encoding="UTF-8"?>
<OASISReport>
   <MessageHeader>
      <TimeDate>2015-11-05T07:50:48-00:00</TimeDate>
      <Source>OASIS</Source>
      <Version>v20131201</Version>
   </MessageHeader>
   <MessagePayload>
      <RTO>
         <name>CAISO</name>
         <REPORT_ITEM>
            <REPORT_HEADER>
               <SYSTEM>OASIS</SYSTEM>
               <TZ>PPT</TZ>
               <REPORT>PRC_LMP</REPORT>
               <MKT_TYPE>DAM</MKT_TYPE>
               <UOM>US$/MWh</UOM>
               <INTERVAL>ENDING</INTERVAL>
               <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
            </REPORT_HEADER>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>2</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>29</VALUE>
            </REPORT_DATA>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>12</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>35.67227</VALUE>
            </REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
            <REPORT_HEADER>
               <SYSTEM>OASIS</SYSTEM>
               <TZ>PPT</TZ>
               <REPORT>PRC_LMP</REPORT>
               <MKT_TYPE>DAM</MKT_TYPE>
               <UOM>US$/MWh</UOM>
               <INTERVAL>ENDING</INTERVAL>
               <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
            </REPORT_HEADER>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>2</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>29</VALUE>
            </REPORT_DATA>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>12</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>35.67227</VALUE>
            </REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
            <REPORT_HEADER>
               <SYSTEM>OASIS</SYSTEM>
               <TZ>PPT</TZ>
               <REPORT>PRC_LMP</REPORT>
               <MKT_TYPE>DAM</MKT_TYPE>
               <UOM>US$/MWh</UOM>
               <INTERVAL>ENDING</INTERVAL>
               <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
            </REPORT_HEADER>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>2</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>29</VALUE>
            </REPORT_DATA>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>12</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>35.67227</VALUE>
            </REPORT_DATA>
</REPORT_ITEM>
<DISCLAIMER_ITEM>
            <DISCLAIMER>The contents of these pages are subject to change without notice.  Decisions based on information contained within the California ISO's web site are the visitor's sole responsibility.</DISCLAIMER>
         </DISCLAIMER_ITEM>
      </RTO>
   </MessagePayload>
</OASISReport>

此数据有三个&#34; REPORT_ITEM&#34;包含数据的标签。我想在三个csv文件上分别写入数据。到目前为止,我能够使用&#39; etree&#39;来解析数据。模块。

My code:

import lxml.etree as et
import csv

tree = et.parse('data.xml')
root = tree.getroot()

list_of_rows = list()

for REPORT_HEADER in root.iter('REPORT_HEADER'):
    SYSTEM = REPORT_HEADER.find('SYSTEM').text
    TZ = REPORT_HEADER.find('TZ').text
    REPORT = REPORT_HEADER.find('REPORT').text
    MKT_TYPE = REPORT_HEADER.find('MKT_TYPE').text
    UOM = REPORT_HEADER.find('UOM').text
    INTERVAL = REPORT_HEADER.find('INTERVAL').text
    SEC_PER_INTERVAL = REPORT_HEADER.find('SEC_PER_INTERVAL').text
    list1 = [SYSTEM,TZ,REPORT,MKT_TYPE,UOM,INTERVAL,SEC_PER_INTERVAL]
    list_of_rows.append(list1)
    # print(list1)

for REPORT_DATA in root.iter('REPORT_DATA'):
    DATA_ITEM = REPORT_DATA.find('DATA_ITEM').text
    RESOURCE_NAME = REPORT_DATA.find('RESOURCE_NAME').text
    OPR_DATE = REPORT_DATA.find('OPR_DATE').text
    INTERVAL_NUM = REPORT_DATA.find('INTERVAL_NUM').text
    INTERVAL_START_GMT = REPORT_DATA.find('INTERVAL_START_GMT').text
    INTERVAL_END_GMT = REPORT_DATA.find('INTERVAL_END_GMT').text
    VALUE = REPORT_DATA.find('VALUE').text
    list2 = [DATA_ITEM,RESOURCE_NAME,OPR_DATE,INTERVAL_NUM,INTERVAL_START_GMT,INTERVAL_END_GMT,VALUE]
    list_of_rows.append(list2)

with open("file.csv", "w") as f:
    csv_writer = csv.writer(f)
    for row in list_of_rows:
        csv_writer.writerow(row)

感谢。我想创建三个csv文件并编写xml响应&#34; REPORT_ITEM&#34;在那些文件上

1 个答案:

答案 0 :(得分:0)

使用xpath代替,但您可能需要调整此项以删除空行! - 如下所示

import lxml.etree as et
import csv


xml="""<?xml version="1.0" encoding="UTF-8"?>
<OASISReport>
    <MessageHeader>
        <TimeDate>2015-11-05T07:50:48-00:00</TimeDate>
        <Source>OASIS</Source>
        <Version>v20131201</Version>
    </MessageHeader>
    <MessagePayload>
        <RTO>
            <name>CAISO</name>
            <REPORT_ITEM>
                <REPORT_HEADER>
                    <SYSTEM>OASIS</SYSTEM>
                    <TZ>PPT</TZ>
                    <REPORT>PRC_LMP</REPORT>
                    <MKT_TYPE>DAM</MKT_TYPE>
                    <UOM>US$/MWh</UOM>
                    <INTERVAL>ENDING</INTERVAL>
                    <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
                </REPORT_HEADER>
                <REPORT_DATA>
                    <DATA_ITEM>LMP_PRC</DATA_ITEM>
                    <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
                    <OPR_DATE>2015-10-12</OPR_DATE>
                    <INTERVAL_NUM>2</INTERVAL_NUM>
                    <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
                    <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
                    <VALUE>29</VALUE>
                </REPORT_DATA>
                <REPORT_DATA>
                    <DATA_ITEM>LMP_PRC</DATA_ITEM>
                    <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
                    <OPR_DATE>2015-10-12</OPR_DATE>
                    <INTERVAL_NUM>12</INTERVAL_NUM>
                    <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
                    <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
                    <VALUE>35.67227</VALUE>
                </REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
    <REPORT_HEADER>
        <SYSTEM>OASIS</SYSTEM>
        <TZ>PPT</TZ>
        <REPORT>PRC_LMP</REPORT>
        <MKT_TYPE>DAM</MKT_TYPE>
        <UOM>US$/MWh</UOM>
        <INTERVAL>ENDING</INTERVAL>
        <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
    </REPORT_HEADER>
    <REPORT_DATA>
        <DATA_ITEM>LMP_PRC</DATA_ITEM>
        <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
        <OPR_DATE>2015-10-12</OPR_DATE>
        <INTERVAL_NUM>2</INTERVAL_NUM>
        <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
        <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
        <VALUE>29</VALUE>
    </REPORT_DATA>
    <REPORT_DATA>
        <DATA_ITEM>LMP_PRC</DATA_ITEM>
        <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
        <OPR_DATE>2015-10-12</OPR_DATE>
        <INTERVAL_NUM>12</INTERVAL_NUM>
        <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
        <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
        <VALUE>35.67227</VALUE>
    </REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
    <REPORT_HEADER>
        <SYSTEM>OASIS</SYSTEM>
        <TZ>PPT</TZ>
        <REPORT>PRC_LMP</REPORT>
        <MKT_TYPE>DAM</MKT_TYPE>
        <UOM>US$/MWh</UOM>
        <INTERVAL>ENDING</INTERVAL>
        <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
    </REPORT_HEADER>
    <REPORT_DATA>
        <DATA_ITEM>LMP_PRC</DATA_ITEM>
        <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
        <OPR_DATE>2015-10-12</OPR_DATE>
        <INTERVAL_NUM>2</INTERVAL_NUM>
        <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
        <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
        <VALUE>29</VALUE>
    </REPORT_DATA>
    <REPORT_DATA>
        <DATA_ITEM>LMP_PRC</DATA_ITEM>
        <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
        <OPR_DATE>2015-10-12</OPR_DATE>
        <INTERVAL_NUM>12</INTERVAL_NUM>
        <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
        <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
        <VALUE>35.67227</VALUE>
    </REPORT_DATA>
</REPORT_ITEM>
<DISCLAIMER_ITEM>
    <DISCLAIMER>The contents of these pages are subject to change without notice.  Decisions based on information contained within the California ISO's web site are the visitor's sole responsibility.</DISCLAIMER>
    </DISCLAIMER_ITEM>
    </RTO>
    </MessagePayload>
</OASISReport>"""

tree = et.fromstring(xml)



itms = tree.xpath("//REPORT_ITEM")

data = []      
for i in itms:
    d= [[k.text for k in j] for j in i]
    print d
    data.append(d)



for item in data:
    for i in range(len(data)):
        with open("D:\\_" + str(i) + ".csv", 'w') as f:
            csv_writer = csv.writer(f)
            csv_writer.writerows(item)