我想在这些文件上创建三个csv文件并编写xml响应(REPORT_ITEM)..
我有以下xml数据:
<?xml version="1.0" encoding="UTF-8"?>
<OASISReport>
<MessageHeader>
<TimeDate>2015-11-05T07:50:48-00:00</TimeDate>
<Source>OASIS</Source>
<Version>v20131201</Version>
</MessageHeader>
<MessagePayload>
<RTO>
<name>CAISO</name>
<REPORT_ITEM>
<REPORT_HEADER>
<SYSTEM>OASIS</SYSTEM>
<TZ>PPT</TZ>
<REPORT>PRC_LMP</REPORT>
<MKT_TYPE>DAM</MKT_TYPE>
<UOM>US$/MWh</UOM>
<INTERVAL>ENDING</INTERVAL>
<SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
</REPORT_HEADER>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>2</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
<VALUE>29</VALUE>
</REPORT_DATA>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>12</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
<VALUE>35.67227</VALUE>
</REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
<REPORT_HEADER>
<SYSTEM>OASIS</SYSTEM>
<TZ>PPT</TZ>
<REPORT>PRC_LMP</REPORT>
<MKT_TYPE>DAM</MKT_TYPE>
<UOM>US$/MWh</UOM>
<INTERVAL>ENDING</INTERVAL>
<SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
</REPORT_HEADER>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>2</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
<VALUE>29</VALUE>
</REPORT_DATA>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>12</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
<VALUE>35.67227</VALUE>
</REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
<REPORT_HEADER>
<SYSTEM>OASIS</SYSTEM>
<TZ>PPT</TZ>
<REPORT>PRC_LMP</REPORT>
<MKT_TYPE>DAM</MKT_TYPE>
<UOM>US$/MWh</UOM>
<INTERVAL>ENDING</INTERVAL>
<SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
</REPORT_HEADER>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>2</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
<VALUE>29</VALUE>
</REPORT_DATA>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>12</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
<VALUE>35.67227</VALUE>
</REPORT_DATA>
</REPORT_ITEM>
<DISCLAIMER_ITEM>
<DISCLAIMER>The contents of these pages are subject to change without notice. Decisions based on information contained within the California ISO's web site are the visitor's sole responsibility.</DISCLAIMER>
</DISCLAIMER_ITEM>
</RTO>
</MessagePayload>
</OASISReport>
此数据有三个&#34; REPORT_ITEM&#34;包含数据的标签。我想在三个csv文件上分别写入数据。到目前为止,我能够使用&#39; etree&#39;来解析数据。模块。
My code:
import lxml.etree as et
import csv
tree = et.parse('data.xml')
root = tree.getroot()
list_of_rows = list()
for REPORT_HEADER in root.iter('REPORT_HEADER'):
SYSTEM = REPORT_HEADER.find('SYSTEM').text
TZ = REPORT_HEADER.find('TZ').text
REPORT = REPORT_HEADER.find('REPORT').text
MKT_TYPE = REPORT_HEADER.find('MKT_TYPE').text
UOM = REPORT_HEADER.find('UOM').text
INTERVAL = REPORT_HEADER.find('INTERVAL').text
SEC_PER_INTERVAL = REPORT_HEADER.find('SEC_PER_INTERVAL').text
list1 = [SYSTEM,TZ,REPORT,MKT_TYPE,UOM,INTERVAL,SEC_PER_INTERVAL]
list_of_rows.append(list1)
# print(list1)
for REPORT_DATA in root.iter('REPORT_DATA'):
DATA_ITEM = REPORT_DATA.find('DATA_ITEM').text
RESOURCE_NAME = REPORT_DATA.find('RESOURCE_NAME').text
OPR_DATE = REPORT_DATA.find('OPR_DATE').text
INTERVAL_NUM = REPORT_DATA.find('INTERVAL_NUM').text
INTERVAL_START_GMT = REPORT_DATA.find('INTERVAL_START_GMT').text
INTERVAL_END_GMT = REPORT_DATA.find('INTERVAL_END_GMT').text
VALUE = REPORT_DATA.find('VALUE').text
list2 = [DATA_ITEM,RESOURCE_NAME,OPR_DATE,INTERVAL_NUM,INTERVAL_START_GMT,INTERVAL_END_GMT,VALUE]
list_of_rows.append(list2)
with open("file.csv", "w") as f:
csv_writer = csv.writer(f)
for row in list_of_rows:
csv_writer.writerow(row)
感谢。我想创建三个csv文件并编写xml响应&#34; REPORT_ITEM&#34;在那些文件上
答案 0 :(得分:0)
使用xpath
代替,但您可能需要调整此项以删除空行! - 如下所示
import lxml.etree as et
import csv
xml="""<?xml version="1.0" encoding="UTF-8"?>
<OASISReport>
<MessageHeader>
<TimeDate>2015-11-05T07:50:48-00:00</TimeDate>
<Source>OASIS</Source>
<Version>v20131201</Version>
</MessageHeader>
<MessagePayload>
<RTO>
<name>CAISO</name>
<REPORT_ITEM>
<REPORT_HEADER>
<SYSTEM>OASIS</SYSTEM>
<TZ>PPT</TZ>
<REPORT>PRC_LMP</REPORT>
<MKT_TYPE>DAM</MKT_TYPE>
<UOM>US$/MWh</UOM>
<INTERVAL>ENDING</INTERVAL>
<SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
</REPORT_HEADER>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>2</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
<VALUE>29</VALUE>
</REPORT_DATA>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>12</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
<VALUE>35.67227</VALUE>
</REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
<REPORT_HEADER>
<SYSTEM>OASIS</SYSTEM>
<TZ>PPT</TZ>
<REPORT>PRC_LMP</REPORT>
<MKT_TYPE>DAM</MKT_TYPE>
<UOM>US$/MWh</UOM>
<INTERVAL>ENDING</INTERVAL>
<SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
</REPORT_HEADER>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>2</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
<VALUE>29</VALUE>
</REPORT_DATA>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>12</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
<VALUE>35.67227</VALUE>
</REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
<REPORT_HEADER>
<SYSTEM>OASIS</SYSTEM>
<TZ>PPT</TZ>
<REPORT>PRC_LMP</REPORT>
<MKT_TYPE>DAM</MKT_TYPE>
<UOM>US$/MWh</UOM>
<INTERVAL>ENDING</INTERVAL>
<SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
</REPORT_HEADER>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>2</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
<VALUE>29</VALUE>
</REPORT_DATA>
<REPORT_DATA>
<DATA_ITEM>LMP_PRC</DATA_ITEM>
<RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
<OPR_DATE>2015-10-12</OPR_DATE>
<INTERVAL_NUM>12</INTERVAL_NUM>
<INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
<INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
<VALUE>35.67227</VALUE>
</REPORT_DATA>
</REPORT_ITEM>
<DISCLAIMER_ITEM>
<DISCLAIMER>The contents of these pages are subject to change without notice. Decisions based on information contained within the California ISO's web site are the visitor's sole responsibility.</DISCLAIMER>
</DISCLAIMER_ITEM>
</RTO>
</MessagePayload>
</OASISReport>"""
tree = et.fromstring(xml)
itms = tree.xpath("//REPORT_ITEM")
data = []
for i in itms:
d= [[k.text for k in j] for j in i]
print d
data.append(d)
for item in data:
for i in range(len(data)):
with open("D:\\_" + str(i) + ".csv", 'w') as f:
csv_writer = csv.writer(f)
csv_writer.writerows(item)