这是我试图整理的脚本片段,让我的生活更轻松。我有一堆来自不同API源的XML文件。他们有不同的项目和不同的领域。他们所做的共同点是像“clientid”这样的共同领域。
我想要做的是最终得到一个CSV,它包含所有XML的组合标题及其相应的数据。所以我需要能够确保12345的“clientid”中的所有信息都添加到行的末尾,用于“itemid”中的同名客户端ID
项目数据:
<item>
<id>99899</id>
<client-id>12345</client-id>
脚本的一部分:
def parseXML():
### Parse XML and convert to CSV ###
#Get XML Source #
tree = ET.fromstring(getdata)
# open a file for writing
xmlTest01 = open('xmlTest01.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(xmlTest01)
item_head = []
count = 0
for member in tree.findall('item'):
item = []
if count == 0:
id = member.find('id').tag
item_head.append(id)
clientid = member.find('client-id').tag
item_head.append(clientid)
id = member.find('id').text
item.append(id)
clientid = member.find('client-id').text
item.append(clientid)
csvwriter.writerow(item)
xmlTest01.close()
下一组数据包括:
<client>
<id>12345</id>
<name>Clients name</name>
<current type="boolean">true</current>
<status>good</status>
所以我想检查上一组数据中的行,查看相同的clientid,然后将名称,当前和状态添加到该行的末尾。
有关最佳方法的任何想法吗?我有大约5-7种这类文件要合并。在将文件转换为CSV之前,我是否应首先尝试合并文件?如果他们都有相似的内容但是他们不这样做可能没问题。
所需的输出结合了两个xml文件的值:
id,clientid,name,current,status
99899,12345,Clients name,true,good
答案 0 :(得分:0)
考虑迭代这三个文件并有条件地检查客户端ID。将xml值解析为您写入csv文件的列表:
import csv
import xml.etree.ElementTree as ET
def parseXML():
projecttree = ET.parse('projects.xml')
clienttree = ET.parse('clients.xml')
teamtasktree = ET.parse('teammembers.xml')
projectroot = projecttree.getroot()
clientroot = clienttree.getroot()
teamtaskroot = teamtasktree.getroot()
data = []
for i in projectroot.iter('project'):
for j in clientroot.iter('client'):
clientid = i.find('client-id').text
if clientid == j.find('id').text:
data.append(i.find('id').text)
data.append(j.find('id').text)
data.append(j.find('name').text)
data.append(j.find('active').text)
data.append(i.find('name').text)
data.append(i.find('active').text)
data.append(i.find('billable').text)
data.append(i.find('bill-by').text)
data.append(i.find('hourly-rate').text)
data.append(i.find('budget').text)
data.append(i.find('over-budget-notification-percentage').text)
data.append(i.find('created-at').text)
data.append(i.find('updated-at').text)
data.append(i.find('starts-on').text)
data.append(i.find('ends-on').text)
data.append(i.find('estimate').text)
data.append(i.find('estimate-by').text)
data.append(i.find('notes').text)
data.append(i.find('cost-budget').text)
cnt = 1
for tm in teamtaskroot.iter('team_members'):
for item in tm.iter('item'):
if item.find('cid').text == clientid and cnt <= 3:
data.append(item.find('full_name').text)
data.append(item.find('cost_rate').text)
cnt += 1
cnt = 1
for tk in teamtaskroot.iter('tasks'):
for item in tk.iter('item'):
if item.find('cid').text == clientid and cnt <= 2:
data.append(item.find('task_id').text)
data.append(item.find('total_hours').text)
cnt += 1
with open('Output.csv', 'w') as f:
csvwriter = csv.writer(f, lineterminator = '\n')
csvwriter.writerow(['Pid', 'Clientid', 'ClientName', 'ClientActive', 'ProjectName', 'ProjectActive',
'Billable', 'BillBy', 'HourlyRate', 'Budget', 'OverbudgetNotificationPercentage',
'CreatedAt', 'UpdatedAt', 'StartsOn', 'EndsOn', 'Estimate', 'EstimateBy',
'Notes', 'CostBudget', 'TeammemberName1', 'CostRate1', 'TeammemberName2', 'CostRate2',
'TeammemberName3', 'CostRate3', 'TaskId1', 'TotalHours1', 'TaskId2', 'TotalHours2'])
csvwriter.writerow(data)
if __name__ == "__main__":
parseXML()
<强>输出强>
Pid,Clientid,ClientName,ClientActive,ProjectName,ProjectActive,Billable,
BillBy,HourlyRate,Budget,OverbudgetNotificationPercentage,CreatedAt,
UpdatedAt,StartsOn,EndsOn,Estimate,EstimateBy,Notes,CostBudget,TeammemberName
1,CostRate1,TeammemberName2,CostRate2,TeammemberName3,CostRate3,
TaskId1,TotalHours1,TaskId2,TotalHours2
11493770,4708336,AFB,true,Services - Consulting - AH,true,true,Project,
421.28,16.0,80.0,2016-08-16T03:22:51Z,
2016-08-16T03:22:51Z,,,16.0,project,Random
notes,,BobR,76.0,BobR,76.0,BobR,76.0,6357137,0.0,6357138,0.0
答案 1 :(得分:0)
此外,考虑XSLT,这是一种特殊用途的转换语言,它可以直接将XML转换为CSV,甚至可以使用其document()
函数从其他XML文件解析。 Python的lxml模块可以处理XSLT 1.0脚本。确保所有三个xml都位于同一目录中。
XSLT 脚本(另存为.xsl文件 - 一个特殊的.xml文件 - 在Python下面调用)
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0">
<xsl:output version="1.0" encoding="UTF-8" method="text" indent="yes" omit-xml-declaration="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="/projects">
<xsl:copy>
<xsl:text>Pid,Clientid,ClientName,ClientActive,ProjectName,ProjectActive,Billable,BillBy,HourlyRate,</xsl:text>
<xsl:text>Budget,OverbudgetNotificationPercentage,CreatedAt,UpdatedAt,StartsOn,EndsOn,Estimate,EstimateBy,</xsl:text>
<xsl:text>Notes,CostBudget,TeammemberName1,CostRate1,TeammemberName2,CostRate2,TeammemberName3,CostRate3,</xsl:text>
<xsl:text>TaskId1,TotalHours1,TaskId2,TotalHours2
</xsl:text>
<xsl:apply-templates select="project"/>
</xsl:copy>
</xsl:template>
<xsl:template match="project">
<xsl:variable name="clientid" select="client-id"/>
<xsl:value-of select="concat(id, ',')"/>
<xsl:variable name="delimiter"><xsl:text>","</xsl:text></xsl:variable>
<xsl:for-each select="document('clients.xml')/clients/client[id=$clientid]/*
[local-name()='id' or local-name()='name' or local-name()='active']">
<xsl:value-of select="." />
<xsl:if test="position() != last()">
<xsl:text>,</xsl:text>
</xsl:if>
</xsl:for-each>
<xsl:value-of select="concat(',',name,',',active,',',billable,',',bill-by,',',hourly-rate,',',budget,',',
over-budget-notification-percentage,',',created-at,',',updated-at,',',starts-on,',',ends-on,',',
estimate,',',estimate-by,',',notes,',',cost-budget,',')"/>
<xsl:for-each select="document('teammembers.xml')/root/team_members/item[cid=$clientid]/*
[local-name()='full_name' or local-name()='cost_rate']">
<xsl:if test="position() < 5">
<xsl:value-of select="." />
<xsl:text>,</xsl:text>
</xsl:if>
</xsl:for-each>
<xsl:for-each select="document('ClientItems_teammembers.xml')/root/tasks/item[cid=$clientid]/*
[local-name()='task_id' or local-name()='total_hours']">
<xsl:if test="position() < 5">
<xsl:value-of select="." />
<xsl:if test="position() != last()">
<xsl:text>,</xsl:text>
</xsl:if>
</xsl:if>
</xsl:for-each>
<xsl:text>
</xsl:text>
</xsl:template>
</xsl:transform>
Python 脚本(转换projects.xml并在XSLT中读取其他两个)
import lxml.etree as ET
def transformXML():
dom = ET.parse('projects.xml')
xslt = ET.parse('XSLTscript.xsl')
transform = ET.XSLT(xslt)
newdom = transform(dom)
with open('Output.csv'),'w') as f:
f.write(str(newdom))
if __name__ == "__main__":
transformXML()
<强>输出强>
Pid,Clientid,ClientName,ClientActive,ProjectName,ProjectActive,Billable,
BillBy,HourlyRate,Budget,OverbudgetNotificationPercentage,CreatedAt,
UpdatedAt,StartsOn,EndsOn,Estimate,EstimateBy,Notes,CostBudget,TeammemberName
1,CostRate1,TeammemberName2,CostRate2,TeammemberName3,CostRate3,
TaskId1,TotalHours1,TaskId2,TotalHours2
11493770,4708336,AFB,true,Services - Consulting - AH,true,true,Project,
421.28,16.0,80.0,2016-08-16T03:22:51Z,
2016-08-16T03:22:51Z,,,16.0,project,Random
notes,,BobR,76.0,BobR,76.0,BobR,76.0,6357137,0.0,6357138,0.0,