我有一些带有以下结构的zipfile(700+)(文件完全是这样的)
<?xml version="1.0" encoding="UTF-8"?>
<Values version="2.0">
<record name="trigger">
<value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
<value name="processingSuspended">false</value>
<value name="retrievalSuspended">false</value>
</record>
<record name="trigger">
<value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
<value name="processingSuspended">false</value>
<value name="retrievalSuspended">false</value>
</record>
</Values>
我想要实现的是,无论第一次出现的字段 processingSuspended 和 retrievalSuspended 的值是true还是false,都要替换。将其替换为false。但只是第一次出现。
编辑:
通过请求我添加到目前为止,我可以得到我想要的字段,但是。我相信有一种更简单的方法。
import os
import zipfile
import glob
import time
import re
def main():
rList = []
for z in glob.glob("*.zip"):
root = zipfile.ZipFile(z)
for filename in root.namelist():
if filename.find("node.ndf") >= 0:
for line in root.read(filename).split("\n"):
if line.find("broker-trigger") >= 0:
for iline in root.read(filename).split("\n"):
Values = dict()
#match Processing state
if iline.find("processingSuspended") >= 0:
mpr = re.search(r'(.*>)(.*?)(<.*)',
iline, re.M|re.I)
#match Retrieval state
if iline.find("retrievalSuspended") >= 0:
mr = re.search(r'(.*>)(.*?)(<.*)',
iline, re.M|re.I)
Values['processingSuspended'] = mpr.group(2)
Values['retrievalSuspended'] = mr.group(2)
#print mr.group(2)
rList.append(Values)
print rList
if __name__== "__main__":
main()
提前致谢。
答案 0 :(得分:1)
尝试使用lxml
:
>>> xml = '''\
<?xml version="1.0" encoding="UTF-8"?>
<Values version="2.0">
<record name="trigger">
<value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
<value name="processingSuspended">true</value>
<value name="retrievalSuspended">true</value>
</record>
<record name="trigger">
<value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
<value name="processingSuspended">true</value>
<value name="retrievalSuspended">true</value>
</record>
</Values>\
'''
>>> from lxml import etree
>>> tree = etree.fromstring(xml)
>>> tree.xpath('//value[@name="processingSuspended"]')[0].text = 'false'
>>> tree.xpath('//value[@name="retrievalSuspended"]')[0].text = 'false'
此xpath
表达式'//value[@name="processingSuspended"]'
找到所有标记value
,其中属性name
等于"processingSuspended"
。然后,我们只使用[0]
获取第一个,并将标记的文本更改为'false'
。
输出:
>>> print(etree.tostring(tree, pretty_print=True))
<Values version="2.0">
<record name="trigger">
<value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
<value name="processingSuspended">false</value>
<value name="retrievalSuspended">false</value>
</record>
<record name="trigger">
<value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
<value name="processingSuspended">true</value>
<value name="retrievalSuspended">true</value>
</record>
</Values>
>>>
答案 1 :(得分:0)
您可以阅读zip存档并使用Python的内置模块更新它们包含的文件中的xml格式数据。 xml.etree.ElementTree
的{{3}}中甚至还有一个教程。
import glob
import xml.etree.ElementTree as ET
import zipfile
def main():
for z in glob.glob("*.zip"):
print 'processing file: {!r}'.format(z)
zfile = zipfile.ZipFile(z)
for filename in zfile.namelist():
print 'processing archive member: {!r} in {}'.format(filename, z)
contents = zfile.open(filename).read()
print 'Before changes:'
print contents
root = ET.fromstring(contents)
if root.tag != "Values" or root.attrib["version"] != "2.0":
print 'unsupported xml file'
break
if(root[0][1].tag == "value" and
root[0][1].attrib["name"] == "processingSuspended"):
root[0][1].text = "false"
else:
print 'expected "processingSuspended" value field not found'
break
if(root[0][2].tag == "value" and
root[0][2].attrib["name"] == "retrievalSuspended"):
root[0][2].text = "false"
else:
print 'expected "retrievalSuspended" value field not found'
break
print 'After changes:'
updated_contents = ET.tostring(root)
print updated_contents
if __name__== "__main__":
main()