仅替换文件上第一次出现的字段/单词

时间:2013-07-10 18:10:38

标签: python zipfile

我有一些带有以下结构的zipfile(700+)(文件完全是这样的)

<?xml version="1.0" encoding="UTF-8"?>
<Values version="2.0">
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">false</value>
    <value name="retrievalSuspended">false</value>
</record>
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">false</value>
    <value name="retrievalSuspended">false</value>
</record>
</Values>

我想要实现的是,无论第一次出现的字段 processingSuspended retrievalSuspended 的值是true还是false,都要替换。将其替换为false。但只是第一次出现。

编辑:

通过请求我添加到目前为止,我可以得到我想要的字段,但是。我相信有一种更简单的方法。

import os
import zipfile
import glob
import time
import re

def main():
    rList = []
    for z in glob.glob("*.zip"):
        root = zipfile.ZipFile(z)
        for filename in root.namelist():
            if filename.find("node.ndf") >= 0:
                for line in root.read(filename).split("\n"):
                    if line.find("broker-trigger") >= 0:
                        for iline in root.read(filename).split("\n"):
                            Values = dict()
                            #match Processing state
                            if iline.find("processingSuspended") >= 0:
                                mpr = re.search(r'(.*>)(.*?)(<.*)', 
                                                iline, re.M|re.I)
                            #match Retrieval state
                            if iline.find("retrievalSuspended") >= 0:
                                mr = re.search(r'(.*>)(.*?)(<.*)', 
                                               iline, re.M|re.I)
                                Values['processingSuspended'] = mpr.group(2)
                                Values['retrievalSuspended'] = mr.group(2)
                                #print mr.group(2)
                                rList.append(Values)
    print rList

if __name__== "__main__":
    main()

提前致谢。

2 个答案:

答案 0 :(得分:1)

尝试使用lxml

>>> xml = '''\
<?xml version="1.0" encoding="UTF-8"?>
<Values version="2.0">
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">true</value>
    <value name="retrievalSuspended">true</value>
</record>
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">true</value>
    <value name="retrievalSuspended">true</value>
</record>
</Values>\
'''

>>> from lxml import etree
>>> tree = etree.fromstring(xml)
>>> tree.xpath('//value[@name="processingSuspended"]')[0].text = 'false'
>>> tree.xpath('//value[@name="retrievalSuspended"]')[0].text = 'false'

xpath表达式'//value[@name="processingSuspended"]'找到所有标记value,其中属性name等于"processingSuspended"。然后,我们只使用[0]获取第一个,并将标记的文本更改为'false'

输出:

>>> print(etree.tostring(tree, pretty_print=True))
<Values version="2.0">
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">false</value>
    <value name="retrievalSuspended">false</value>
</record>
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">true</value>
    <value name="retrievalSuspended">true</value>
</record>
</Values>

>>> 

答案 1 :(得分:0)

您可以阅读zip存档并使用Python的内置模块更新它们包含的文件中的xml格式数据。 xml.etree.ElementTree的{​​{3}}中甚至还有一个教程。

import glob
import xml.etree.ElementTree as ET
import zipfile

def main():
    for z in glob.glob("*.zip"):
        print 'processing file: {!r}'.format(z)
        zfile = zipfile.ZipFile(z)
        for filename in zfile.namelist():
            print 'processing archive member: {!r} in {}'.format(filename, z)
            contents = zfile.open(filename).read()

            print 'Before changes:'
            print contents

            root = ET.fromstring(contents)
            if root.tag != "Values" or root.attrib["version"] != "2.0":
                print 'unsupported xml file'
                break

            if(root[0][1].tag == "value" and
               root[0][1].attrib["name"] == "processingSuspended"):
                root[0][1].text = "false"
            else:
                print 'expected "processingSuspended" value field not found'
                break

            if(root[0][2].tag == "value" and
               root[0][2].attrib["name"] == "retrievalSuspended"):
                root[0][2].text = "false"
            else:
                print 'expected "retrievalSuspended" value field not found'
                break

            print 'After changes:'
            updated_contents = ET.tostring(root)
            print updated_contents

if __name__== "__main__":
    main()