从使用python创建的xml中删除代码

时间:2012-02-01 05:48:28

标签: python xml

我正在复制,然后使用python更新元数据xml文件 - 除了原始图元文件中的以下代码被删除外,其他工作正常

<?xml version="1.0" encoding="utf-8"?><?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>

它需要在文件的开头。

PHP的答案是@ xml insertion at specific point of xml file,但我需要一个Python解决方案。

代码和完整的解释在我原来的帖子中,但我正在分离这个问题,因为它与我原来的问题不同。 Search and replace multiple lines in xml/text files using python

谢谢,

完整代码

import os, xml, arcpy, shutil, datetime, Tkinter, tkFileDialog, tkSimpleDialog
from xml.etree import ElementTree as et 

path=os.getcwd()
RootDirectory=path
currentPath=path
arcpy.env.workspace = path
Count=0
DECLARATION = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>\n"""
Generated_XMLs=RootDirectory+'\GeneratedXML_LOG.txt'
f = open(Generated_XMLs, 'a')
f.write("Log of Metadata Creation Process - Update: "+str(datetime.datetime.now())+"\n")
f.close()

for root, dirs, files in os.walk(RootDirectory, topdown=False):
    #print root, dirs
    for directory in dirs:
        try:
            currentPath=os.path.join(root,directory)
        except:
            pass
        os.chdir(currentPath)
        arcpy.env.workspace = currentPath
        print currentPath
#def Create_xml(currentPath):

        FileList = arcpy.ListFeatureClasses()
        zone="_Zone"

        for File in FileList:
            Count+=1
            FileDesc_obj = arcpy.Describe(File)
            FileNm=FileDesc_obj.file
            check_meta=os.listdir(currentPath)
            existingXML=FileNm[:FileNm.find('.')]
            existingExtension=FileNm[FileNm.find('.'):]
            print "XML: "+existingXML
            #print check_meta
            #if  existingXML+'.xml' in check_meta:
            #newMetaFile='new'
            for f in check_meta:
                if f.startswith(existingXML) and f.endswith('.xml'):
                    print "exists, file name:", f
                    newMetaFile=FileNm+"_2012Metadata.xml"
                    try:
                        shutil.copy2(f, newMetaFile)
                    except:
                        pass
                    break
                else:
                    #print "Does not exist"
                    newMetaFile=FileNm+"_BaseMetadata.xml"

            print "New meta file: "+newMetaFile+ " for: "+File
            if newMetaFile.endswith('_BaseMetadata.xml'):        
                print "calling tkinter"
                root = Tkinter.Tk()
                root.withdraw()
                file = tkFileDialog.askopenfile(parent=root,mode='rb',title='Choose a xml base file to match with: '+File)
                if file != None:
                    metafile=os.path.abspath(file.name)
                    file.close()
                    #print metafile
                    shutil.copy2(metafile,newMetaFile)
                    print "copied"+metafile
                    root.destroy

                else:
                    shutil.copy2('L:\Data_Admin\QA\Metadata_python_toolset\Master_Metadata.xml', newMetaFile)
                    #root = Tkinter.Tk()
                    #root.withdraw()
                    #newTitle=tkSimpleDialog.askstring('title', 'prompt')
                    #root.destroy
                    #print newTitle

            print "Parsing meta file: "+newMetaFile
            tree=et.parse(newMetaFile)        
            print "Processing: "+str(File)

            for node in tree.findall('.//title'):
                node.text = str(FileNm)
            for node in tree.findall('.//procstep/srcused'):
                node.text = str(currentPath+"\\"+existingXML+".xml")
            dt=dt=str(datetime.datetime.now())
            for node in tree.findall('.//procstep/date'):
                node.text = str(dt[:10])
            for node in tree.findall('.//procstep/time'):
                node.text = str(dt[11:13]+dt[16:19])
            for node in tree.findall('.//metd/date'):
                node.text = str(dt[:10])
            for node in tree.findall('.//northbc'):
                node.text = str(FileDesc_obj.extent.YMax)
            for node in tree.findall('.//southbc'):
                node.text = str(FileDesc_obj.extent.YMin)
            for node in tree.findall('.//westbc'):
                node.text = str(FileDesc_obj.extent.XMin)
            for node in tree.findall('.//eastbc'):
                node.text = str(FileDesc_obj.extent.XMax)        
            for node in tree.findall('.//native/nondig/formname'):
                node.text = str(os.getcwd()+"\\"+File)
            for node in tree.findall('.//native/digform/formname'):
                node.text = str(FileDesc_obj.featureType)
            for node in tree.findall('.//avlform/nondig/formname'):
                node.text = str(FileDesc_obj.extension)
            for node in tree.findall('.//avlform/digform/formname'):
                node.text = str(float(os.path.getsize(File))/int(1024))+" KB"
            for node in tree.findall('.//theme'):
                node.text = str(FileDesc_obj.spatialReference.name +" ; EPSG: "+str(FileDesc_obj.spatialReference.factoryCode))
            print node.text
            projection_info=[]
            Zone=FileDesc_obj.spatialReference.name

            if "GCS" in str(FileDesc_obj.spatialReference.name):
                projection_info=[FileDesc_obj.spatialReference.GCSName, FileDesc_obj.spatialReference.angularUnitName, FileDesc_obj.spatialReference.datumName, FileDesc_obj.spatialReference.spheroidName]
                print "Geographic Coordinate system"
            else:
                projection_info=[FileDesc_obj.spatialReference.datumName, FileDesc_obj.spatialReference.spheroidName, FileDesc_obj.spatialReference.angularUnitName, Zone[Zone.rfind(zone)-3:]]
                print "Projected Coordinate system"
            x=0
            for node in tree.findall('.//spdom'):
                for node2 in node.findall('.//keyword'):
                    #print node2.text
                    node2.text = str(projection_info[x])
                    #print node2.text
                    x=x+1


            tree.write(newMetaFile)
            with open(newMetaFile, 'w') as output: # would be better to write to temp file and rename
                output.write(DECLARATION)
                tree.write(output, xml_declaration=False, encoding='utf-8') 
    # xml_declaration=False - don't write default declaration   

            f = open(Generated_XMLs, 'a')
            f.write(str(Count)+": "+File+"; "+newMetaFile+"; "+currentPath+";"+existingXML+"\n")
            f.close()



    #        Create_xml(currentPath)

Wing IDE的错误消息

  

xml.parsers.expat.ExpatError:找不到元素:第3行,第0列文件   “L:\ Data_Admin \ QA \ Metadata_python_toolset \ TEST2 \ update_Metadata1f.py”   第78行,在tree = et.parse(newMetaFile)文件中   “C:\ Python26 \ ArcGIS10.0 \ Lib \ xml \ etree \ ElementTree.py”,第862行,   parse tree.parse(source,parser)文件   “C:\ Python26 \ ArcGIS10.0 \ Lib \ xml \ etree \ ElementTree.py”,第587行,   解析self._root = parser.close()文件   “C:\ Python26 \ ArcGIS10.0 \ Lib \ xml \ etree \ ElementTree.py”,第1254行,   close self._parser.Parse(“”,1)#end of data

2 个答案:

答案 0 :(得分:2)

我也在努力将PI添加到ElementTree文档的开头。我想出了一个使用假根节点(使用None作为元素标记)来保存任何所需处理指令然后是真实文档根节点的解决方案。

import xml.etree.ElementTree as ET

# Build your XML document as normal...
root = ET.Element('root')

# Create 'fake' root node
fake_root = ET.Element(None)

# Add desired processing instructions.  Repeat as necessary.
pi = ET.PI("xml-stylesheet", "type='text/xsl' href='ANZMeta.xsl'")
pi.tail = "\n"
fake_root.append(pi)

# Add real root as last child of fake root
fake_root.append(root)

# Write to file, using ElementTree.write( ) to generate <?xml ...?> tag.
tree = ET.ElementTree(fake_root)
tree.write("doc.xml", xml_declaration=True)

生成的doc.xml文件:

<?xml version='1.0' encoding='us-ascii'?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>
<root />

答案 1 :(得分:1)

如果您的所有xml文件都具有相同的声明,您可以自己编写:

import xml.etree.ElementTree as ET


DECLARATION = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>\n"""

tree = ET.parse(filename)
# do some work on tree

with open(filename, 'w') as output: # would be better to write to temp file and rename
    output.write(DECLARATION)
    tree.write(output, xml_declaration=False, encoding='utf-8') 
    # xml_declaration=False - don't write default declaration