答案 0 :(得分:2)
这是一个非常基本的示例,没有错误处理,并且使用非常严格定义的XML文件,但您应该将其作为开头并继续使用以下链接:
代码:
import os
import os.path
from xml.dom.minidom import parse
def parse_file(path):
files = []
try:
dom = parse(path)
for filetag in dom.getElementsByTagName('File'):
type = filetag.getElementsByTagName('Type')[0].firstChild.data
if type == 'config':
path = tag.getElementsByTagName('Path')[0].firstChild.data
files.append(path)
dom.unlink()
except:
raise
return files
def main():
files = []
for root, dirs, files in os.walk('/var/packs'):
if 'info.xml' in files:
files += parse_file(os.path.join(root, 'info.xml'))
print 'The list of desired files:', files
if __name__ == '__main__':
main()
答案 1 :(得分:1)
使用lxml.etree和XPath:
files = []
for root, dirnames, filenames in os.walk('/var/packs'):
for filename in filenames:
if filename != 'info.xml':
continue
tree = lxml.etree.parse(os.path.join(root, filename))
files.extend(tree.getroot().xpath('//File[Type[text()="config"]]/Path/text()'))
如果lxml不可用,您也可以使用标准库中的etree API:
files = []
for root, dirnames, filenames in os.walk('/var/packs'):
for filename in filenames:
if filename != 'info.xml':
continue
tree = xml.etree.ElementTree.parse(os.path.join(root, filename))
for file_node in tree.findall('File'):
type_node = file_node.find('Type')
if type_node is not None and type_node.text == 'config':
path_node = file_node.find('Path')
if path_node is not None:
files.append(path_node.text)
答案 2 :(得分:0)
把它写在我的头顶,但是这里。我们将使用os.path.walk以递归方式下降到您的目录和minidom中进行解析。
import os
from xml.dom import minidom
# opens a given info.xml file and prints out "Path"'s contents
def parseInfoXML(filename):
doc = minidom.parse(filename)
for fileNode in doc.getElementsByTagName("File"):
# warning: we assume the existence of a Path node, and that it contains a Text node
print fileNode.getElementsByTagName("Path")[0].childNodes[0].data
doc.unlink()
def checkDirForInfoXML(arg, dirname, names):
if "info.xml" in names:
parseInfoXML(os.path.join(dirname, "info.xml"))
# recursively walk the directory tree, calling our visitor function to check for info.xml in each dir
# this will include packs as well, so be sure that there's no info.xml in there
os.path.walk("/var/packs" , checkDirForInfoXML, None)
不是最有效的方法,我敢肯定,但如果你不指望任何错误/无论如何都会这样做。