扫描目录树并生成xml。我尝试了很多东西,但它失败了。
对于Ex XML文件结构。
<dir name="dir_A">
<dir name="dir_AA">
<file name="abc.doc"/>
</dir>
<dir name="dir_BA">
<dir name="dir_BAA">
<file name="abc.doc"/>
</dir>
<file name="abc.doc"/>
</dir>
</dir>
我尝试的代码但不完整。我在开发期间删除了一些代码,我现在还没有,抱歉。
import xml.etree.ElementTree as ET
import os
class XMLOperations:
def list_files(self, startpath):
parent = None
prevLevel = None
xmlRoot = ET.Element("root")
xmlRoot.set('xml','http://www.google.com')
xmlRoot.set('xmlns','http://www.w3.org/1999/xlink')
directory = ET.Element("directory")
elementFile = ET.Element("file")
for root, dirs, files in os.walk(startpath):
level = root.replace(startpath, '').count(os.sep)
current = os.path.basename(root)
try:
dir_name = root.split(startpath+"/")[1]
except:
continue
depth = dir_name.count(os.sep)
fList = dir_name.split(os.sep)
if level == 0:
ET.SubElement(xmlRoot, directory, name = current)
else:
for tags in fList:
ET.SubElement(xmlRoot, directory)
if depth > 3:
break
#with open("output.xml",'w') as file:
# file.write(xmlRoot)
ET.dump(xmlRoot)
谢谢。
答案 0 :(得分:1)
os.walk
获取root(当前目录),当前目录的目录列表,文件列表当前目录。os.walk
xapth = "/dir[@name='dir_9']"
或xapth = "/dir[@name='dir_9']/dir[@name='dir_apache']"
dir
元素。<强>输入强>:
>>> p = '/home/vivek/Desktop/9'
>>> import os
>>> for root, dires,files in os.walk(p):
... print root
... print dires
... print files
... print "="*10
...
/home/vivek/Desktop/9
['apache', 'i18n', 'templates', 'common']
['manage.py', 'urls.py', 'settings.py', '__init__.py']
==========
/home/vivek/Desktop/9/apache
[]
['readeradmin.wsgi']
==========
/home/vivek/Desktop/9/i18n
[]
['__init__.py', 'models.py']
==========
/home/vivek/Desktop/9/templates
['admin', 'registration']
[]
==========
/home/vivek/Desktop/9/templates/admin
[]
['base_site.html']
==========
/home/vivek/Desktop/9/templates/registration
[]
['logged_out.html']
==========
/home/vivek/Desktop/9/common
[]
['views.py', '__init__.py', 'idmaptoalpha.py', 'tests.py', 'models.py']
==========
<强>代码:强>
import os
import lxml.etree as PARSER
xml_root = PARSER.Element("dir", {"name":"dir_"+os.path.basename(p)})
base_location = os.path.dirname(p) + "/"
for root, dires, files in os.walk(p):
# Get Parent by xpth
xpath_tmp = root.split(base_location)[1]
xpath_p = ""
for i in xpath_tmp.split("/"):
xpath_p = "%s/dir[@name='dir_%s']"%(xpath_p, i)
parent = xml_root.xpath(xpath_p)[0]
#- Append directory to parent element.
for i in dires:
parent.append(PARSER.Element("dir", {"name":"dir_"+i}))
#- Append files to parent element.
for i in files:
parent.append(PARSER.Element("file", {"name":i}))
print PARSER.tostring(xml_root, method="xml", pretty_print=True)
<强>输出强>:
<dir name="dir_9">
<dir name="dir_apache">
<file name="readeradmin.wsgi"/>
</dir>
<dir name="dir_i18n">
<file name="__init__.py"/>
<file name="models.py"/>
</dir>
<dir name="dir_templates">
<dir name="dir_admin">
<file name="base_site.html"/>
</dir>
<dir name="dir_registration">
<file name="logged_out.html"/>
</dir>
</dir>
<dir name="dir_common">
<file name="views.py"/>
<file name="__init__.py"/>
<file name="idmaptoalpha.py"/>
<file name="tests.py"/>
<file name="models.py"/>
</dir>
<file name="manage.py"/>
<file name="urls.py"/>
<file name="settings.py"/>
<file name="__init__.py"/>
</dir>