在python中是否有任何扫描目录树并生成xml的代码

时间:2015-12-30 05:31:15

标签: python xml

扫描目录树并生成xml。我尝试了很多东西,但它失败了。

对于Ex XML文件结构。

<dir name="dir_A">
    <dir name="dir_AA"> 
       <file name="abc.doc"/>
    </dir>
    <dir name="dir_BA">
       <dir name="dir_BAA"> 
           <file name="abc.doc"/>
       </dir>
       <file name="abc.doc"/>
    </dir>
</dir>

我尝试的代码但不完整。我在开发期间删除了一些代码,我现在还没有,抱歉。

import xml.etree.ElementTree as ET
import os

class XMLOperations:

    def list_files(self, startpath):

        parent = None
        prevLevel = None
        xmlRoot = ET.Element("root")

        xmlRoot.set('xml','http://www.google.com')
        xmlRoot.set('xmlns','http://www.w3.org/1999/xlink')

        directory = ET.Element("directory")
        elementFile = ET.Element("file")

        for root, dirs, files in os.walk(startpath):

            level = root.replace(startpath, '').count(os.sep)
            current = os.path.basename(root)

            try:
                dir_name = root.split(startpath+"/")[1]
            except:
                continue

            depth = dir_name.count(os.sep)
            fList = dir_name.split(os.sep)

            if level == 0:
                ET.SubElement(xmlRoot, directory, name = current)

            else:
                for tags in fList:
                    ET.SubElement(xmlRoot, directory)
            if depth > 3:
                break

        #with open("output.xml",'w') as file:
        #    file.write(xmlRoot)

        ET.dump(xmlRoot)

谢谢。

1 个答案:

答案 0 :(得分:1)

  1. 使用os.walk获取root(当前目录),当前目录的目录列表,文件列表当前目录。
  2. 通过lxml Parser创建xml根元素。
  3. os.walk
  4. 迭代目录结构
  5. 这很重要:通过xpath获取当前目录的Parent元素,因此从当前位置路径创建xpath。例如xapth = "/dir[@name='dir_9']"xapth = "/dir[@name='dir_9']/dir[@name='dir_apache']"
  6. 按目录列表附加dir元素。
  7. 按文件列表追加'file'元素。
  8. <强>输入

    >>> p = '/home/vivek/Desktop/9' 
    >>> import os
    >>> for root, dires,files in os.walk(p):
    ...   print root
    ...   print dires
    ...   print files
    ...   print "="*10
    ... 
    /home/vivek/Desktop/9
    ['apache', 'i18n', 'templates', 'common']
    ['manage.py', 'urls.py', 'settings.py', '__init__.py']
    ==========
    /home/vivek/Desktop/9/apache
    []
    ['readeradmin.wsgi']
    ==========
    /home/vivek/Desktop/9/i18n
    []
    ['__init__.py', 'models.py']
    ==========
    /home/vivek/Desktop/9/templates
    ['admin', 'registration']
    []
    ==========
    /home/vivek/Desktop/9/templates/admin
    []
    ['base_site.html']
    ==========
    /home/vivek/Desktop/9/templates/registration
    []
    ['logged_out.html']
    ==========
    /home/vivek/Desktop/9/common
    []
    ['views.py', '__init__.py', 'idmaptoalpha.py', 'tests.py', 'models.py']
    ==========
    

    <强>代码:

    import os 
    import lxml.etree as PARSER
    xml_root = PARSER.Element("dir", {"name":"dir_"+os.path.basename(p)})
    
    base_location = os.path.dirname(p) + "/"
    for root, dires, files in os.walk(p):
        # Get Parent by xpth
        xpath_tmp = root.split(base_location)[1]
        xpath_p = ""
        for i in xpath_tmp.split("/"):
            xpath_p = "%s/dir[@name='dir_%s']"%(xpath_p, i)
    
        parent = xml_root.xpath(xpath_p)[0]
        #- Append directory to parent element.
        for i in dires:
            parent.append(PARSER.Element("dir", {"name":"dir_"+i}))
        #- Append files to parent element.
        for i in files:
            parent.append(PARSER.Element("file", {"name":i}))
    
    
    print PARSER.tostring(xml_root, method="xml", pretty_print=True)
    

    <强>输出

    <dir name="dir_9">
      <dir name="dir_apache">
        <file name="readeradmin.wsgi"/>
      </dir>
      <dir name="dir_i18n">
        <file name="__init__.py"/>
        <file name="models.py"/>
      </dir>
      <dir name="dir_templates">
        <dir name="dir_admin">
          <file name="base_site.html"/>
        </dir>
        <dir name="dir_registration">
          <file name="logged_out.html"/>
        </dir>
      </dir>
      <dir name="dir_common">
        <file name="views.py"/>
        <file name="__init__.py"/>
        <file name="idmaptoalpha.py"/>
        <file name="tests.py"/>
        <file name="models.py"/>
      </dir>
      <file name="manage.py"/>
      <file name="urls.py"/>
      <file name="settings.py"/>
      <file name="__init__.py"/>
    </dir>