我的python脚本读取 XML文件,以提供文件夹结构。
我的XML文件:
<?xml version="1.0" encoding="utf-8"?>
<serverfiles name="Test">
<serverfiles name="Fail">
<serverfiles name="Cam1">
<serverfiles name="Mod1">
<serverfiles name="2019-01-07" />
<serverfiles name="2019-01-08" />
</serverfiles>
<serverfiles name="Mod2">
<serverfiles name="2019-02-07" />
<serverfiles name="2019-02-08" />
</serverfiles>
</serverfiles>
</serverfiles>
<serverfiles name="Pass">
<serverfiles name="Cam1">
<serverfiles name="Mod1">
<serverfiles name="2019-03-07" />
<serverfiles name="2019-03-08" />
</serverfiles>
<serverfiles name="Mod2">
<serverfiles name="2019-04-07" />
<serverfiles name="2019-04-08" />
</serverfiles>
</serverfiles>
</serverfiles>
</serverfiles>
Python脚本:
from pprint import pprint
import xml.etree.ElementTree as ET
def walk(e):
name = e.attrib['name']
children = [walk(c) for c in e if e.tag == 'serverfiles']
return {'name': name, 'children': children} if children else {'name': name, 'path': ''}
file = ET.parse(r'folder_structure.xml')
r = file.getroot()
s = walk(r)
pprint(s)
这产生以下输出:
{'children': [{'children': [{'children': [{'children': [{'name': '2019-01-07',
'path': ''},
{'name': '2019-01-08',
'path': ''}],
'name': 'Mod1'},
{'children': [{'name': '2019-02-07',
'path': ''},
{'name': '2019-02-08',
'path': ''}],
'name': 'Mod2'}],
'name': 'Cam1'}],
'name': 'Fail'},
{'children': [{'children': [{'children': [{'name': '2019-03-07',
'path': ''},
{'name': '2019-03-08',
'path': ''}],
'name': 'Mod1'},
{'children': [{'name': '2019-04-07',
'path': ''},
{'name': '2019-04-08',
'path': ''}],
'name': 'Mod2'}],
'name': 'Cam1'}],
'name': 'Pass'}], 'name': 'Test'}
但是我的所需输出是:
{'children': [{'children': [{'children': [{'children': [{'name': '2019-01-07',
'path': '/Test/Fail/Cam1/Mod1/'},
{'name': '2019-01-08',
'path': '/Test/Fail/Cam1/Mod1/'}],
'name': 'Mod1'},
{'children': [{'name': '2019-02-07',
'path': '/Test/Fail/Cam1/Mod2/'},
{'name': '2019-02-08',
'path': '/Test/Fail/Cam1/Mod2/'}],
'name': 'Mod2'}],
'name': 'Cam1'}],
'name': 'Fail'},
{'children': [{'children': [{'children': [{'name': '2019-03-07',
'path': '/Test/Pass/Cam1/Mod1/'},
{'name': '2019-03-08',
'path': '/Test/Pass/Cam1/Mod1/'}],
'name': 'Mod1'},
{'children': [{'name': '2019-04-07',
'path': '/Test/Pass/Cam1/Mod2/'},
{'name': '2019-04-08',
'path': '/Test/Pass/Cam1/Mod2/'}],
'name': 'Mod2'}],
'name': 'Cam1'}],
'name': 'Pass'}], 'name': 'Test'}
我已经介绍了access ElementTree node parent node和how to get xpath from root in python while parsing xml,但无法提出解决方案。
如何在解析 XML文件的同时获取每个叶节点的路径(从根节点开始)? / p>
答案 0 :(得分:1)
您可能忘记了跟踪最终路径所需的运行名称。对于一般用例,我可能是错的,但是下面的脚本应该可以解决您遇到的特定问题。
from pprint import pprint
import xml.etree.ElementTree as ET
def walk(e, runningname=''):
name = e.attrib['name']
# TODO: checking whether this is the leaf node
# perhaps there are better ways
if len(e) > 0:
runningname += f'/{name}'
children = [walk(c, runningname) for c in e if e.tag == 'serverfiles']
return {'name': name, 'children': children} if children else {'name': name, 'path': runningname}
file = ET.parse(r'r.xml')
r = file.getroot()
s = walk(r)
pprint(s)