我正在从api anbd解析python中的一些糟糕的列表,并输出到嵌套的json结构中,该结构具有特定格式,以供大量前端服务使用。
下面的列表是一个示例,其中每个项目都是文件的完整路径。我无法修改此输入,因为它来自遍历数据库的外部服务。目录项不出现在此列表中,仅文件,该文件所在的目录是路径的明显形式,即下面没有MIPK / DORAS文件。下面的示例:
"/generic_root/site_store/MIPK/CM.toroidal",
"/generic_root/site_store/MIPK/CM.Supervoid",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.forest",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.benign",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.dunes",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking"
以前使用的功能非常慢,但是我目前正在使用以下代码来解析列表。输出格式不是我想要的确切格式。我被困在将节点附加到根节点上。
在下面,它采用路径,找到嵌套目录,并删除每个文件中存在的根路径,然后创建具有适当嵌套结构的节点对象。
将其添加到prev_node之后,然后使用目录名称作为键将其附加到字典中。
import logging
logger = logging.getLogger(__name__)
def main():
# Initialise
root_path = '/generic_root'
store = '/site_store'
file_list = [
"/generic_root/site_store/MIPK/CM.toroidal",
"/generic_root/site_store/MIPK/CM.Supervoid",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.forest",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.benign",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.dunes",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking"
]
# Call loop and display results
viewstore_tree_map = create_dir_tree(file_list, root_path, store)
logging.info('\n\tView store keys: %s\n\tKeys: %s\n\tDict of store: %s',
len(viewstore_tree_map.keys()), viewstore_tree_map.keys(),
viewstore_tree_map)
def joiner(delimiter, *args):
'''
Joins path strings correctly, unpack before passing args
'''
return delimiter.join(list(args))
def create_dir_tree(file_list, root_path, store):
'''
File_list [LIST][STR]
root_path [STR]
store [STR]
Return [DICT]
'''
node_map = {}
full_root = root_path+store
for sub_path in file_list:
parents = sub_path.replace(full_root, '').split('/')[1:-1]
prev_node = None
node = None
node_path = full_root
# create tree structure for directory nodes
for parent in parents:
node_path = joiner('/', node_path, parent)
node_exists = 1
if node_path not in node_map:
node_exists = 0
node_map[node_path] = {
'name': parent,
'data': [],
'type': 'dir',
'path': node_path,
}
node = node_map[node_path]
# Handles appending previous dict to data field of new dict
if prev_node and not node_exists:
prev_node['data'].append(node)
prev_node = node
# logger.info(pprint.pprint(prev_node))
if node:
node['data'].append({
'name': sub_path.rsplit('/')[-1],
'type': 'file',
'path': sub_path
})
return node_map
下面是上面代码的输出。由于这些列表的大小最终会增加,因此这是一个很大的问题,并且将是一个相当大的内存问题。
node_map = {
'/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE': {
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant',
'type': 'file',
'name': 'CM.tangeant'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral',
'type': 'file',
'name': 'CM.astral'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking',
'type': 'file',
'name': 'CM.forking'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE'
},
'/generic_root/site_store/MIPK/DORAS/CRUDE': {
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.forest',
'type': 'file',
'name': 'CM.forest'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.benign',
'type': 'file',
'name': 'CM.benign'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.dunes',
'type': 'file',
'name': 'CM.dunes'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE'
},
'/generic_root/site_store/MIPK/DORAS/COMMODITIES': {
'type': 'dir',
'data': [{
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant',
'type': 'file',
'name': 'CM.tangeant'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral',
'type': 'file',
'name': 'CM.astral'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking',
'type': 'file',
'name': 'CM.forking'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE'
}],
'name': 'COMMODITIES',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES'
},
'/generic_root/site_store/MIPK': {
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/CM.toroidal',
'type': 'file',
'name': 'CM.toroidal'
}, {
'path': '/generic_root/site_store/MIPK/CM.Supervoid',
'type': 'file',
'name': 'CM.Supervoid'
}, {
'type': 'dir',
'data': [{
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.forest',
'type': 'file',
'name': 'CM.forest'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.benign',
'type': 'file',
'name': 'CM.benign'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.dunes',
'type': 'file',
'name': 'CM.dunes'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE'
}, {
'type': 'dir',
'data': [{
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant',
'type': 'file',
'name': 'CM.tangeant'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral',
'type': 'file',
'name': 'CM.astral'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking',
'type': 'file',
'name': 'CM.forking'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE'
}],
'name': 'COMMODITIES',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES'
}],
'name': 'DORAS',
'path': '/generic_root/site_store/MIPK/DORAS'
}],
'name': 'MIPK',
'path': '/generic_root/site_store/MIPK'
},
'/generic_root/site_store': {
'type': 'dir',
'data': [{
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/CM.toroidal',
'type': 'file',
'name': 'CM.toroidal'
}, {
'path': '/generic_root/site_store/MIPK/CM.Supervoid',
'type': 'file',
'name': 'CM.Supervoid'
}, {
'type': 'dir',
'data': [{
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.forest',
'type': 'file',
'name': 'CM.forest'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.benign',
'type': 'file',
'name': 'CM.benign'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.dunes',
'type': 'file',
'name': 'CM.dunes'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE'
}, {
'type': 'dir',
'data': [{
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant',
'type': 'file',
'name': 'CM.tangeant'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral',
'type': 'file',
'name': 'CM.astral'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking',
'type': 'file',
'name': 'CM.forking'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE'
}],
'name': 'COMMODITIES',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES'
}],
'name': 'DORAS',
'path': '/generic_root/site_store/MIPK/DORAS'
}],
'name': 'MIPK',
'path': '/generic_root/site_store/MIPK'
}],
'name': 'site_store',
'path': '/generic_root/site_store'
},
'/generic_root/site_store/MIPK/DORAS': {
'type': 'dir',
'data': [{
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.forest',
'type': 'file',
'name': 'CM.forest'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.benign',
'type': 'file',
'name': 'CM.benign'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE/CM.dunes',
'type': 'file',
'name': 'CM.dunes'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/CRUDE'
}, {
'type': 'dir',
'data': [{
'type': 'dir',
'data': [{
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant',
'type': 'file',
'name': 'CM.tangeant'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral',
'type': 'file',
'name': 'CM.astral'
}, {
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking',
'type': 'file',
'name': 'CM.forking'
}],
'name': 'CRUDE',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE'
}],
'name': 'COMMODITIES',
'path': '/generic_root/site_store/MIPK/DORAS/COMMODITIES'
}],
'name': 'DORAS',
'path': '/generic_root/site_store/MIPK/DORAS'
}
}
2个问题:
desired output = {
"type": "dir",
"data": [{
"type": "dir",
"data": [{
"path": "/generic_root/site_store/MIPK/CM.toroidal",
"type": "file",
"name": "CM.toroidal"
}, {
"path": "/generic_root/site_store/MIPK/CM.Supervoid",
"type": "file",
"name": "CM.Supervoid"
}, {
"type": "dir",
"data": [{
"type": "dir",
"data": [{
"path": "/generic_root/site_store/MIPK/DORAS/CRUDE/CM.forest",
"type": "file",
"name": "CM.forest"
}, {
"path": "/generic_root/site_store/MIPK/DORAS/CRUDE/CM.benign",
"type": "file",
"name": "CM.benign"
}, {
"path": "/generic_root/site_store/MIPK/DORAS/CRUDE/CM.dunes",
"type": "file",
"name": "CM.dunes"
}],
"name": "CRUDE",
"path": "/generic_root/site_store/MIPK/DORAS/CRUDE"
}, {
"type": "dir",
"data": [{
"type": "dir",
"data": [{
"path": "/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant",
"type": "file",
"name": "CM.tangeant"
}, {
"path": "/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral",
"type": "file",
"name": "CM.astral"
}, {
"path": "/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking",
"type": "file",
"name": "CM.forking"
}],
"name": "CRUDE",
"path": "/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE"
}],
"name": "COMMODITIES",
"path": "/generic_root/site_store/MIPK/DORAS/COMMODITIES"
}],
"name": "DORAS",
"path": "/generic_root/site_store/MIPK/DORAS"
}],
"name": "MIPK",
"path": "/generic_root/site_store/MIPK"
}],
"name": "site_store",
"path": "/generic_root/site_store"
}
答案 0 :(得分:0)
请熟悉此解决方案,该解决方案使用导航器对象在字典上上下移动。
首先解析字符串,将其从路径拆分为子目录列表。
检查它是否存在于当前结果级别(“数据”字段中的“名称” =由lamdba处理的空白),如果不存在,请将其导航到该级别。
一旦导航器创建或导航到正确的子目录(父列表中的最后一个子目录),然后将节点对象附加到结果数组。
对于下一个路径,导航器将重置为完整结果,然后该过程再次开始。
欢迎反馈,我敢肯定它会有所改善。
def main():
# Initialise
root_path = '/generic_root'
store = '/site_store'
file_list = [
"/generic_root/site_store/MIPK/CM.toroidal",
"/generic_root/site_store/MIPK/CM.Supervoid",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.forest",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.benign",
"/generic_root/site_store/MIPK/DORAS/CRUDE/CM.dunes",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.tangeant",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.astral",
"/generic_root/site_store/MIPK/DORAS/COMMODITIES/CRUDE/CM.forking"
]
# Call loop and display results
viewstore_tree_map = create_dir_tree(file_list, root_path, store)
def Node(full_path, name):
'''
full_path[STR]
name [STR]
returns [DICT] node object
'''
return {
'path': full_path,
'name': name,
'type': 'dir',
'data': [],
}
def Record(full_path):
'''
full_path[STR]
returns [DICT] record object
'''
return {
'path': full_path,
'name': full_path.rsplit('/', 1)[-1],
'type': 'file',
}
def joiner(delimiter, *args):
'''
delimiter[STR]
*args [STR]'s unpack sets and lists
returns [STR] Joins path strings correctly
'''
return delimiter.join(list(args))
def create_dir_tree(file_list, root_path, store):
'''
file_list [LIST][STR]
root_path [STR] - root directory
store [STR] - store site (sub directory)
returns [DICT] Nested dict of paths from a flat list
'''
# Intiliase
full_root = self.base_dir + store
result = Node(full_root, store)
for path in file_list):
short_path = path.replace(full_root, '')
# 1st pass: set navigator to result,
# Subsequent: Resets navigator object, and node_path for next path
navigator = result
node_path = full_root
# Remove non-relevant parts of path
parents = short_path.replace(str_filter, '').split('/')[1:-1]
for parent in parents:
node_path = joiner('/', node_path, parent)
if not filter(lambda dir_rec: dir_rec['name'] == parent,
navigator['data']):
# If node does not exist, append dir object to 'data'
navigator['data'].append(Node(node_path, parent))
# Move navigator to previously found node
navigator = navigator['data'][-1]
# Append record after constructing or navigating to correct path
navigator['data'].append(Record(path))
return result