我已经格式化了txt文件,如下所示:
Hostinfo Start
DATE 190819 1522
HOST midas
DOMAIN test.de
HW_PLATFORM x86_64
SERVER_TYPE virtual
CPU_INFO
CPU_TYPE Intel(R) Xeon(R) CPU E7-8867 v4 @ 2.40GHz
CPU_COUNT 2
CORE_COUNT 2
THREAD_COUNT 8
MEMORY 32951312 kB
OS Start
OS Linux
OS_VERSION 4.9.0-6-amd64
OS_UPTIME 536 days 21:08
OS End
RELEASE Debian GNU/Linux 9 (stretch)
RELEASE_VERSION 9
RELEASE_PATCHLEVEL
Hostinfo End
使用前导空格的计数需要将其转换为类似于以下格式的json格式:
"Hostinfo": [
{
"DATE": "190819 1522"
"HOST": "midas"
"DOMAIN": "test.de"
"HW_PLATFORM": "x86_64"
"SERVER_TYPE": "virtual"
"CPU_INFO": {
"CPU_TYPE": "Intel(R) Xeon(R) CPU E7-8867 v4 @ 2.40GHz"
"CPU_COUNT": "2"
"CORE_COUNT": "2"
}
"THREAD_COUNT": "8"
"MEMORY": "32951312 kB"
"OS": [
{
"OS": "Linux"
"OS_VERSION": "4.9.0-6-amd64"
"OS_UPTIME": "536 days 21:08"
}
]
"RELEASE": "Debian GNU/Linux 9 (stretch)"
"RELEASE_VERSION": "9"
"RELEASE_PATCHLEVEL" : ""
}
]
我对此脚本有一些承诺,但无法解决如何将大括号之间的行设置为上级词典(级别)的对象的问题:
#!/usr/bin/python
import json
import itertools
import string
import re
filename = 'commands.txt'
commands = {}
with open(filename) as fh:
previous_line = 0
mark_line = ""
for line in fh:
current_line = ((len(line) - len(line.lstrip()))/2)
diff = current_line - previous_line
if re.search(' Start$', line.strip()):
line = line.strip().replace(' Start', ':{')
print(line)
mark_line = "start_line"
elif re.search(' Ende$', line.strip()):
line = line.strip().replace(' Ende', '')
print("}")
mark_line = "end_line"
elif diff == 0:
print(line.strip())S
elif diff > 0:
if mark_line == "start_line" or mark_line == "end_line":
mark_line = "0"
else:
print("{")
print(line.strip())
elif diff < 0:
if mark_line == "start_line" or mark_line == "end_line":
mark_line = "0"
else:
print("}")
print(line.strip())
previous_line = ((len(line) - len(line.lstrip()))/2)
#line = (str((len(line) - len(line.lstrip()))/2) + ";" + line.strip())
try:
command, description = line.strip().split(' ', 1)
commands[command] = description.strip()
except Exception:
command = line.strip()
description = ""
commands[command] = description.strip()
print(json.dumps(commands, indent=2, sort_keys=True))
也许您可以使我对如何解决这个问题有所了解,或者征询一些建议?可能是某些模块可以简化此脚本吗?
UPD::向我的混乱脚本添加一些json标记。如果我以错误/正确的方式前进,可以请教我吗?
答案 0 :(得分:1)
您可以将itertools.groupby
用于递归:
import itertools as it, re
data = [[*re.findall('^\s+', b), *re.split('(?<=[A-Z])\s+', i)] for b in open('os_stuff.txt') if not (i:=re.sub('^\s+|\sStart\n$', '', b)).endswith('End\n')]
def to_tree(d):
_d = [(a, list(b)) for a, b in it.groupby(d, key=lambda x:bool(re.findall('^\s+$', x[0])))]
new_dict, _last = {}, None
for i, [a, b] in enumerate(_d):
if not a:
for j, *k in b:
if not k or (not k[0] and i < len(_d) - 2):
_last = j
else:
new_dict[j] = ' '.join(k).strip('\n')
else:
new_dict[_last] = [to_tree([[k[2:], *j] if k[2:] else j for k, *j in b])]
return new_dict
import json
print(json.dumps(to_tree(data), indent=4))
输出:
{
"Hostinfo": [
{
"DATE": "190819 1522",
"HOST": "midas",
"DOMAIN": "test.de",
"HW_PLATFORM": "x86_64",
"SERVER_TYPE": "virtual",
"CPU_INFO": [
{
"CPU_TYPE": "Intel(R) Xeon(R) CPU E7-8867 v4 @ 2.40GHz",
"CPU_COUNT": "2",
"CORE_COUNT": "2"
}
],
"THREAD_COUNT": "8",
"MEMORY": "32951312 kB ",
"OS": [
{
"OS": "Linux",
"OS_VERSION": "4.9.0-6-amd64",
"OS_UPTIME": "536 days 21:08"
}
],
"RELEASE": "Debian GNU/Linux 9 (stretch)",
"RELEASE_VERSION": "9",
"RELEASE_PATCHLEVEL": ""
}
]
}
编辑:Python2.7解决方案:
import itertools as it, re
new_data = [[i, re.sub('^\s+|\sStart\n$', '', i)] for i in open('os_stuff.txt')]
data = [re.findall('^\s+', a)+re.split('(?<=[A-Z])\s+', b) for a, b in new_data if not b.endswith('End\n')]
def to_tree(d):
_d = [(a, list(b)) for a, b in it.groupby(d, key=lambda x:bool(re.findall('^\s+$', x[0])))]
new_dict, _last = {}, None
for i, [a, b] in enumerate(_d):
if not a:
for j_k in b:
if not j_k[1:] or (not j_k[1:][0] and i < len(_d) - 2):
_last = j_k[0]
else:
new_dict[j_k[0]] = ' '.join(j_k[1:]).strip('\n')
else:
new_dict[_last] = [to_tree([[k_j[0][2:]]+k_j[1:] if k_j[0][2:] else k_j[1:] for k_j in b])]
return new_dict
print(to_dict(data))