如何使用前导空格将txt文件转换为json?

时间:2019-10-18 03:29:26

标签: python json

我已经格式化了txt文件,如下所示:

Hostinfo Start
  DATE 190819 1522
  HOST midas
  DOMAIN test.de
  HW_PLATFORM x86_64
  SERVER_TYPE virtual
  CPU_INFO
    CPU_TYPE  Intel(R) Xeon(R) CPU E7-8867 v4 @ 2.40GHz
    CPU_COUNT 2
    CORE_COUNT 2
  THREAD_COUNT 8
  MEMORY       32951312 kB
  OS Start
    OS Linux
    OS_VERSION 4.9.0-6-amd64
    OS_UPTIME 536 days 21:08
    OS End
  RELEASE Debian GNU/Linux 9 (stretch)
  RELEASE_VERSION 9
  RELEASE_PATCHLEVEL
Hostinfo End

使用前导空格的计数需要将其转换为类似于以下格式的json格式:

"Hostinfo": [
{
  "DATE": "190819 1522"
  "HOST": "midas"
  "DOMAIN": "test.de"
  "HW_PLATFORM": "x86_64"
  "SERVER_TYPE": "virtual"
  "CPU_INFO": {
    "CPU_TYPE": "Intel(R) Xeon(R) CPU E7-8867 v4 @ 2.40GHz"
    "CPU_COUNT": "2"
    "CORE_COUNT": "2"
    }
  "THREAD_COUNT": "8"
  "MEMORY": "32951312 kB"
  "OS": [
      {
      "OS": "Linux"
      "OS_VERSION": "4.9.0-6-amd64"
      "OS_UPTIME": "536 days 21:08"
      }
    ]
  "RELEASE": "Debian GNU/Linux 9 (stretch)"
  "RELEASE_VERSION": "9"
  "RELEASE_PATCHLEVEL" : ""
}
]

我对此脚本有一些承诺,但无法解决如何将大括号之间的行设置为上级词典(级别)的对象的问题:

#!/usr/bin/python
import json
import itertools
import string
import re

filename = 'commands.txt'

commands = {}
with open(filename) as fh:
    previous_line = 0
    mark_line = ""

    for line in fh:
        current_line = ((len(line) - len(line.lstrip()))/2) 

        diff = current_line - previous_line
        if re.search(' Start$', line.strip()):
            line = line.strip().replace(' Start', ':{')
            print(line)
            mark_line = "start_line"
        elif re.search(' Ende$', line.strip()):
            line = line.strip().replace(' Ende', '')
            print("}")
            mark_line = "end_line"
        elif diff == 0:
            print(line.strip())S
        elif diff > 0:
            if mark_line == "start_line" or mark_line == "end_line":
                mark_line = "0"
            else:
                print("{")
                print(line.strip())
        elif diff < 0:
            if mark_line == "start_line" or mark_line == "end_line":
                mark_line = "0"
            else:
                print("}")
                print(line.strip())
        previous_line = ((len(line) - len(line.lstrip()))/2)


        #line = (str((len(line) - len(line.lstrip()))/2) + ";" + line.strip())

        try:
            command, description = line.strip().split(' ', 1)
            commands[command] = description.strip()
        except Exception:
            command = line.strip()
            description = ""
            commands[command] = description.strip()


print(json.dumps(commands, indent=2, sort_keys=True))

也许您可以使我对如何解决这个问题有所了解,或者征询一些建议?可能是某些模块可以简化此脚本吗?

UPD::向我的混乱脚本添加一些json标记。如果我以错误/正确的方式前进,可以请教我吗?

1 个答案:

答案 0 :(得分:1)

您可以将itertools.groupby用于递归:

import itertools as it, re
data = [[*re.findall('^\s+', b), *re.split('(?<=[A-Z])\s+', i)] for b in open('os_stuff.txt') if not (i:=re.sub('^\s+|\sStart\n$', '', b)).endswith('End\n')]
def to_tree(d):
   _d = [(a, list(b)) for a, b in it.groupby(d, key=lambda x:bool(re.findall('^\s+$', x[0])))]
   new_dict, _last = {}, None
   for i, [a, b] in enumerate(_d):
      if not a:
         for j, *k in b:
            if not k or (not k[0] and i < len(_d) - 2):
               _last = j
            else:
               new_dict[j] = ' '.join(k).strip('\n')
      else:
         new_dict[_last] = [to_tree([[k[2:], *j] if k[2:] else j for k, *j in b])]
   return new_dict

import json
print(json.dumps(to_tree(data), indent=4))

输出:

{
  "Hostinfo": [
    {
        "DATE": "190819 1522",
        "HOST": "midas",
        "DOMAIN": "test.de",
        "HW_PLATFORM": "x86_64",
        "SERVER_TYPE": "virtual",
        "CPU_INFO": [
            {
                "CPU_TYPE": "Intel(R) Xeon(R) CPU E7-8867 v4 @ 2.40GHz",
                "CPU_COUNT": "2",
                "CORE_COUNT": "2"
            }
        ],
        "THREAD_COUNT": "8",
        "MEMORY": "32951312 kB ",
        "OS": [
            {
                "OS": "Linux",
                "OS_VERSION": "4.9.0-6-amd64",
                "OS_UPTIME": "536 days 21:08"
            }
        ],
        "RELEASE": "Debian GNU/Linux 9 (stretch)",
        "RELEASE_VERSION": "9",
        "RELEASE_PATCHLEVEL": ""
     }
  ]
}

编辑:Python2.7解决方案:

import itertools as it, re
new_data = [[i, re.sub('^\s+|\sStart\n$', '', i)] for i in open('os_stuff.txt')]
data = [re.findall('^\s+', a)+re.split('(?<=[A-Z])\s+', b) for a, b in new_data if not b.endswith('End\n')]
def to_tree(d):
  _d = [(a, list(b)) for a, b in it.groupby(d, key=lambda x:bool(re.findall('^\s+$', x[0])))]
  new_dict, _last = {}, None
  for i, [a, b] in enumerate(_d):
     if not a:
       for j_k in b:
         if not j_k[1:] or (not j_k[1:][0] and i < len(_d) - 2):
            _last = j_k[0]
         else:
            new_dict[j_k[0]] = ' '.join(j_k[1:]).strip('\n')
     else:
       new_dict[_last] = [to_tree([[k_j[0][2:]]+k_j[1:] if k_j[0][2:] else k_j[1:] for k_j in b])]
  return new_dict


print(to_dict(data))