使用电子表格数据填充嵌套字典

时间:2017-02-22 04:50:25

标签: python excel dictionary

我知道我希望我的Python词典列表看起来像什么,但是在将电子表格数据拉入数据结构时遇到了问题。我遇到的问题是,一行可能有数据填充父字典值以及1个子字符。对于后续行,如果父级列中的值为空,则假定子级的列属于上一个父级。如果我们遇到父数据不为空的新行,请将其视为要添加到列表中的新父项。

这是电子表格的示例:

[
  {
    "name": "NTP_Policy1",
    "descr": "NTP Policy 1",
    "adminSt": "enabled",
    "authSt": "disabled",
    "servers": [
      {
        "hostname": "10.10.10.10",
        "descr": "NTP1 Server",
        "preferred": true,
        "server_EPG": "oob-default",
        "minPoll": 4,
        "maxPoll": 6
      },
      {
        "hostname": "20.10.10.10",
        "descr": "NTP2 Server",
        "preferred": false,
        "server_EPG": "oob-default",
        "minPoll": 4,
        "maxPoll": 6
      }
    ]
  },
  {
    "name": "NTP_Policy2",
    "descr": "NTP Policy 2",
    "adminSt": "enabled",
    "authSt": "disabled",
    "servers": [
      {
        "hostname": "30.10.10.10",
        "descr": "NTP3 Server",
        "preferred": true,
        "server_EPG": "oob-default",
        "minPoll": 4,
        "maxPoll": 6
      },
      {
        "hostname": "40.10.10.10",
        "descr": "NTP4 Server",
        "preferred": false,
        "server_EPG": "oob-default",
        "minPoll": 4,
        "maxPoll": 6
      }
    ]
  }
]

我希望数据结构如下所示:

>>> import pyexcel
>>> from pprint import pprint
>>> def excel_to_dict(sheet):
...     rows = sheet.iter_rows()
...     keys = next(rows)
...     dict_list = []
...     # For each row in the spreadsheet,
...     # Create an iterator pair so that the key is iterated over at the same time as its matching cell in the row
...     # Then save that pairing as descriptors of the switch
...     for row in rows:
...         dict = {}
...         dict['servers'] = []
...         server_atts = {}
...         for key,cell in zip(keys, row):
...             if str(cell.value) != 'None' and str(key.value) == 'name':
...                 dict[str(key.value)] = str(cell.value)
...                 parentKey = str(key.value)
...             elif (str(cell.value) != 'None' and str(key.value) == 'descr') or (str(cell.value) != 'None' and str(key.value) == 'adminSt') or (str(cell.value) != 'None' and str(key.value) == 'authSt'):
...                 dict[str(key.value)] = str(cell.value)
...             elif str(cell.value) == 'None':
...                 continue
...             else:
...                 server_atts[str(key.value)] = str(cell.value)
...         dict['servers'].append(server_atts.copy())
...         dict_list.append(dict.copy())
...     return dict_list
>>> wb = openpyxl.load_workbook('aci_config.xlsx')
>>> ntpPolsSheet = wb.get_sheet_by_name('ntp_pol')
>>> ntpPols = excel_to_dict(ntpPolsSheet)
>>>
>>> pprint(ntpPols)
[{'adminSt': 'enabled',
  'authSt': 'disabled',
  'descr': 'Test NTP Policy',
  'name': 'test1-NTPPOL',
  'servers': [{'server_EPG': 'oob-default',
               'server_descr': 'NTP1 server',
               'server_hostname_ip': '10.10.10.10',
               'server_maxPoll': '6',
               'server_minPol': '4',
               'server_preferred': 'yes'}]},
 {'servers': [{'server_EPG': 'oob-default',
               'server_descr': 'NTP2 server',
               'server_hostname_ip': '10.10.10.11',
               'server_maxPoll': '6',
               'server_minPol': '4',
               'server_preferred': 'no'}]},
 {'servers': [{'server_EPG': 'oob-default',
               'server_descr': 'NTP3 server',
               'server_hostname_ip': '10.10.10.12',
               'server_maxPoll': '6',
               'server_minPol': '4',
               'server_preferred': 'no'}]},
 {'adminSt': 'enabled',
  'authSt': 'disabled',
  'descr': 'Test 2 NTP policy',
  'name': 'test2-NTPPOL',
  'servers': [{'server_EPG': 'oob-default',
               'server_descr': 'NTP1 server',
               'server_hostname_ip': '20.10.10.10',
               'server_maxPoll': '6',
               'server_minPol': '4',
               'server_preferred': 'yes'}]},
 {'servers': [{'server_EPG': 'oob-default',
               'server_descr': 'NTP2 server',
               'server_hostname_ip': '20.10.10.11',
               'server_maxPoll': '6',
               'server_minPol': '4',
               'server_preferred': 'no'}]},
 {'servers': [{'server_EPG': 'oob-default',
               'server_descr': 'NTP3 server',
               'server_hostname_ip': '20.10.10.12',
               'server_maxPoll': '6',
               'server_minPol': '4',
               'server_preferred': 'no'}]}]

我最接近的代码看起来像这样,但是后续行将子项附加到父级别。

$cars = array (
    'expensive' => $BMW,
    'medium'    => $Volvo,
    'cheap'     => $Lada
);

foreach (array_keys($cars) as $index) {
    echo "$index<br/>\n";
}

代码需要什么才能正确填充dict列表?是否有更好的电子表格格式可以更容易导入数据?我试图在一张纸上完成所有操作,而不是多张纸。

1 个答案:

答案 0 :(得分:0)

我建议以csv格式保存.xlsx文件,因为它必须更容易处理。它将以文本形式显示:

name,descr,adminSt,authSt,server_hostname_ip,server_descr,server_preferred,server_EPG,server_minPoll,
test1-NTPPOL,Test NTP Policy,enabled,disabled,10.10.10.10,NTP1 server,yes,oob-default,4,6
,,,,10.10.10.11,NTP2 server,no,oob-default,4,6
,,,,10.10.10.12,NTP3 server,no,oob-default,4,6
test2-NTPPOL,Test 2 NTP policy,enabled,disabled,20.10.10.10,NTP1 server,yes,oob-default,4,6
,,,,20.10.10.11,NTP2 server,no,oob-default,4,6
,,,,20.10.10.12,NTP3 server,no,oob-default,4,6

然后你可以使用pandas来读取csv并将其转换为json。 Pandas有一个.iloc函数,它允许您先按行索引,然后按列名称进行索引。

import pandas as pd
from beeprint import pp

def excel_to_dict(sheet):
    dict_list = []
    last_test_dict = None
    for i in xrange(len(sheet)):
        # When we find a new row with a name value, we want to insert
        # the old test_dict into the dict_list and make a new test_dict.
        # Also, we want to skip the first row to not append an empty dict.
        if pd.notnull(sheet.iloc[i]['name']):
            if i != 0:
                dict_list.append(test_dict)
            test_dict = {}
            test_dict['name'] = sheet.iloc[i]['name']
            test_dict['descr'] = sheet.iloc[i]['descr']
            test_dict['adminSt'] = sheet.iloc[i]['adminSt']
            test_dict['authSt'] = sheet.iloc[i]['authSt']
            test_dict['servers'] = []
            server_info = {}
            server_info['server_hostname'] = sheet.iloc[i]['server_hostname_ip']
            server_info['server_descr'] = sheet.iloc[i]['server_descr']
            server_info['server_preferred'] = sheet.iloc[i]['server_preferred']
            server_info['server_EPG'] = sheet.iloc[i]['server_EPG']
            server_info['minPoll'] = sheet.iloc[i]['server_minPoll']
            server_info['maxPoll'] = sheet.iloc[i]['server_maxPoll']
            test_dict['servers'].append(server_info)
            last_test_dict = test_dict # keep a handle to our new dict
        else:
            # Use the handle to the last test dict created to add info
            # about a new server without modifying the name of the test
            server_info = {}
            server_info['server_hostname'] = sheet.iloc[i]['server_hostname_ip']
            server_info['server_descr'] = sheet.iloc[i]['server_descr']
            server_info['server_preferred'] = sheet.iloc[i]['server_preferred']
            server_info['server_EPG'] = sheet.iloc[i]['server_EPG']
            server_info['minPoll'] = sheet.iloc[i]['server_minPoll']
            server_info['maxPoll'] = sheet.iloc[i]['server_maxPoll']
            last_test_dict['servers'].append(server_info)

    # In case we didn't enter the last test dict into the list
    dict_list.append(last_test_dict)
    return dict_list

sheet = pd.read_csv('sheet.csv', sep=',')
pp(excel_to_dict(sheet))