从字典列表中创建嵌套的json对象

时间:2019-04-02 18:32:32

标签: python json google-bigquery

我想将词典列表转换为嵌套的.json文件对象。我有一个字典列表,字典中的一个字段指示是否在.json文件中嵌套特定字段,以及是否应该嵌套在哪个位置。

我可以将事物嵌套到适当的表中,但是让它们在字段中进一步嵌套会导致我陷入循环。

我的数据采用以下格式:

table_list = [
    {"Table": "table1", "Field": "field1", "Description": "description1", "Type": "STR"}, 
    {"Table": "table1", "Field": "field2", "Description": "description2", "Type": "STR"}, 
    {"Table": "table1", "Field": "field3", "Description": "description3", "Type": "STR"},
    {"Table": "table1", "Field": "field4", "Description": "description4", "Type": "STR"},
    {"Table": "table1", "Field": "field5", "Description": "description5", "Type": "RECORD"},
    {"Table": "table1", "Field": "field5.nest1", "Description": "description6", "Type": "STR"},
    {"Table": "table1", "Field": "field5.nest2", "Description": "description7", "Type": "STR"},
    {"Table": "table1", "Field": "field5.nest3", "Description": "description8", "Type": "STR"},
    {"Table": "table1", "Field": "field5.nest4", "Description": "description9", "Type": "RECORD"},
    {"Table": "table1", "Field": "field5.nest4.nest1", "Description": "description10", "Type": "STR"},
    {"Table": "table1", "Field": "field5.nest4.nest2", "Description": "description11", "Type": "STR"},
    {"Table": "table2", "Field": "field1", "Description": "description1", "Type": "STR"}
]

,我希望它以这种格式输出(对任何错别字表示抱歉):

{
    "table1": [
    {
        "Field": "field1",
        "Description": "description1",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "field2",
        "Description": "description2",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "field3",
        "Description": "description3",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "field4",
        "Description": "description4",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "field5",
        "Description": "description5",
        "Mode": "REPEATED",
        "Type": "RECORD",
        "Fields": [
            {
                "Field": "nest1",
                "Description": "description6",
                "Mode": "NULLABLE",
                "Type": "STR"
            },
            {
                "Field": "nest2",
                "Description": "description7",
                "Mode": "NULLABLE",
                "Type": "STR"
            },
            {
                "Field": "nest3",
                "Description": "description8",
                "Mode": "NULLABLE",
                "Type": "STR"
            },
            {
                "Field": "nest4",
                "Description": "description9",
                "Mode": "REPEATED",
                "Type": "RECORD",
                "Fields": [
                    {
                        "Field": "nest1",
                        "Description": "description10",
                        "Mode": "NULLABLE",
                        "Type": "STR"
                    },
                    {
                        "Field": "nest2",
                        "Description": "description11",
                        "Mode": "NULLABLE",
                        "Type": "STR"
                    }
                ]
            }
        ]
    }
    ]
    "table2": [
    {
        "Field": "field1",
        "Description": "description1",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    ]
}

并且我很难让nest1和nest2在现有字典中创建一个新字段,并带有一个开放列表,可以将其添加到更深的位置。在此示例中,嵌套仅深3层,但我可能需要深达15层

我有一些代码将在第一级使用"Table"进行应用,但是进入要添加到该列表的字段是一项艰巨的任务,我没有发现完全相同的问题。

我看到很多人试图通过使嵌套结构变平来反向执行此操作,但是我正在尝试创建嵌套。

import json


def create_schema(file_to_read):
    all_tables = {}
    for row in file_to_read:
        if row['Table'] in all_tables.keys():
            all_tables[row['Table']].append({"Mode": "NULLABLE",
                                             "Field": row['Field'],
                                             "Type": row['Type'],
                                             "Description": row['Description']})
        else:
            all_tables[row['Table']] = []
            all_tables[row['Table']].append({"Mode": "NULLABLE",
                                             "Field": row['Field'],
                                             "Type": row['Type'],
                                             "Description": row['Description']})
    return json.dumps(all_tables, indent=4, sort_keys=True)

此功能的实际作用是:

{
    "table1": [
    {
        "Field": "field1",
        "Description": "description1",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "field2",
        "Description": "description2",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "field3",
        "Description": "description3",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "field4",
        "Description": "description4",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "field5",
        "Description": "description5",
        "Mode": "NULLABLE",
        "Type": "RECORD",
    },
    {
        "Field": "nest1",
        "Description": "description6",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "nest2",
        "Description": "description7",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "nest3",
        "Description": "description8",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "nest4",
        "Description": "description9",
        "Mode": "NULLABLE",
        "Type": "RECORD",
    },
    {
        "Field": "nest1",
        "Description": "description10",
        "Mode": "NULLABLE",
        "Type": "STR"
    },
    {
        "Field": "nest2",
        "Description": "description11",
        "Mode": "NULLABLE",
        "Type": "STR"
    }
    ]
    "table2": [
    {
        "Field": "field1",
        "Description": "description1",
        "Mode": "NULLABLE",
        "Type": "STR"
    }
    ]
}

(对于上下文,这旨在作为BigQuery json模式着陆)

1 个答案:

答案 0 :(得分:1)

这应该可以实现您的目标:

from collections import defaultdict

d = defaultdict(list)
for t in table_list:
    field_list = d[t['Table']]
    field = t['Field'].split('.')
    for f in field[:-1]:
        field_list = next(el['Fields'] for el in field_list if el['Field'] == f)
    new_d = {'Field': field[-1], 'Description': t['Description'], 'Mode': 'NULLABLE' if t['Type'] == 'STR' else 'REPEATED', 'Type': t['Type']}
    field_list.append(defaultdict(list, new_d))

print(json.dumps(d, indent=4))

或者,如果您不想使用defaultdict

d = {}
for t in table_list:
    if t['Table'] not in d:
        d[t['Table']] = []
    field_list = d[t['Table']]
    field = t['Field'].split('.')
    for f in field[:-1]:
        inner = next(el for el in field_list if el['Field'] == f)
        if 'Fields' not in inner:
            inner['Fields'] = []
        field_list = inner['Fields']
    new_d = {'Field': field[-1], 'Description': t['Description'], 'Mode': 'NULLABLE' if t['Type'] == 'STR' else 'REPEATED', 'Type': t['Type']}
    field_list.append(new_d)

输出:

{
    "table1": [
        {
            "Field": "field1",
            "Description": "description1",
            "Mode": "NULLABLE",
            "Type": "STR"
        },
        {
            "Field": "field2",
            "Description": "description2",
            "Mode": "NULLABLE",
            "Type": "STR"
        },
        {
            "Field": "field3",
            "Description": "description3",
            "Mode": "NULLABLE",
            "Type": "STR"
        },
        {
            "Field": "field4",
            "Description": "description4",
            "Mode": "NULLABLE",
            "Type": "STR"
        },
        {
            "Field": "field5",
            "Description": "description5",
            "Mode": "REPEATED",
            "Type": "RECORD",
            "Fields": [
                {
                    "Field": "nest1",
                    "Description": "description6",
                    "Mode": "NULLABLE",
                    "Type": "STR"
                },
                {
                    "Field": "nest2",
                    "Description": "description7",
                    "Mode": "NULLABLE",
                    "Type": "STR"
                },
                {
                    "Field": "nest3",
                    "Description": "description8",
                    "Mode": "NULLABLE",
                    "Type": "STR"
                },
                {
                    "Field": "nest4",
                    "Description": "description9",
                    "Mode": "REPEATED",
                    "Type": "RECORD",
                    "Fields": [
                        {
                            "Field": "nest1",
                            "Description": "description10",
                            "Mode": "NULLABLE",
                            "Type": "STR"
                        },
                        {
                            "Field": "nest2",
                            "Description": "description11",
                            "Mode": "NULLABLE",
                            "Type": "STR"
                        }
                    ]
                }
            ]
        }
    ],
    "table2": [
        {
            "Field": "field1",
            "Description": "description1",
            "Mode": "NULLABLE",
            "Type": "STR"
        }
    ]
}