Python:将嵌套的JSON写为List

时间:2016-04-19 02:58:47

标签: python json dictionary recursion

经过两天近三天的编程后,我正在寻求帮助。

我想做什么:导入Json字符串/文件并将其写入数据库。输入(汽车)有多种组合。最低嵌套的dict / list定义了将在DB中写入的列表元素的数量。

这是JSON / String:

input = [{"id":"BMW_1_Series",
        "years":[{"id":10052,"year":2008,
                    "styles":[{"id":560,"name":"128i 2",
                               "submodel":{"body":"Conver","niceName":"conve"},"trim":"128i"},
                              {"id":561,"name":"135i ",
                               "submodel":{"body":"Conver","niceName":"conver"},"trim":"135i"}
                              ]
                  },
                 {"id":427,"year":2016,
                  "styles":[{"id":433,"name":"228i ",
                             "submodel":{"body":"Conve", "niceName":"conver"},"trim":"228i SULEV"},
                            {"id":431,"name":"M235i",
                             "submodel":{"body":"Coupe", "niceName":"m235i"},"trim":"M235i"}
                            ]
                  }
                 ]
          }
         #I deleted the other entries, but it is a list
         ]

输出应该是一个字典,而子句的键应该是子键的前缀:

{'id': 427, 'year': 2016, 'styles_id': 431, 'styles_name': 'M235i',   'styles_trim': 'M235i', 'submodel_body': 'Coupe', 'submodel_niceName': 'm235i'}

我从多年开始就开始工作了:

for s in years:#styles
    outputA ={}
    for specifierA, valueA in s.items():
        if isinstance(valueA, list):
            for A in valueA:
                if isinstance(A, dict):
                    outputB = {}
                    for specifierB, valueB in A.items(): #submodel
                        if isinstance(valueB, dict):
                            for specifierBB, valueBB in valueB.items():
                                outputA[specifierB+"__"+specifierBB]= valueBB
                        else:
                            outputA[specifierA+"_"+specifierB]= valueB
                else:
                    outputA[specifierA] = valueA

        elif isinstance(valueA, dict):
            outputB = {}
            for specifierB, valueB in valueA.items(): #submodel
                outputA[specifierA+"_"+specifierB]= valueB
        else:
            outputA[specifierA] = valueA
    print(str(outputA))
    output.append(outputA)

我在这里停留,因为我想知道如何读取包含dicts,list和normal值的JSON文件。

到目前为止我的方法(它不起作用,我花了三天时间......)(我删除了所有中间打印以便于阅读)*注意:COde是一个连续循环......

def readL(input, prefix=""):
    x = True
    output={}
    k=0
    while (x):
        k+=1
        x=False
        if isinstance(input, list):
            print("LIST: "+str(input))
            for A in input:
                if isinstance(A, dict):
                    output = dict(output, **readL(A))
                elif isinstance(A, list):
                    output = dict(output, **readL(A))
        elif isinstance(input, dict):           
            for specifierB, valueB in input.items(): #submodel
                if isinstance(valueB, dict):
                    specifierB = str(specifierB)+"_"
                    output = dict(output, **readL(valueB,specifierB))
                if isinstance(valueB, list):
                    specifierB = str(specifierB)+"_"
                    output = dict(output, **readL(valueB,specifierB))  
                spec =  prefix+specifierB                
                output[spec] = valueB  

#check if Output Dict contains list or Dict -> continue loop
        for specifierT, valueT in output.items():
            if isinstance(valueT, dict) or isinstance(valueT, list):
                x = True
        if k ==1:# I have a continues loop so far, thats why
            break
    return(output)

readL(test)

基本上,我想要一个递归函数继续调用自己,直到输出中没有dict / list为止。

我也对如何阅读输入的其他任何更快的方法持开放态度。

我非常期待一些建议。请耐心等待,我对Python很陌生。

感谢很多!

更新

我在Flattening Generic JSON List of Dicts or Lists in Python从@poke

找到了部分解决方案
def splitObj (obj, prefix = None):
    '''
    Split the object, returning a 3-tuple with the flat object, optionally
    followed by the key for the subobjects and a list of those subobjects.
    '''
    # copy the object, optionally add the prefix before each key
    new = obj.copy() if prefix is None else { '{}_{}'.format(prefix, k): v for k, v in obj.items() }

    # try to find the key holding the subobject or a list of subobjects
    for k, v in new.items():
        # list of subobjects
        if isinstance(v, list):
            del new[k]
            return new, k, v
        # or just one subobject
        elif isinstance(v, dict):
            del new[k]
            return new, k, [v]
    return new, None, None

def flatten (data, prefix = None):
    '''
    Flatten the data, optionally with each key prefixed.
    '''
    # iterate all items
    for item in data:
        # split the object
        flat, key, subs = splitObj(item, prefix)

        # just return fully flat objects
        if key is None:
            yield flat
            continue

        # otherwise recursively flatten the subobjects
        for sub in flatten(subs, key):
            sub.update(flat)
            yield sub

但是我的输入也有所改变:

input = [{'states': ['USED'], 'niceName': '1-series', 'id': 'BMW_1_Series',
            'years': [{'styles':
                       [{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
                         'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 100994560},
                        {'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
                          'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 100974974},
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 100974975},
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 100994561}
                        ],
                       'states': ['USED'], 'id': 100524709, 'year': 2008},
                      {'styles':
                       [{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 101082656}, 
                        {'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 101082655},
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 101082663},
                        {'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 101082662}
                        ], 
                       'states': ['USED'], 'id': 100503222, 'year': 2009},
                      {'styles': 
                       [{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 101200599},
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 101200600}, 
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 101200607}, 
                        {'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 101200601}
                        ], 
                       'states': ['USED'], 'id': 100529091, 'year': 2010}, 
                      {'styles':
                       [{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 101288165}, 
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 101288166}, 
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 101288298}, 
                        {'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 101288297}
                        ], 
                       'states': ['USED'], 'id': 100531309, 'year': 2011}, 
                      {'styles': 
                       [{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 101381667}, 
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 101381668}, 
                        {'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 101381665}, 
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 101381666}
                        ], 
                       'states': ['USED'], 'id': 100534729, 'year': 2012}, 
                      {'styles': 
                       [{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                        'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 200428722},
                        {'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 200428721}, 
                        {'trim': '135is', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '135is 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 200421701}, 
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 200428724}, 
                        {'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 200428723}, 
                        {'trim': '128i SULEV', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'}, 
                         'name': '128i SULEV 2dr Coupe (3.0L 6cyl 6M)', 'id': 200428726}, 
                        {'trim': '128i SULEV', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '128i SULEV 2dr Convertible (3.0L 6cyl 6M)', 'id': 200428725}, 
                        {'trim': '135is', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'}, 
                         'name': '135is 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 200428727}
                        ], 
                       'states': ['USED'], 'id': 200421700, 'year': 2013}
                      ], 
          'name': '1 Series', 'make': {'niceName': 'bmw', 'name': 'BMW', 'id': 200000081}
          }]

但我收到错误:

AttributeError: 'str' object has no attribute 'update'

因为它没有处理'states': ['USED']我认为

我该怎么办?

我已将此问题移至Flatten nested JSON (Dict, List) into List to prepare to write into DB 请在那里评论。谢谢!

4 个答案:

答案 0 :(得分:0)

我不认为这是最好的解决方案,但我认为这可以做你想做的事情:

def flatten(something,parent_key=None):

    if parent_key==None:
        prefix = ""
    else:
        prefix = parent_key+"_"

    if type(something) == type({}):
        temp={}
        for key in something:
            temp.update(flatten(something[key],prefix+key))
        return temp

    elif type(something) == type([]):
        temp = {}
        for index in range(len(something)):
            temp.update(flatten(something[index],prefix+str(index)))
        return temp

    else:
        return {parent_key:something}

然后就像运行它一样:

if __name__=='__main__':


input_list = [{"id":"BMW_1_Series",
        "years":[{"id":10052,"year":2008,
                    "styles":[{"id":560,"name":"128i 2",
                               "submodel":{"body":"Conver","niceName":"conve"},"trim":"128i"},
                              {"id":561,"name":"135i ",
                               "submodel":{"body":"Conver","niceName":"conver"},"trim":"135i"}
                              ]
                  },
                 {"id":427,"year":2016,
                  "styles":[{"id":433,"name":"228i ",
                             "submodel":{"body":"Conve", "niceName":"conver"},"trim":"228i SULEV"},
                            {"id":431,"name":"M235i",
                             "submodel":{"body":"Coupe", "niceName":"m235i"},"trim":"M235i"}
                            ]
                  }
                 ]
          }
         #I deleted the other entries, but it is a list
         ]

    a = flatten(input_list)
    print(a)

如果您可以保证每个列表中的ID,则可以使用id代替索引:

temp.update(flatten(something[index],prefix+str(something[index]['id'])))

答案 1 :(得分:0)

我有同样的需求,既可以对任意JSON进行扁平化和非规范化 - 包括那些没有预定义模式的JSON。我还想确保JSON树中所有元素之间的一对一和一对多关系保留在展平/非规范化版本中。

到目前为止,这已经适用于我尝试过的所有用例(每行有一个JSON记录的文件,而不是多行JSON的文件)。

https://github.com/bundgus/py_curate_json

答案 2 :(得分:0)

这是另一种方法。更贴近您的要求。

{{1}}

答案 3 :(得分:0)

如果我理解正确,你试图压扁你的JSON,但你只想在遇到一个或多个键时弄平它。如果是这种情况,你可以试试这个:

假设:

from pprint import pprint

input_list = [{"id":"BMW_1_Series",
        "years":[{"id":10052,"year":2008,
                    "styles":[{"id":560,"name":"128i 2",
                               "submodel":{"body":"Conver","niceName":"conve"},"trim":"128i"},
                              {"id":561,"name":"135i ",
                               "submodel":{"body":"Conver","niceName":"conver"},"trim":"135i"}
                              ]
                  },
                 {"id":427,"year":2016,
                  "styles":[{"id":433,"name":"228i ",
                             "submodel":{"body":"Conve", "niceName":"conver"},"trim":"228i SULEV"},
                            {"id":431,"name":"M235i",
                             "submodel":{"body":"Coupe", "niceName":"m235i"},"trim":"M235i"}
                            ]
                  }
                 ]
          }
         #I deleted the other entries, but it is a list
         ]

使用功能:

def flatten(something,parent_key=None):
    if parent_key==None:
        prefix = ""
    else:
        prefix = parent_key+"_"

    if type(something) == type({}):
        temp={}
        for key in something:
            temp.update(flatten(something[key],prefix+key))

        return temp

    elif type(something) == type([]):
        temp = {}
        for index in range(len(something)):
            temp.update(flatten(something[index],prefix+str(index)))
            # temp.update(flatten(something[index],prefix+str(something[index]['id'])))

        return temp
    else:
        return {parent_key:something}

def sometimes_flatten(something, flatten_keys):
    if type(something) == type({}):
        temp={}
        for key in something:
            if key in flatten_keys:
                temp.update(flatten(something[key],key))
            else:
                temp.update({key:sometimes_flatten(something[key],flatten_keys)})
        return temp

    elif type(something) == type([]):
        return [sometimes_flatten(x,flatten_keys) for x in something]

    else:
        return something

然后在这里打电话给他们:

if __name__=='__main__':

    flatten_keys = ['years']

    a = sometimes_flatten(input_list,flatten_keys)

    pprint(a)

但您可以将任意键放入flatten_keys变量中以更改其行为方式以及将执行多少展平