Python,如何压缩和重组复杂的数据集

时间:2017-01-05 16:14:18

标签: python json

我是一个新的python用户,我刚刚熟悉使用zip函数重构数据,但是我现在面临着一个具有挑战性的数据集,我必须重组。我有3个json响应,我必须从我的端部合并,数据集在设计上是相同的,并且具有相同的长度,它们只是与它们运行的​​环境不同。

为简洁起见,让3个文件的值相同:qa.json | dev.json | prod.json

注意:这个外部对象是对象的数组/列表,我只是为了简洁而放置了一个对象

[
{
  "elements": [
    {
      "keyword": "Scenario", 
      "name": "valid user can login site", 
      "steps": [
        {
          "name": "a valid user name and password", 
          "result": {
            "status": "passed"
          }
        }, 
        { 
          "name": "a valid user clicking on the login button after typing in user name and password", 
          "result": { 
            "status": "passed"
          }
        }, 
        { 
          "name": "map should display", 
          "result": {
            "status": "passed"
          }
        }
      ]
    }
  ], 
  "keyword": "Feature", 
  "name": "login", 
  "status": "passed"
}
]

我想要实现的目标:

注意:我想将它们合并到一组中,反映状态的不同环境

[
{
  "elements": [
    {
      "keyword": "Scenario", 
      "name": "valid user can login site", 
      "steps": [
        {
          "name": "a valid user name and password", 
          "result": {
            "qa": "passed",
            "prod": "passed",
            "dev": "passed"
          }
        }, 
        { 
          "name": "a valid user clicking on the login button after typing in user name and password", 
          "result": { 
            "qa": "passed",
            "prod": "passed",
            "dev": "passed"
          }
        }, 
        { 
          "name": "map should display", 
          "result": {
            "qa": "passed",
            "prod": "passed",
            "dev": "passed"
          }
        }
      ]
    }
  ], 
  "keyword": "Feature", 
  "name": "login", 
  "qa": "passed",
  "prod": "passed",
  "dev": "passed"
}
]

到目前为止我做了什么:

我来自javascript背景,所以我还是熟悉python逻辑

import json

with open('qa.json') as data_file:    
    qa = json.load(data_file)

with open('dev.json') as data_file:    
    dev = json.load(data_file)

with open('prod.json') as data_file:    
    prod = json.load(data_file)

json_list = [{SOME STRUCT} for q, d, p in zip(qa, dev, prod)]

3 个答案:

答案 0 :(得分:1)

我没有太多时间,所以我发布了我认为对你的问题有效的解决方案,即使它有点麻烦。如果我有时间,我会编辑它。

<强>代码

import json

data = """
[
{
    "elements": [{
        "keyword": "Scenario",
        "name": "valid user can login site",
        "steps": [{
            "name": "a valid user name and password",
            "result": {
                "status": "passed"
            }
        }, {
            "name": "a valid user clicking on the login button after typing in user name and password",
            "result": {
                "status": "passed"
            }
        }, {
            "name": "map should display",
            "result": {
                "status": "passed"
            }
        }]
    }],
    "keyword": "Feature",
    "name": "login",
    "status": "passed"
},
{
    "elements": [{
        "keyword": "Scenario",
        "name": "valid user can login site",
        "steps": [{
            "name": "a valid user name and password",
            "result": {
                "status": "passed"
            }
        }, {
            "name": "a valid user clicking on the login button after typing in user name and password",
            "result": {
                "status": "failed"
            }
        }, {
            "name": "map should display",
            "result": {
                "status": "passed"
            }
        }]
    }],
    "keyword": "Feature",
    "name": "login",
    "status": "passed"
}
]
"""

def get_result(envData, objIndex, elementIndex, stepIndex):
    return envData[objIndex]['elements'][elementIndex]['steps'][stepIndex]['result']['status']

def set_combined_results(combinedData, objIndex, elementIndex, stepIndex, results):
    resultNode = combinedData[objIndex]['elements'][elementIndex]['steps'][stepIndex]['result']
    resultNode.update({ 'qa': results[0], 'prod': results[1], 'dev': results[2] })

if __name__ == '__main__':
    qAData = json.loads(data)
    prodData = json.loads(data)
    devData = json.loads(data)
    combinedData = json.loads(data)

    for objIndex, obj in enumerate(combinedData):
        for elementIndex, element in enumerate(obj['elements']):
            for stepIndex, _ in enumerate(element['steps']):
                qAResult = get_result(qAData, objIndex, elementIndex, stepIndex)
                prodResult = get_result(prodData, objIndex, elementIndex, stepIndex)
                devResult = get_result(devData, objIndex, elementIndex, stepIndex)
                combinedResults = (qAResult, prodResult, devResult)
                set_combined_results(combinedData, objIndex, elementIndex, stepIndex, combinedResults)

        qAAggregateResult = qAData[objIndex]['status']
        prodAggregateResult = prodData[objIndex]['status']
        devAggregateResult = devData[objIndex]['status']

        del combinedData[objIndex]['status']
        combinedData[objIndex]['qa'] = qAAggregateResult
        combinedData[objIndex]['prod'] = prodAggregateResult
        combinedData[objIndex]['dev'] = devAggregateResult

    print(json.dumps(combinedData, indent=True))

<强>输出

[
 {
  "keyword": "Feature",
  "name": "login",
  "elements": [
   {
    "keyword": "Scenario",
    "name": "valid user can login site",
    "steps": [
     {
      "result": {
       "qa": "passed",
       "status": "passed",
       "dev": "passed",
       "prod": "passed"
      },
      "name": "a valid user name and password"
     },
     {
      "result": {
       "qa": "passed",
       "status": "passed",
       "dev": "passed",
       "prod": "passed"
      },
      "name": "a valid user clicking on the login button after typing in user name and password"
     },
     {
      "result": {
       "qa": "passed",
       "status": "passed",
       "dev": "passed",
       "prod": "passed"
      },
      "name": "map should display"
     }
    ]
   }
  ],
  "dev": "passed",
  "prod": "passed",
  "qa": "passed"
 },
 {
  "keyword": "Feature",
  "name": "login",
  "elements": [
   {
    "keyword": "Scenario",
    "name": "valid user can login site",
    "steps": [
     {
      "result": {
       "qa": "passed",
       "status": "passed",
       "dev": "passed",
       "prod": "passed"
      },
      "name": "a valid user name and password"
     },
     {
      "result": {
       "qa": "failed",
       "status": "failed",
       "dev": "failed",
       "prod": "failed"
      },
      "name": "a valid user clicking on the login button after typing in user name and password"
     },
     {
      "result": {
       "qa": "passed",
       "status": "passed",
       "dev": "passed",
       "prod": "passed"
      },
      "name": "map should display"
     }
    ]
   }
  ],
  "dev": "failed",
  "prod": "failed",
  "qa": "failed"
 }
]

答案 1 :(得分:0)

由于list中的每个对象都是dict,您可以使用dict.update方法更新dict
f.e。

a = [{'one': 1}, {'three': 3}]
b = [{'one': 1}, {'two': 2}] # {'one': 1} is duplicate with same value
c = [{'a': 'aaa'}, {'two': 22}] # {'two': 22} is duplicate with different value

for x, y, z in zip(a, b, c):
    x.update(y)
    x.update(z)

现在x将为[{'a': 'aaa', 'one': 1}, {'three': 3, 'two': 22}]

对于你提到的那些json文件,它将是;

import json
from pprint import pprint

qa = '''
[
{
  "elements": [
    {
      "keyword": "Scenario", 
      "name": "valid user can login site", 
      "steps": [
        {
          "name": "a valid user name and password", 
          "result": {
            "qa": "passed"
          }
        }, 
        { 
          "name": "a valid user clicking on the login button after typing in user name and password", 
          "result": { 
            "qa": "passed"
          }
        }, 
        { 
          "name": "map should display", 
          "result": {
            "qa": "passed"
          }
        }
      ]
    }
  ], 
  "keyword": "Feature", 
  "name": "login", 
  "qa": "passed"
}
]
'''
dev = '''
[
{
  "elements": [
    {
      "keyword": "Scenario", 
      "name": "valid user can login site", 
      "steps": [
        {
          "name": "a valid user name and password", 
          "result": {
            "dev": "passed"
          }
        }, 
        { 
          "name": "a valid user clicking on the login button after typing in user name and password", 
          "result": { 
            "dev": "passed"
          }
        }, 
        { 
          "name": "map should display", 
          "result": {
            "dev": "passed"
          }
        }
      ]
    }
  ], 
  "keyword": "Feature", 
  "name": "login", 
  "dev": "passed"
}
]
'''
prod = '''
[
{
  "elements": [
    {
      "keyword": "Scenario", 
      "name": "valid user can login site", 
      "steps": [
        {
          "name": "a valid user name and password", 
          "result": {
            "prod": "passed"
          }
        }, 
        { 
          "name": "a valid user clicking on the login button after typing in user name and password", 
          "result": { 
            "prod": "passed"
          }
        }, 
        { 
          "name": "map should display", 
          "result": {
            "prod": "passed"
          }
        }
      ]
    }
  ], 
  "keyword": "Feature", 
  "name": "login", 
  "prod": "passed"
}
]
'''
qa = json.loads(qa)
dev = json.loads(dev)
prod = json.loads(prod)

for q, p, d in zip(qa, dev, prod):
    # update all keys but 'elements'
    q.update({k: v for k, v in p.items() if k != 'elements'})
    q.update({k: v for k, v in d.items() if k != 'elements'})

    # update the three 'result' dict
    for i in range(3):
        q['elements'][0]['steps'][i]['result'].update(p['elements'][0]['steps'][i]['result'])
        q['elements'][0]['steps'][i]['result'].update(d['elements'][0]['steps'][i]['result'])

pprint(qa)

输出;

[{'dev': 'passed',
  'elements': [{'keyword': 'Scenario',
                'name': 'valid user can login site',
                'steps': [{'name': 'a valid user name and password',
                           'result': {'dev': 'passed',
                                      'prod': 'passed',
                                      'qa': 'passed'}},
                          {'name': 'a valid user clicking on the login button '
                                   'after typing in user name and password',   
                           'result': {'dev': 'passed',
                                      'prod': 'passed',
                                      'qa': 'passed'}},
                          {'name': 'map should display',
                           'result': {'dev': 'passed',  
                                      'prod': 'passed',
                                      'qa': 'passed'}}]}],
  'keyword': 'Feature',
  'name': 'login',
  'prod': 'passed',
  'qa': 'passed'}] 

答案 2 :(得分:0)

这是一个递归合并实现。它将合并任意类型的Python对象,这样除了dicts之外的任何东西在合并时必须相等。

对于dicts,如果相同的键与存在键的那些键中的可合并值相关联,则允许合并。在合并之前,任何等于参数dict的{​​{1}}个键都会以索引为后缀:

fieldname

现在可以在from copy import deepcopy from itertools import chain def merge(field_name, *objs): # Make sure all objs are of same type types = list(set(map(type, objs))) if not len(set(map(type, objs))) == 1: raise Exception('Cannot merge objects of different types: {types}'.format(types=types)) first = objs[0] # for any random objects, make sure they are equal! if not isinstance(first, (list, tuple, dict)): if not len(set(objs)) == 1: raise Exception("Cannot merge non-equal objects that aren't dicts: {objs}".format(objs=objs)) return deepcopy(first) # for lists, tuples: zip 'em and merge the zipped elements if isinstance(first, (list, tuple)): return [merge(field_name, *zipped) for zipped in zip(*objs)] # dicts result_dict = {} keys = list(set(chain.from_iterable(d.keys() for d in objs))) try: keys.remove(field_name) except ValueError: pass for k in keys: # merge values from all dicts where key is present result_dict[k] = merge(field_name, *(d[k] for d in objs if k in d)) for i, d in enumerate(objs): if field_name in d: result_dict['{f}_{i}'.format(f=field_name, i=i)] = d[field_name] return result_dict 上使用dev, qa, prod,这是OP提供的结构的dicts

>>> from pprint import pprint
>>> pprint(merge('status', qa, dev, prod))
[{'elements': [{'keyword': 'Scenario',
                'name': 'valid user can login site',
                'steps': [{'name': 'a valid user name and password',
                           'result': {'status_0': 'passed',
                                      'status_1': 'passed',
                                      'status_2': 'passed'}},
                          {'name': 'a valid user clicking on the login button after typing in user name and password',
                           'result': {'status_0': 'passed',
                                      'status_1': 'passed',
                                      'status_2': 'passed'}},
                          {'name': 'map should display',
                           'result': {'status_0': 'passed',
                                      'status_1': 'passed',
                                      'status_2': 'passed'}}]}],
  'keyword': 'Feature',
  'name': 'login',
  'status_0': 'passed',
  'status_1': 'passed',
  'status_2': 'passed'}]

这当然不是完全通用的,例如它只是“深度”合并序列liststuples,但它应该足够适合从json加载的数据结构。我希望它可以帮助您找到解决方案。