Question

有一个用于处理JSON API响应并将其转换为pandas df的功能。

def test_lists(responses):
    df_from_list = []
    ids = []
    df_from_dicts = []
    df_from_lists = []
    responses_json = []

    # clean values
    try: 
        for response in responses:
            data = response.get('data')
            names = [x.get('name') for x in data]
            periods = [x.get('period') for x in data]
            ids = [x.get('id') for x in data]
            ids = [x.split('/')[0] for x in ids]
            values = [x.get('values') for x in data]
            merged = list(itertools.chain(*values))
            clean_values = [x.get('value') for x in merged] 
            clean_endtimes = [x.get('end_time') for x in merged]

            # check the type of the element 
            for elem in clean_values:
                if type(elem) == dict:
                    dict_of_values = dict(zip(clean_endtimes, clean_values))
                    df_from_dict.dropna(inplace=True)

                if type(elem) != dict:
                    response_json=json_normalize(response['data'], record_path=['values'], meta=['id', 'name', 'period'])
                    responses_json.append(response_json)
                    df_from_list = pd.concat(responses_json, sort=True).reset_index(drop=True)
                    df_from_list.drop_duplicates(inplace=True)

            # append every result if it is a dict, else pass
            try:
                df_from_dicts.append(df_from_dict) 
            except UnboundLocalError:
                pass

    except:
        if len(responses) < 1: # if there are no data, stop
            pass

    try:
        # if there are lists and dicts
        if len(df_from_dicts) > 0 and len(df_from_list) > 0:
            df_fb = df_from_list.append(df_from_dicts, sort=True)
        # if it is only lists
        elif not df_from_dicts:
            df_fb = df_from_list
        # if it is only dicts
        elif not df_from_list:
            df_fb = df_from_dicts
        return df_fb  

    # if somehow df_fb is empty
    except:
        if len(df_fb) < 1:
            pass

当遍历包含字典和列表的响应列表时，我不知道为什么，但是字典是重复的。

如果该类型的行是这样的：

dict, list, list, dict

它将第一个字典的结果相加3次。

我尝试将if添加到列表中

# check the type of the element 
            for elem in clean_values:
                if type(elem) == dict:
                    dict_of_values = dict(zip(clean_endtimes, clean_values))
                    df_from_dict.dropna(inplace=True)


                try:
                     df_from_dicts.append(df_from_dict) 
                except UnboundLocalError:
                    pass

但是它会使所有内容翻倍4倍。

如果我把它放在第二个循环之外

for response in responses:
           data = response.get('data')
           names = [x.get('name') for x in data]
           periods = [x.get('period') for x in data]
           ids = [x.get('id') for x in data]
           ids = [x.split('/')[0] for x in ids]
           values = [x.get('values') for x in data]
           merged = list(itertools.chain(*values))
           clean_values = [x.get('value') for x in merged] 
           clean_endtimes = [x.get('end_time') for x in merged]

           # check the type of the element 
           for elem in clean_values:
               if type(elem) == dict:
                   dict_of_values = dict(zip(clean_endtimes, clean_values))
                   df_from_dict.dropna(inplace=True)
try:
   df_from_dicts.append(df_from_dict) # if it is only lists
except UnboundLocalError:
    pass

它不解析第二个字典

虽然dict和list dict上的迭代存储重复项

0 个答案: