有一个用于处理JSON API响应并将其转换为pandas df的功能。
def test_lists(responses):
df_from_list = []
ids = []
df_from_dicts = []
df_from_lists = []
responses_json = []
# clean values
try:
for response in responses:
data = response.get('data')
names = [x.get('name') for x in data]
periods = [x.get('period') for x in data]
ids = [x.get('id') for x in data]
ids = [x.split('/')[0] for x in ids]
values = [x.get('values') for x in data]
merged = list(itertools.chain(*values))
clean_values = [x.get('value') for x in merged]
clean_endtimes = [x.get('end_time') for x in merged]
# check the type of the element
for elem in clean_values:
if type(elem) == dict:
dict_of_values = dict(zip(clean_endtimes, clean_values))
df_from_dict.dropna(inplace=True)
if type(elem) != dict:
response_json=json_normalize(response['data'], record_path=['values'], meta=['id', 'name', 'period'])
responses_json.append(response_json)
df_from_list = pd.concat(responses_json, sort=True).reset_index(drop=True)
df_from_list.drop_duplicates(inplace=True)
# append every result if it is a dict, else pass
try:
df_from_dicts.append(df_from_dict)
except UnboundLocalError:
pass
except:
if len(responses) < 1: # if there are no data, stop
pass
try:
# if there are lists and dicts
if len(df_from_dicts) > 0 and len(df_from_list) > 0:
df_fb = df_from_list.append(df_from_dicts, sort=True)
# if it is only lists
elif not df_from_dicts:
df_fb = df_from_list
# if it is only dicts
elif not df_from_list:
df_fb = df_from_dicts
return df_fb
# if somehow df_fb is empty
except:
if len(df_fb) < 1:
pass
当遍历包含字典和列表的响应列表时,我不知道为什么,但是字典是重复的。
如果该类型的行是这样的:
dict, list, list, dict
它将第一个字典的结果相加3次。
我尝试将if添加到列表中
# check the type of the element
for elem in clean_values:
if type(elem) == dict:
dict_of_values = dict(zip(clean_endtimes, clean_values))
df_from_dict.dropna(inplace=True)
try:
df_from_dicts.append(df_from_dict)
except UnboundLocalError:
pass
但是它会使所有内容翻倍4倍。
如果我把它放在第二个循环之外
for response in responses:
data = response.get('data')
names = [x.get('name') for x in data]
periods = [x.get('period') for x in data]
ids = [x.get('id') for x in data]
ids = [x.split('/')[0] for x in ids]
values = [x.get('values') for x in data]
merged = list(itertools.chain(*values))
clean_values = [x.get('value') for x in merged]
clean_endtimes = [x.get('end_time') for x in merged]
# check the type of the element
for elem in clean_values:
if type(elem) == dict:
dict_of_values = dict(zip(clean_endtimes, clean_values))
df_from_dict.dropna(inplace=True)
try:
df_from_dicts.append(df_from_dict) # if it is only lists
except UnboundLocalError:
pass
它不解析第二个字典