我输入的json中的最大记录数为100,但是有一个分页下一个链接提供接下来的100条记录。以下是我所拥有但它返回一个只有100个条目的字典 - 我知道还有更多 - 我应该如何修改此函数以获取所有记录?
def process_comment_json(comment_json):
post_comment_dict = dict()
next_links = list()
if 'comments' in comment_json.keys():
try:
for y in comment_json['comments']['data']:
post_id = comment_json['id']
commentor_name = y['from']['name']
commentor_id = y['from']['id']
created_time = y['created_time']
message = remove_non_ascii(y['message'])
sentiment = return_sentiment_score(message)
post_comment_dict[commentor_id] = {'commentor_name':commentor_name,\
'created_time':created_time, 'message':message,\
'sentiment':sentiment}
except:
print("malformed data, skipping this comment in round1")
if 'next' in comment_json['comments']['paging']:
print('found_next appending')
next_links.append(comment_json['comments']['paging']['next'])
else:
return post_comment_dict
while next_links:
print("processing next_links")
print("current len of post_comment_dict is:", len(post_comment_dict))
for next_link in next_links:
t = requests.get(next_link)
nl_json = t.json()
next_links.pop()
if "data" in list(nl_json.keys()):
for record in nl_json['data']:
try:
for y in comment_json['comments']['data']:
post_id = comment_json['id']
commentor_name = y['from']['name']
commentor_id = y['from']['id']
created_time = y['created_time']
message = remove_non_ascii(y['message'])
sentiment = return_sentiment_score(message)
post_comment_dict[commentor_id] = {'commentor_name':commentor_name,\
'created_time':created_time, 'message':message,\
'sentiment':sentiment}
except:
print("malformed data, skipping this comment from the next_links list")
if 'next' in comment_json['comments']['paging']:
print('found_next appending')
next_links.append(comment_json['comments']['paging']['next'])
else:
return post_comment_dict