从while循环添加python dict会产生意外结果

时间:2017-01-17 01:01:37

标签: python while-loop

我输入的json中的最大记录数为100,但是有一个分页下一个链接提供接下来的100条记录。以下是我所拥有但它返回一个只有100个条目的字典 - 我知道还有更多 - 我应该如何修改此函数以获取所有记录?

def process_comment_json(comment_json):
    post_comment_dict = dict()
    next_links = list()
    if 'comments' in comment_json.keys():
        try:
            for y in comment_json['comments']['data']:
                post_id = comment_json['id']
                commentor_name = y['from']['name']
                commentor_id = y['from']['id']
                created_time = y['created_time']
                message = remove_non_ascii(y['message'])
                sentiment = return_sentiment_score(message)
                post_comment_dict[commentor_id] =  {'commentor_name':commentor_name,\
                                 'created_time':created_time, 'message':message,\
                                'sentiment':sentiment}                    

        except:
            print("malformed data, skipping this comment in round1")        


    if 'next' in comment_json['comments']['paging']:
        print('found_next appending')
        next_links.append(comment_json['comments']['paging']['next'])
    else:
        return post_comment_dict

    while next_links:
        print("processing next_links")
        print("current len of post_comment_dict is:", len(post_comment_dict))
        for  next_link in next_links:
            t = requests.get(next_link)
            nl_json = t.json()
            next_links.pop()
            if "data" in list(nl_json.keys()):
                for record in nl_json['data']:
                    try:
                        for y in comment_json['comments']['data']:
                            post_id = comment_json['id']
                            commentor_name = y['from']['name']
                            commentor_id = y['from']['id']
                            created_time = y['created_time']
                            message = remove_non_ascii(y['message'])
                            sentiment = return_sentiment_score(message)
                            post_comment_dict[commentor_id] = {'commentor_name':commentor_name,\
                                                 'created_time':created_time, 'message':message,\
                                                'sentiment':sentiment}                               

                    except:
                        print("malformed data, skipping this comment from the next_links list")

        if 'next' in comment_json['comments']['paging']:
            print('found_next appending')
            next_links.append(comment_json['comments']['paging']['next'])
        else:
            return post_comment_dict

0 个答案:

没有答案