nytimesarticle api keep getting a key error: KeyError: 'news_desk'

时间:2018-06-04 17:38:08

标签: python

Im working on a project where I am using the New York Times Search API to scrape for articles regarding protest using the keyword divest. I am having problems getting the code to work I am new to python been learning it for the last 3 weeks the example you see is from the tutorial i am using to learn how to scrape the nytimesarticle package that I had to install along with pip

from nytimesarticle import articleAPI
api = articleAPI('93a2fe2f6c3a4feca8481fcc83efa43c')
articles = api.search(q = 'Obama', fq = {'headlines':'Obama','source':['Reuters','AP','The New York Times']},begin_date = 20111231)
"""
This function takes in a response to the NYT api and parses
the articles into a list of dictionaries
"""
def parse_articles(articles):
    news = []
    for i in articles['response']['docs']:
        dic = {}
        dic['id'] = i['_id']
        if i['abstract'] is not None:
            dic['abstract'] = i['abstract'].encode("utf8")
        dic['headline'] = i['headline']['main'].encode("utf8")
        dic['desk'] = i['news_desk']
        dic['date'] = i['pub_date'][0:10] # cutting time of day.
        dic['section'] = i['section_name']
        if i['snippet'] is not None:
            dic['snippet'] = i['snippet'].encode("utf8")
        dic['source'] = i['source']
        dic['type'] = i['type_of_material']
        dic['url'] = i['web_url']
        dic['word_count'] = i['word_count']
        # locations
        locations = []
        for x in range(0,len(i['keywords'])):
            if 'glocations' in i['keywords'][x]['name']:
                locations.append(i['keywords'][x]['value'])
        dic['locations'] = locations
        # subject
        subjects = []
        for x in range(0,len(i['keywords'])):
            if 'subject' in i['keywords'][x]['name']:
                subjects.append(i['keywords'][x]['value'])
        dic['subjects'] = subjects
        news.append(dic)
    return(news)

def get_articles(date,query):
    '''
    This function accepts a year in string format (e.g.'1980')
    and a query (e.g.'Amnesty International') and it will
    return a list of parsed articles (in dictionaries)
    for that year.
    '''
    all_articles = []
    for i in range(0,100): #NYT limits pager to first 100 pages. But rarely will you find over 100 pages of results anyway.
        articles = api.search(q = query,
               fq = {'source':['Reuters','AP', 'The New York Times']},
               begin_date = date + '0101',
               end_date = date + '1231',
               sort='oldest',
               page = str(i))
        articles = parse_articles(articles)
        all_articles = all_articles + articles
    return(all_articles)

Amnesty_all = []
for i in range(1980,2014):
    print 'Processing' + str(i) + '...'
    Amnesty_year =  get_articles(str(i),'Amnesty International')
    Amnesty_all = Amnesty_all + Amnesty_year

import csv
keys = Amnesty_all[0].keys()
with open('amnesty-mentions.csv', 'wb') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(Amnesty_all)

This is the error message I keep getting in the Terminal:

Traceback (most recent call last):
  File "nytimesscrape.py", line 62, in <module>
    Amnesty_year =  get_articles(str(i),'Amnesty International')
  File "nytimesscrape.py", line 55, in get_articles
    articles = parse_articles(articles)
  File "nytimesscrape.py", line 16, in parse_articles
    dic['desk'] = i['news_desk']
KeyError: 'news_desk'

1 个答案:

答案 0 :(得分:0)

Python raises a KeyError whenever a dict() object is requested and the key is not in the dictionary. It appears that 'news_desk' is not an element of at least one of the elements of articles['response']['docs'].