import requests
url = ('https://newsapi.org/v2/everything?'
response = requests.get(url)
import pandas as pd
from pandas.io.json import json_normalize
# using the pandas.io.json.json_normalize() function;
# it can produce a dataframe for you from list-and-dictionaries structure typically loaded from a JSON source.
df = json_normalize(response.json(), 'articles')
# make the datetime column a native type, and add a date-only column
df['publishedAt'] = pd.to_datetime(df['publishedAt'])
df['date'] = df['publishedAt'].dt.date
# move source dictionary into separate columns rather than dictionaries
source_columns = df['source'].apply(pd.Series).add_prefix('source_')
df = pd.concat([df.drop(['source'], axis=1), source_columns], axis=1)
Y obtengo con:
>>> pd.pivot_table(
... df, index='date', columns='source_name', values='title',
... aggfunc=list
... )
source_name Financial Times Reuters The Times of India
2018-12-07 [Pound exodus: Brexit drives away US and Asia ... [U.S. accuses Huawei CFO of Iran sanctions cov... [Live 1st Test India vs Australia: Rain delays...
¿Cómoobtener 20 resultados de estas fuentes indexadas cadadía? ¿Cómollamar a la url cada dias y almacenar sus resultados en un marco de datos de forma recursiva?
Es decir,格式为:
2007-01-01 What Sticks from '06. Somalia Orders Islamist...
2007-01-02 Heart Health: Vitamin Does Not Prevent Death ...
2007-01-03 Google Answer to Filling Jobs Is an Algorithm...
2007-01-04 Helping Make the Shift From Combat to Commerc...
2007-01-05 Rise in Ethanol Raises Concerns About Corn as...
dates = np.arange(np.datetime64('2018-12-09'), np.datetime64('2019-01-09'))
d = {'date'}
df = pd.DataFrame(index=d)
for date in dates:
url = ('https://newsapi.org/v2/everything?'
'apiKey=de9e19b7547e44c4983ad761c104278f')% (date,date)
response = requests.get(url)
# using the pandas.io.json.json_normalize() function;
# it can produce a dataframe for you from list-and-dictionaries structure typically loaded from a JSON source.
df_new = json_normalize(response.json(), 'articles')
# make the datetime column a native type, and add a date-only column
df_new['publishedAt'] = pd.to_datetime(df_new['publishedAt'])
df_new['date'] = df_new['publishedAt'].dt.date
# move source dictionary into separate columns rather than dictionaries
source_columns = df_new['source'].apply(pd.Series).add_prefix('source_')
df_new = pd.concat([df_new.drop(['source'], axis=1), source_columns], axis=1)
df_new = pd.pivot_table(df_new, index='date', columns='source_name', values='title',aggfunc=list)
# to add it to the dataframe
df.append(df_new, ignore_index=True)