我正在尝试运行此脚本以从美国人口普查中提取数据,但人口普查API拒绝了我的请求。这是拒绝我的拉力,我做了一些工作,但我很难过......任何关于如何处理这个问题的想法
import pandas as pd
import requests
from pandas.compat import StringIO
#Sourced from the following site https://github.com/mortada/fredapi
from fredapi import Fred
fred = Fred(api_key='xxxx')
import StringIO
import datetime
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO as stio
else:
from io import StringIO as stio
year_list = '2013','2014','2015','2016','2017'
month_list = '01','02','03','04','05','06','07','08','09','10','11','12'
#############################################
#Get the total exports from the United States
#############################################
exports = pd.DataFrame()
for i in year_list:
for s in month_list:
try:
link="https://api.census.gov/data/timeseries/intltrade/exports/hs?get=CTY_CODE,CTY_NAME,ALL_VAL_MO,ALL_VAL_YR&time="
str1 = ''.join([i])
txt = '-'
str2 = ''.join([s])
total_link=link+str1+txt+str2
r = requests.get(total_link, headers = {'User-agent': 'your bot 0.1'})
df = pd.read_csv(StringIO(r.text))
##################### change starts here #####################
##################### since it is a dataframe itself, so the method to create a dataframe from a list won't work ########################
# Drop the total sales line
df.drop(df.index[0])
# Rename Column name
df.columns=['CTY_CODE','CTY_NAME','EXPORT MTH','EXPORT YR','time','UN']
# Change the ["1234" to 1234
df['CTY_CODE']=df['CTY_CODE'].str[2:-1]
# Change the 2017-01] to 2017-01
df['time']=df['time'].str[:-1]
##################### change ends here #####################
exports = exports.append(df, ignore_index=False)
except:
print i
print s
答案 0 :(得分:0)
你走了:
import ast
import itertools
import pandas as pd
import requests
base = "https://api.census.gov/data/timeseries/intltrade/exports/hs?get=CTY_CODE,CTY_NAME,ALL_VAL_MO,ALL_VAL_YR&time="
year_list = ['2013','2014','2015','2016','2017']
month_list = ['01','02','03','04','05','06','07','08','09','10','11','12']
exports = []
rejects = []
for year, month in itertools.product(year_list, month_list):
url = '%s%s-%s' % (base, year, month)
r = requests.get(url, headers={'User-agent': 'your bot 0.1'})
if r.text:
r = ast.literal_eval(r.text)
df = pd.DataFrame(r[2:], columns=r[0])
exports.append(df)
else:
rejects.append((int(year), int(month)))
exports = pd.concat(exports).reset_index().drop('index', axis=1)
您的结果如下:
CTY_CODE CTY_NAME ALL_VAL_MO ALL_VAL_YR time
0 1010 GREENLAND 233446 233446 2013-01
1 1220 CANADA 23170845914 23170845914 2013-01
2 2010 MEXICO 17902453702 17902453702 2013-01
3 2050 GUATEMALA 425978783 425978783 2013-01
4 2080 BELIZE 17795867 17795867 2013-01
5 2110 EL SALVADOR 207606613 207606613 2013-01
6 2150 HONDURAS 429806151 429806151 2013-01
7 2190 NICARAGUA 75752432 75752432 2013-01
8 2230 COSTA RICA 598484187 598484187 2013-01
9 2250 PANAMA 1046236431 1046236431 2013-01
10 2320 BERMUDA 47156737 47156737 2013-01
11 2360 BAHAMAS 256292297 256292297 2013-01
... ... ... ... ...
13883 0024 LAFTA 27790655209 193139639307 2017-07
13884 0025 EURO AREA 15994685459 121039479852 2017-07
13885 0026 APEC 76654291110 550552655105 2017-07
13886 0027 ASEAN 6030380132 44558200533 2017-07
13887 0028 CACM 2133048149 13333440411 2017-07
13888 1XXX NORTH AMERICA 41622877949 299981278306 2017-07
13889 2XXX CENTRAL AMERICA 4697852283 30756310800 2017-07
13890 3XXX SOUTH AMERICA 8117215081 55039567414 2017-07
13891 4XXX EUROPE 25201247938 189925038230 2017-07
13892 5XXX ASIA 38329181070 274304503490 2017-07
13893 6XXX AUSTRALIA AND OC... 2389798925 16656777753 2017-07
13894 7XXX AFRICA 1809443365 13022520158 2017-07
操作实例:
itertools.product
遍历(年,月)组合的产品,并将其与base
网址rejects
,未找到项目的元组列表exports = []
,因为连接DataFrames列表比附加到现有DataFrame更有效