我正在尝试逐步将csv文件的第一列中的值加载到URL中,并一次请求一个URL,延迟时间为5秒。第一列中的每个值都应替换“theid”
到目前为止,这是我的代码:
# I have a defined function
def withid (theid):
""""""
global cache
dupe = False
theurl = "{0}{1}{2}".format(OMDBURL, "?i=", theid)
response = urllib2.urlopen(theurl)
movdata = json.load(response)
for mov in cache:
if movdata[MKEY[1]] == mov[MKEY[1]]:
dupe = True
if not dupe:
cache.append(movdata)
outfile2 = open('outputrows2-shortened.csv', 'rb')
for row in outfile2:
theid = outfile2(row[0])
time.sleep(5)
输出:TypeError:'file'对象不可调用
答案 0 :(得分:0)
您的withid()
函数永远不会返回任何内容。尝试在其末尾添加return movdata
。
这是一个可能有用的重写版本:
import csv
import json
import time
import urllib2
PAGE_DELAY = 5. # time between loading pages
PAGE_LOAD = 0.3 # how long it takes to load a page
make_url = 'http://www.imdb.com/title/tt{}/'.format
def get_csv_column(csv_fname, col, **kwargs):
with open(csv_fname, 'rb') as inf:
incsv = csv.reader(inf, **kwargs)
column = [row[col] for row in incsv]
return column
def get_data_by_id(id):
url = make_url(id)
response = urllib2.urlopen(url)
data = json.load(response)
return id,data
def delayed(delay, fn, *args):
time.sleep(delay)
return fn(*args)
def human_time(seconds):
if seconds >= 86400:
return '{:0.1f} days'.format(seconds / 86400.)
elif seconds >= 3600:
return '{:0.1f} hours'.format(seconds / 3600.)
elif seconds >= 60:
return '{:0.1f} minutes'.format(minutes / 60.)
else:
return '{:0.1f} seconds'.format(seconds)
def main():
ids = get_csv_column('outputrows2.csv', 0)
expected = (PAGE_DELAY + PAGE_LOAD) * len(ids)
print('This will take about {}.'.format(human_time(expected)))
results = (delayed(PAGE_DELAY, get_data_by_id, id) for id in ids)
moviedata = dict(results) # => gives dict of {id:data}
if __name__=="__main__":
main()