我有以下定义的函数,并试图弄清楚如何从csv中提取值并将它们放入URL的“theid”部分。
def get_csv_column(csv_fname, col, **kwargs):
with open(csv_fname, 'rb') as inf:
incsv = csv.reader(inf, **kwargs)
column = [row[col] for row in incsv]
return column
def withid (theid):
""""""
global cache
dupe = False
theurl = "{0}{1}{2}".format(OMDBURL, "?i=", theid)
response = urllib2.urlopen(theurl)
movdata = json.load(response)
for mov in cache:
if movdata[MKEY[1]] == mov[MKEY[1]]:
dupe = True
if not dupe:
cache.append(movdata)
return movdata
###I thought this loop below could reach in, pull the data and add a 5 second delay after each request
with open('step3_desired_output.txt','w') as step3:
for column in withid:
step3.write(movdata)
time.sleep(5)
这似乎不起作用,我只是迷失了如何继续。
答案 0 :(得分:0)
继续Trying to run a defined function with a delay
import csv
import json
import time
import urllib2
PAGE_DELAY = 5. # time between loading pages
PAGE_LOAD = 0.3 # how long it takes to load a page
INFILE = 'outputrows2.csv'
OUTFILE = 'step3_desired_output.txt'
make_url = 'http://www.imdb.com/title/tt{}/'.format
def get_csv_column(csv_fname, col, **kwargs):
with open(csv_fname, 'rb') as inf:
incsv = csv.reader(inf, **kwargs)
column = [row[col] for row in incsv]
return column
def get_data_by_id(id):
url = make_url(id)
return urllib2.urlopen(url).read()
def delayed(delay, fn, *args):
time.sleep(delay)
return fn(*args)
def human_time(seconds):
if seconds >= 86400:
return '{:0.1f} days'.format(seconds / 86400.)
elif seconds >= 3600:
return '{:0.1f} hours'.format(seconds / 3600.)
elif seconds >= 60:
return '{:0.1f} minutes'.format(minutes / 60.)
else:
return '{:0.1f} seconds'.format(seconds)
def main():
ids = get_csv_column(INFILE, 0)
expected = (PAGE_DELAY + PAGE_LOAD) * len(ids)
print('This will take about {}.'.format(human_time(expected)))
results = (delayed(PAGE_DELAY, get_data_by_id, id) for id in ids)
with open(OUTFILE, 'w') as outf:
for res in results:
outf.write(res)
if __name__=="__main__":
main()