我编写了一个脚本,该脚本获取公开可用的数据并使用peewee放入SQLite DB。这些数据可用site.com/data/1
,site.com/data/2
... site.com/data/N
。所以我使用for循环并每次更改N的值。
问题是在一段时间后脚本停止工作。我的意思是,它只是停止响应(不退出)。我在循环中有print语句,它停止打印下一个数字。
对于较小的环路范围,它可以完美地工作,而对于较大的环路则停止工作。我发现最高可达80,它只是工作正常。
当我强制关闭脚本时,我会在终端上关注。它基本上与请求和http连接有关。
Traceback (most recent call last):
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 372, in _make_request
httplib_response = conn.getresponse(buffering=True)
TypeError: getresponse() got an unexpected keyword argument 'buffering'
完整追溯:
Traceback (most recent call last):
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 372, in _make_request
httplib_response = conn.getresponse(buffering=True)
TypeError: getresponse() got an unexpected keyword argument 'buffering'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "script.py", line 71, in <module>
main()
File "script.py", line 56, in main
user_data = get_script_user_data(i)
File "script.py", line 42, in get_script_user_data
r = requests.get(users_data_public_api.format(user_id))
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/api.py", line 69, in get
return request('get', url, params=params, **kwargs)
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs)
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs)
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/sessions.py", line 573, in send
r = adapter.send(request, **kwargs)
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/adapters.py", line 370, in send
timeout=timeout
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen
body=body, headers=headers)
File "/Users/avi/Documents/code/my-app/venv/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 374, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/Cellar/python3/3.4.3/Frameworks/Python.framework/Versions/3.4/lib/python3.4/http/client.py", line 1171, in getresponse
response.begin()
File "/usr/local/Cellar/python3/3.4.3/Frameworks/Python.framework/Versions/3.4/lib/python3.4/http/client.py", line 351, in begin
version, status, reason = self._read_status()
File "/usr/local/Cellar/python3/3.4.3/Frameworks/Python.framework/Versions/3.4/lib/python3.4/http/client.py", line 313, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/local/Cellar/python3/3.4.3/Frameworks/Python.framework/Versions/3.4/lib/python3.4/socket.py", line 374, in readinto
return self._sock.recv_into(b)
KeyboardInterrupt
我以为我的请求太快,请求无法缓冲它(只是我的假设)。所以我加入睡眠声明半秒钟。问题仍然存在。然后我把睡眠改为1.5秒,一直工作直到循环范围达到400.但是在下一次迭代时它停止在50-60范围内。
这是完整的代码:
import sqlite3
import datetime
import time
from datetime import date
import requests
from peewee import *
users_data_public_api = "http://api.some-site.com/user/{0}"
db = SqliteDatabase('opendata_users.db')
class OpendataUser(Model):
user_id = IntegerField()
fullname = CharField()
email = CharField()
sex = CharField(null = True)
dob = DateField(null = True)
class Meta:
database = db
def initialize_db():
db.connect()
#db.create_tables([OpendataUser])
def deinit():
db.close()
def get_opendata_user_data(user_id):
""" Returns Opendata user data only if he has all the fields required """
r = requests.get(users_data_public_api.format(user_id))
if r.status_code == requests.codes.ok:
user_data = r.json()['users'][0]
if user_data['email'] != None and 'deleted' not in user_data['email']:
if user_data['fullname'] != None and user_data['dob'] != None:
try:
user_data['dob'] = datetime.datetime.strptime(user_data['dob'], "%d/%m/%Y").date()
except ValueError:
user_data['dob'] = None
return user_data
def main():
for i in range(450, 600):
user_data = get_opendata_user_data(i)
if user_data:
print(i)
od_user = OpendataUser(user_id=user_data['user_id'],
fullname=user_data['fullname'],
email=user_data['email'],
sex=user_data['sex'],
dob=user_data['dob'])
od_user.save()
time.sleep(1.5)
if __name__ == '__main__':
initialize_db()
main()
deinit()
那么原因是什么?感谢任何帮助。 (代码不是开源的,所以我在这里和那里进行了修改)