我对以下代码有一点问题:
def getTextFromURL(url):
req = pycurl.Curl()
print "URL: "+ str(url)
req.setopt(pycurl.URL, url)
req.setopt(pycurl.HTTPHEADER, ["Accept:"])
req.setopt(pycurl.CONNECTTIMEOUT, 5)
req.setopt(pycurl.TIMEOUT, 10)
req.setopt(pycurl.NOSIGNAL, 1)
resultStream = StringIO.StringIO()
req.setopt(pycurl.WRITEFUNCTION, resultStream.write)
try:
req.perform()
text = BeautifulSoup(resultStream.getvalue()).get_text()
return text
except BaseException as e:
print type(e)
return ''
def addTextToTask(newTask):
newTask.fullText = getTextFromURL(newTask.url)
return newTask
def createTasks(sources):
newTasks = []
for source in sources:
newTask = models.Task(title = source.title, url = source.url, fullText = None)
newTasks.append(newTask)
pool = Pool(4)
fullTasks = pool.map(addTextToTask, newTasks)
pool.close()
pool.join()
for fullTask in fullTasks:
db.session.add(fullTask)
db.session.commit()
return True
当我调用createTasks() - 函数时,它工作正常,少于100个源。如果我用超过100个来源调用它,它开始变慢并最终冻结。我没有收到任何错误消息。