挣扎着一段似乎按原样运行的代码但在完成之前遇到了一个递归错误。
import csv
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import re
import pandas
data = pandas.read_csv("list.csv", header=0)
areaList = list(data.a)
pageNum = ["1","2","3","4","5"]
S = set()
def linkscraper(indexvalue):
n = 0
while indexvalue < 221 and n < 5:
req = Request("https://www.someurl.com/search_location="+areaList[indexvalue]+"&page="+pageNum[n], headers={"User-Agent": "Mozilla/5.0"})
html = urlopen(req).read()
bsObj = BeautifulSoup(html)
for link in bsObj.find_all(href=re.compile("searchterm")):
S.update([link["href"]])
print(len(S))
n += 1
linkscraper(indexvalue+1)
linkscraper(0)
由于递归函数调用只发生了221次,我认为python的递归限制默认值至少是2000次?有什么进展的线索?如果答案非常明显,请提前道歉,还是很新的!