正在运行的文件并在大约75-100个网址被抓取后超时。
我尝试过延迟并获得相同的结果,不确定是否还有其他尝试。任何输入将不胜感激。
urls = [line.strip() for line in inf]
for url in urls:
sourceCode = requests.get(url)
plainText = sourceCode.text
soup = BeautifulSoup(plainText, "html.parser")
irock = soup.find_all('div', class_="card-img-container")
for img in irock:
imageElement = img.find("img")
bingo = imageElement.get("data-src")
imgName = imageElement.get("title")
fullName = str(imgName) + ".jpg"
r = urllib.request.urlretrieve(bingo, fullName)
print(url)
print("Done")