我正在尝试抓取Google,以便从列表(列表名称)中获取一些图片(例如100张左右)。但是我的代码只返回20,我不知道为什么。
这是我的代码:
import os
import requests
from bs4 import BeautifulSoup
liste_name = ['blood orange','apple golden']
for name in liste_name:
name_splited = name.split(" ")
if len(name_splited) > 1:
full_name = name_splited[0] + "_" + name_splited[1]
path = "./Dataset/Trainset/" + full_name + "/"
name = name_splited[0] + "%" + name_splited[1]
url = "https://www.google.ch/search?site=webhp&tbm=isch&source=hp&q=" + \
name + "&oq=" + name + "biw=1280&bih=579&num=100"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
list_res_image = soup.find_all("img")
if not os.path.exists(path):
os.makedirs(path)
for index, lien in enumerate(list_res_image):
link = lien['src']
test = False
while not test:
try:
img = requests.get(link).content
test = True
except requests.exceptions.SSLError:
pass
with open(path + full_name + str(index) + ".png", "wb") as f:
f.write(img)