我正在尝试使用刮刀下载和保存图像,但它只下载前 20 张图像,而我希望它下载尽可能多的图像。
import requests
from bs4 import BeautifulSoup
import os
url = "https://www.google.com/search?q=cats&sxsrf=ALeKk01diaA8AhwZsRpiMkZxaTUY6MuN4Q:1624119375856&source=lnms&tbm=isch&sa=X&ved=2ahUKEwj62uGTjKTxAhWMIsAKHV12B74Q_AUoAXoECAEQAw&biw=1848&bih=949"
folder = "images"
r = requests.get(url,stream=True)
soup = BeautifulSoup(r.text,"html.parser")
images = soup.select("img")
try:
os.mkdir(os.path.join(os.getcwd(),folder))
except:
pass
os.chdir(os.path.join(os.getcwd(),folder))
i = 0
for image in images:
if i != 0:
link = image["src"]
with open(str(i) + ".jpg", "wb") as f:
im = requests.get(link)
f.write(im.content)
print("Writing: ",i)
i += 1
答案 0 :(得分:0)
使用此代码我得到 109 jpeg
import requests
from bs4 import BeautifulSoup
import os
my_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 OPR/58.0.3135.107"
headers = {}
headers['User-Agent'] = my_UA
url = "https://www.google.com/search?q=cats&sxsrf=ALeKk01diaA8AhwZsRpiMkZxaTUY6MuN4Q:1624119375856&source=lnms&tbm=isch&sa=X&ved=2ahUKEwj62uGTjKTxAhWMIsAKHV12B74Q_AUoAXoECAEQAw&biw=1848&bih=949"
folder = "images"
r = requests.get(url,stream=True,headers=headers)
soup = BeautifulSoup(r.text,"html.parser")
images = soup.select("img")
try:
os.mkdir(os.path.join(os.getcwd(),folder))
except:
pass
os.chdir(os.path.join(os.getcwd(),folder))
i = 0
print("total images found=",len(images))
for image in images:
link=""
if image.get("src"):
link = image["src"]
elif image.get("data-src"):
link = image["data-src"]
if link and not "image/gif;" in link:
with open(str(i) + ".jpg", "wb") as f:
im = requests.get(link,headers=headers)
f.write(im.content)
print("Writing: ",i)
i += 1
-有 2 个属性“src”和“data-src”
- 它会跳过 gif。
-对于更多文件,您可以使用 selenium 来完成