我使用此代码下载jpg文件没有任何问题。但正如您所看到的,以下页面源包含大量带有路径blank.gif的图像。
<a href="/en/chowchow-puppy-sleeping-dogs-pet-448311/"><img src="/static/img/blank.gif"
我的问题:是否可以在空白时添加检测功能。然后从“https://pixabay.com/en/chowchow-puppy-sleeping-dogs-pet-448311/”自动下载640 * 426的图像文件以及如何存档?
import random
import requests
from bs4 import BeautifulSoup
# got from http://stackoverflow.com/a/16696317
def download_file(url):
local_filename = url.split('/')[-1]
print("Downloading {} ---> {}".format(url, local_filename))
# NOTE the stream=True parameter
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
return local_filename
def Download_Image_from_Web(url):
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")
for link in soup.findAll('img'):
image_links = link.get('src')
if not image_links.startswith('http'):
image_links = url + '/' + image_links
download_file(image_links)
Download_Image_from_Web("https://pixabay.com/en/photos/?q=sleeping+puppy&hp=&image_type=&cat=&min_width=&min_height=")
答案 0 :(得分:1)
更新版本。阅读评论以获取更多信息。
import random
import requests
from bs4 import BeautifulSoup
# got from http://stackoverflow.com/a/16696317
def download_file(url):
local_filename = url.split('/')[-1]
print("Downloading {} ---> {}".format(url, local_filename))
# NOTE the stream=True parameter
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
return local_filename
def Download_Image_from_Web(url):
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")
for link in soup.findAll('img'):
image_links = link.get('src')
if image_links.endswith('blank.gif'):
image_links = link.get('data-lazy')
if not image_links.startswith('http'):
image_links = url + '/' + image_links
download_file(image_links)
Download_Image_from_Web("https://pixabay.com/en/photos/?q=sleeping+puppy&hp=&image_type=&cat=&min_width=&min_height=")