我编写了一个程序来爬网任何网站,并提供网站中存在的图像的名称。用户可以从列表中选择要保存的图像。图像已下载,但无法打开图像文件。我添加了保存图像的功能。 GUI是在Tkinter中完成的,我猜这不是必需的。
def fetch_url():
url = _url.get()
config['images'] = []
_images.set(())
try:
page = requests.get(url)
except requests.RequestException as rex:
_sb(str(rex))
else:
soup = BeautifulSoup(page.content, 'html.parser')
images = fetch_images(soup, url)
if images:
_images.set(tuple(img['name'] for img in images))
_sb('Images found: {}'.format(len(images)))
else:
_sb('No images found!.')
config['images'] = images
def fetch_images(soup, base_url):
images = []
for img in soup.findAll('img'):
src = img.get('src')
img_url = ('{base_url}/{src}'.format(base_url=base_url, src=src))
name = img_url.split('/')[-1]
if name[-3:] == "png" or name[-3:] == "jpg" or name[-4:] == "jpeg": ### <- here
images.append(dict(name=name, url=img_url))
return images
def fetch_selected_images(event):
widget = event.widget
selected_idx = widget.curselection()
selected_items = [widget.get(int(item)) for item in selected_idx]
selected_images = []
url = _url.get() + '/img'
for img in selected_items:
img_url = ('{base_url}/{src}'.format(base_url=url, src=img))
name = img_url.split('/')[-1]
if name in selected_items:
selected_images.append(dict(name=name, url=img_url))
for idx in selected_idx:
widget.itemconfig(idx, fg='red')
config['images'] = selected_images
def save():
if not config.get('images'):
_alert('No images to save!')
return
if _save_method.get() == 'img':
dirname = filedialog.os.getcwd()
_save_images(dirname)
def _save_images(dirname):
i = 1
if dirname and config.get('images'):
for img in config['images']:
img_data = requests.get(img['url']).content
filename = str(i)
i = i + 1
with open(filename + '.png', 'wb') as f:
f.write(img_data)
_alert('Done')