抓取图像并仅保存选定的图像

时间:2018-11-18 03:05:41

标签: python web-scraping beautifulsoup pycharm

我编写了一个程序来爬网任何网站,并提供网站中存在的图像的名称。用户可以从列表中选择要保存的图像。图像已下载,但无法打开图像文件。我添加了保存图像的功能。 GUI是在Tkinter中完成的,我猜这不是必需的。

      def fetch_url():
        url = _url.get()
        config['images'] = []
        _images.set(())

        try:
            page = requests.get(url)
        except requests.RequestException as rex:
            _sb(str(rex))
        else:
            soup = BeautifulSoup(page.content, 'html.parser')
            images = fetch_images(soup, url)
            if images:
                _images.set(tuple(img['name'] for img in images))
                _sb('Images found: {}'.format(len(images)))
            else:
                _sb('No images found!.')
            config['images'] = images


    def fetch_images(soup, base_url):
        images = []
        for img in soup.findAll('img'):
           src = img.get('src')
           img_url = ('{base_url}/{src}'.format(base_url=base_url, src=src))
           name = img_url.split('/')[-1]
           if name[-3:] == "png" or name[-3:] == "jpg" or name[-4:] == "jpeg": ### <- here
               images.append(dict(name=name, url=img_url))
        return images


    def fetch_selected_images(event):
        widget = event.widget
        selected_idx = widget.curselection()
        selected_items = [widget.get(int(item)) for item in selected_idx]
        selected_images = []
        url = _url.get() + '/img'

        for img in selected_items:
            img_url = ('{base_url}/{src}'.format(base_url=url, src=img))
            name = img_url.split('/')[-1]
            if name in selected_items:
                selected_images.append(dict(name=name, url=img_url))
            for idx in selected_idx:
                widget.itemconfig(idx, fg='red')

        config['images'] = selected_images

        def save():
    if not config.get('images'):
        _alert('No images to save!')
        return

    if _save_method.get() == 'img':
        dirname = filedialog.os.getcwd()
        _save_images(dirname)


def _save_images(dirname):
    i = 1
    if dirname and config.get('images'):
        for img in config['images']:
            img_data = requests.get(img['url']).content
            filename = str(i)
            i = i + 1
            with open(filename + '.png', 'wb') as f:
                f.write(img_data)
        _alert('Done')

0 个答案:

没有答案