如何从google下载图片并在google-images-download中同时使用关键字重命名图片

时间:2018-07-24 12:02:29

标签: python web-crawler google-crawlers

在使用google-images-download时,如何同时从Google下载图像并使用关键字重命名图像?使用此程序包时,名称是从图像URL生成的 enter image description here

我除了可以使用商品名称命名图像外 有人可以帮我吗?

这是代码:

from google_images_download import google_images_download

response = google_images_download.googleimagesdownload()
arguments = {"keywords":"cat, dog, pig", "limit":1, "print_urls":True, "image_directory":'home1', "size":"large"} 
absolute_image_paths = response.download(arguments)

1 个答案:

答案 0 :(得分:1)

您需要覆盖google_images_download中的方法,包括:download_image,_get_all_items,下载。 _get_all_items和下载需要发布download_image

# Download Images
    def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,search_term):
        if print_urls:
            print("Image URL: " + image_url)
        try:
            req = Request(image_url, headers={
                "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
            try:
                # timeout time to download an image
                if socket_timeout:
                    timeout = float(socket_timeout)
                else:
                    timeout = 10

                response = urlopen(req, None, timeout)
                data = response.read()
                response.close()
                # keep everything after the last '/'
                # for keyword in keywords.split(','):
                #     image_name = str(keyword)
                #     print(image_name)
                image_name = search_term + "." + "jpeg"
                print(image_name,'XXXxXXXXx')


                # image_name = str(image_url[(image_url.rfind('/')) + 1:])
                # image_name = image_name.lower()
                # if no extension then add it
                # remove everything after the image name
                # if image_format == "":
                #     image_name = image_name + "." + "jpg"
                # elif image_format == "jpeg":
                #     image_name = image_name[:image_name.find(image_format) + 4]
                # else:
                #     image_name = image_name[:image_name.find(image_format) + 3]

                # prefix name in image
                if prefix:
                    prefix = prefix + " "
                else:
                    prefix = ''

                # if no_numbering:
                path = main_directory + "/" + dir_name + "/" + prefix + image_name
                # else:
                # path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name

                print(path)

                try:
                    output_file = open(path, 'wb')
                    output_file.write(data)
                    output_file.close()
                    absolute_path = os.path.abspath(path)
                except OSError as e:
                    download_status = 'fail'
                    download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
                    return_image_name = ''
                    absolute_path = ''

                #return image name back to calling method to use it for thumbnail downloads
                download_status = 'success'
                download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name
                return_image_name = prefix + str(count) + ". " + image_name

                # image size parameter
                if print_size:
                    print("Image Size: " + str(self.file_size(path)))

            except UnicodeEncodeError as e:
                download_status = 'fail'
                download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
                return_image_name = ''
                absolute_path = ''

            except URLError as e:
                download_status = 'fail'
                download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
                return_image_name = ''
                absolute_path = ''

        except HTTPError as e:  # If there is any HTTPError
            download_status = 'fail'
            download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
            return_image_name = ''
            absolute_path = ''

        except URLError as e:
            download_status = 'fail'
            download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
            return_image_name = ''
            absolute_path = ''

        except ssl.CertificateError as e:
            download_status = 'fail'
            download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
            return_image_name = ''
            absolute_path = ''

        except IOError as e:  # If there is any IOError
            download_status = 'fail'
            download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
            return_image_name = ''
            absolute_path = ''

        except IncompleteRead as e:
            download_status = 'fail'
            download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e)
            return_image_name = ''
            absolute_path = ''

        return download_status,download_message,return_image_name,absolute_path