Question

以下是我用于从Google图片下载图片的代码：

def get_soup(url, header):
    return BeautifulSoup(urllib.request.urlopen(urllib.request.Request(url, headers=header)), 'html.parser')


def get_images_for_term(text, width, height, number_of_images=3):
    search_keywords = ''
    collect_valid_urls = []
    query = text
    if not (height and width) == '1':
        keyword_keys = ' high quality background imagesize:' + width + 'x' + height
    else:
        keyword_keys = ' high quality background'
    query = query + keyword_keys
    max_images = number_of_images

    try:
        if not ((height == width) and (height and width) == '1'):
            search_keywords = text + '_' + height + 'by' + width

        else:
            search_keywords = text
        os.makedirs(search_keywords)
    except OSError as e:
        if e.errno != 17:
            raise
        pass

    query = query.split()
    query = '+'.join(query)
    url = "google Url"

    header = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
                            "Chrome/43.0.2357.134 Safari/537.36"}
    soup = get_soup(url, header)
    ActualImages = []
    for a in soup.find_all("div", {"class": "rg_meta"}):
        link = json.loads(a.text)["ou"]

        ActualImages.append(link)

    Stop_Counter = 0
    for i, img in enumerate(ActualImages):

        try:
            print(img)
            req = urllib.request.Request(img, headers={'User-Agent': header})
            raw_img = urllib.request.urlopen(req, None, 15).read()

            print(raw_img[0])
            if raw_img[0] != '<':
                image = np.asarray(bytearray(raw_img), dtype="uint8")
                image = cv2.imdecode(image, cv2.IMREAD_COLOR)
                try:
                    blur_map, score, blurry = estimate_blur(image)
                except IOError and AttributeError:
                    blurry = False
                if not blurry:
                    collect_valid_urls.append(img)
                    Output_file = open(search_keywords + "/" + str(Stop_Counter + 1) + ".jpg", 'wb')

                    Output_file.write(raw_img)
                    Output_file.close()
                    Stop_Counter += 1
                    if Stop_Counter == max_images:
                        print("Done with downloading the images")
                        break
                else:
                    print("Image is Blurry.")

        except Exception as e:
            print("could not load : " + img)

            print(e)


get_images_for_term('cats', '200', '300')

我收到以下错误：

https://i.pinimg.com/736x/ec/65/86/ec658681dada104797b3f1f49026c7f1--cat-wallpaper-iphone-wallpaper.jpg
could not load : https://i.pinimg.com/736x/ec/65/86/ec658681dada104797b3f1f49026c7f1--cat-wallpaper-iphone-wallpaper.jpg
expected string or bytes-like object

请帮助我改进代码，以免再次给我带来任何错误。

Answer 1

urlopen接受字符串网址作为参数，您应该直接将链接传递给：

raw_img = urllib.request.urlopen(img, timeout=15).read()

或修复请求对象，而不在headers的新词典中声明前一个标头：

req = urllib.request.Request(img, headers=header)
raw_img = urllib.request.urlopen(req, None, 15).read()

下载图像时出现Python3代码错误

1 个答案: