图片从Google下载并保存

时间:2020-07-27 04:36:05

标签: python python-3.x web-scraping

以下代码成功下载了图片,但没有创建相应的文件夹,我不确定如何遍历所有搜索地址。现在它只获取一个地址的图像,而我有100多个地址。

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import json
import os
import urllib3
import argparse
import urllib.request

print("define program variables and open google images...")
searchterm = '1415 Bush St' # will also be the name of the folder
url = "https://www.google.co.in/search?q="+searchterm+"&source=lnms&tbm=isch"
# NEED TO DOWNLOAD CHROMEDRIVER, insert path to chromedriver inside parentheses in following line
browser = webdriver.Chrome(r'C:\Users\renu.sharma\OneDrive - CBRE, Inc-\Documents\chromedriver.exe')
browser.get(url)
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}

counter = 0
succounter = 0

#print("start scrolling to generate more images on the page...")
# 500 time we scroll down by 10000 in order to generate more images on the website
#for _ in range(500):
 #   browser.execute_script("window.scrollBy(0,10000)")

print("start scraping ...")
for x in browser.find_elements_by_xpath('//img[contains(@class,"rg_i Q4LuWd")]'):
    counter = counter + 1
    print("Total Count:", counter)
    print("Succsessful Count:", succounter)
    print("URL:", x.get_attribute('src'))

    img = x.get_attribute('src')
    new_filename = searchterm+str(counter)+".jpg"

    try:
        path = r'C:/Users/renu.sharma/Images/'
        path += new_filename
        urllib.request.urlretrieve(img, path)
        succounter += 1
    except Exception as e:
        print(e)

print(succounter, "pictures succesfully downloaded")
browser.close()

1 个答案:

答案 0 :(得分:0)

感谢大家的帮助!

我已经解决了这个问题, 下面的代码完全可以满足所有要求

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import urllib.request

print("define program variables and open google images...")
searchterm = ['3500 Boston St','1415 Bush St','1811 POrtal St'] # will also be the name of the folder
for i in searchterm:
    url = "https://www.google.co.in/search?q="+i+"&source=lnms&tbm=isch"
# NEED TO DOWNLOAD CHROMEDRIVER, insert path to chromedriver inside parentheses in following line
    browser = webdriver.Chrome(r'C:\Users\renu.sharma\OneDrive - CBRE, Inc-\Documents\chromedriver.exe')
    browser.get(url)
    header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}

    counter = 0
    succounter = 0
    if not os.path.exists(i):
        os.mkdir(i)

#print("start scrolling to generate more images on the page...")
# 500 time we scroll down by 10000 in order to generate more images on the website
#for _ in range(500):
 #   browser.execute_script("window.scrollBy(0,10000)")

    print("start scraping ...")
    for x in browser.find_elements_by_xpath('//img[contains(@class,"rg_i Q4LuWd")]'):
        counter = counter + 1
        print("Total Count:", counter)
        print("Succsessful Count:", succounter)
        print("URL:", x.get_attribute('src'))

        img = x.get_attribute('src')
        new_filename = i+" " +str(counter)+".jpg"

        try:
            path = os.path.join(i , i + "_" + str(counter))
            path += new_filename
            urllib.request.urlretrieve(img, path)
            succounter += 1
        except Exception as e:
            print(e)

print(succounter, "pictures succesfully downloaded")
browser.close()