以下代码成功下载了图片,但没有创建相应的文件夹,我不确定如何遍历所有搜索地址。现在它只获取一个地址的图像,而我有100多个地址。
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import json
import os
import urllib3
import argparse
import urllib.request
print("define program variables and open google images...")
searchterm = '1415 Bush St' # will also be the name of the folder
url = "https://www.google.co.in/search?q="+searchterm+"&source=lnms&tbm=isch"
# NEED TO DOWNLOAD CHROMEDRIVER, insert path to chromedriver inside parentheses in following line
browser = webdriver.Chrome(r'C:\Users\renu.sharma\OneDrive - CBRE, Inc-\Documents\chromedriver.exe')
browser.get(url)
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}
counter = 0
succounter = 0
#print("start scrolling to generate more images on the page...")
# 500 time we scroll down by 10000 in order to generate more images on the website
#for _ in range(500):
# browser.execute_script("window.scrollBy(0,10000)")
print("start scraping ...")
for x in browser.find_elements_by_xpath('//img[contains(@class,"rg_i Q4LuWd")]'):
counter = counter + 1
print("Total Count:", counter)
print("Succsessful Count:", succounter)
print("URL:", x.get_attribute('src'))
img = x.get_attribute('src')
new_filename = searchterm+str(counter)+".jpg"
try:
path = r'C:/Users/renu.sharma/Images/'
path += new_filename
urllib.request.urlretrieve(img, path)
succounter += 1
except Exception as e:
print(e)
print(succounter, "pictures succesfully downloaded")
browser.close()
答案 0 :(得分:0)
感谢大家的帮助!
我已经解决了这个问题, 下面的代码完全可以满足所有要求
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import urllib.request
print("define program variables and open google images...")
searchterm = ['3500 Boston St','1415 Bush St','1811 POrtal St'] # will also be the name of the folder
for i in searchterm:
url = "https://www.google.co.in/search?q="+i+"&source=lnms&tbm=isch"
# NEED TO DOWNLOAD CHROMEDRIVER, insert path to chromedriver inside parentheses in following line
browser = webdriver.Chrome(r'C:\Users\renu.sharma\OneDrive - CBRE, Inc-\Documents\chromedriver.exe')
browser.get(url)
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}
counter = 0
succounter = 0
if not os.path.exists(i):
os.mkdir(i)
#print("start scrolling to generate more images on the page...")
# 500 time we scroll down by 10000 in order to generate more images on the website
#for _ in range(500):
# browser.execute_script("window.scrollBy(0,10000)")
print("start scraping ...")
for x in browser.find_elements_by_xpath('//img[contains(@class,"rg_i Q4LuWd")]'):
counter = counter + 1
print("Total Count:", counter)
print("Succsessful Count:", succounter)
print("URL:", x.get_attribute('src'))
img = x.get_attribute('src')
new_filename = i+" " +str(counter)+".jpg"
try:
path = os.path.join(i , i + "_" + str(counter))
path += new_filename
urllib.request.urlretrieve(img, path)
succounter += 1
except Exception as e:
print(e)
print(succounter, "pictures succesfully downloaded")
browser.close()