我有一个脚本可以让我将图片上传到谷歌,效果很好。
但几乎没有时间它不再起作用,而我没有改变任何东西我不明白为什么
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import json
import urllib
import sys
import time
# ajout d'un chemin vers geckodriver à la variable d'environnement OS
os.environ["PATH"] += os.pathsep + os.getcwd()
# Configuration
download_path = r"C:\\Users\\stoum\\OneDrive\\Bureau\\scrap_img\\"
# Images
words_to_search = ['elephant']
nb_to_download = [100]
first_image_position = [5]
def main():
if len(words_to_search) != len(nb_to_download) or len(nb_to_download) != len(first_image_position) :
raise ValueError('Vous avez peut-être oublié de configurer l\'une des listes (length is different)')
i= 0
# Pour chaque mot de la liste, nous téléchargeons le nombre d'images demandé
while i<len(words_to_search):
print("Mots "+str(i)+" : "+str(nb_to_download[i])+"\""+words_to_search[i]+"\"")
if nb_to_download[i] > 0:
search_and_save(words_to_search[i],nb_to_download[i], first_image_position[i])
i+=1
def search_and_save(text, number, first_position):
# Number_of_scrolls * 100 images seront ouvertes dans le navigateur
number_of_scrolls = int((number + first_position)/ 100 + 1)
print("Chercher : "+text+" ; nombre : "+str(number)+"; À partir de la: "+str(first_position)+" ; scrolls : "+str(number_of_scrolls))
# Create directories to save images
if not os.path.exists(download_path + text.replace(" ", "_")):
os.makedirs(download_path + text.replace(" ", "_"))
# Connect to Google Image
url = "https://www.google.co.in/search?q="+text+"&source=lnms&tbm=isch"
driver = webdriver.chrome.webdriver.WebDriver(executable_path='C:\\Users\\stoum\\OneDrive\\Bureau\\chromedriver.exe')
driver.get(url)
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
extensions = {"jpg", "jpeg", "png", "gif"}
img_count = 0
downloaded_img_count = 0
img_skip = 0
# Prepare Google Page
for _ in range(number_of_scrolls):
for __ in range(10):
# Plusieurs scrolls nécessaires pour afficher les 100 images
driver.execute_script("window.scrollBy(0, 1000000)")
time.sleep(0.2)
# pour charger les 100 images suivantes
time.sleep(2.5)
try:
driver.find_element_by_xpath('//*[@id="islmp"]/div/div[1]/div/div/div[4]/div[2]/input').click()
time.sleep(2.5)
except Exception as e:
print("images trouvées:"+ str(e))
break
# Process (Télécharger) images
imges = driver.find_elements_by_xpath('//*[@class="isv-r PNCib MSM1fd BUooTd"]')
print("Total images:"+ str(len(imges)) + "\n")
for img in imges:
if img_skip < first_position:
# Sautez les premières images
img_skip += 1
else :
# Obtenir l'image
img_count += 1
img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
img_type = json.loads(img.get_attribute('innerHTML'))["ity"]
print("image télécharger "+ str(img_count) + ": "+ img_url)
try:
if img_type not in extensions:
img_type = "jpg"
# Téléchargez l'image et enregistrez-la
req = urllib.request.Request(img_url, headers=headers)
raw_img = urllib.request.urlopen(req).read()
f = open(download_path+text.replace(" ", "_")+"/"+str(img_skip+downloaded_img_count)+"."+img_type, "wb")
f.write(raw_img)
f.close
downloaded_img_count += 1
except Exception as e:
print("Échec du téléchargement:"+ str(e))
finally:
print("")
if downloaded_img_count >= number:
break
print("Total ignoré : "+str(img_skip)+"; Total télécharger : "+ str(downloaded_img_count)+ "/"+ str(img_count))
driver.quit()
if __name__ == "__main__":
main()
我收到以下错误
Mots 0 : 100"大象" Chercher : 大象;编号:100; À partir de la: 5 ;卷轴:1 图像trouvées:消息:元素不可交互 (会话信息:chrome=89.0.4389.114)
图像总数:100
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
<ipython-input-12-6cfc7e11e2bb> in <module>
99
100 if __name__ == "__main__":
--> 101 main()
<ipython-input-12-6cfc7e11e2bb> in main()
25 print("Mots "+str(i)+" : "+str(nb_to_download[i])+"\""+words_to_search[i]+"\"")
26 if nb_to_download[i] > 0:
---> 27 search_and_save(words_to_search[i],nb_to_download[i], first_image_position[i])
28 i+=1
29
<ipython-input-12-6cfc7e11e2bb> in search_and_save(text, number, first_position)
75 # Obtenir l'image
76 img_count += 1
---> 77 img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
78 img_type = json.loads(img.get_attribute('innerHTML'))["ity"]
79 print("image télécharger "+ str(img_count) + ": "+ img_url)
c:\users\stoum\appdata\local\programs\python\python36\lib\json\__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
352 parse_int is None and parse_float is None and
353 parse_constant is None and object_pairs_hook is None and not kw):
--> 354 return _default_decoder.decode(s)
355 if cls is None:
356 cls = JSONDecoder
c:\users\stoum\appdata\local\programs\python\python36\lib\json\decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
c:\users\stoum\appdata\local\programs\python\python36\lib\json\decoder.py in raw_decode(self, s, idx)
355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
--> 357 raise JSONDecodeError("Expecting value", s, err.value) from None
358 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
答案 0 :(得分:0)
在第 77 行,您的问题是 img.get_attribute('innerHTML')
可以返回空字符串。
您可以像这样重新创建错误:
import json
a = ""
json.loads(a)
有很多方法可以处理错误。一种方法可能是在将 img.get_attribute('innerHTML')
放入 json.loads(....
If 语句可以这样使用:
else:
# Obtenir l'image
img_count += 1
#img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
######################## EXAMPLE IF #####################################
if img.get_attribute('innerHTML') == "":
img_url = "NONE FOUND"
else:
img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
######################### EXAMPLE IF #################################
img_type = json.loads(img.get_attribute('innerHTML'))["ity"]
print("image télécharger "+ str(img_count) + ": "+ img_url)
try:
答案 1 :(得分:0)
我认为它来自 main() 函数
JSONDecodeError Traceback (most recent call last)
<ipython-input-3-7b92a9e742ca> in <module>
103
104 if __name__ == "__main__":
--> 105 main()
<ipython-input-3-7b92a9e742ca> in main()
25 print("Mots "+str(i)+" : "+str(nb_to_download[i])+"\""+words_to_search[i]+"\"")
26 if nb_to_download[i] > 0:
---> 27 search_and_save(words_to_search[i],nb_to_download[i], first_image_position[i])
28 i+=1
29
<ipython-input-3-7b92a9e742ca> in search_and_save(text, number, first_position)
79 img_url = "NONE FOUND"
80 else:
---> 81 img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
82 img_type = json.loads(img.get_attribute('innerHTML'))["ity"]
83 print("image télécharger "+ str(img_count) + ": "+ img_url)
c:\users\stoum\appdata\local\programs\python\python36\lib\json\__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
352 parse_int is None and parse_float is None and
353 parse_constant is None and object_pairs_hook is None and not kw):
--> 354 return _default_decoder.decode(s)
355 if cls is None:
356 cls = JSONDecoder
c:\users\stoum\appdata\local\programs\python\python36\lib\json\decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
c:\users\stoum\appdata\local\programs\python\python36\lib\json\decoder.py in raw_decode(self, s, idx)
355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
--> 357 raise JSONDecodeError("Expecting value", s, err.value) from None
358 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)