Question

我有一个脚本可以让我将图片上传到谷歌，效果很好。

但几乎没有时间它不再起作用，而我没有改变任何东西我不明白为什么

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import json
import urllib
import sys
import time

# ajout d'un chemin vers geckodriver à la variable d'environnement OS
os.environ["PATH"] += os.pathsep + os.getcwd()

# Configuration
download_path = r"C:\\Users\\stoum\\OneDrive\\Bureau\\scrap_img\\"
# Images
words_to_search = ['elephant']
nb_to_download = [100]
first_image_position = [5]

def main():
    if len(words_to_search) != len(nb_to_download) or len(nb_to_download) != len(first_image_position) :
        raise ValueError('Vous avez peut-être oublié de configurer l\'une des listes (length is different)')
    i= 0
    # Pour chaque mot de la liste, nous téléchargeons le nombre d'images demandé
    while i<len(words_to_search):
        print("Mots "+str(i)+" : "+str(nb_to_download[i])+"\""+words_to_search[i]+"\"")
        if nb_to_download[i] > 0:
            search_and_save(words_to_search[i],nb_to_download[i], first_image_position[i])
        i+=1


def search_and_save(text, number, first_position):
    # Number_of_scrolls * 100 images seront ouvertes dans le navigateur
    number_of_scrolls = int((number + first_position)/ 100 + 1) 
    print("Chercher : "+text+" ; nombre : "+str(number)+"; À partir de la: "+str(first_position)+" ; scrolls : "+str(number_of_scrolls))

    # Create directories to save images
    if not os.path.exists(download_path + text.replace(" ", "_")):
        os.makedirs(download_path + text.replace(" ", "_"))

    # Connect to Google Image
    url = "https://www.google.co.in/search?q="+text+"&source=lnms&tbm=isch"
    driver = webdriver.chrome.webdriver.WebDriver(executable_path='C:\\Users\\stoum\\OneDrive\\Bureau\\chromedriver.exe')
    driver.get(url)
    headers = {}
    headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
    extensions = {"jpg", "jpeg", "png", "gif"}
    
    img_count = 0
    downloaded_img_count = 0
    img_skip = 0
    
    # Prepare Google Page
    for _ in range(number_of_scrolls):
        for __ in range(10):
            # Plusieurs scrolls nécessaires pour afficher les 100 images
            driver.execute_script("window.scrollBy(0, 1000000)")
            time.sleep(0.2)
        # pour charger les 100 images suivantes
        time.sleep(2.5)
        try:
            driver.find_element_by_xpath('//*[@id="islmp"]/div/div[1]/div/div/div[4]/div[2]/input').click()
            time.sleep(2.5)
        except Exception as e:
            print("images trouvées:"+ str(e))
            break

    # Process (Télécharger) images
    imges = driver.find_elements_by_xpath('//*[@class="isv-r PNCib MSM1fd BUooTd"]')
    print("Total images:"+ str(len(imges)) + "\n")
    for img in imges:
        if img_skip < first_position:
            # Sautez les premières images 
            img_skip += 1
        else :
            # Obtenir l'image
            img_count += 1
            img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
            img_type = json.loads(img.get_attribute('innerHTML'))["ity"]
            print("image télécharger "+ str(img_count) + ": "+ img_url)
            try:
                if img_type not in extensions:
                    img_type = "jpg"
                # Téléchargez l'image et enregistrez-la
                req = urllib.request.Request(img_url, headers=headers)
                raw_img = urllib.request.urlopen(req).read()
                f = open(download_path+text.replace(" ", "_")+"/"+str(img_skip+downloaded_img_count)+"."+img_type, "wb")
                f.write(raw_img)
                f.close
                downloaded_img_count += 1
            except Exception as e:
                print("Échec du téléchargement:"+ str(e))
            finally:
                print("")
            if downloaded_img_count >= number:
                break

    print("Total ignoré : "+str(img_skip)+"; Total télécharger : "+ str(downloaded_img_count)+ "/"+ str(img_count))
    driver.quit()

if __name__ == "__main__":
    main()

我收到以下错误

Mots 0 : 100"大象" Chercher : 大象;编号：100； À partir de la: 5 ;卷轴：1 图像trouvées：消息：元素不可交互（会话信息：chrome=89.0.4389.114）

图像总数：100

---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
<ipython-input-12-6cfc7e11e2bb> in <module>
     99 
    100 if __name__ == "__main__":
--> 101     main()

<ipython-input-12-6cfc7e11e2bb> in main()
     25         print("Mots "+str(i)+" : "+str(nb_to_download[i])+"\""+words_to_search[i]+"\"")
     26         if nb_to_download[i] > 0:
---> 27             search_and_save(words_to_search[i],nb_to_download[i], first_image_position[i])
     28         i+=1
     29 

<ipython-input-12-6cfc7e11e2bb> in search_and_save(text, number, first_position)
     75             # Obtenir l'image
     76             img_count += 1
---> 77             img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
     78             img_type = json.loads(img.get_attribute('innerHTML'))["ity"]
     79             print("image télécharger "+ str(img_count) + ": "+ img_url)

c:\users\stoum\appdata\local\programs\python\python36\lib\json\__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    352             parse_int is None and parse_float is None and
    353             parse_constant is None and object_pairs_hook is None and not kw):
--> 354         return _default_decoder.decode(s)
    355     if cls is None:
    356         cls = JSONDecoder

c:\users\stoum\appdata\local\programs\python\python36\lib\json\decoder.py in decode(self, s, _w)
    337 
    338         """
--> 339         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    340         end = _w(s, end).end()
    341         if end != len(s):

c:\users\stoum\appdata\local\programs\python\python36\lib\json\decoder.py in raw_decode(self, s, idx)
    355             obj, end = self.scan_once(s, idx)
    356         except StopIteration as err:
--> 357             raise JSONDecodeError("Expecting value", s, err.value) from None
    358         return obj, end

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

Answer 1

在第 77 行，您的问题是 img.get_attribute('innerHTML') 可以返回空字符串。

您可以像这样重新创建错误：

import json
a = ""
json.loads(a)

有很多方法可以处理错误。一种方法可能是在将 img.get_attribute('innerHTML') 放入 json.loads(....

之前测试它是否不返回空字符串

If 语句可以这样使用：

else:
    # Obtenir l'image
    img_count += 1
    #img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
    ########################   EXAMPLE IF  #####################################
    if img.get_attribute('innerHTML') == "":
        img_url = "NONE FOUND"
    else:
        img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
    #########################   EXAMPLE IF     #################################
    img_type = json.loads(img.get_attribute('innerHTML'))["ity"]
    print("image télécharger "+ str(img_count) + ": "+ img_url)
    try:

Answer 2

我认为它来自 main() 函数

JSONDecodeError                           Traceback (most recent call last)
<ipython-input-3-7b92a9e742ca> in <module>
    103 
    104 if __name__ == "__main__":
--> 105     main()

<ipython-input-3-7b92a9e742ca> in main()
     25         print("Mots "+str(i)+" : "+str(nb_to_download[i])+"\""+words_to_search[i]+"\"")
     26         if nb_to_download[i] > 0:
---> 27             search_and_save(words_to_search[i],nb_to_download[i], first_image_position[i])
     28         i+=1
     29 

<ipython-input-3-7b92a9e742ca> in search_and_save(text, number, first_position)
     79                 img_url = "NONE FOUND"
     80             else:
---> 81                 img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
     82             img_type = json.loads(img.get_attribute('innerHTML'))["ity"]
     83             print("image télécharger "+ str(img_count) + ": "+ img_url)

c:\users\stoum\appdata\local\programs\python\python36\lib\json\__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    352             parse_int is None and parse_float is None and
    353             parse_constant is None and object_pairs_hook is None and not kw):
--> 354         return _default_decoder.decode(s)
    355     if cls is None:
    356         cls = JSONDecoder

c:\users\stoum\appdata\local\programs\python\python36\lib\json\decoder.py in decode(self, s, _w)
    337 
    338         """
--> 339         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    340         end = _w(s, end).end()
    341         if end != len(s):

c:\users\stoum\appdata\local\programs\python\python36\lib\json\decoder.py in raw_decode(self, s, idx)
    355             obj, end = self.scan_once(s, idx)
    356         except StopIteration as err:
--> 357             raise JSONDecodeError("Expecting value", s, err.value) from None
    358         return obj, end

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

硒谷歌浏览器：期望值：第 1 行第 1 列（字符 0）

2 个答案: