Selenium Webdriver停止与[Errno 10054]

时间:2014-01-30 00:09:32

标签: python firefox selenium selenium-webdriver

我正在尝试运行一个Python 2.7.0例程,该例程使用Selenium 2.37.2启动Firefox 26.0浏览器并向Google n-gram站点提交查询(所有这些都在我的Windows 8计算机上)。该程序适用于输入文件中的前十个条目,然后使用以下回溯暂停:

Traceback (most recent call last):
  File "C:\Python27\lib\lib-tk\Tkinter.py", line 1410, in __call__
    return self.func(*args)
  File "C:\Users\Douglas\Desktop\n-grams\n_gram_api.py", line 43, in query_n_gra
ms
    driver.get("https://books.google.com/ngrams")
  File "C:\Python27\lib\site-packages\selenium-2.37.2-py2.7.egg\selenium\webdriv
er\remote\webdriver.py", line 176, in get
    self.execute(Command.GET, {'url': url})
  File "C:\Python27\lib\site-packages\selenium-2.37.2-py2.7.egg\selenium\webdriv
er\remote\webdriver.py", line 162, in execute
    response = self.command_executor.execute(driver_command, params)
  File "C:\Python27\lib\site-packages\selenium-2.37.2-py2.7.egg\selenium\webdriv
er\remote\remote_connection.py", line 355, in execute
    return self._request(url, method=command_info[0], data=data)
  File "C:\Python27\lib\site-packages\selenium-2.37.2-py2.7.egg\selenium\webdriv
er\remote\remote_connection.py", line 402, in _request
    response = opener.open(request)
  File "C:\Python27\lib\urllib2.py", line 391, in open
    response = self._open(req, data)
  File "C:\Python27\lib\urllib2.py", line 409, in _open
    '_open', req)
  File "C:\Python27\lib\urllib2.py", line 369, in _call_chain
    result = func(*args)
  File "C:\Python27\lib\urllib2.py", line 1173, in http_open
    return self.do_open(httplib.HTTPConnection, req)
  File "C:\Python27\lib\urllib2.py", line 1148, in do_open
    raise URLError(err)
URLError: <urlopen error [Errno 10054] An existing connection was forcibly close
d by the remote host>

我找到了许多讨论错误信息的信息网站,但是我无法弄清楚为什么我自己的进程在通过for循环进行十次交互之后停止了。这是我正在运行的代码(对不起它有点长,我不想修剪它,以防罪魁祸首隐藏在GUI中):

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from Tkinter import *
import Tkinter as tk
from tkFileDialog import askopenfilename
import time

#out
out = open("n_grams_outfile.txt", "w")
out.write("search string" + "\t" + "pub year" + "\t" + "frequency" + "\n")

#create a function that will return the filepath for a file provided by the user
user_defined_filepath = {}
def selectfile():
    user_defined_filepath['filename'] = askopenfilename(filetypes=[("Text","*.txt")]) # user_defined_filepath['filename'] may now be accessed in the global scope.

#create function we'll call when start button is pressed
def query_n_grams(event = "<Button>"):

    #create binary switch we'll use to only start new browser in first pass. Set default to true
    first_pass = 1

    #identify the input file
    inputfile = user_defined_filepath['filename']
    readinputfile = open(inputfile).read()
    stringinputfile = str(readinputfile)

    #assume input file = tsv. Left hand column = string of len <= 6; right hand column = pub year of text
    split_by_row = stringinputfile.split("\n")
    for row in split_by_row: 
        search_terms = row.split("\t")[0]
        actual_pub_year = row.split("\t")[1]
        pub_year_minus_five = int(actual_pub_year) - 5
        pub_year_plus_five = int(actual_pub_year) + 5        

        #you now have terms and pub yaer. Fire up webdriver and ride, cowboy
        if first_pass == 1:
            driver = webdriver.Firefox()
            first_pass = 0

        #otherwise, use extant driver
        driver.implicitly_wait(10)
        driver.get("https://books.google.com/ngrams")
        driver.implicitly_wait(10)

        #send keys
        driver.implicitly_wait(10)
        keyword = driver.find_element_by_class_name("query")
        driver.implicitly_wait(10)
        keyword.clear()
        driver.implicitly_wait(10)
        keyword.send_keys(str(search_terms))
        driver.implicitly_wait(10)

        #find start year
        driver.implicitly_wait(10)
        start_year = driver.find_element_by_name("year_start")
        driver.implicitly_wait(10)
        start_year.clear()
        driver.implicitly_wait(10)
        start_year.send_keys(str(pub_year_minus_five))
        driver.implicitly_wait(10)

        #find end year
        driver.implicitly_wait(10)
        end_year = driver.find_element_by_name("year_end")
        driver.implicitly_wait(10)
        end_year.clear()
        driver.implicitly_wait(10)
        end_year.send_keys(pub_year_plus_five)
        driver.implicitly_wait(10)

        #click enter
        driver.implicitly_wait(10)
        submit_button = driver.find_element_by_class_name("kd_submit")
        driver.implicitly_wait(10)
        submit_button.click()
        driver.implicitly_wait(10)

        #grab html
        driver.implicitly_wait(10)
        html = driver.page_source
        driver.implicitly_wait(10)

        #if you run a search that yields no hits, can't split the html, so use try/except
        try:

            #we want the list object that comes right after timeseries and before the comma
            desired_percent_figures = html.split('"timeseries": [')[1].split("]")[0]

            #now desired_percent_figures contains comma-separated list of percents (which we still need to convert out of mathematical notation). Convert out of mathematical notation (with e)
            percents_as_list = desired_percent_figures.split(",")

            #convert to ints
            percent_list_as_ints = [float(i) for i in percents_as_list]

            #take your list and find mean
            mean_percent = sum(percent_list_as_ints) / float(len(percent_list_as_ints))

            out.write(str(search_terms) + "\t" + str(actual_pub_year) + "\t" + str(mean_percent) + "\n")

        #you'll get IndexError if you run a query like "Hello Garrett" for which there are no entries in the database at all. (Other queries, like 'animal oeconomy' for year 1700, yields result 0, but because search string is in database elsewhere, won't throw IndexError)
        except IndexError:

            mean_percent = "0.0"

            #because we got an index error, we know that the search yielded no results. so let's type 0.0 as percent
            out.write(str(search_terms) + "\t" + str(actual_pub_year) + "\t" + str(mean_percent) + "\n")

        time.sleep(6)

#create TK frame
root = tk.Tk()
canvas = tk.Canvas(root, width=157, height=100)
canvas.pack()

#create label for tk
ngram_label = tk.Button(root, text = "Google N-Gram API", command = "", anchor = 'w', width = 14, activebackground = "#33B5E5")
ngram_label_canvas = canvas.create_window(20, 20, anchor='nw', width = 119, window=ngram_label)

#create a button that allows users to find a file for analysis
file_label = tk.Button(root, text = "Input file", command = selectfile, anchor = 'w', width = 7, activebackground = "#33B5E5")
file_label_canvas = canvas.create_window(20, 60, anchor='nw', window=file_label)

#create a start button that allows users to submit selected parameters and run the "startviewing" processes
start_label = tk.Button(root, text = "Go!", command = query_n_grams, anchor = 'w', width = 3, activebackground = "#33B5E5")
start_label_canvas = canvas.create_window(107, 60, anchor='nw', window=start_label)

root.mainloop()

有谁知道为什么这个脚本会生成我上面发布的错误消息?我非常感谢其他人可以就此问题提出的任何建议。

2 个答案:

答案 0 :(得分:2)

我遇到了同样的问题......问题出在新的Firefox更新(从46到47)这是一个很大的错误:)

无论如何我确实以这种方式修复了这个问题..

下载并安装Firefox 46:从47.0降级到46.0

你可以从这个链接做到: 如果您有32位:Click here |或者如果你有64位:Click here

  

注意在安装旧版本之前,您需要删除当前的Firefox .. :)注意它真正重要的

你现在已经准备就绪了。:)

如果您对网址有任何疑问,可以找到自己的方式using this link

玩得开心。

答案 1 :(得分:0)

我调用了Firefox 23而不是26,这解决了这个问题。

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from Tkinter import *
import Tkinter as tk
from tkFileDialog import askopenfilename
import time
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary

#out
out = open("n_grams_outfile.txt", "w")
out.write("search string" + "\t" + "pub year" + "\t" + "frequency" + "\n")

#create a function that will return the filepath for a file provided by the user
user_defined_filepath = {}
def selectfile():
    user_defined_filepath['filename'] = askopenfilename(filetypes=[("Text","*.txt")]) # user_defined_filepath['filename'] may now be accessed in the global scope.

#create function we'll call when start button is pressed
def query_n_grams(event = "<Button>"):

    #create binary switch we'll use to only start new browser in first pass. Set default to true
    first_pass = 1

    #identify the input file
    inputfile = user_defined_filepath['filename']
    readinputfile = open(inputfile).read()
    stringinputfile = str(readinputfile)

    #assume input file = tsv. Left hand column = string of len <= 6; right hand column = pub year of text
    split_by_row = stringinputfile.split("\n")
    for row in split_by_row:

        #because the program will yelp if it reaches the end of the input file and then tries to split an empty line on "\t", wrap call in try/except
        try:
            search_terms = row.split("\t")[0]
            actual_pub_year = row.split("\t")[1]
        except IndexError:
            pass

        pub_year_minus_five = int(actual_pub_year) - 5
        pub_year_plus_five = int(actual_pub_year) + 5        

        #you now have terms and pub yaer. Fire up webdriver and ride, cowboy
        if first_pass == 1:

            binary = FirefoxBinary('C:\Text\Professional\Digital Humanities\Programming Languages\Python\Query Literature Online\LION 3.0\Firefox Versions\Firefox23\FirefoxPortable.exe')
            driver = webdriver.Firefox(firefox_binary=binary)

            first_pass = 0

        #otherwise, use extant driver
        driver.implicitly_wait(10)
        driver.get("https://books.google.com/ngrams")
        driver.refresh()
        driver.implicitly_wait(10)

        #send keys
        driver.implicitly_wait(10)
        keyword = driver.find_element_by_class_name("query")
        driver.implicitly_wait(10)
        keyword.clear()
        driver.implicitly_wait(10)
        keyword.send_keys(str(search_terms))
        driver.implicitly_wait(10)

        #find start year
        driver.implicitly_wait(10)
        start_year = driver.find_element_by_name("year_start")
        driver.implicitly_wait(10)
        start_year.clear()
        driver.implicitly_wait(10)
        start_year.send_keys(str(pub_year_minus_five))
        driver.implicitly_wait(10)

        #find end year
        driver.implicitly_wait(10)
        end_year = driver.find_element_by_name("year_end")
        driver.implicitly_wait(10)
        end_year.clear()
        driver.implicitly_wait(10)
        end_year.send_keys(pub_year_plus_five)
        driver.implicitly_wait(10)

        #click enter
        driver.implicitly_wait(10)
        submit_button = driver.find_element_by_class_name("kd_submit")
        driver.implicitly_wait(10)
        submit_button.click()
        driver.implicitly_wait(10)

        #grab html
        driver.implicitly_wait(10)
        html = driver.page_source
        driver.implicitly_wait(10)

        #if you run a search that yields no hits, can't split the html, so use try/except
        try:

            #we want the list object that comes right after timeseries and before the comma
            desired_percent_figures = html.split('"timeseries": [')[1].split("]")[0]

            #now desired_percent_figures contains comma-separated list of percents (which we still need to convert out of mathematical notation). Convert out of mathematical notation (with e)
            percents_as_list = desired_percent_figures.split(",")

            #convert to ints
            percent_list_as_ints = [float(i) for i in percents_as_list]

            #take your list and find mean
            mean_percent = sum(percent_list_as_ints) / float(len(percent_list_as_ints))

            out.write(str(search_terms) + "\t" + str(actual_pub_year) + "\t" + str(mean_percent) + "\n")

        #you'll get IndexError if you run a query like "Hello Garrett" for which there are no entries in the database at all. (Other queries, like 'animal oeconomy' for year 1700, yields result 0, but because search string is in database elsewhere, won't throw IndexError)
        except IndexError:

            mean_percent = "0.0"

            #because we got an index error, we know that the search yielded no results. so let's type 0.0 as percent
            out.write(str(search_terms) + "\t" + str(actual_pub_year) + "\t" + str(mean_percent) + "\n")

#create TK frame
root = tk.Tk()
canvas = tk.Canvas(root, width=157, height=100)
canvas.pack()

#create label for tk
ngram_label = tk.Button(root, text = "Google N-Gram API", command = "", anchor = 'w', width = 14, activebackground = "#33B5E5")
ngram_label_canvas = canvas.create_window(20, 20, anchor='nw', width = 119, window=ngram_label)

#create a button that allows users to find a file for analysis
file_label = tk.Button(root, text = "Input file", command = selectfile, anchor = 'w', width = 7, activebackground = "#33B5E5")
file_label_canvas = canvas.create_window(20, 60, anchor='nw', window=file_label)

#create a start button that allows users to submit selected parameters and run the "startviewing" processes
start_label = tk.Button(root, text = "Go!", command = query_n_grams, anchor = 'w', width = 3, activebackground = "#33B5E5")
start_label_canvas = canvas.create_window(107, 60, anchor='nw', window=start_label)

root.mainloop()