Question

我已使用Python在Selenium中创建了一个Web抓取工具。我正在抓取的网站请删除＃

（要访问网站，请选择启动子，在部门中选择konkan，在地区中选择孟买市，然后选择项目，您将看到项目列表）

这是针对1个区和1个分区的。

我的抓取工具所做的事情是遍历每个地区和每个部门，选择项目并获取这些项目的详细信息。

但是如今，它仅废弃1个项目名称并停止它不会遍历项目列表，这是我收到的错误：

Error:
Traceback (most recent call last):
 File "C:\Users\prince.bhatia\Desktop\Version\Maha Rera.py", line 64, in <module>
   while len(selectProject.options) == 1:
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\support\select.py", line 47, in options
   return self._el.find_elements(By.TAG_NAME, 'option')
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webelement.py", line 527, in find_elements
   {"using": by, "value": value})['value']
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webelement.py", line 493, in _execute
   return self._parent.execute(command, params)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 254, in execute
   response = self.command_executor.execute(driver_command, params)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 464, in execute
   return self._request(command_info[0], url, body=data)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 487, in _request
   self._conn.request(method, parsed_url.path, body, headers)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1239, in request
   self._send_request(method, url, body, headers, encode_chunked)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1285, in _send_request
   self.endheaders(body, encode_chunked=encode_chunked)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1234, in endheaders
   self._send_output(message_body, encode_chunked=encode_chunked)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1026, in _send_output
   self.send(msg)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 964, in send
   self.connect()
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 936, in connect
   (self.host,self.port), self.timeout, self.source_address)
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\socket.py", line 722, in create_connection
   raise err
 File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\socket.py", line 713, in create_connection
   sock.connect(sa)
OSError: [WinError 10048] Only one usage of each socket address (protocol/network address/port) is normally permitted

from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException


import os
import time
import csv

driver = webdriver.Chrome("./chromedriver")

driver.get('https://maharerait.mahaonline.gov.in/searchlist/searchlist')

# try:
#     element = WebDriverWait(driver, 100).until(
#         EC.presence_of_element_located((By.ID, "Promoter"))
#     )
# finally:
#     print('0000000000000000000000')
#     driver.quit()

time.sleep(1)

driver.find_element_by_id('Promoter').click()

divisionLength = len(Select(driver.find_element_by_id('Division')).options)
print('*********{}'.format(divisionLength))

firstRow = 0
titleRow = []
contentRows = []

for divisionElement in range(1,divisionLength):
    selectDivision = Select(driver.find_element_by_id('Division'))
    selectDivision.options
    selectDivision.select_by_index(divisionElement)
    time.sleep(1)
    districtLength = len(Select(driver.find_element_by_id('District')).options)
    while districtLength == 1:
        pass
    print(districtLength)
    for districtElement in range(1,districtLength):
        selectDistrict = Select(driver.find_element_by_id('District'))
        selectDistrict.options

        selectDistrict.select_by_index(districtElement)

        time.sleep(2)
        projectLength = len(Select(driver.find_element_by_id('Project')).options)

        print('/------------------------------/')
        print('/-----project number: {}-------/'.format(projectLength))
        print('/------------------------------/')
        if projectLength == 1:
            continue

        for projectElement in range(1,projectLength):
            selectProject = Select(driver.find_element_by_id('Project'))
            selectProject.options

            while len(selectProject.options) == 1:
                pass
            # c = len(select.options)
            # print('---------------{}'.format(c))

            # titleRow = []
            # contentRows = []
            # firstRow = 0

            # for i in range(1,c):
            #     select = Select(driver.find_element_by_id('Project'))
            #     while len(select.options) == 1:
            #         pass
            time.sleep(1)
            selectProject.select_by_index(projectElement)

            driver.find_element_by_id('btnSearch').click()
            tableRows = driver.find_element_by_class_name('table').find_elements_by_tag_name('tr')

            if firstRow == 0:
                headRow = tableRows[0].find_elements_by_tag_name('th')
                for headRowData in range(0,len(headRow)):
                    text = headRow[headRowData].find_element_by_tag_name('span').text
                    titleRow.append(text)
                firstRow = firstRow + 1

            for dataRowsNumbers in range(1,len(tableRows)):
                dataRow = tableRows[dataRowsNumbers].find_elements_by_tag_name('td')
                tempList = []
                for dataRowContents in range(0,len(dataRow)):
                    try:
                        a_link = dataRow[dataRowContents].find_element_by_tag_name('a').get_attribute('href')
                        tempList.append(str(a_link))
                    except NoSuchElementException:

                        tempList.append(str(dataRow[dataRowContents].text))
                    # if dataRow[dataRowContents].text == 'View':
                    #     a_link = dataRow[dataRowContents].find_element_by_tag_name('a').get_attribute('href')
                    #     tempList.append(str(a_link))
                    # else:
                    #     tempList.append(str(dataRow[dataRowContents].text))
                    print(dataRow[dataRowContents].text)
                print(tempList)
                contentRows.append(tempList)
# print('Automated check is over')
# print('Stored data in programs is as below:')
# print(contentRows)
with open("./data.csv",'w') as csvfile:
    csvfile = csv.writer(csvfile, delimiter=',')
    csvfile.writerow(titleRow)
    csvfile.writerow("")
    for i in range(0,len(contentRows)):
        csvfile.writerow(contentRows[i])

driver.close()

有人可以告诉我我应该怎么做才能使其运行吗？

我正在使用Python 3.6。它应该遍历每个地区和每个部门，并刮擦每个项目名称的详细信息。

硒铬驱动程序不返回解决方案

0 个答案: