Question

from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
path_to_chromedriver = 'C:/Users/Admin/AppData/Local/Programs/Python/Python37-32/chromedriver.exe'
browser = webdriver.Chrome(executable_path=path_to_chromedriver)
browser.get('https://www.gradebeam.com/constructionresources/constructioncommunity.aspx?Category=Subs')
tablebodies = browser.find_elements_by_css_selector('.rpLink.rpExpandable')

print(len(tablebodies))
for row in tablebodies:
    CompanyName = row.find_element_by_xpath(".//*[@class='rpOut']/span[2]").text
    rpslide = browser.find_element_by_class_name('rpSlide').find_elements_by_css_selector('.accountInfo')
    for list in rpslide:
        links = row.find_element_by_xpath(".//span[2]").text

    print(CompanyName , links)

上面的代码给我一个名字错误，即使链接已经定义了........

如果有人可以提供帮助，我们将不胜感激

from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
path_to_chromedriver = 'C:/Users/Admin/AppData/Local/Programs/Python/Python37-32/chromedriver.exe'
browser = webdriver.Chrome(executable_path=path_to_chromedriver)
with open('E:\demo.txt') as f:
    content = f.readlines()
content = [x.strip() for x in content]
with open('E:\Contractors1.csv', 'a') as f:
    headers = ("Company Name,Contact Person,Address,Event_Link,PhoneNumber,Faxnumber, Website")
    f.write(headers)
    f.write("\n")
    for link in content:
        browser.get(link)

        tablebodies = browser.find_elements_by_css_selector('.contentpaneopen.record-container')

        for row in tablebodies:
            try:
                CompanyName= row.find_element_by_xpath(".//div[@class='title-container article-title pos-pagetitle item-pagetitle']/h2[1]/span[1]/a[1]/span[1]").text
            except NoSuchElementException:
                pass
            try:
                ContactPerson = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[1]/div[1]").text
            except NoSuchElementException:
                pass
            try:
                Address1 = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[2]/div[1]").text
                Address = Address1.replace("\n","")
            except NoSuchElementException:
                pass
            try:
                PhoneNumber = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[4]/div[1]").text
            except NoSuchElementException:
                pass
            try:
                Faxnumber = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[5]/div[1]").text
            except NoSuchElementException:
                pass
            try:
                url = row.find_element_by_xpath(".//*[@class='element element-sfobject element-last row2 elRow2']/div[1]/a[2]/").get_attribute('href')
            except NoSuchElementException:
                pass
            print(CompanyName.replace(",","|") + "," + ContactPerson.replace(",","|") + "," + Address.replace(',',"|") + "," + PhoneNumber + "," + Faxnumber +  "," + url + "\n")

这是另一个类似的代码，在其中我可以打印所有内容，但网址却给了我相同的名称错误。

Answer 1

在您的代码中-

for row in tablebodies:
    CompanyName = row.find_element_by_xpath(".//*[@class='rpOut']/span[2]").text
    rpslide = browser.find_element_by_class_name('rpSlide').find_elements_by_css_selector('.accountInfo')
    for list in rpslide:
        links = row.find_element_by_xpath(".//span[2]").text
    print(CompanyName , links)

您可以在links循环范围内定义for list in rpslide。但是，如果rpslide为空，则根本不会声明links。

在诸如links = ''之类的for循环上方声明它，您的错误应该消失了。

for row in tablebodies:
    ...
    links = ''
    for list in rpslide:
        links = row.find_element_by_xpath(".//span[2]").text
    print(CompanyName , links)

此外，如果links打印为空，则需要弄清楚为什么rpslide为空，可能是-

browser.find_element_by_class_name('rpSlide').find_elements_by_css_selecto

找不到任何东西

Answer 2

第二部分返回错误，因为url返回空但没有错误，这应该可以解决：

from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
path_to_chromedriver = 'C:/Users/Admin/AppData/Local/Programs/Python/Python37-32/chromedriver.exe'
browser = webdriver.Chrome(executable_path=path_to_chromedriver)
with open('E:\demo.txt') as f:
    content = f.readlines()
content = [x.strip() for x in content]
with open('E:\Contractors1.csv', 'a') as f:
    headers = ("Company Name,Contact Person,Address,Event_Link,PhoneNumber,Faxnumber, Website")
    f.write(headers)
    f.write("\n")
    for link in content:
        browser.get(link)

        tablebodies = browser.find_elements_by_css_selector('.contentpaneopen.record-container')

        for row in tablebodies:
            try:
                CompanyName= row.find_element_by_xpath(".//div[@class='title-container article-title pos-pagetitle item-pagetitle']/h2[1]/span[1]/a[1]/span[1]").text
            except NoSuchElementException:
                pass
            try:
                ContactPerson = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[1]/div[1]").text
            except NoSuchElementException:
                pass
            try:
                Address1 = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[2]/div[1]").text
                Address = Address1.replace("\n","")
            except NoSuchElementException:
                pass
            try:
                PhoneNumber = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[4]/div[1]").text
            except NoSuchElementException:
                pass
            try:
                Faxnumber = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[5]/div[1]").text
            except NoSuchElementException:
                pass
            try:
                url = row.find_element_by_xpath(".//*[@class='element element-sfobject element-last row2 elRow2']/div[1]/a[2]/").get_attribute('href')
                if url:
                    pass
                else:
                    url="url returned empty"
            except NoSuchElementException:
                pass
            print(CompanyName.replace(",","|") + "," + ContactPerson.replace(",","|") + "," + Address.replace(',',"|") + "," + PhoneNumber + "," + Faxnumber +  "," + url + "\n")

我所做的更改：

在最后一次尝试中，有一个if条件检查url是否为空，所以：

if url:

如果url的值不为空，则返回true，否则返回跳转到else语句并将url定义为字符串

Python给已经定义的名称错误

2 个答案: