from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
path_to_chromedriver = 'C:/Users/Admin/AppData/Local/Programs/Python/Python37-32/chromedriver.exe'
browser = webdriver.Chrome(executable_path=path_to_chromedriver)
browser.get('https://www.gradebeam.com/constructionresources/constructioncommunity.aspx?Category=Subs')
tablebodies = browser.find_elements_by_css_selector('.rpLink.rpExpandable')
print(len(tablebodies))
for row in tablebodies:
CompanyName = row.find_element_by_xpath(".//*[@class='rpOut']/span[2]").text
rpslide = browser.find_element_by_class_name('rpSlide').find_elements_by_css_selector('.accountInfo')
for list in rpslide:
links = row.find_element_by_xpath(".//span[2]").text
print(CompanyName , links)
上面的代码给我一个名字错误,即使链接已经定义了........
如果有人可以提供帮助,我们将不胜感激
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
path_to_chromedriver = 'C:/Users/Admin/AppData/Local/Programs/Python/Python37-32/chromedriver.exe'
browser = webdriver.Chrome(executable_path=path_to_chromedriver)
with open('E:\demo.txt') as f:
content = f.readlines()
content = [x.strip() for x in content]
with open('E:\Contractors1.csv', 'a') as f:
headers = ("Company Name,Contact Person,Address,Event_Link,PhoneNumber,Faxnumber, Website")
f.write(headers)
f.write("\n")
for link in content:
browser.get(link)
tablebodies = browser.find_elements_by_css_selector('.contentpaneopen.record-container')
for row in tablebodies:
try:
CompanyName= row.find_element_by_xpath(".//div[@class='title-container article-title pos-pagetitle item-pagetitle']/h2[1]/span[1]/a[1]/span[1]").text
except NoSuchElementException:
pass
try:
ContactPerson = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[1]/div[1]").text
except NoSuchElementException:
pass
try:
Address1 = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[2]/div[1]").text
Address = Address1.replace("\n","")
except NoSuchElementException:
pass
try:
PhoneNumber = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[4]/div[1]").text
except NoSuchElementException:
pass
try:
Faxnumber = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[5]/div[1]").text
except NoSuchElementException:
pass
try:
url = row.find_element_by_xpath(".//*[@class='element element-sfobject element-last row2 elRow2']/div[1]/a[2]/").get_attribute('href')
except NoSuchElementException:
pass
print(CompanyName.replace(",","|") + "," + ContactPerson.replace(",","|") + "," + Address.replace(',',"|") + "," + PhoneNumber + "," + Faxnumber + "," + url + "\n")
这是另一个类似的代码,在其中我可以打印所有内容,但网址却给了我相同的名称错误。
答案 0 :(得分:2)
在您的代码中-
for row in tablebodies:
CompanyName = row.find_element_by_xpath(".//*[@class='rpOut']/span[2]").text
rpslide = browser.find_element_by_class_name('rpSlide').find_elements_by_css_selector('.accountInfo')
for list in rpslide:
links = row.find_element_by_xpath(".//span[2]").text
print(CompanyName , links)
您可以在links
循环范围内定义for list in rpslide
。但是,如果rpslide
为空,则根本不会声明links
。
在诸如links = ''
之类的for循环上方声明它,您的错误应该消失了。
for row in tablebodies:
...
links = ''
for list in rpslide:
links = row.find_element_by_xpath(".//span[2]").text
print(CompanyName , links)
此外,如果links
打印为空,则需要弄清楚为什么rpslide
为空,可能是-
browser.find_element_by_class_name('rpSlide').find_elements_by_css_selecto
找不到任何东西
答案 1 :(得分:1)
第二部分返回错误,因为url返回空但没有错误,这应该可以解决:
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
path_to_chromedriver = 'C:/Users/Admin/AppData/Local/Programs/Python/Python37-32/chromedriver.exe'
browser = webdriver.Chrome(executable_path=path_to_chromedriver)
with open('E:\demo.txt') as f:
content = f.readlines()
content = [x.strip() for x in content]
with open('E:\Contractors1.csv', 'a') as f:
headers = ("Company Name,Contact Person,Address,Event_Link,PhoneNumber,Faxnumber, Website")
f.write(headers)
f.write("\n")
for link in content:
browser.get(link)
tablebodies = browser.find_elements_by_css_selector('.contentpaneopen.record-container')
for row in tablebodies:
try:
CompanyName= row.find_element_by_xpath(".//div[@class='title-container article-title pos-pagetitle item-pagetitle']/h2[1]/span[1]/a[1]/span[1]").text
except NoSuchElementException:
pass
try:
ContactPerson = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[1]/div[1]").text
except NoSuchElementException:
pass
try:
Address1 = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[2]/div[1]").text
Address = Address1.replace("\n","")
except NoSuchElementException:
pass
try:
PhoneNumber = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[4]/div[1]").text
except NoSuchElementException:
pass
try:
Faxnumber = row.find_element_by_xpath(".//*[@class='introduction-container article-intro pos-intro item-intro']/div[5]/div[1]").text
except NoSuchElementException:
pass
try:
url = row.find_element_by_xpath(".//*[@class='element element-sfobject element-last row2 elRow2']/div[1]/a[2]/").get_attribute('href')
if url:
pass
else:
url="url returned empty"
except NoSuchElementException:
pass
print(CompanyName.replace(",","|") + "," + ContactPerson.replace(",","|") + "," + Address.replace(',',"|") + "," + PhoneNumber + "," + Faxnumber + "," + url + "\n")
我所做的更改:
在最后一次尝试中,有一个if条件检查url是否为空,所以:
if url:
如果url的值不为空,则返回true,否则返回跳转到else语句并将url定义为字符串