from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
import time
url = "https://www.bungol.ca/"
driver = webdriver.Firefox(executable_path ='/usr/local/bin/geckodriver')
driver.get(url)
#Select toronto by default
driver.find_element_by_xpath("""/html/body/section/div[2]/div/div[1]/form/div/select/optgroup[1]/option[1]""").click()
time.sleep(1)
driver.find_element_by_xpath("""/html/body/section/div[2]/div/div[1]/form/div/button""").click()
driver.find_element_by_xpath("""/html/body/nav/div[1]/ul[1]/li[3]/select/option[8]""").click()
#select last 2 years
driver.find_element_by_xpath("""//*[@id="activeListings"]""").click()
#opening sold listing in that area
driver.find_element_by_xpath("""/html/body/div[5]/i""").click() #closes property type slide
driver.find_element_by_xpath("""//*[@id="navbarDropdown"]""").click()
driver.find_element_by_xpath("""//*[@id="listViewToggle"]""").click()
def data_collector():
hidden_next = driver.find_element_by_class_name("nextPaginate")
#inputs in textbox
inputElement = driver.find_element_by_id('navbarSearchAddressInput')
inputElement.send_keys('M3B2B6')
time.sleep(1)
#inputElement.send_keys(Keys.ENTER)
row_count = 3
table = driver.find_elements_by_css_selector("""#listViewTableBody""")
while hidden_next.is_displayed(): #while there is a next page button to be pressed
time.sleep(3) #delay for table refresh
#row_count = len(driver.find_elements_by_css_selector("""html body#body div#listView.table-responsive table#listViewTable.table.table-hover.mb-0 tbody#listViewTableBody tr.mb-2"""))
for row in range(row_count): #loop through the rows found
#alternate row by changing the tr index
driver.find_element_by_xpath("""/html/body/div[8]/table/tbody/tr[""" + str(row + 1) + """]/td[1]""").click()
time.sleep(2)
print(driver.find_element_by_css_selector("""#listingStatus""").text) #sold price
#closes the pop up after getting the data
driver.find_element_by_css_selector('.modal-xl > div:nth-child(1) > div:nth-child(1) > button:nth-child(1)').click()
time.sleep(1)
#clicks next page button for the table
driver.find_element_by_xpath("""//*[@id="listViewNextPaginate"]""").click()
if __name__ == "__main__":
data_collector()
代码循环遍历第一张表中的所有行(当前设置为3以进行测试),单击每一行-弹出窗口出现,获取信息并关闭弹出窗口。但是当它单击到下一页时,它不会单击第二页的任何行。找不到行xpath也不显示错误。而是显示弹出窗口关闭按钮错误,因为由于未按行显示弹出窗口而无法打开弹出窗口。
当表格翻转到下一页时,如何使其单击行?
供表参考:
https://www.bungol.ca/map/location/toronto/?
关闭左侧的属性滑块
点击工具->打开列表
答案 0 :(得分:0)
当我单击第二页中的行时,在浏览器中我也无法打开弹出窗口。因此,我认为这可能是网站的错。
如果要检查元素是否存在,可以使用以下代码:
def check_exists_by_xpath(xpath, driver):
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
答案 1 :(得分:0)
尝试一下。我的理解是您的脚本会遍历清单,打开清单,获取清单状态,关闭清单并对所有清单进行相同的操作。
如果我的理解是正确的,则以下代码可能会对您有所帮助。最好将隐式和time.sleep()更改为显式等待并清理函数。
话虽如此,我没有完全测试代码,但是代码确实导航到了清单和收集数据的一页以上
SELECT DISTINCT ON (cust_id) id, cust_id
FROM texts
WHERE over18 = FALSE AND
(now() BETWEEN ad_start_date AND ad_end_date OR
texts.default = TRUE
)
ORDER BY cust_id,
( now() BETWEEN ad_start_date AND ad_end_date )::int desc,
created_at DESC;