使用webdriver截取审核日期

时间:2016-11-17 09:50:11

标签: python-2.7 selenium-webdriver web-scraping

from selenium import webdriver
driver = webdriver.Firefox()
base_url = 'https://www.tripadvisor.in/Hotel_Review-g60763-d208452-Reviews-'
location_url = 'Conrad_New_York-New_York_City_New_York.html'
driver.get(base_url + location_url)
def close_all_popups(driver):
    driver.window_handles
    for h in driver.window_handles[1:]:
        driver.switch_to_window(h)
        driver.close()
    driver.switch_to_window(driver.window_handles[0])
    driver.find_element_by_css_selector(".taLnk.moreLink.ulBlueLinks").click()
for link in driver.find_elements_by_class_name('entry'):
    # Code to go fetch title tag which inside class entry and then span class
    test = link.find_element_by_xpath('//div/span').get_attribute('title').extract()
    print(test)
driver.quit()

1 个答案:

答案 0 :(得分:0)

如果您只想要日期,请尝试此代码:

from selenium import webdriver

driver = webdriver.Chrome()
driver.maximize_window()
base_url = 'https://www.tripadvisor.in/Hotel_Review-g60763-d208452-Reviews-'
location_url = 'Conrad_New_York-New_York_City_New_York.html'
driver.get(base_url + location_url)
def close_all_popups(driver):
    driver.window_handles
    for h in driver.window_handles[1:]:
        driver.switch_to_window(h)
        driver.close()
    driver.switch_to_window(driver.window_handles[0])
    driver.find_element_by_css_selector(".taLnk.moreLink.ulBlueLinks").click()

for link in driver.find_elements_by_css_selector('span.ratingDate.relativeDate'):

    test = link.get_attribute('title')
    print(test)
driver.quit()

它会将日期打印为:

16 November 2016
16 November 2016
16 November 2016
16 November 2016
15 November 2016
15 November 2016
15 November 2016
14 November 2016
14 November 2016
14 November 2016

希望这就是你要找的东西。