如果我已经搜索了一个元素,有什么办法可以在同一个类中找到另一个?
例如。
<div class="day">
<span class="day_number">4</span>
<span class="day_item_time" data-day-total-time="day-total-time">10m</span></div>
所以我必须按日期搜索(查找具有4的元素),但是如何将其转换为找到10m?
这是打印出元素<selenium.webdriver.firefox.webelement.FirefoxWebElement (session="4ff20f1a-75d1-42c1-a7d0-de0c532651a6", element="1147f2d5-8832-44d4-a906-929ebaaa49e2")>
答案 0 :(得分:2)
尝试以下代码。它应返回您的预期输出。
driver.find_element_by_xpath("(//div[@class='day']/span)[1]").text
driver.find_element_by_xpath("(//div[@class='day']/span)[2]").text
输出:
4
10m
答案 1 :(得分:0)
类似这样的东西:
import selenium
import json
import time
import re
import string
import requests
import bs4
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
domain = 'https://stockx.com/puma?prices=300-400,200-300&size_types=men&years=2017'
def prepare_driver(url):
options = Options()
# options.add_argument('-headless')
driver = webdriver.Chrome(executable_path='/Users/Documents/python/Selenium/bin/chromedriver')
driver.get(url)
time.sleep(2)
wait = WebDriverWait(driver, 10).until(EC.presence_of_element_located(
(By.CLASS_NAME, 'title-container')))
return driver
def fill_form(driver, search_argument):
'''Finds all the input tags in form and makes a POST requests.'''
#search_field = driver.find_element_by_id('q')
#search_field.send_keys(search_argument)
# We look for the search button and click it
#driver.find_element_by_class_name('search__submit')\
#.click()
wait = WebDriverWait(driver, timeout=10).until(
EC.presence_of_all_elements_located(
(By.CLASS_NAME, 'title-container')))
def scrape_results(driver, n_results):
'''Returns the data from n_results amount of results.'''
product_urls = list()
product_data = list()
for product_title in driver.find_elements_by_css_selector("div[class*='tile browse-tile']"):
product_urls.append(product_title.find_element_by_css_selector(
"a[href*='/']").get_attribute('href'))
print(*product_urls, sep = "\n")
for url in range(0, n_results):
if url == n_results:
break
url_data = scrape_product_data(driver, product_urls[url])
product_data.append(url_data)
#print(*product_data, sep = "\n")
return product_data
def scrape_product_data(driver, product_url):
'''Visits an product page and extracts the data.'''
if driver == None:
driver = prepare_driver(product_url)
driver.get(product_url)
time.sleep(12)
product_fields = dict()
# Get the product name
product_fields['product_name'] = driver.find_element_by_xpath(
'//div[@class="col-md-12"]/h1').text
# Get the image url
product_fields['imgurl'] = driver.find_element_by_xpath(
'//img[@class="product-image"]').get_attribute('src')
return product_fields
if __name__ == '__main__':
try:
driver = prepare_driver(domain)
#fill_form(driver, 'juniole tf')
product_data = scrape_results(driver, 4)
product_data = json.dumps(product_data, indent=4) #ensure_acii => changes japanese to correct character
with open('booking_data_stockx.json', 'w') as f:
f.write(product_data)
finally:
driver.quit()
答案 2 :(得分:0)
您可以使用下面的xpath获得day-total-time
。
使用span
类和文本day_number
,4
获取ancestor
-使用类div
获取第一父级day
,然后获取{{ 1}}具有span
属性:
data-day-total-time
使用//span[@class='day_number' and .='4']/ancestor::div[@class='day'][1]/span[@data-day-total-time]
类获得div
,使用day
类和文本span
来获得子day_number
,然后使用{{1 }}属性:
4