我正在尝试抓取页面。.但是我需要的链接在'a'标签的href中。 如何用硒(或BS4)获得它
我的代码。
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
from bs4 import BeautifulSoup
import requests
import pyautogui as ptg
PATH = "C:\Program Files (x86)\chromedriver.exe"
SRC = requests.get("Link (hidden for security)").text
SOUP = BeautifulSoup(SRC, 'lxml')
driver = webdriver.Chrome(PATH)
driver.get("Link (hidden for security)")
#driver.execute_script("window.scrollBy(0,1500)")
email = "(Hidden for security)"
password = "(Hidden for security)"
log_in_btn_1 = driver.find_element_by_xpath("/html/body/div/div/div[2]/div/div[1]/div[2]/a[2]").click()
#email_input_btn = driver.find_element_by_xpath("/html/body/div/div/div[2]/div/table/tbody/tr/td/div[3]/div[2]/form/ul/li[1]/input").click()
#Point(x=325, y=234) // email coords
#Point(x=588, y=303) // pass coords
ptg.click(325, 234)
ptg.typewrite(email, interval=0.05)
ptg.click(588, 303)
ptg.typewrite(password, interval=0.05)
log_in_btn_2 = driver.find_element_by_xpath("/html/body/div/div/div[2]/div/table/tbody/tr/td/div[3]/div[2]/form/ul/li[3]/input").click()
#Point(x=1188, y=59) // URL area
post_link = "(hidden for security)"
ptg.click(537, 75)
ptg.typewrite(post_link)
ptg.typewrite(["enter"])
出于安全考虑,链接被隐藏
请帮助。
答案 0 :(得分:0)
来自https://code.luasoftware.com/tutorials/selenium/get-href-of-element-with-selenium-python/:
el = driver.find_element_by_css_selector("a.link")
if el:
url = el.get_attribute("href")