根据我的代码,我能够获得Project的第一个标题,并且希望打印副标题(FSI详细信息)。无法使用beautifulsoup获得第二个标题。我尝试了第n个孩子的参考
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import urllib.request
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import os
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver,
20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search-
pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio= WebDriverWait(driver,
10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
driver.execute_script("arguments[0].click();",Registered_Project_radio)
Application = driver.find_element_by_id("CertiNo")
Application.send_keys("P50500000005")
Search = WebDriverWait(driver,
10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
driver.execute_script("arguments[0].click();",Search)
View = [item.get_attribute('href') for item in
driver.find_elements_by_tag_name("a") if
item.get_attribute('href') is not None]
View = View[0]
driver.get(View)
request = urllib.request.Request(View)
html = urllib.request.urlopen(request).read()
soup = BeautifulSoup(html, 'html.parser')
divPInfo2 = soup.find("div", {"id": "DivProject"})
Project_title = divPInfo2.find("div", {'class': 'x_panel'},
recursive=False).find("div", {'class': 'x_title'}).find(
"h2").text.strip()
print(Project_title)
Project_title1 = divPInfo2.find("div", {'class': 'x_panel'},
recursive=False).find("div", {'class': 'x_title'}).find_all(
"h2")[1].text.strip()
print(Project_title1 ) # (FSI Detail) heading should be printed here
答案 0 :(得分:0)
您可以尝试CSS选择器:contains("FSI Details")
,该选择器选择包含字符串“ FSI Details”的元素。此代码打印“ FSI详细信息”部分的标签和值:
import requests
from bs4 import BeautifulSoup
url = 'https://maharerait.mahaonline.gov.in/PrintPreview/PrintPreview?q=BPUvrrjIzYs%2f2hwYj1YIOfflh9NisZW6zTns2KLjHBZn6cbQ008s91nzlFrDxVvLwR1vAeLID0%2bo%2bD0H0Z6o2t%2b5P%2b%2fbBOcHCbMQHU8gkwdNZJnbbfu6N7mWSpgKXt4AiQyzuEpoDE7FX6HZypqsGXz4ObYD4KpyRzCsFJaWTgA%3d'
soup = BeautifulSoup(requests.get(url).text, 'lxml')
fsi_content = soup.select_one('.x_title:contains("FSI Details") + .x_content')
print('{: <160}{: <8}'.format('Label', 'Value'))
print('-' * 168)
for label, text in zip(fsi_content.select('label'), fsi_content.select('div:has(> label) + div')):
print('{: <160}{: <8}'.format(label.get_text(strip=True), text.get_text(strip=True)))
打印:
Label Value
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Built-up-Area as per Proposed FSI (In sqmts) ( Proposed but not sanctioned) ( As soon as approved, should be immediately updated in Approved FSI) 0
Built-up-Area as per Approved FSI (In sqmts) 11566.50
TotalFSI 11566.50
进一步阅读: