如何在Python中使用beautifulsoup查找第n个孩子的标题并打印文本

时间:2019-07-16 07:26:59

标签: python selenium-webdriver beautifulsoup

根据我的代码,我能够获得Project的第一个标题,并且希望打印副标题(FSI详细信息)。无法使用beautifulsoup获得第二个标题。我尝试了第n个孩子的参考

 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.keys import Keys
 import urllib.request
 from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.webdriver.support.select import Select
 from selenium.webdriver.common.keys import Keys
 import time
 import pandas as pd
 import os
 url = 'https://maharerait.mahaonline.gov.in'
 chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

 driver = webdriver.Chrome(executable_path=chrome_path)
 driver.get(url)
 WebDriverWait(driver, 
    20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search- 
    pro-details']//a[contains(.,'Search Project Details')]"))).click()
 Registered_Project_radio= WebDriverWait(driver, 
    10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
 driver.execute_script("arguments[0].click();",Registered_Project_radio)
 Application = driver.find_element_by_id("CertiNo")
 Application.send_keys("P50500000005")
 Search = WebDriverWait(driver, 
      10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
 driver.execute_script("arguments[0].click();",Search)
 View = [item.get_attribute('href') for item in 
      driver.find_elements_by_tag_name("a") if
      item.get_attribute('href') is not None]
 View = View[0]
 driver.get(View)
 request = urllib.request.Request(View)
 html = urllib.request.urlopen(request).read()
 soup = BeautifulSoup(html, 'html.parser')
 divPInfo2 = soup.find("div", {"id": "DivProject"})
 Project_title = divPInfo2.find("div", {'class': 'x_panel'}, 
    recursive=False).find("div", {'class': 'x_title'}).find(
   "h2").text.strip()
 print(Project_title)
 Project_title1 = divPInfo2.find("div", {'class': 'x_panel'}, 
     recursive=False).find("div", {'class': 'x_title'}).find_all(
     "h2")[1].text.strip()
 print(Project_title1 )  # (FSI Detail) heading should be printed here

1 个答案:

答案 0 :(得分:0)

您可以尝试CSS选择器:contains("FSI Details"),该选择器选择包含字符串“ FSI Details”的元素。此代码打印“ FSI详细信息”部分的标签和值:

import requests
from bs4 import BeautifulSoup

url = 'https://maharerait.mahaonline.gov.in/PrintPreview/PrintPreview?q=BPUvrrjIzYs%2f2hwYj1YIOfflh9NisZW6zTns2KLjHBZn6cbQ008s91nzlFrDxVvLwR1vAeLID0%2bo%2bD0H0Z6o2t%2b5P%2b%2fbBOcHCbMQHU8gkwdNZJnbbfu6N7mWSpgKXt4AiQyzuEpoDE7FX6HZypqsGXz4ObYD4KpyRzCsFJaWTgA%3d'

soup = BeautifulSoup(requests.get(url).text, 'lxml')

fsi_content = soup.select_one('.x_title:contains("FSI Details") + .x_content')

print('{: <160}{: <8}'.format('Label', 'Value'))
print('-' * 168)
for label, text in zip(fsi_content.select('label'), fsi_content.select('div:has(> label) + div')):
    print('{: <160}{: <8}'.format(label.get_text(strip=True), text.get_text(strip=True)))

打印:

Label                                                                                                                                                           Value   
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Built-up-Area as per Proposed FSI (In sqmts) ( Proposed but not sanctioned) ( As soon as approved, should be immediately updated in Approved FSI)               0       
Built-up-Area as per Approved FSI (In sqmts)                                                                                                                    11566.50
TotalFSI                                                                                                                                                        11566.50

进一步阅读:

CSS Selectors Refernece