我已经尝试从打印预览页面抓取每一行并将其导出到CSV文件。我曾尝试过发送密钥(P50500000005)。它对于上述键(P50500000005)正常工作。但是,当我将密钥更改为另一个(P49500000001)时,我没有得到与打印预览页上提到的相同的抓取数据。我想要的代码应该刮除CSV文件上所有键的打印页。
enter code here
import urllib.request
from bs4 import BeautifulSoup
import csv
import os
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import os
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver,
20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search-
pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio= WebDriverWait(driver,
10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
driver.execute_script("arguments[0].click();",Registered_Project_radio)
Application = driver.find_element_by_id("CertiNo")
Application.send_keys("P49500000001") #working properly for P50500000005
Search = WebDriverWait(driver,
10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
driver.execute_script("arguments[0].click();",Search)
View = [item.get_attribute('href') for item in
driver.find_elements_by_tag_name("a") if
item.get_attribute('href') is not None]
View = View[0]
request = urllib.request.Request(View)
html = urllib.request.urlopen(request).read()
soup = BeautifulSoup(html, 'html.parser')
with open("file_demo.csv" , "a") as csv_file:
writer = csv.writer(csv_file)
divPInfo = soup.find("div", {"id": "DivPInfo"})
title = divPInfo.find("div", {'class': 'x_panel'},
recursive=False).find("div", {'class': 'x_title'}).find(
"h2").text.strip()
print(title)
csv_file.write(title + "\n")
x_contentObject = divPInfo.find("div", {'class': 'x_panel'}, recursive=False).find_all("div", {'class': 'col-md-3'})
my_dict = {x_contentObject[0].text.strip(): x_contentObject[1].text.strip()}
print(my_dict)
for key, value in my_dict.items():
writer.writerow([key, value])
# code for Organization :
divPInfo1 = soup.find("div", {"id": "fldFirm"})
title1 = divPInfo1.find("div", {'class': 'x_panel'}, recursive=False).find("div", {'class': 'x_title'}).find(
"h2").text.strip()
x_contentObject1 =divPInfo1.find("div", {'class': 'x_panel'}, recursive=False).find_all("div", {'class': 'col-md-3'})
my_dict1 = {x_contentObject1[0].text.strip(): x_contentObject1[1].text.strip()}
print(title1)
csv_file.write(title1+ "\n")
#print(my_dict1)
my_dict2 = {x_contentObject1[2].text.strip(): x_contentObject1[3].text.strip()}
#print(my_dict2)
my_dict1.update(my_dict2)
my_dict3 = {x_contentObject1[4].text.strip(): x_contentObject1[5].text.strip()}
#print(my_dict3)
my_dict1.update(my_dict3)
my_dict4 = {x_contentObject1[6].text.strip(): x_contentObject1[7].text.strip()}
#print(my_dict4)
my_dict1.update(my_dict4)
my_dict5 = {x_contentObject1[8].text.strip(): x_contentObject1[9].text.strip()}
#print(my_dict5)
my_dict1.update(my_dict5)
my_dict6 = {x_contentObject1[10].text.strip(): x_contentObject1[11].text.strip()}
#print(my_dict6)
my_dict1.update(my_dict6)
print(my_dict1)
for key, value in my_dict1.items():
writer.writerow([key, value])
#Code for the Address:
#title2 = divPInfo1.find("div", {'class': 'x_panel'}, recursive=False).find("div", {'class': 'x_title'}).find(
# "h3").text
#print(title2)
Address = soup.select_one('.x_title:contains("Address Details")')
print(Address.text)
csv_file.write(Address.text + "\n")
my_dict7 = {x_contentObject1[12].text.strip(): x_contentObject1[13].text.strip()}
#print(my_dict7)
my_dict8 = {x_contentObject1[14].text.strip(): x_contentObject1[15].text.strip()}
#print(my_dict8)
my_dict7.update(my_dict8)
my_dict9 = {x_contentObject1[16].text.strip(): x_contentObject1[17].text.strip()}
#print(my_dict9)
my_dict7.update(my_dict9)
my_dict10 = {x_contentObject1[18].text.strip(): x_contentObject1[19].text.strip()}
#print(my_dict10)
my_dict7.update(my_dict10)
my_dict11 = {x_contentObject1[20].text.strip(): x_contentObject1[21].text.strip()}
#print(my_dict11)
my_dict7.update(my_dict11)
my_dict12 = {x_contentObject1[22].text.strip(): x_contentObject1[23].text.strip()}
#print(my_dict12)
my_dict7.update(my_dict12)
my_dict13 = {x_contentObject1[24].text.strip(): x_contentObject1[25].text.strip()}
#print(my_dict13)
my_dict7.update(my_dict13)
my_dict14 = {x_contentObject1[26].text.strip(): x_contentObject1[27].text.strip()}
#print(my_dict14)
my_dict7.update(my_dict14)
my_dict15 = {x_contentObject1[28].text.strip(): x_contentObject1[29].text.strip()}
#print(my_dict15)
my_dict7.update(my_dict15)
my_dict16 = {x_contentObject1[30].text.strip(): x_contentObject1[31].text.strip()}
#print(my_dict16)
my_dict7.update(my_dict16)
my_dict17 = {x_contentObject1[32].text.strip(): x_contentObject1[33].text.strip()}
#print(my_dict17)
my_dict7.update(my_dict17)
print(my_dict7)
for key, value in my_dict7.items():
writer.writerow([key, value])
# code for the Organization Contact detail:
Organization_Contact_Details = soup.select_one('.x_title:contains("Organization Contact Details")')
print(Organization_Contact_Details.text)
csv_file.write(Organization_Contact_Details.text + "\n")
my_dict18 = {x_contentObject1[34].text.strip(): x_contentObject1[35].text.strip()}
#print(my_dict18)
my_dict19 = {x_contentObject1[36].text.strip(): x_contentObject1[37].text.strip()}
#print(my_dict19)
my_dict18.update(my_dict19)
print(my_dict18)
for key, value in my_dict18.items():
writer.writerow([key, value])
# Member Information
div_mem_info = soup.find("div", {"id": "fldindtxt78"})
Mem_info_title = div_mem_info.find("div", {'class': 'x_panel'}, recursive=False).find("div", {'class': 'x_title'}).find(
"h2").text.strip()
print(Mem_info_title)
csv_file.write(Mem_info_title + "\n")
driver.get(View)
table = pd.read_html(driver.page_source)[0]
print(table)
table.to_csv(csv_file , sep=',',index = False)
# code for the Project:
divPInfo2 = soup.find("div", {"id": "DivProject"})
Project_title = divPInfo2.find("div", {'class': 'x_panel'}, recursive=False).find("div", {'class': 'x_title'}).find(
"h2").text.strip()
print(Project_title)
csv_file.write(Project_title + "\n")
x_contentObject1 =divPInfo2.find("div", {'class': 'x_panel'}, recursive=False).find_all("div", {'class': 'col-md-3'})
my_dict1 = {x_contentObject1[0].text.strip(): x_contentObject1[1].text.strip()}
#print(my_dict1)
my_dict2 = {x_contentObject1[2].text.strip(): x_contentObject1[3].text.strip()}
#print(my_dict2)
my_dict1.update(my_dict2)
my_dict3 = {x_contentObject1[4].text.strip(): x_contentObject1[5].text.strip()}
#print(my_dict3)
my_dict1.update(my_dict3)
my_dict4 = {x_contentObject1[6].text.strip(): x_contentObject1[7].text.strip()}
#print(my_dict4)
my_dict1.update(my_dict4)
my_dict5 = {x_contentObject1[8].text.strip(): x_contentObject1[9].text.strip()}
#print(my_dict5)
my_dict1.update(my_dict5)
my_dict6 = {x_contentObject1[10].text.strip(): x_contentObject1[11].text.strip()}
#print(my_dict6)
my_dict1.update(my_dict6)
my_dict7 = {x_contentObject1[12].text.strip(): x_contentObject1[13].text.strip()}
#print(my_dict7)
my_dict1.update(my_dict7)
my_dict8 = {x_contentObject1[14].text.strip(): x_contentObject1[15].text.strip()}
#print(my_dict8)
my_dict1.update(my_dict8)
my_dict9 = {x_contentObject1[16].text.strip(): x_contentObject1[17].text.strip()}
#print(my_dict9)
my_dict1.update(my_dict9)
my_dict10 = {x_contentObject1[18].text.strip(): x_contentObject1[19].text.strip()}
#print(my_dict10)
my_dict1.update(my_dict10)
my_dict11 = {x_contentObject1[20].text.strip(): x_contentObject1[21].text.strip()}
#print(my_dict11)
my_dict1.update(my_dict11)
my_dict12 = {x_contentObject1[22].text.strip(): x_contentObject1[23].text.strip()}
#print(my_dict12)
my_dict1.update(my_dict12)
my_dict13 = {x_contentObject1[24].text.strip(): x_contentObject1[25].text.strip()}
#print(my_dict13)
my_dict1.update(my_dict13)
my_dict14 = {x_contentObject1[26].text.strip(): x_contentObject1[27].text.strip()}
#print(my_dict14)
my_dict1.update(my_dict14)
my_dict15 = {x_contentObject1[28].text.strip(): x_contentObject1[29].text.strip()}
#print(my_dict15)
my_dict1.update(my_dict15)
my_dict16 = {x_contentObject1[30].text.strip(): x_contentObject1[31].text.strip()}
#print(my_dict16)
my_dict1.update(my_dict16)
my_dict17 = {x_contentObject1[32].text.strip(): x_contentObject1[33].text.strip()}
#print(my_dict17)
my_dict1.update(my_dict17)
my_dict18 = {x_contentObject1[34].text.strip(): x_contentObject1[35].text.strip()}
#print(my_dict18)
my_dict1.update(my_dict18)
my_dict19 = {x_contentObject1[36].text.strip(): x_contentObject1[37].text.strip()}
#print(my_dict19)
my_dict1.update(my_dict19)
my_dict20 = {x_contentObject1[38].text.strip(): x_contentObject1[39].text.strip()}
#print(my_dict20)
my_dict1.update(my_dict20)
my_dict21 = {x_contentObject1[40].text.strip(): x_contentObject1[41].text.strip()}
#print(my_dict21)
my_dict1.update(my_dict21)
my_dict22 = {x_contentObject1[42].text.strip(): x_contentObject1[43].text.strip()}
#print(my_dict22)
my_dict1.update(my_dict22)
my_dict23 = {x_contentObject1[44].text.strip(): x_contentObject1[45].text.strip()}
#print(my_dict23)
my_dict1.update(my_dict23)
my_dict24 = {x_contentObject1[46].text.strip(): x_contentObject1[47].text.strip()}
#print(my_dict24)
my_dict1.update(my_dict24)
my_dict25 = {x_contentObject1[48].text.strip(): x_contentObject1[49].text.strip()}
#print(my_dict25)
my_dict1.update(my_dict25)
my_dict26 = {x_contentObject1[50].text.strip(): x_contentObject1[51].text.strip()}
#print(my_dict26)
my_dict1.update(my_dict26)
print(my_dict1)
for key, value in my_dict1.items():
writer.writerow([key, value])
#Code for the FSI Details:
fsi_content = soup.select_one('.x_title:contains("FSI Details")')
print(fsi_content.text)
csv_file.write(fsi_content.text + "\n")
my_dict27 = {x_contentObject1[52].text.strip(): x_contentObject1[53].text.strip()}
#print(my_dict27)
my_dict28 = {x_contentObject1[54].text.strip(): x_contentObject1[55].text.strip()}
#print(my_dict28)
my_dict27.update(my_dict28)
my_dict29 = {x_contentObject1[56].text.strip(): x_contentObject1[57].text.strip()}
#print(my_dict29)
my_dict27.update(my_dict29)
print(my_dict27)
for key, value in my_dict27.items():
writer.writerow([key, value])
# Code for the Bank Details
Bank_detail = soup.select_one('.x_title:contains("Bank Details")')
print(Bank_detail.text)
csv_file.write(Bank_detail.text + "\n")
my_dict30 = {x_contentObject1[58].text.strip(): x_contentObject1[59].text.strip()}
#print(my_dict30)
my_dict31 = {x_contentObject1[60].text.strip(): x_contentObject1[61].text.strip()}
#print(my_dict31)
my_dict30.update(my_dict31)
print(my_dict30)
for key, value in my_dict30.items():
writer.writerow([key, value])
# code for the Project Details:
divPInfo3 = soup.find("div", {"id": "DivAmenities"})
Project_Detail_title = divPInfo3.find("div", {'class': 'x_panel'}, recursive=False).find("div", {'class': 'x_title'}).find(
"h2").text.strip()
print(Project_Detail_title)
csv_file.write(Project_Detail_title + "\n")
table = pd.read_html(driver.page_source)[1]
print(table)
table.to_csv(csv_file , sep=',',index = False)
# Code for the Development Work:
development_Work = soup.select_one('.x_title:contains("Development Work")')
print(development_Work.text)
csv_file.write(development_Work.text + "\n")
table = pd.read_html(driver.page_source)[2]
print(table)
table.to_csv(csv_file , sep=',',index = False)
#Code for the Building Detials:
div_build_det = soup.find("div", {"id": "DivBuilding"})
building_Detials = div_build_det.find("div", {'class': 'x_panel'}, recursive=False).find("div", {'class': 'x_title'}).find(
"h2").text.strip()
print(building_Detials)
csv_file.write(building_Detials + "\n")
table = pd.read_html(driver.page_source)[3]
table = table.drop_duplicates()
print(table)
table.to_csv(csv_file , sep=',',index = False)
# Code for the Project Professional Information:
project_Professional_Information = soup.select_one('.x_title:contains("Project Professional Information")')
print(project_Professional_Information.text)
csv_file.write(project_Professional_Information.text + "\n")
table = pd.read_html(driver.page_source)[10]
print(table)
table.to_csv(csv_file , sep=',',index = False)
csv_file.close()