数据抓取后无法从网站提取表

时间:2020-05-31 11:49:04

标签: python selenium beautifulsoup datatable extract

我写了一个代码从网站上提取表,但是我不能这样做。

from selenium import webdriver 
import time, re
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import time

chrome_path = r"C:\Users\user\Desktop\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)

driver.get("http://www.nhb.gov.in/OnlineClient/MonthlyPriceAndArrivalReport.aspx")

html_source = driver.page_source
results=[]

#cauliflower
element_month = driver.find_element_by_id ("ctl00_ContentPlaceHolder1_ddlmonth")
drp_month = Select(element_month)
drp_month.select_by_visible_text("January")

element_category_name = driver.find_element_by_id ("ctl00_ContentPlaceHolder1_drpCategoryName")
drp_category_name = Select(element_category_name)
drp_category_name.select_by_visible_text("VEGETABLES")

time.sleep(2)
element_crop_name = driver.find_element_by_id ("ctl00_ContentPlaceHolder1_drpCropName")
drp_crop_name = Select(element_crop_name)
drp_crop_name.select_by_value("117")
time.sleep(2)
element_variety_name = driver.find_element_by_id ("ctl00_ContentPlaceHolder1_ddlvariety")
drp_variety_name = Select(element_variety_name)
drp_variety_name.select_by_value("18")

element_state = driver.find_element_by_id ("ctl00_ContentPlaceHolder1_LsboxCenterList")
drp_state = Select(element_state)
drp_state.select_by_visible_text("AHMEDABAD")

driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_btnSearch"]""").click()

soup = BeautifulSoup(driver.page_source, 'html.parser')
table = pd.read_html(driver.page_source)[3]
 #number three is arbitrary. I tried all numbers from 1 to 6 and python did 
 #not recognize the table at 
the bottom of the scree. 
print(len(table))
print(table)
with pd.ExcelWriter(r'C:\Users\user\Desktop\python.xlsx') as writer:
 table.to_excel(writer, sheet_name = "cauliflower", index=False) # cauliflower results on sheet named 
 cauliflower
 writer.save()  

能否请您帮我弄清楚如何提取网站底部的表格。您的帮助将不胜感激。先感谢您。

0 个答案:

没有答案