我写了一段代码来抓取网站https://www1.nseindia.com/live_market/dynaContent/live_watch/fxTracker/optChainDataByExpDates.jsp?symbol=USDINR&instrument=OPTCUR&expiryDt=17JUL2020
我有一个访问列而不是数据的代码。
有人可以告诉我我在做什么错吗?
我使用beautifulsoup并查看了页面的HTML结构。
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime as dt,time
import os
from pathlib import Path
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/80.0.3987.132 Safari/537.36','Accept-Language': 'en-US,en;q=0.9','Accept-Encoding': 'gzip, deflate'}
url = "https://www1.nseindia.com/live_market/dynaContent/live_watch/fxTracker/optChainDataByExpDates.jsp"
symbol= 'USDINR'
exp= '29JUL2020'
page = requests.get(url, params = {"symbol": symbol,"instrument": "OPTCUR","date": exp}, headers = headers)
page.status_code
page.content
soup= BeautifulSoup(page.content, 'html.parser')
#print(soup.prettify())
table_it=soup.find_all(class_="opttbldata")
table_cls_1=soup.find_all(id="octable")
#module 1 : Getting table columns
col_list=[]
for mytable in table_cls_1:
table_head= mytable.find('thead')
try:
rows=table_head.find_all('tr')
for tr in rows:
cols = tr.find_all('th')
for th in cols:
er=th.text
#ee=er.encode('utf8')
col_list.append(er)
except:
print("no thead")
col_list_fnl= [e for e in col_list if e not in ('CALLS','PUTS','Chart','\xc2\xa0','\xa0')]
print( col_list_fnl)
#module 2: Getting Data
table_cls_2= soup.find( id="octable")
all_trs = table_cls_2.find_all('tr')
req_row = table_cls_2.find_all('tr')
new_table= pd.DataFrame( index= range(0, len(req_row)-3), columns = col_list_fnl)
row_marker=0
for row_number, tr_nos in enumerate(req_row):
if row_number<=1 or row_number == len(req_row)-1:
continue
td_columns = tr_nos.find_all('td')
select_cols = td_columns[1:22]
cols_horizontal = range(0, len( select_cols))
for nu, column in enumerate(select_cols):
utf_string = column.get_text()
utf_string = utf_string.strip('\n\r\t":')
# tr=utf_string.decode('utf8')
# tr=tr.replace(',' , '')
new_table.iloc[row_marker,[nu]] = utf_string
row_marker+= 1
print(new_table)
new_table['Expiry'] = exp
new_table['Date'] = date
new_table= new_table.replace('-',"0")
答案 0 :(得分:0)
似乎您的参数没有正确应用。我建议使用URL本身来传递您的参数,例如:
symbol= 'USDINR'
exp= '17JUL2020'
url = f"https://www1.nseindia.com/live_market/dynaContent/live_watch/fxTracker/optChainDataByExpDates.jsp?symbol={symbol}&instrument=OPTCUR&expiryDt={exp}"
page = requests.get(url, headers = headers)
这将为您选择的日期提供正确的输出。