我使用以下Python脚本填写以下页面中的表单:
http://www.bseindia.com/indices/IndexArchiveData.aspx?expandable=3
该网站通过手动浏览器显示该表,但是当我使用以下python mechanize模块来模拟浏览器活动时,我在响应变量中得到一个空白表。为什么会这样?我该如何解决这个问题?
import httplib
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup
import mechanize
from datetime import datetime, timedelta
from time import gmtime,strftime
import csv
import sys
import cookielib
# Browser
br = mechanize.Browser()
#list=["BSE30 ","BSE500 ","AUTO ","BANKEX ","BSECG ","BSECD ","BSEFMCG", "BSEHC ","MIDCAP ","SMLCAP ","TECK ","METAL ","OILGAS "]
list=["BSEFMCG"]
myfile = open('CII_Report.csv', 'w')
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
#Time
date_today=sys.argv[2]
date_then=sys.argv[1]
print date_today,date_then
# Cookie Jar
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
# Browser options
br.set_handle_equiv(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br = mechanize.Browser()
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'),('Accept', '*/*')]
for item in list:
url = 'http://www.bseindia.com/indices/IndexArchiveData.aspx?expandable=3'
br.open(url)
response = br.response().read()
br.select_form(nr=0)
br.set_all_readonly(False)
br.form['ctl00$ContentPlaceHolder1$txtFromDate']='14/11/3012'
br.form['ctl00$ContentPlaceHolder1$txtToDate']='29/11/2013'
br.form.set_value([item],name='ctl00$ContentPlaceHolder1$ddlIndex')
response = br.submit().read()
result=response.strip().split('\r\n')
print result
a='''close_last=result[1].split(',')[4]
close_current=result[len(result)-1].split(',')[4]
diff=float(close_current)-float(close_last)
diff_file="%.1f" %((diff*100.0)/float(close_last))
wr.writerow([item])
wr.writerow([result[len(result)-1].split(',')[0],close_current])
wr.writerow([result[1].split(',')[0],close_last])
wr.writerow(["CHANGE(%)",diff_file])
wr.writerow([" "])'''
myfile.close()
答案 0 :(得分:0)
我自己解决了。
有一个onCLick函数()被调用,它正在改变一个元素值。
必须包括:
br['ctl00$ContentPlaceHolder1$hidInd']="BSE30 "
它就像一个魅力!