我在抓取网页时不是菜鸟。但是当我开始抓这个网址时 bse insider trading
我收到一个非常奇怪的错误。
关于此页面的细节。此页面属于印度市场的一个主要交易所。默认情况下,当此页面加载时,它会显示最近的内幕交易信息。当我们选择日期并提交时,会发出一个发布请求并显示数据。当然,数据跨越多个页面。
当我发送带有自定义日期的相应帖子数据时,我会收到500
的状态代码page$2
,这意味着我想要第2页的信息,其余所有页面都返回正常。我试图urlencode这个post变量,并尝试随机的东西,但没有运气。请问有人也可以确认一下这个案例。
我正在使用python,requests,lxml进行抓取,这是我的代码
import requests
from lxml import html
import urllib
url = "http://www.bseindia.com/corporates/Insider_Trading.aspx?expandable=0"
data = {'ctl00$ContentPlaceHolder1$GetQuote1$smartSearch':'Enter Scrip Name / Code / ID','ctl00$ContentPlaceHolder1$GetQuote1$hdnCode':'','WINDOW_NAMER':'1','__EVENTTARGET':'ctl00$ContentPlaceHolder1$gvData','__EVENTARGUMENT':'Page$2', 'ctl00$ContentPlaceHolder1$fmdate':'20140923', 'ctl00$ContentPlaceHolder1$eddate':'20140929', 'ctl00$ContentPlaceHolder1$txtDate':'01/01/2014', 'ctl00$ContentPlaceHolder1$txtTodate':'29/09/2014', 'ctl00$ContentPlaceHolder1$GetQuote1$hdnCode':'', 'ctl00$ContentPlaceHolder1$ddlregulation':'ALL', '__VIEWSTATE':'/wEPDwUJMTY2MjA4NTkxD2QWAmYPZBYCAgMPZBYCAgMPZBYKAg8PEA8WBB4NRGF0YVRleHRGaWVsZAUORkxEX1RZUEVGSUxURVIeC18hRGF0YUJvdW5kZ2QQFQQDQUxMBTEzKDQpBjEzKDRBKQUxMyg2KRUEA0FMTAUxMyg0KQYxMyg0QSkFMTMoNikUKwMEZ2dnZ2RkAhEPD2QWAh4Hb25jbGljawUScmV0dXJuIFZhbGlkYXRlKCk7ZAIVDzwrAA0BAA8WBB8BZx4LXyFJdGVtQ291bnQC8boBZBYCZg9kFjYCAQ9kFhBmDw8WAh4HVmlzaWJsZWhkZAIBDw8WAh8EaGRkAgIPDxYCHwRoZGQCAw8PFgIfBGhkZAIEDw8WAh8EaGRkAgUPDxYCHwRoZGQCCg8PFgIfBGhkZAILDw8WAh8EaGRkAgIPZBYYZg8PFgIeBFRleHQFBjUyNjg2MWRkAgEPDxYCHwUFD1Jpc2hpIExhc2VyIEx0ZGRkAgIPDxYCHwUFDUhhcnNoYWQgUGF0ZWxkZAIDDw8WAh8FBQowMS8wMS8yMDE0ZGQCBA8PFgIfBQUBQmRkAgUPDxYCHwUFBk1hcmtldGRkAgYPDxYCHwUFAzUwMGRkAgcPDxYCHwUFBDAuMDFkZAIIDw8WAh8FBQY1NTA4OTlkZAIJDw8WAh8FBQE2ZGQCCg8PFgIfBQUFMTMoNilkZAILD2QWAgIBDw8WAh8FBQwwMiBKYW4gMjAxNCBkZAIDD2QWGGYPDxYCHwUFBjUzMDAwMWRkAgEPDxYCHwUFIkd1amFyYXQgQWxrYWxpZXMgYW5kIENoZW1pY2FscyBMdGRkZAICDw8WAh8FBRZHdWphcmF0IE1hcml0aW1lIEJvYXJkZGQCAw8PFgIfBQUKMDEvMDEvMjAxNGRkAgQPDxYCHwUFAUJkZAIFDw8WAh8FBQZNYXJrZXRkZAIGDw8WAh8FBQUyMjczMWRkAgcPDxYCHwUFBiZuYnNwO2RkAggPDxYCHwUFBzIxMTY5MDJkZAIJDw8WAh8FBQQyLjg4ZGQCCg8PFgIfBQUGMTMoNEEpZGQCCw9kFgICAQ8PFgIfBQUMMDMgSmFuIDIwMTQgZGQCBA9kFhhmDw8WAh8FBQY1MzAwMDFkZAIBDw8WAh8FBSJHdWphcmF0IEFsa2FsaWVzIGFuZCBDaGVtaWNhbHMgTHRkZGQCAg8PFgIfBQUWR3VqYXJhdCBNYXJpdGltZSBCb2FyZGRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFCZGQCBQ8PFgIfBQUGTWFya2V0ZGQCBg8PFgIfBQUFMjI3MzFkZAIHDw8WAh8FBQYmbmJzcDtkZAIIDw8WAh8FBQcyMTE2OTAyZGQCCQ8PFgIfBQUEMi44OGRkAgoPDxYCHwUFBTEzKDYpZGQCCw9kFgICAQ8PFgIfBQUMMDMgSmFuIDIwMTQgZGQCBQ9kFhhmDw8WAh8FBQY1MzEyNDFkZAIBDw8WAh8FBRtMaW5jIFBlbiAmYW1wOyBQbGFzdGljcyBMdGRkZAICDw8WAh8FBRlMaW5jIFdyaXRpbmcgQWlkcyBQdnQgTHRkZGQCAw8PFgIfBQUKMDEvMDEvMjAxNGRkAgQPDxYCHwUFAUJkZAIFDw8WAh8FBQZNYXJrZXRkZAIGDw8WAh8FBQUxNzY4NWRkAgcPDxYCHwUFBiZuYnNwO2RkAggPDxYCHwUFBzE2MzY0ODFkZAIJDw8WAh8FBQUxMS4wN2RkAgoPDxYCHwUFBTEzKDYpZGQCCw9kFgICAQ8PFgIfBQUMMDIgSmFuIDIwMTQgZGQCBg9kFhhmDw8WAh8FBQY1MzEyNzJkZAIBDw8WAh8FBRhOaWtraSBHbG9iYWwgRmluYW5jZSBMdGRkZAICDw8WAh8FBRVBbmFuZCBLdW1hciBDaGF1cmFzaWFkZAIDDw8WAh8FBQowMS8wMS8yMDE0ZGQCBA8PFgIfBQUBU2RkAgUPDxYCHwUFBk1hcmtldGRkAgYPDxYCHwUFBDUzOTBkZAIHDw8WAh8FBQYmbmJzcDtkZAIIDw8WAh8FBQYxNjE1MzJkZAIJDw8WAh8FBQQ0LjcyZGQCCg8PFgIfBQUFMTMoNClkZAILD2QWAgIBDw8WAh8FBQwwMyBKYW4gMjAxNCBkZAIHD2QWGGYPDxYCHwUFBjUzMTI3MmRkAgEPDxYCHwUFGE5pa2tpIEdsb2JhbCBGaW5hbmNlIEx0ZGRkAgIPDxYCHwUFFUFuYW5kIEt1bWFyIENoYXVyYXNpYWRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFTZGQCBQ8PFgIfBQUGTWFya2V0ZGQCBg8PFgIfBQUENTM5MGRkAgcPDxYCHwUFBiZuYnNwO2RkAggPDxYCHwUFBjE2MTUzMmRkAgkPDxYCHwUFBDQuNzJkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDA0IEphbiAyMDE0IGRkAggPZBYYZg8PFgIfBQUGNTMxMjczZGQCAQ8PFgIfBQUaUmFkaGUgRGV2ZWxvcGVycyBJbmRpYSBMdGRkZAICDw8WAh8FBQ9KYWhuYXZpIEEgUGF0ZWxkZAIDDw8WAh8FBQowMS8wMS8yMDE0ZGQCBA8PFgIfBQUBQmRkAgUPDxYCHwUFBk1hcmtldGRkAgYPDxYCHwUFBDM4NjlkZAIHDw8WAh8FBQYmbmJzcDtkZAIIDw8WAh8FBQc1NDU4Mjk4ZGQCCQ8PFgIfBQUFMjEuNjhkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDA2IEphbiAyMDE0IGRkAgkPZBYYZg8PFgIfBQUGNTMxNjg3ZGQCAQ8PFgIfBQUTS2FydXR1cmkgR2xvYmFsIEx0ZGRkAgIPDxYCHwUFFVJoZWEgSG9sZGluZ3MgUHZ0IEx0ZGRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFTZGQCBQ8PFgIfBQUGTWFya2V0ZGQCBg8PFgIfBQUGOTI2MzczZGQCBw8PFgIfBQUEMC4xMWRkAggPDxYCHwUFCDE3MDY4MzEzZGQCCQ8PFgIfBQUDMi4xZGQCCg8PFgIfBQUGMTMoNEEpZGQCCw9kFgICAQ8PFgIfBQUMMDMgSmFuIDIwMTQgZGQCCg9kFhhmDw8WAh8FBQY1MzE2ODdkZAIBDw8WAh8FBRNLYXJ1dHVyaSBHbG9iYWwgTHRkZGQCAg8PFgIfBQUVUmhlYSBIb2xkaW5ncyBQdnQgTHRkZGQCAw8PFgIfBQUKMDEvMDEvMjAxNGRkAgQPDxYCHwUFAVNkZAIFDw8WAh8FBQZNYXJrZXRkZAIGDw8WAh8FBQY5MjYzNzNkZAIHDw8WAh8FBQQwLjExZGQCCA8PFgIfBQUIMTcwNjgzMTNkZAIJDw8WAh8FBQMyLjFkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDA0IEphbiAyMDE0IGRkAgsPZBYYZg8PFgIfBQUGNTMxOTc4ZGQCAQ8PFgIfBQUXQW1iaWthIENvdHRvbiBNaWxscyBMdGRkZAICDw8WAh8FBQxQIFYgQ2hhbmRyYW5kZAIDDw8WAh8FBQowMS8wMS8yMDE0ZGQCBA8PFgIfBQUBQmRkAgUPDxYCHwUFBk1hcmtldGRkAgYPDxYCHwUFAzEyM2RkAgcPDxYCHwUFBiZuYnNwO2RkAggPDxYCHwUFBjUyMDUwNGRkAgkPDxYCHwUFBDguODVkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDAxIEphbiAyMDE0IGRkAgwPZBYYZg8PFgIfBQUGNTMyMjE1ZGQCAQ8PFgIfBQUNQVhJUyBCYW5rIEx0ZGRkAgIPDxYCHwUFJkdlbmVyYWwgSW5zdXJhbmNlIENvcnBvcmF0aW9uIG9mIEluZGlhZGQCAw8PFgIfBQUXMDEvMDEvMjAxNCAtIDEyLzAzLzIwMTRkZAIEDw8WAh8FBQFTZGQCBQ8PFgIfBQUGTWFya2V0ZGQCBg8PFgIfBQUGMjQ5MTY5ZGQCBw8PFgIfBQUGJm5ic3A7ZGQCCA8PFgIfBQUHNzQ1MzkzMGRkAgkPDxYCHwUFBDEuNTlkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDEzIE1hciAyMDE0IGRkAg0PZBYYZg8PFgIfBQUGNTMyMzQ5ZGQCAQ8PFgIfBQUiVHJhbnNwb3J0IENvcnBvcmF0aW9uIG9mIEluZGlhIEx0ZGRkAgIPDxYCHwUFG0thbWxlc2ggRGV2aSBXL28gQWppdCBTaW5naGRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFTZGQCBQ8PFgIfBQUGTWFya2V0ZGQCBg8PFgIfBQUEMTQwMGRkAgcPDxYCHwUFBiZuYnNwO2RkAggPDxYCHwUFBDQxNDBkZAIJDw8WAh8FBQQwLjAxZGQCCg8PFgIfBQUFMTMoNilkZAILD2QWAgIBDw8WAh8FBQwwNiBKYW4gMjAxNCBkZAIOD2QWGGYPDxYCHwUFBjUzMjQ4OGRkAgEPDxYCHwUFFkRpdmlzIExhYm9yYXRvcmllcyBMdGRkZAICDw8WAh8FBRRNYWRodXN1ZGFuYSBSYW8gRGl2aWRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFTZGQCBQ8PFgIfBQUGTWFya2V0ZGQCBg8PFgIfBQUENzAwMGRkAgcPDxYCHwUFBiZuYnNwO2RkAggPDxYCHwUFBjMwOTI5NmRkAgkPDxYCHwUFBDAuMjNkZAIKDw8WAh8FBQYxMyg0QSlkZAILD2QWAgIBDw8WAh8FBQwwMyBKYW4gMjAxNCBkZAIPD2QWGGYPDxYCHwUFBjUzMjQ4OGRkAgEPDxYCHwUFFkRpdmlzIExhYm9yYXRvcmllcyBMdGRkZAICDw8WAh8FBRRNYWRodXN1ZGFuYSBSYW8gRGl2aWRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFTZGQCBQ8PFgIfBQUGTWFya2V0ZGQCBg8PFgIfBQUENzAwMGRkAgcPDxYCHwUFBiZuYnNwO2RkAggPDxYCHwUFBjMwOTI5NmRkAgkPDxYCHwUFBDAuMjNkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDA0IEphbiAyMDE0IGRkAhAPZBYYZg8PFgIfBQUGNTMyNjMwZGQCAQ8PFgIfBQUUR29rYWxkYXMgRXhwb3J0cyBMdGRkZAICDw8WAh8FBRBEaW5lc2ggSiBIaW5kdWphZGQCAw8PFgIfBQUKMDEvMDEvMjAxNGRkAgQPDxYCHwUFAVNkZAIFDw8WAh8FBQYmbmJzcDtkZAIGDw8WAh8FBQM1MDBkZAIHDw8WAh8FBQYmbmJzcDtkZAIIDw8WAh8FBQcyMjE1ODk5ZGQCCQ8PFgIfBQUENi40NGRkAgoPDxYCHwUFBTEzKDYpZGQCCw9kFgICAQ8PFgIfBQUMMDIgSmFuIDIwMTQgZGQCEQ9kFhhmDw8WAh8FBQY1MzI2MzBkZAIBDw8WAh8FBRRHb2thbGRhcyBFeHBvcnRzIEx0ZGRkAgIPDxYCHwUFEk1hZGFubGFsIEogSGluZHVqYWRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFTZGQCBQ8PFgIfBQUGJm5ic3A7ZGQCBg8PFgIfBQUDMTAwZGQCBw8PFgIfBQUGJm5ic3A7ZGQCCA8PFgIfBQUHMTg5ODc5OWRkAgkPDxYCHwUFBDUuNTJkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDAyIEphbiAyMDE0IGRkAhIPZBYYZg8PFgIfBQUGNTMyNjQxZGQCAQ8PFgIfBQUQTmFuZGFuIERlbmltIEx0ZGRkAgIPDxYCHwUFEUNoaXJpcGFsIEV4aW0gTExQZGQCAw8PFgIfBQUKMDEvMDEvMjAxNGRkAgQPDxYCHwUFAUJkZAIFDw8WAh8FBQZNYXJrZXRkZAIGDw8WAh8FBQQxMzgwZGQCBw8PFgIfBQUGJm5ic3A7ZGQCCA8PFgIfBQUHNzAxMDM1OWRkAgkPDxYCHwUFBiZuYnNwO2RkAgoPDxYCHwUFBTEzKDYpZGQCCw9kFgICAQ8PFgIfBQUMMjAgSmFuIDIwMTQgZGQCEw9kFhhmDw8WAh8FBQY1MzI2NDFkZAIBDw8WAh8FBRBOYW5kYW4gRGVuaW0gTHRkZGQCAg8PFgIfBQURQ2hpcmlwYWwgRXhpbSBMTFBkZAIDDw8WAh8FBQowMS8wMS8yMDE0ZGQCBA8PFgIfBQUBQmRkAgUPDxYCHwUFBk1hcmtldGRkAgYPDxYCHwUFBDQwNjhkZAIHDw8WAh8FBQYmbmJzcDtkZAIIDw8WAh8FBQc3MDE0NDI3ZGQCCQ8PFgIfBQUEMTUuNGRkAgoPDxYCHwUFBTEzKDYpZGQCCw9kFgICAQ8PFgIfBQUMMjAgSmFuIDIwMTQgZGQCFA9kFhhmDw8WAh8FBQY1MzI3MTBkZAIBDw8WAh8FBRdTYWRiaGF2IEVuZ2luZWVyaW5nIEx0ZGRkAgIPDxYCHwUFEkFtYXJzaW5oIEogVmFnaGVsYWRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFCZGQCBQ8PFgIfBQUERVNPU2RkAgYPDxYCHwUFBDYyNTBkZAIHDw8WAh8FBQYmbmJzcDtkZAIIDw8WAh8FBQUxMTQwMmRkAgkPDxYCHwUFBDAuMDFkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDAyIEphbiAyMDE0IGRkAhUPZBYYZg8PFgIfBQUGNTMyNzEwZGQCAQ8PFgIfBQUXU2FkYmhhdiBFbmdpbmVlcmluZyBMdGRkZAICDw8WAh8FBQ5BdHVsIE4gUnVwYXJlbGRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFCZGQCBQ8PFgIfBQUERVNPU2RkAgYPDxYCHwUFBDUwMDBkZAIHDw8WAh8FBQYmbmJzcDtkZAIIDw8WAh8FBQUxMDAwMGRkAgkPDxYCHwUFBDAuMDFkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDAyIEphbiAyMDE0IGRkAhYPZBYYZg8PFgIfBQUGNTMyNzEwZGQCAQ8PFgIfBQUXU2FkYmhhdiBFbmdpbmVlcmluZyBMdGRkZAICDw8WAh8FBQ9WaWpheSBKIEthbHlhbmlkZAIDDw8WAh8FBQowMS8wMS8yMDE0ZGQCBA8PFgIfBQUBQmRkAgUPDxYCHwUFBEVTT1NkZAIGDw8WAh8FBQQyNTAwZGQCBw8PFgIfBQUGJm5ic3A7ZGQCCA8PFgIfBQUEMzIzOGRkAgkPDxYCHwUFBiZuYnNwO2RkAgoPDxYCHwUFBTEzKDYpZGQCCw9kFgICAQ8PFgIfBQUMMDIgSmFuIDIwMTQgZGQCFw9kFhhmDw8WAh8FBQY1MzI3MzVkZAIBDw8WAh8FBRtSIFN5c3RlbXMgSW50ZXJuYXRpb25hbCBMdGRkZAICDw8WAh8FBRRHTSBTb2x1dGlvbnMgUHZ0IEx0ZGRkAgMPDxYCHwUFCjAxLzAxLzIwMTRkZAIEDw8WAh8FBQFCZGQCBQ8PFgIfBQUKT2ZmIE1hcmtldGRkAgYPDxYCHwUFBTEzMjAwZGQCBw8PFgIfBQUDMC4xZGQCCA8PFgIfBQUGODUyOTYzZGQCCQ8PFgIfBQUENi43M2RkAgoPDxYCHwUFBjEzKDRBKWRkAgsPZBYCAgEPDxYCHwUFDDAyIEphbiAyMDE0IGRkAhgPZBYYZg8PFgIfBQUGNTMyNzM1ZGQCAQ8PFgIfBQUbUiBTeXN0ZW1zIEludGVybmF0aW9uYWwgTHRkZGQCAg8PFgIfBQUUR00gU29sdXRpb25zIFB2dCBMdGRkZAIDDw8WAh8FBQowMS8wMS8yMDE0ZGQCBA8PFgIfBQUBQmRkAgUPDxYCHwUFCk9mZiBNYXJrZXRkZAIGDw8WAh8FBQUxMzIwMGRkAgcPDxYCHwUFAzAuMWRkAggPDxYCHwUFBjg1Mjk2M2RkAgkPDxYCHwUFBDYuNzNkZAIKDw8WAh8FBQUxMyg2KWRkAgsPZBYCAgEPDxYCHwUFDDAyIEphbiAyMDE0IGRkAhkPZBYYZg8PFgIfBQUGNTMyNzM1ZGQCAQ8PFgIfBQUbUiBTeXN0ZW1zIEludGVybmF0aW9uYWwgTHRkZGQCAg8PFgIfBQUUR01VIEluZm9zb2Z0IFB2dCBMdGRkZAIDDw8WAh8FBQowMS8wMS8yMDE0ZGQCBA8PFgIfBQUBQmRkAgUPDxYCHwUFCk9mZiBNYXJrZXRkZAIGDw8WAh8FBQUxMzIwMGRkAgcPDxYCHwUFAzAuMWRkAggPDxYCHwUFBjc5OTQ2MGRkAgkPDxYCHwUFBDYuMzFkZAIKDw8WAh8FBQYxMyg0QSlkZAILD2QWAgIBDw8WAh8FBQwwMiBKYW4gMjAxNCBkZAIaD2QWGGYPDxYCHwUFBjUzMjczNWRkAgEPDxYCHwUFG1IgU3lzdGVtcyBJbnRlcm5hdGlvbmFsIEx0ZGRkAgIPDxYCHwUFFEdNVSBJbmZvc29mdCBQdnQgTHRkZGQCAw8PFgIfBQUKMDEvMDEvMjAxNGRkAgQPDxYCHwUFAUJkZAIFDw8WAh8FBQpPZmYgTWFya2V0ZGQCBg8PFgIfBQUFMTMyMDBkZAIHDw8WAh8FBQMwLjFkZAIIDw8WAh8FBQY3OTk0NjBkZAIJDw8WAh8FBQQ2LjMxZGQCCg8PFgIfBQUFMTMoNilkZAILD2QWAgIBDw8WAh8FBQwwMiBKYW4gMjAxNCBkZAIbDw8WAh8EaGRkAhcPDxYCHwVlZGQCGQ8WAh4JaW5uZXJodG1sBc8CKiBOb3RlOiAgQWxzbyByZWZlciBDb3Jwb3JhdGUgYW5ub3VuY2VtZW50cyBzZWN0aW9uIGZvciBsYXRlc3QgSW5zaWRlciBUcmFkaW5nIGluZm9ybWF0aW9uLjxhIGNsYXNzPSd0YWJsZWJsdWVsaW5rJyBocmVmPSdodHRwOi8vd3d3LmJzZWluZGlhLmNvbS9jb3Jwb3JhdGVzL2Fubi5hc3B4P2N1cnBnPTEmYW5uZmxhZz0xJmR0PTIwMTQwOTIxJmR1cj1QJmR0dG89MjAxNDA5MjcmY2F0PUluc2lkZXIlMjBUcmFkaW5nJTIwLyUyMFNBU1Qmc2NyaXA9JmFubnR5cGU9QScgdGFyZ2V0PSdfYmxhbmsnIHN0eWxlPSdjdXJzb3I6cG9pbnRlcjtjb2xvcjojMDE2MDkxOyc+Q2xpY2sgaGVyZTwvYT5kGAIFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYCBSNjdGwwMCRDb250ZW50UGxhY2VIb2xkZXIxJGJ0blN1Ym1pdAUmY3RsMDAkQ29udGVudFBsYWNlSG9sZGVyMSRJbWFnZUJ1dHRvbjEFIGN0bDAwJENvbnRlbnRQbGFjZUhvbGRlcjEkZ3ZEYXRhDzwrAAoBCAK9B2Q=', '__EVENTVALIDATION':'/wEWGQLoxuedDgKJsYefCgK6rpDlDwL8np6XAwLl44LQAgK0w8TlCwKHlNXODQKW0sv1BgLei4rQAwKExL/8BQLJsJHgCQLhsb3hCQLJsLmaCAL40JWiCgK9vIn8DAKN+qTQCAKN+qzQCAKN+rjQCAKN+rTQCAKN+qDQCAKN+rzQCAKN+ojQCAKN+oTQCALj8ODgCQKI2cKdDA=='}
cookies = {'_gat':'1', 'expandable':'0c', '_asc':'099bb6b2148be3ebcdb9a1f31af', '__auc':'70dfa206148b7750a132dbe342b', '_ga':'GA1.2.35458869.1411827174'}
headers = {'Referer':'http://www.bseindia.com/corporates/Insider_Trading.aspx?expandable=0', 'Orgin':'http://www.bseindia.com', 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36'}
for i in range(1,10):
data['__EVENTARGUMENT'] = ('Page$'+str(i))
print data['__EVENTARGUMENT']
r = requests.post(url, data=data, cookies=cookies );
if r.status_code != 200:
print r.reason
print i
root = html.fromstring(r.text)
a = root.xpath('//*[@id="ctl00_ContentPlaceHolder1_gvData"]/tr')
for b in a[2:-1]:
#print b.findall('td')[1].text_content()
pass
答案 0 :(得分:0)
而不是硬编码请求参数,从页面解析它,然后按照分页:
import re
from lxml import html
import requests
def get_data(tree):
for row in tree.xpath('//tr[@class="TTRow"]/td[1]'):
print row.text
url = "http://www.bseindia.com/corporates/Insider_Trading.aspx?expandable=0"
headers = {'Referer': 'http://www.bseindia.com/corporates/Insider_Trading.aspx?expandable=0',
'Origin': 'http://www.bseindia.com',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
# start session
session = requests.Session()
# get the main page
response = session.get('http://www.bseindia.com/corporates/Insider_Trading.aspx?expandable=0', headers=headers)
root = html.fromstring(response.content)
# since this is the first page - parse the data
print "PAGE 1"
get_data(root)
print "------"
data = {
'ctl00$ContentPlaceHolder1$GetQuote1$smartSearch': 'Enter Scrip Name / Code / ID',
'ctl00$ContentPlaceHolder1$fmdate': '20140923',
'ctl00$ContentPlaceHolder1$eddate': '20140929',
'ctl00$ContentPlaceHolder1$txtDate': '',
'ctl00$ContentPlaceHolder1$txtTodate': '',
'ctl00$ContentPlaceHolder1$GetQuote1$hdnCode': '',
'ctl00$ContentPlaceHolder1$ddlregulation': 'ALL',
'myDestination': '#',
'WINDOW_NAMER': '1',
'__VIEWSTATE': root.find('.//input[@id="__VIEWSTATE"]').attrib['value'],
'__EVENTVALIDATION': root.find('.//input[@id="__EVENTVALIDATION"]').attrib['value']
}
pattern = re.compile("javascript:__doPostBack\('(.*?)','(.*?)'\)")
for index, row in enumerate(root.xpath('//tr[@class="pgr"]//td/a'), start=2):
print "PAGE %d" % index
target, argument = pattern.search(row.attrib['href']).groups()
data['__EVENTTARGET'] = target
data['__EVENTARGUMENT'] = argument
response = session.post(url, data=data, headers=headers)
root = html.fromstring(response.content)
get_data(root)
print "------"
打印(每页中的第一列内容):
PAGE 1
531807
532706
532959
531807
532840
533400
536507
533400
533304
506395
506395
507717
507717
506395
506395
506395
506395
531807
512393
532268
532466
532689
532689
532706
532706
------
PAGE 2
500228
500246
508969
508969
531802
532532
532532
532832
532832
500228
508969
508969
531802
500124
500124
511243
512393
512393
512393
512393
512393
512393
512393
512393
512393
------
PAGE 3
512393
512393
532466
532508
532799
532799
532799
532799
532799
532799
532799
532799
502355
502355
502355
501700
501700
500247
500247
500875
506615
500247
500875
500875
500875
------
PAGE 4
523838
523838
500875
511503
511503
511503
506615
506615
506615
506615
506615
506615
506615
506615
506615
506615
511503
511503
511503
506395
506395
524348
506685
506685
506685
------
PAGE 5
506685
------