我使用的是Python 2.7和Mechanize 0.2.5。当我尝试访问我试图抓取的页面上的表单时出错。我想填写表格并下载数据。我的代码是
cj = cookielib.LWPCookieJar()
br = mechanize.Browser(factory=mechanize.RobustFactory())
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.open('http://areaprofiler.gov.in')
req = br.click_link(text='List of Elected Members')
br.open(req)
print br.forms()
br.select_form(nr=0)
如果我使用br = mechanize.Browser(factory=mechanize.RobustFactory())
或br = mechanize.Browser()
,则无关紧要。
>>> print br.forms()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Library/Python/2.7/site-packages/mechanize/_mechanize.py", line 420, in forms
return self._factory.forms()
File "/Library/Python/2.7/site-packages/mechanize/_html.py", line 557, in forms
self._forms_factory.forms())
File "/Library/Python/2.7/site-packages/mechanize/_html.py", line 237, in forms
_urlunparse=_rfc3986.urlunsplit,
File "/Library/Python/2.7/site-packages/mechanize/_form.py", line 844, in ParseResponseEx
_urlunparse=_urlunparse,
File "/Library/Python/2.7/site-packages/mechanize/_form.py", line 981, in _ParseFileEx
fp.feed(data)
File "/Library/Python/2.7/site-packages/mechanize/_form.py", line 781, in feed
self.bs_base_class.feed(self, data)
File "/Library/Python/2.7/site-packages/mechanize/_beautifulsoup.py", line 690, in feed
SGMLParser.feed(self, text)
File "/Library/Python/2.7/site-packages/mechanize/_sgmllib_copy.py", line 110, in feed
self.goahead(0)
File "/Library/Python/2.7/site-packages/mechanize/_sgmllib_copy.py", line 144, in goahead
k = self.parse_starttag(i)
File "/Library/Python/2.7/site-packages/mechanize/_sgmllib_copy.py", line 302, in parse_starttag
self.finish_starttag(tag, attrs)
File "/Library/Python/2.7/site-packages/mechanize/_sgmllib_copy.py", line 347, in finish_starttag
self.handle_starttag(tag, method, attrs)
File "/Library/Python/2.7/site-packages/mechanize/_sgmllib_copy.py", line 387, in handle_starttag
method(attrs)
File "/Library/Python/2.7/site-packages/mechanize/_form.py", line 735, in do_option
_AbstractFormParser._start_option(self, attrs)
File "/Library/Python/2.7/site-packages/mechanize/_form.py", line 480, in _start_option
raise ParseError("OPTION outside of SELECT")
mechanize._form.ParseError: OPTION outside of SELECT
感谢。