import mechanize
from bs4 import BeautifulSoup
import urllib2
import cookielib
cj = cookielib.CookieJar()
br = mechanize.Browser()
br.set_handle_robots(False)
br.set_cookiejar(cj)
br.open("*******")
br.select_form(nr=0)
br.form['ctl00$BodyContent$Username'] = '****'
br.form['ctl00$BodyContent$Password'] = '****'
br.submit()
print br.response().read()
目前,这会抓取一个网页并返回资源,但不会返回页面的实际html(内容等)。如何更改它以便我可以获取html?
答案 0 :(得分:0)
你的关闭,你应该使用美丽的汤将标签变成一个漂亮的xml格式。
import mechanize
from bs4 import BeautifulSoup
import urllib2
import cookielib
cj = cookielib.CookieJar()
br = mechanize.Browser()
br.set_handle_robots(False)
br.set_cookiejar(cj)
br.open("*******")
br.select_form(nr=0)
br.form['ctl00$BodyContent$Username'] = '****'
br.form['ctl00$BodyContent$Password'] = '****'
br.submit()
soup = BeautifulSoup(br.response().read())
print soup
or
for row in soup:
print row