所以,我想要做的是登录Wallbase.cc,然后获取NSFW壁纸的标签(你需要登录)。看起来好像我可以签到好但是当我尝试访问壁纸页面时它会引发403错误。这是我正在使用的代码:
import urllib2
import urllib
import cookielib
import re
username = 'xxxx'
password = 'xxxx'
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
payload = {
'csrf' : '371b3b4bd0d1990048354e2056cd36f20b1d7088',
'ref' : 'aHR0cDovL3dhbGxiYXNlLmNjLw==',
'username' : username,
'password' : password
}
login_data = urllib.urlencode(payload)
req = urllib2.Request('http://wallbase.cc/user/login', login_data)
url = "http://wallbase.cc/wallpaper/2098029"
#Opens url of each pic
usock = urllib2.urlopen(url)
data = usock.read()
usock.close()
有什么想法吗? 顺便说一下,使用的壁纸实际上并不是NSFW,它被错误地标记了。
答案 0 :(得分:0)
您可以尝试使用此库http://wwwsearch.sourceforge.net/mechanize/
这是一个例子:
import re
import mechanize
br = mechanize.Browser()
br.open("http://www.example.com/")
# follow second link with element text matching regular expression
response1 = br.follow_link(text_regex=r"cheese\s*shop", nr=1)
assert br.viewing_html()
print br.title()
print response1.geturl()
print response1.info() # headers
print response1.read() # body
br.select_form(name="order")
# Browser passes through unknown attributes (including methods)
# to the selected HTMLForm.
br["cheeses"] = ["mozzarella", "caerphilly"] # (the method here is __setitem__)
# Submit current form. Browser calls .close() on the current response on
# navigation, so this closes response1
response2 = br.submit()
# print currently selected form (don't call .submit() on this, use br.submit())
print br.form
response3 = br.back() # back to cheese shop (same data as response1)
# the history mechanism returns cached response objects
# we can still use the response, even though it was .close()d
response3.get_data() # like .seek(0) followed by .read()
response4 = br.reload() # fetches from server
for form in br.forms():
print form
# .links() optionally accepts the keyword args of .follow_/.find_link()
for link in br.links(url_regex="python.org"):
print link
br.follow_link(link) # takes EITHER Link instance OR keyword args
br.back()