我想提交登录Reddit.com网站,导航到页面的特定区域,然后提交评论。我没有看到这段代码有什么问题,但是它没有起作用,Reddit网站上没有反映出任何变化。
import mechanize
import cookielib
def main():
#Browser
br = mechanize.Browser()
# Cookie Jar
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
#Opens the site to be navigated
r= br.open('http://www.reddit.com')
html = r.read()
# Select the second (index one) form
br.select_form(nr=1)
# User credentials
br.form['user'] = 'DUMMYUSERNAME'
br.form['passwd'] = 'DUMMYPASSWORD'
# Login
br.submit()
#Open up comment page
r= br.open('http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/')
html = r.read()
#Text box is the 8th form on the page (which, I believe, is the text area)
br.select_form(nr=7)
#Change 'text' value to a testing string
br.form['text']= "this is an automated test"
#Submit the information
br.submit()
这有什么问题?
答案 0 :(得分:19)
我肯定会建议尽可能尝试使用API,但这适用于我(不适用于您的示例帖子,已删除,但适用于任何有效的帖子):
#!/usr/bin/env python
import mechanize
import cookielib
import urllib
import logging
import sys
def main():
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
r= br.open('http://www.reddit.com')
# Select the second (index one) form
br.select_form(nr=1)
# User credentials
br.form['user'] = 'user'
br.form['passwd'] = 'passwd'
# Login
br.submit()
# Open up comment page
posting = 'http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/'
rval = 'PoopSandwiches'
# you can get the rval in other ways, but this will work for testing
r = br.open(posting)
# You need the 'uh' value from the first form
br.select_form(nr=0)
uh = br.form['uh']
br.select_form(nr=7)
thing_id = br.form['thing_id']
id = '#' + br.form.attrs['id']
# The id that gets posted is the form id with a '#' prepended.
data = {'uh':uh, 'thing_id':thing_id, 'id':id, 'renderstyle':'html', 'r':rval, 'text':"Your text here!"}
new_data_dict = dict((k, urllib.quote(v).replace('%20', '+')) for k, v in data.iteritems())
# not sure if the replace needs to happen, I did it anyway
new_data = 'thing_id=%(thing_id)s&text=%(text)s&id=%(id)s&r=%(r)s&uh=%(uh)s&renderstyle=%(renderstyle)s' %(new_data_dict)
# not sure which of these headers are really needed, but it works with all
# of them, so why not just include them.
req = mechanize.Request('http://www.reddit.com/api/comment', new_data)
req.add_header('Referer', posting)
req.add_header('Accept', ' application/json, text/javascript, */*')
req.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
req.add_header('X-Requested-With', 'XMLHttpRequest')
cj.add_cookie_header(req)
res = mechanize.urlopen(req)
main()
关闭javascript并查看如何处理reddit注释会很有趣。现在有一堆magic
发生在发帖时调用的onsubmit函数中。这是添加uh
和id
值的地方。