我正在自己做一些简单的Python + FB Graph培训,我遇到了一个奇怪的问题:
import time
import sys
import urllib2
import urllib
from json import loads
base_url = "https://graph.facebook.com/search?q="
post_id = None
post_type = None
user_id = None
message = None
created_time = None
def doit(hour):
page = 1
search_term = "\"Plastic Planet\""
encoded_search_term = urllib.quote(search_term)
print encoded_search_term
type="&type=post"
url = "%s%s%s" % (base_url,encoded_search_term,type)
print url
while(1):
try:
response = urllib2.urlopen(url)
except urllib2.HTTPError, e:
print e
finally:
pass
content = response.read()
content = loads(content)
print "=================================="
for c in content["data"]:
print c
print "****************************************"
try:
content["paging"]
print "current URL"
print url
print "next page!------------"
url = content["paging"]["next"]
print url
except:
pass
finally:
pass
"""
print "new URL is ======================="
print url
print "=================================="
"""
print url
我在这里要做的是自动翻阅搜索结果, 但尝试内容[“分页”] [“下一步”]
但奇怪的是没有数据返回;我收到了以下内容:
{"data":[]}
即使是在第一个循环中。
但是当我将URL复制到浏览器中时,返回了很多结果。
我还尝试了一个带有访问令牌的版本,同样的事情发生了。
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
好的,多亏了TryPyPy,这是我上一个问题的简化和编辑版本:
为什么:
import urllib2
url = "https://graph.facebook.com/searchq=%22Plastic+Planet%22&type=post&limit=25&until=2010-12-29T19%3A54%3A56%2B0000"
response = urllib2.urlopen(url)
print response.read()
结果为{"data":[]}
?
但是同一个网址会在浏览器中产生大量数据吗?
答案 0 :(得分:1)
使用Chrome(我获得大量数据)和Firefox(我获得空响应)的试验和错误使我在'Accept-Language'标题上为零。其他修改据说只是装饰性的,但我不确定CookieJar。
import time
import sys
import urllib2
import urllib
from json import loads
import cookielib
base_url = "https://graph.facebook.com/search?q="
post_id = None
post_type = None
user_id = None
message = None
created_time = None
jar = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
opener.addheaders = [
('Accept-Language', 'en-US,en;q=0.8'),]
def doit(hour):
page = 1
search_term = "\"Plastic Planet\""
encoded_search_term = urllib.quote(search_term)
print encoded_search_term
type="&type=post"
url = "%s%s%s" % (base_url,encoded_search_term,type)
print url
data = True
while data:
response = opener.open(url)
opener.addheaders += [
('Referer', url) ]
content = response.read()
content = loads(content)
print "=================================="
for c in content["data"]:
print c.keys()
print "****************************************"
if "paging" in content:
print "current URL"
print url
print "next page!------------"
url = content["paging"]["next"]
print url
else:
print content
print url
data = False
doit(1)
这是一个清理过的最小工作版本:
import urllib2
import urllib
from json import loads
import cookielib
def doit(search_term, base_url = "https://graph.facebook.com/search?q="):
opener = urllib2.build_opener()
opener.addheaders = [('Accept-Language', 'en-US,en;q=0.8')]
encoded_search_term = urllib.quote(search_term)
type="&type=post"
url = "%s%s%s" % (base_url,encoded_search_term,type)
print encoded_search_term
print url
data = True
while data:
response = opener.open(url)
content = loads(response.read())
print "=================================="
for c in content["data"]:
print c.keys()
print "****************************************"
if "paging" in content:
url = content["paging"]["next"]
else:
print "Empty response"
print content
data = False
doit('"Plastic Planet"')