我已经尝试了其他问题中列出的所有方法来调试此代码,但无法解决此问题。由于我是python的初学者,我无法使用请求更改urllib2方法。根据请求,我遇到了keyerror:' id'不存在。请尽早帮忙。
Python代码:
import os
os.environ['http_proxy']=''
import urllib
import urllib2
import json
import mysql.connector
import datetime
from config import config
import requests
def connect_db():
connection = mysql.connector.connect(**config)
return connection
def create_post_url(graph_url, APP_ID, APP_SECRET):
#create authenticated post URL
post_args = "/posts/?key=value&access_token=" + APP_ID + "|" + APP_SECRET
post_url = graph_url + post_args
return post_url
def render_to_json(graph_url):
#render graph url call to JSON
web_response = urllib2.Request(graph_url)
response= urllib2.urlopen(web_response)
readable_data= response.read()
json_data = json.loads(readable_data)
return json_data
def scrape_posts_by_date(graph_url, date, post_data, APP_ID, APP_SECRET):
#render URL to JSON
page_posts = render_to_json(graph_url)
#extract next page
next_page = page_posts["paging"]["next"]
#grab all posts
page_posts = page_posts["data"]
#boolean to tell us when to stop collecting
collecting = True
#for each post capture data
#for post in page_posts:
#for each post capture data
for post in page_posts:
try:
likes_count = get_likes_count(post["id"], APP_ID, APP_SECRET)
current_post = [post["id"], post["message"],
post["created_time"],
post["shares"]["count"]]
except Exception:
current_post = [ "error", "error", "error", "error"]
if current_post[2] != "error":
print date
print current_post[3]
#compare dates
if date <= current_post[3]:
post_data.append(current_post)
elif date > current_post[2]:
print "Done collecting"
collecting = False
break
#If we still don't meet date requirements, run on next page
if collecting == True:
scrape_posts_by_date(next_page, date, post_data, APP_ID, APP_SECRET)
return post_data
def get_likes_count(post_id, APP_ID, APP_SECRET):
#create Graph API Call
graph_url = "https://graph.facebook.com/"
likes_args = post_id + "/likes?summary=true&key=value&access_token" + APP_ID + "|" + APP_SECRET
likes_url = graph_url + likes_args
likes_json = render_to_json(likes_url)
#pick out the likes count
count_likes = likes_json["summary"]["total_count"]
return count_likes
def create_comments_url(graph_url, post_id, APP_ID, APP_SECRET):
#create Graph API Call
comments_args = post_id + "/comments/?key=value&access_token=" + APP_ID + "|" + APP_SECRET
comments_url = graph_url + comments_args
return comments_url
def get_comments_data(comments_url, comment_data, post_id):
#render URL to JSON
comments = render_to_json(comments_url)["data"]
#for each comment capture data
for comment in comments:
try:
current_comments = [comment["id"], comment["message"], comment["like_count"],
comment["created_time"], post_id]
print current_comments
comment_data.append(current_comments)
except Exception:
current_comments = ["error", "error", "error", "error", "error"]
#check if there is another page
try:
#extract next page
next_page = comments["paging"]["next"]
except Exception:
next_page = None
#if we have another page, recurse
if next_page is not None:
get_comments_data(next_page, comment_data, post_id)
else:
return comment_data
def main():
#simple data pull App Secret and App ID
APP_SECRET = "app_secret"
APP_ID = "app_id"
#to find go to page's FB page, at the end of URL find username
#e.g. http://facebook.com/walmart, walmart is the username
list_companies = ["walmart", "cisco", "pepsi", "facebook"]
graph_url = "https://graph.facebook.com/"
#the time of last weeks crawl
last_crawl = datetime.datetime.now() - datetime.timedelta(weeks=1)
last_crawl = last_crawl.isoformat()
#create db connection
connection = connect_db()
cursor = connection.cursor()
#SQL statement for adding Facebook page data to database
insert_info = ("INSERT INTO page_info "
"(fb_id, likes, talking_about, username)"
"VALUES (%s, %s, %s, %s)")
#SQL statement for adding post data
insert_posts = ("INSERT INTO post_info "
"(fb_post_id, message, likes_count, time_created, shares, page_id)"
"VALUES (%s, %s, %s, %s, %s, %s)")
#SQL statement for adding comment data
insert_comments = ("INSERT INTO comment_info "
"(comment_id, message, likes_count, time_created, post_id)"
"VALUES (%s, %s, %s, %s, %s)")
for company in list_companies:
#make graph api url with company username
current_page = graph_url + company
#open public page in facebook graph api
json_fbpage = render_to_json(current_page)
#gather our page level JSON Data
page_data = [json_fbpage["id"], json_fbpage["likes"],
json_fbpage["talking_about_count"],
json_fbpage["username"]]
print page_data
#extract post data
post_url = create_post_url(current_page, APP_ID, APP_SECRET)
post_data = []
post_data = scrape_posts_by_date(post_url, last_crawl, post_data)
print post_data
#insert the data we pulled into db
cursor.execute(insert_info, page_data)
#grab primary key
last_key = cursor.lastrowid
comment_data = []
#loop through and insert data
for post in post_data:
post.append(last_key)
cursor.execute(insert_posts, post)
#capture post id of data just inserted
post_key = cursor.lastrowid
print post_key
comment_url = create_comments_url(graph_url, post[0], APP_ID, APP_SECRET)
comments = get_comments_data(comment_url, comment_data, post_key)
#insert comments
for comment in comments:
cursor.execute(insert_comments, comment)
#commit the data to the db
connection.commit()
connection.close()
if __name__ == "__main__":
main()
这是我得到的错误:
Traceback (most recent call last):
File "script.py", line 210, in <module>
main()
File "script.py", line 164, in main
json_fbpage = render_to_json(current_page)
File "script.py", line 26, in render_to_json
response= urllib2.urlopen(web_response)
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 410, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 523, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 448, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 400: Bad Request
答案 0 :(得分:0)
错误是由于页面信息URL请求您的访问令牌。如果在使用Graph API for Page Info时访问令牌不存在,那么它将抛出相同的错误。
您需要更改current_page变量中的详细信息以添加访问令牌。