遇到urllib2.HTTPError:HTTP错误400:错误请求

时间:2016-04-09 20:53:02

标签: python json urllib2 urllib

我已经尝试了其他问题中列出的所有方法来调试此代码,但无法解决此问题。由于我是python的初学者,我无法使用请求更改urllib2方法。根据请求,我遇到了keyerror:' id'不存在。请尽早帮忙。

Python代码:

import os
os.environ['http_proxy']=''
import urllib
import urllib2
import json
import mysql.connector
import datetime
from config import config
import requests

def connect_db():
    connection = mysql.connector.connect(**config)
    return connection


def create_post_url(graph_url, APP_ID, APP_SECRET): 
    #create authenticated post URL
    post_args = "/posts/?key=value&access_token=" + APP_ID + "|" + APP_SECRET
    post_url = graph_url + post_args

    return post_url

def render_to_json(graph_url):
    #render graph url call to JSON
    web_response = urllib2.Request(graph_url)
    response= urllib2.urlopen(web_response)
    readable_data= response.read()
    json_data = json.loads(readable_data)

    return json_data

def scrape_posts_by_date(graph_url, date, post_data, APP_ID, APP_SECRET):
    #render URL to JSON
    page_posts = render_to_json(graph_url)

    #extract next page
    next_page = page_posts["paging"]["next"]

    #grab all posts
    page_posts = page_posts["data"]

    #boolean to tell us when to stop collecting
    collecting = True

    #for each post capture data
    #for post in page_posts:
            #for each post capture data
    for post in page_posts:
        try:
            likes_count = get_likes_count(post["id"], APP_ID, APP_SECRET)
            current_post = [post["id"], post["message"],
                    post["created_time"], 
                                        post["shares"]["count"]]        

        except Exception:
            current_post = [ "error", "error", "error", "error"]

        if current_post[2] != "error":
            print date
            print current_post[3]
            #compare dates
            if date <= current_post[3]:
                post_data.append(current_post)

            elif date > current_post[2]:
                print "Done collecting"
                collecting = False
                break


    #If we still don't meet date requirements, run on next page         
    if collecting == True:
        scrape_posts_by_date(next_page, date, post_data, APP_ID, APP_SECRET)

    return post_data

def get_likes_count(post_id, APP_ID, APP_SECRET):
    #create Graph API Call
    graph_url = "https://graph.facebook.com/" 
    likes_args = post_id + "/likes?summary=true&key=value&access_token" + APP_ID + "|" + APP_SECRET
    likes_url = graph_url + likes_args
    likes_json = render_to_json(likes_url)

    #pick out the likes count
    count_likes = likes_json["summary"]["total_count"]

    return count_likes

def create_comments_url(graph_url, post_id, APP_ID, APP_SECRET):
    #create Graph API Call
    comments_args = post_id + "/comments/?key=value&access_token=" + APP_ID + "|" + APP_SECRET
    comments_url = graph_url + comments_args

    return comments_url

def get_comments_data(comments_url, comment_data, post_id):
    #render URL to JSON
    comments = render_to_json(comments_url)["data"]

    #for each comment capture data
    for comment in comments:
        try:
            current_comments = [comment["id"], comment["message"], comment["like_count"],
                        comment["created_time"], post_id]
            print current_comments
            comment_data.append(current_comments)

        except Exception:
            current_comments = ["error", "error", "error", "error", "error"]


    #check if there is another page
    try:
        #extract next page
        next_page = comments["paging"]["next"]
    except Exception:
        next_page = None


    #if we have another page, recurse
    if next_page is not None:
        get_comments_data(next_page, comment_data, post_id)
    else:
        return comment_data

def main():
    #simple data pull App Secret and App ID
    APP_SECRET = "app_secret"
    APP_ID = "app_id"

    #to find go to page's FB page, at the end of URL find username
    #e.g. http://facebook.com/walmart, walmart is the username
    list_companies = ["walmart", "cisco", "pepsi", "facebook"]
    graph_url = "https://graph.facebook.com/"

    #the time of last weeks crawl
    last_crawl = datetime.datetime.now() - datetime.timedelta(weeks=1)
    last_crawl = last_crawl.isoformat()

    #create db connection
    connection = connect_db()
    cursor = connection.cursor()

    #SQL statement for adding Facebook page data to database
    insert_info = ("INSERT INTO page_info "
                    "(fb_id, likes, talking_about, username)"
                    "VALUES (%s, %s, %s, %s)")

    #SQL statement for adding post data             
    insert_posts = ("INSERT INTO post_info "
                    "(fb_post_id, message, likes_count, time_created, shares, page_id)"
                    "VALUES (%s, %s, %s, %s, %s, %s)")

    #SQL statement for adding comment data
    insert_comments = ("INSERT INTO comment_info "
                       "(comment_id, message, likes_count, time_created, post_id)"
                       "VALUES (%s, %s, %s, %s, %s)")

    for company in list_companies:
        #make graph api url with company username
        current_page = graph_url + company

        #open public page in facebook graph api
        json_fbpage = render_to_json(current_page)


        #gather our page level JSON Data
        page_data = [json_fbpage["id"], json_fbpage["likes"],
                     json_fbpage["talking_about_count"],
                     json_fbpage["username"]]
        print page_data

        #extract post data
        post_url = create_post_url(current_page, APP_ID, APP_SECRET)
        post_data = []
        post_data = scrape_posts_by_date(post_url, last_crawl, post_data)

        print post_data

        #insert the data we pulled into db
        cursor.execute(insert_info, page_data)

        #grab primary key
        last_key = cursor.lastrowid

        comment_data = []

        #loop through and insert data
        for post in post_data:
            post.append(last_key)
            cursor.execute(insert_posts, post)

            #capture post id of data just inserted
            post_key = cursor.lastrowid
            print post_key
            comment_url = create_comments_url(graph_url, post[0], APP_ID, APP_SECRET)
            comments = get_comments_data(comment_url, comment_data, post_key)

            #insert comments
            for comment in comments:
                cursor.execute(insert_comments, comment)

        #commit the data to the db
        connection.commit()

    connection.close()


if __name__ == "__main__":
    main()    

这是我得到的错误:

Traceback (most recent call last):
  File "script.py", line 210, in <module>
    main()    
  File "script.py", line 164, in main
    json_fbpage = render_to_json(current_page)
  File "script.py", line 26, in render_to_json
    response= urllib2.urlopen(web_response)
  File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
    return _opener.open(url, data, timeout)
  File "/usr/lib/python2.7/urllib2.py", line 410, in open
    response = meth(req, response)
  File "/usr/lib/python2.7/urllib2.py", line 523, in http_response
    'http', request, response, code, msg, hdrs)
  File "/usr/lib/python2.7/urllib2.py", line 448, in error
    return self._call_chain(*args)
  File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
    result = func(*args)
  File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default
    raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 400: Bad Request

1 个答案:

答案 0 :(得分:0)

错误是由于页面信息URL请求您的访问令牌。如果在使用Graph API for Page Info时访问令牌不存在,那么它将抛出相同的错误。

您需要更改current_page变量中的详细信息以添加访问令牌。