Twitter刮刀打印而不是插入MySQL表

时间:2017-08-24 19:55:48

标签: python mysql python-2.7 twitter tweepy

我遇到以下脚本的问题。脚本运行时没有错误,但在括号中重复打印变量verified,而不是打印收集推文的时间等。

它也没有将数据插入到我为创建数据而创建的MySQL表中。

任何帮助将不胜感激!

import tweepy
import json
import MySQLdb
from dateutil import parser

WORDS = ['X', 'X', 'X', 'X']

#These are twitter API keys
CONSUMER_KEY = "X"
CONSUMER_SECRET = "X"
ACCESS_TOKEN = "X"
ACCESS_TOKEN_SECRET = "X"

#MySQL db information
HOST = "localhost"
USER = "root"
PASSWD = "X"
DATABASE = "X"

# This function takes the desired scraping values and stores it
def store_data(tweet_id, screen_name, created_at, verified, followers_count, favourites_count, friends_count, statuses_count, text):
    db=MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
    cursor = db.cursor()
    insert_query = "INSERT INTO twitter (tweet_id, screen_name, created_at, verified, followers_count, favourites_count, friends_count, statuses_count, text) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
    cursor.execute(insert_query, (tweet_id, screen_name, created_at, verified, followers_count, favourites_count, friends_count, statuses_count, text))
    db.commit()
    cursor.close()
    db.close()
    return

class StreamListener(tweepy.StreamListener):
    #This is a class provided by tweepy to access the Twitter Streaming API.
    #It collects tweets in realtime

    def on_connect(self):
        # Called initially to connect to the Streaming API
        print("You are now connected to the streaming API.")

    def on_error(self, status_code):
        # On error - if an error occurs, display the error / status code 
        print('An Error has occured: ' + repr(status_code))
        return False

    def on_data(self, data):
        #This is the meat of the script...it connects to your mongoDB and stores the tweet
        try:
           # Decode the JSON from Twitter
            datajson = json.loads(data)

            #grab the wanted data from the Tweet
            tweet_id = datajson['id']
            screen_name = datajson['user']['screen_name']
            verified = datajson['verified']
            followers_count = datajson['followers_count']
            favourites_count = datajson['favourites_count']
            friends_count = datajson['friends_count']
            statuses_count = datajson['statuses_count']
            text = datajson['text']
            created_at = parser.parse(datajson['created_at'])

            #print out a message to the screen that we have collected a tweet
            print("Tweet collected at " + str(created_at))

            #insert the data into the MySQL database
            store_data(tweet_id, screen_name, created_at, verified, followers_count, favourites_count, friends_count, statuses_count, text)

        except Exception as e:
           print(e)

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
#Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting.
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
print("Tracking: " + str(WORDS))
streamer.filter(track=WORDS)

0 个答案:

没有答案