尝试在某些推文上运行情绪分析时出现“ keyerror”

时间:2019-12-19 00:28:38

标签: python pandas dataframe pycharm sentiment-analysis

我得到的错误是:

Traceback (most recent call last):
  File "C:\Users\Ben\Desktop\Python Projects\roboticsassignment\venv\lib\site-packages\pandas\core\indexes\base.py", line 2897, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'tweets'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:/Users/Ben/Desktop/Python Projects/roboticsassignment/tweets.py", line 133, in <module>
    df['sentiment'] = np.array([tweet_analyser.analyse_sentiment(tweet) for tweet in df['tweets']])
  File "C:\Users\Ben\Desktop\Python Projects\roboticsassignment\venv\lib\site-packages\pandas\core\frame.py", line 2995, in __getitem__
    indexer = self.columns.get_loc(key)
  File "C:\Users\Ben\Desktop\Python Projects\roboticsassignment\venv\lib\site-packages\pandas\core\indexes\base.py", line 2899, in get_loc
    return self._engine.get_loc(self._maybe_cast_indexer(key))
  File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'tweets'

Process finished with exit code 1

仅当我尝试运行此行代码时,才会出现此问题

df['sentiment'] = np.array([tweet_analyser.analyse_sentiment(tweet) for tweet in df['tweets']])

该文件的整个代码(请注意,由于我当前未使用某些功能,因此我已将其注释掉了):

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
from tweepy import Cursor

from textblob import TextBlob

import twitterApp
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt

desired_width=320
pd.set_option('display.width', desired_width)
np.set_printoptions(linewidth=desired_width)
pd.set_option('display.max_columns',10)

#Twitter Client
class TwitterClient():
    def __init__(self, twitter_user=None):
        self.auth = TwitterAuthenticator().authenticate_twitter_app()
        self.twitter_client = API(self.auth)

        self.twitter_user = twitter_user

    def get__twitter_client_api(self):
        return self.twitter_client

    def get_tweets(self, num_tweets):
        UserTweets = []
        for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(num_tweets):
            UserTweets.append(tweet)
            return UserTweets

    # def get_friend_list(self, num_friends):
    #     friend_list = []
    #     for friend in Cursor(self.twitter_client.friends).items(num_friends):
    #         friend_list.apend(friend)
    #         return friend_list


# Authenticator
class TwitterAuthenticator():
    def authenticate_twitter_app(self):
        # Takes consumer key and consumer key secret from twitterApp file as arguments for authentication
        auth = OAuthHandler(twitterApp.CONSUMER_KEY, twitterApp.CONSUMER_SECRET)
        # Takes access token and access token secret from twitterApp file as arguments for authentication
        auth.set_access_token(twitterApp.ACCESS_TOKEN, twitterApp.ACCESS_TOKEN_SECRET)
        return auth

"""
A class for updating a live stream of tweets and processing them
"""
class TweetStream():

    def __init__(self):
        self.twitter_authenticator = TwitterAuthenticator()

    def stream_tweets(self, fetched_tweets_filename, keywords):


        # Listener processes the status of the programme / if there was an error, or if data has been correctly printed
        listener = TweetListener(fetched_tweets_filename)
        auth = self.twitter_authenticator.authenticate_twitter_app()
        tweets = Stream(auth, listener)

        tweets.filter(track=keywords)


"""
A listener class that prints received tweets to stdout
"""
class TweetListener(StreamListener):
    def __init__(self, fetched_tweets_filename):
        self.fetched_tweets_filename = fetched_tweets_filename


# Takes in data from StreamListener and prints it
    def on_data(self, data):
        try:
            print(data)
            with open(self.fetched_tweets_filename, 'a') as tf:
                tf.write(data)
            return True
        except BaseException as e:
            print("Error on_data: %s" % str(e))
            return True
# Prints error(status) if one should occur
    def on_error(self, status):
        # Returning false in cases where rate limit occurs
        if status == 420:
            return False
        print(status)

# Class for analysing and categorising twitter content
class TweetAnalysis():

    def clean_tweet(self, tweet):
        #Removes special characters from tweets, including hyperlinks, then returns the tweet
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

    def analyse_sentiment (self, tweet):
        analysis = TextBlob(self.clean_tweet(tweet))

        if analysis.sentiment.polarity > 0:
            return 1
        elif analysis.sentiment.polarity == 0:
            return 0
        else:
            return -1

    def tweets_to_data_frame(self, tweets):
        #looping through all tweets gathered and extracting the "text" field from each
        df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])
        df['id'] = np.array([tweet.id for tweet in tweets])
        df['len'] = np.array([len(tweet.text) for tweet in tweets])
        df['date'] = np.array([tweet.created_at for tweet in tweets])
        df['source'] = np.array([tweet.source for tweet in tweets])
        df['likes'] = np.array([tweet.favorite_count for tweet in tweets])
        df['retweets'] = np.array([tweet.retweet_count for tweet in tweets])
        return df

if __name__=="__main__":
    twitter_client = TwitterClient()
    tweet_analyser = TweetAnalysis()
    api = twitter_client.get__twitter_client_api()

    tweets = api.user_timeline(screen_name="realDonaldTrump", count=200)
    # Creating a dataframe for the content gathered from the API
    df = tweet_analyser.tweets_to_data_frame(tweets)
    df['sentiment'] = np.array([tweet_analyser.analyse_sentiment(tweet) for tweet in df['tweets']])
    print(df.head(10))

    #shows what we can ask for from the tweets
    ##print(dir(tweets[0]))
    ##print(tweets[0].retweet_count)

    # figure out average length of tweets out of those collected
    # print(np.mean(df['len']))

    # get number of likes for most liked tweet
    # print(np.max(df['likes']))

    # get number of retweets
    # print(np.max(df['retweets']))

    # Time series
    # time_likes = pd.Series(data=df['likes'].values, index=df['date'])
    # time_likes.plot(figsize=(16, 4), color='r')
    # plt.show()

    # time_retweets = pd.Series(data=df['retweets'].values, index=df['date'])
    # time_retweets.plot(figsize=(16, 4), color='r')
    # plt.show()

    # time_likes = pd.Series(data=df['likes'].values, index=df['date'])
    # time_likes.plot(figsize=(16, 4), label="likes", legend=True)
    #
    # time_retweets = pd.Series(data=df['retweets'].values, index=df['date'])
    # time_retweets.plot(figsize=(16, 4), label="retweets", legend=True)
    # plt.show()

如果您能注意到导致此错误的原因的任何明显信息,请帮助初学者,这是针对Uni项目的,我有点停滞不前。提前加油

0 个答案:

没有答案