我得到的错误是:
Traceback (most recent call last):
File "C:\Users\Ben\Desktop\Python Projects\roboticsassignment\venv\lib\site-packages\pandas\core\indexes\base.py", line 2897, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'tweets'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Ben/Desktop/Python Projects/roboticsassignment/tweets.py", line 133, in <module>
df['sentiment'] = np.array([tweet_analyser.analyse_sentiment(tweet) for tweet in df['tweets']])
File "C:\Users\Ben\Desktop\Python Projects\roboticsassignment\venv\lib\site-packages\pandas\core\frame.py", line 2995, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\Ben\Desktop\Python Projects\roboticsassignment\venv\lib\site-packages\pandas\core\indexes\base.py", line 2899, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'tweets'
Process finished with exit code 1
仅当我尝试运行此行代码时,才会出现此问题
df['sentiment'] = np.array([tweet_analyser.analyse_sentiment(tweet) for tweet in df['tweets']])
该文件的整个代码(请注意,由于我当前未使用某些功能,因此我已将其注释掉了):
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
from tweepy import Cursor
from textblob import TextBlob
import twitterApp
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
desired_width=320
pd.set_option('display.width', desired_width)
np.set_printoptions(linewidth=desired_width)
pd.set_option('display.max_columns',10)
#Twitter Client
class TwitterClient():
def __init__(self, twitter_user=None):
self.auth = TwitterAuthenticator().authenticate_twitter_app()
self.twitter_client = API(self.auth)
self.twitter_user = twitter_user
def get__twitter_client_api(self):
return self.twitter_client
def get_tweets(self, num_tweets):
UserTweets = []
for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(num_tweets):
UserTweets.append(tweet)
return UserTweets
# def get_friend_list(self, num_friends):
# friend_list = []
# for friend in Cursor(self.twitter_client.friends).items(num_friends):
# friend_list.apend(friend)
# return friend_list
# Authenticator
class TwitterAuthenticator():
def authenticate_twitter_app(self):
# Takes consumer key and consumer key secret from twitterApp file as arguments for authentication
auth = OAuthHandler(twitterApp.CONSUMER_KEY, twitterApp.CONSUMER_SECRET)
# Takes access token and access token secret from twitterApp file as arguments for authentication
auth.set_access_token(twitterApp.ACCESS_TOKEN, twitterApp.ACCESS_TOKEN_SECRET)
return auth
"""
A class for updating a live stream of tweets and processing them
"""
class TweetStream():
def __init__(self):
self.twitter_authenticator = TwitterAuthenticator()
def stream_tweets(self, fetched_tweets_filename, keywords):
# Listener processes the status of the programme / if there was an error, or if data has been correctly printed
listener = TweetListener(fetched_tweets_filename)
auth = self.twitter_authenticator.authenticate_twitter_app()
tweets = Stream(auth, listener)
tweets.filter(track=keywords)
"""
A listener class that prints received tweets to stdout
"""
class TweetListener(StreamListener):
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
# Takes in data from StreamListener and prints it
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
# Prints error(status) if one should occur
def on_error(self, status):
# Returning false in cases where rate limit occurs
if status == 420:
return False
print(status)
# Class for analysing and categorising twitter content
class TweetAnalysis():
def clean_tweet(self, tweet):
#Removes special characters from tweets, including hyperlinks, then returns the tweet
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def analyse_sentiment (self, tweet):
analysis = TextBlob(self.clean_tweet(tweet))
if analysis.sentiment.polarity > 0:
return 1
elif analysis.sentiment.polarity == 0:
return 0
else:
return -1
def tweets_to_data_frame(self, tweets):
#looping through all tweets gathered and extracting the "text" field from each
df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])
df['id'] = np.array([tweet.id for tweet in tweets])
df['len'] = np.array([len(tweet.text) for tweet in tweets])
df['date'] = np.array([tweet.created_at for tweet in tweets])
df['source'] = np.array([tweet.source for tweet in tweets])
df['likes'] = np.array([tweet.favorite_count for tweet in tweets])
df['retweets'] = np.array([tweet.retweet_count for tweet in tweets])
return df
if __name__=="__main__":
twitter_client = TwitterClient()
tweet_analyser = TweetAnalysis()
api = twitter_client.get__twitter_client_api()
tweets = api.user_timeline(screen_name="realDonaldTrump", count=200)
# Creating a dataframe for the content gathered from the API
df = tweet_analyser.tweets_to_data_frame(tweets)
df['sentiment'] = np.array([tweet_analyser.analyse_sentiment(tweet) for tweet in df['tweets']])
print(df.head(10))
#shows what we can ask for from the tweets
##print(dir(tweets[0]))
##print(tweets[0].retweet_count)
# figure out average length of tweets out of those collected
# print(np.mean(df['len']))
# get number of likes for most liked tweet
# print(np.max(df['likes']))
# get number of retweets
# print(np.max(df['retweets']))
# Time series
# time_likes = pd.Series(data=df['likes'].values, index=df['date'])
# time_likes.plot(figsize=(16, 4), color='r')
# plt.show()
# time_retweets = pd.Series(data=df['retweets'].values, index=df['date'])
# time_retweets.plot(figsize=(16, 4), color='r')
# plt.show()
# time_likes = pd.Series(data=df['likes'].values, index=df['date'])
# time_likes.plot(figsize=(16, 4), label="likes", legend=True)
#
# time_retweets = pd.Series(data=df['retweets'].values, index=df['date'])
# time_retweets.plot(figsize=(16, 4), label="retweets", legend=True)
# plt.show()
如果您能注意到导致此错误的原因的任何明显信息,请帮助初学者,这是针对Uni项目的,我有点停滞不前。提前加油