在我的Python脚本中,我试图显示一组给定的流式推文的geo_enabled
值。如果geo_enabled
为false
,我希望将其显示为false
或true
,否则我还要显示place
和country
标记为null
,如果相应的值未被人员推文填充。问题是我目前陷入困境,因为我的脚本不断抛出KeyError
。
我认为KeyError
生成geo_enabled
,因为false
有import time
import json
import pandas as pd
import re
#tweepy based modules
import tweepy
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy.streaming import StreamListener
#initializing authentication credentials
consumer_key = ''
consumer_secret = ''
access_key = ''
access_secret = ''
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener) :
def __init__(self,time_limit) :
self.start_time = time.time()
self.limit = time_limit
self.saveFile = open('requests.json','a')
super(StdOutListener,self).__init__()
def on_data(self, data) :
if ((time.time() - self.start_time) < self.limit) :
self.saveFile.write(data)
self.saveFile.write('\n')
return True
else :
self.saveFile.close()
return False
def on_error(self, status) :
print(status)
def getwords(string) :
return re.findall(r"[\w'#]+|[.,!?;]",string)
if __name__ == '__main__' :
#This handles Twitter authetification and the connection to Twitter Streaming API
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
time_limit = input("Enter the time limit in minutes : ")
time_limit *= 60
stream = Stream(auth,listener = StdOutListener(time_limit))
string = raw_input("Enter the list of keywords/hashtags to be compared : ")
keyword_list = getwords(string)
#This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
stream.filter(track = keyword_list)
tweets_data_path = 'requests.json'
tweets_data = []
tweet_list = []
tweets_file = open(tweets_data_path, "r")
for i, line in enumerate(tweets_file) :
if line.rstrip() :
tweet = json.loads(line)
tweet_list.append(tweet)
num_tweets_collected = len(tweet_list)
count = 0
#Creates a data frame structure
text_dump = open('text_dump.txt', 'w')
#Populating the location field of the data frame
#tweet_dataframe['location'] = map(lambda tweet : tweet['location'], tweet_list)
#print(tweet_dataframe['location'])
#index column for data frame
index_dataframe = []
for i in range(0, num_tweets_collected) :
index_dataframe.append(i)
tweet_text = [tweet['text'].encode('utf-8') for tweet in tweet_list]
tweet_text_dataframe = pd.DataFrame(tweet_text, index = index_dataframe, columns = ['text'])
tweet_geolocation_dataframe = pd.DataFrame()
tweet_geolocation_dataframe['geo_enabled'] = map(lambda tweet: tweet['geo_enabled'] if tweet['geo_enabled'] != "false" else "false", tweet_list)
tweet_text_ = tweet_text_dataframe['text']
print(tweet_geolocation_dataframe['geo_enabled'])
作为值。任何有关此事的工作。
我的python脚本:
abhijeet-mohanty-2:Desktop SubrataMohanty$ python twitter_stream_dump.py
Enter the time limit in minutes : 1
Enter the list of keywords/hashtags to be compared : python ruby scala
Traceback (most recent call last):
File "twitter_stream_dump.py", line 94, in <module>
tweet_geolocation_dataframe['geo_enabled'] = map(lambda tweet: tweet['geo_enabled'] if tweet['geo_enabled'] != "false" else "false", tweet_list)
File "twitter_stream_dump.py", line 94, in <lambda>
tweet_geolocation_dataframe['geo_enabled'] = map(lambda tweet: tweet['geo_enabled'] if tweet['geo_enabled'] != "false" else "false", tweet_list)
KeyError: 'geo_enabled'
输出:
tweet_geolocation_dataframe['geo_enabled'] = map(lambda tweet: tweet['geo_enabled'] if tweet['geo_enabled'] != "false" else "false", tweet_list)
编辑:
所以我对Python脚本进行了以下更改,但返回了空数据框。
我替换了以下行 -
for tweet in tweet_list :
if 'geo_enabled' in tweet :
tweet_geolocation_dataframe['geo_enabled'] = map(lambda tweet: tweet['geo_enabled'] if tweet['geo_enabled'] != "false" else "false", tweet_list)
else:
tweet_geolocation_dataframe['geo_enabled'] = False
以下行:
abhijeet-mohanty-2:Desktop SubrataMohanty$ python twitter_stream_dump.py
Enter the time limit in minutes : 1
Enter the list of keywords/hashtags to be compared : python ruby scala
Series([], Name: geo_enabled, dtype: bool)
输出:
geo_enabled
解决json_decode
字段空数据框问题的任何方法。
答案 0 :(得分:1)
试试这个 -
if 'geo_enabled' in tweet:
tweet_geolocation_dataframe['geo_enabled'] = map(lambda tweet: tweet['geo_enabled'] if tweet['geo_enabled'] != "false" else "false", tweet_list)
else:
tweet_geolocation_dataframe['geo_enabled'] = False