我第一次使用Tweepy。目前收到此错误
---------------------------------------------------------------------------
IOError Traceback (most recent call last)
<ipython-input-11-cdd7ebe0c00f> in <module>()
----> 1 data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
2 data_python = json.loads(data_json)
3
4 csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file
IOError: [Errno 2] No such file or directory: 'raw_tweets.json'
我感觉我所获得的代码并不起作用。例如,打印(状态)不打印任何内容。此外,我在目录中看不到保存的CSV或JSON文件。
我是新手,所以你能提供的任何帮助/文件都会很棒!
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os
import json
import csv
import io
from pymongo import MongoClient
ckey = 'blah'
consumer_secret = 'blah'
access_token_key = 'blah'
access_token_secret = 'blah'
#start_time = time.time() #grabs the system time
keyword_list = ['keyword'] #track list
#Listener Class Override
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
self.tweet_data = []
def on_data(self, data):
saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')
while (time.time() - self.time) < self.limit:
try:
self.tweet_data.append(data)
return True
except BaseException, e:
print 'failed ondata,', str(e)
time.sleep(5)
pass
saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
saveFile.write(u'[\n')
saveFile.write(','.join(self.tweet_data))
saveFile.write(u'\n]')
saveFile.close()
exit()
def on_error(self, status):
print status
class listener(StreamListener):
def __init__(self, start_time, time_limit=10):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
print(data)
try:
client = MongoClient('blah', 27017)
db = client['blah']
collection = db['blah']
tweet = json.loads(data)
collection.insert(tweet)
return True
except BaseException as e:
print('failed ondata,')
print(str(e))
time.sleep(5)
pass
exit()
def on_error(self, status):
print(status)
data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
data_python = json.loads(data_json)
csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file
更新:创建文件但文件为空
import tweepy
import datetime
auth = tweepy.OAuthHandler('xxx', 'xxx')
auth.set_access_token('xxx', 'xxx')
class listener(tweepy.StreamListener):
def __init__(self, timeout, file_name, *args, **kwargs):
super(listener, self).__init__(*args, **kwargs)
self.start_time = None
self.timeout = timeout
self.file_name = file_name
self.tweet_data = []
def on_data(self, data):
if self.start_time is None:
self.start_time = datetime.datetime.now()
while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
with open(self.file_name, 'a') as data_file:
data_file.write('\n')
data_file.write(data)
def on_error(self, status):
print status
l = listener(60, 'stack_raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)
答案 0 :(得分:1)
您不是为侦听器创建Stream。下面代码的最后一行就是这样做的。接下来你必须启动Stream,这是最后一行。我必须警告你,将它存储在mongodb中是正确的,因为我存储它的文件似乎很容易增长到几GB。该文件也不完全是json。文件中的每一行都是一个json。你必须根据自己的需要调整它。
import tweepy
import datetime
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
class listener(tweepy.StreamListener):
def __init__(self, timeout, file_name, *args, **kwargs):
super(listener, self).__init__(*args, **kwargs)
self.start_time = None
self.timeout = timeout
self.file_name = file_name
self.tweet_data = []
def on_data(self, data):
if self.start_time is None:
self.start_time = datetime.datetime.now()
while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
with open(self.file_name, 'a') as data_file:
data_file.write('\n')
data_file.write(data)
def on_error(self, status):
print status
l = listener(60, 'raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)