IOError:[Errno 2]没有这样的文件或目录:意味着文件还没有被写入?

时间:2016-08-13 13:14:50

标签: json python-2.7 csv tweepy

我第一次使用Tweepy。目前收到此错误

---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-11-cdd7ebe0c00f> in <module>()
----> 1 data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
      2 data_python = json.loads(data_json)
      3 
      4 csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file

IOError: [Errno 2] No such file or directory: 'raw_tweets.json'

我感觉我所获得的代码并不起作用。例如,打印(状态)不打印任何内容。此外,我在目录中看不到保存的CSV或JSON文件。

我是新手,所以你能提供的任何帮助/文件都会很棒!

import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os
import json
import csv
import io
from pymongo import MongoClient

ckey = 'blah'
consumer_secret = 'blah'
access_token_key = 'blah'
access_token_secret = 'blah'


#start_time = time.time() #grabs the system time
keyword_list = ['keyword'] #track list

#Listener Class Override
class listener(StreamListener):

    def __init__(self, start_time, time_limit=60):

        self.time = start_time
        self.limit = time_limit
        self.tweet_data = []

    def on_data(self, data):

        saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')

        while (time.time() - self.time) < self.limit:

            try:

                self.tweet_data.append(data)

                return True


            except BaseException, e:
                print 'failed ondata,', str(e)
                time.sleep(5)
                pass

        saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
        saveFile.write(u'[\n')
        saveFile.write(','.join(self.tweet_data))
        saveFile.write(u'\n]')
        saveFile.close()
        exit()

    def on_error(self, status):

        print status

class listener(StreamListener):
    def __init__(self, start_time, time_limit=10):

        self.time = start_time
        self.limit = time_limit

    def on_data(self, data):

        while (time.time() - self.time) <  self.limit:
            print(data)
            try:


                client = MongoClient('blah', 27017)
                db = client['blah']
                collection = db['blah']
                tweet = json.loads(data)

                collection.insert(tweet)

                return True


            except BaseException as e:
                print('failed ondata,') 
                print(str(e))
                time.sleep(5)
                pass

        exit()

    def on_error(self, status):
        print(status)
data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
data_python = json.loads(data_json)

csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file

更新:创建文件但文件为空

import tweepy
import datetime
auth = tweepy.OAuthHandler('xxx', 'xxx')
auth.set_access_token('xxx', 'xxx')


class listener(tweepy.StreamListener):

    def __init__(self, timeout, file_name, *args, **kwargs):

        super(listener, self).__init__(*args, **kwargs)
        self.start_time = None
        self.timeout = timeout
        self.file_name = file_name
        self.tweet_data = []

    def on_data(self, data):
        if self.start_time is None:
            self.start_time = datetime.datetime.now()
        while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
            with open(self.file_name, 'a') as data_file:
                data_file.write('\n')
                data_file.write(data)

    def on_error(self, status):
        print status


l = listener(60, 'stack_raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)

1 个答案:

答案 0 :(得分:1)

您不是为侦听器创建Stream。下面代码的最后一行就是这样做的。接下来你必须启动Stream,这是最后一行。我必须警告你,将它存储在mongodb中是正确的,因为我存储它的文件似乎很容易增长到几GB。该文件也不完全是json。文件中的每一行都是一个json。你必须根据自己的需要调整它。

import tweepy
import datetime
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)


class listener(tweepy.StreamListener):

    def __init__(self, timeout, file_name, *args, **kwargs):

        super(listener, self).__init__(*args, **kwargs)
        self.start_time = None
        self.timeout = timeout
        self.file_name = file_name
        self.tweet_data = []

    def on_data(self, data):
        if self.start_time is None:
            self.start_time = datetime.datetime.now()
        while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
            with open(self.file_name, 'a') as data_file:
                data_file.write('\n')
                data_file.write(data)

    def on_error(self, status):
        print status


l = listener(60, 'raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)