创建数据集

时间:2017-06-27 16:44:04

标签: dataset tweepy

我正在尝试从特定的主题标签中提取推文,并将它们保存在csv文件中。下面的代码效果很好,但我想分割数据。我该怎么分开呢。

任何建议都将受到高度赞赏, Niddal

# -*- coding: utf-8 -*-

from __future__ import absolute_import, print_function

from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
import json
import codecs
import sys


ckey = ''
csecret = ''
atoken = ''
asecret = ''

non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)

class StdOutListener(StreamListener):

    def on_data(self, data):
        try:


            tweet = json.loads(data)['text']

            #tweet = data.split(',"text":"')[1].split('","source')[0]
            print(tweet.translate(non_bmp_map))


            saveThis = str(time.time())+'::'+tweet
            SaveFile = codecs.open('d:\\StremHash.csv','a', "utf-8")
            SaveFile.write(saveThis)
            SaveFile.write('\n')
            SaveFile.close()
            return True

        except BaseException, e:
            print ('failed on data,',str(e))
            time.sleep(5)

    def on_error(self, status):
        print(status)

if __name__ == '__main__':

    l = StdOutListener()
    auth = OAuthHandler(ckey, csecret)
    auth.set_access_token(atoken, asecret)

    twitterStream = Stream(auth, l)
    twitterStream.filter(track=[unicode("#عيدكم_مبارك","utf-8")])

0 个答案:

没有答案