使用IPython和Mongo DB过滤Twitter Streaming API的GEO位置

时间:2016-02-08 12:29:38

标签: python mongodb twitter geocode twitter-streaming-api

我是编程新手,并试图在Jupyter笔记本中查看代码,将特定位置的推文流式传输到Mongo DB数据库。我这样做有困难。有人可以告诉我,如果我使用正确的Geocode调用来过滤Twitter流吗?

谢谢

我正在使用的完整代码如下:

import numpy as np
import pandas as pd
import tweepy
import time
import math
import os
import sys
from geopy import geocoders

from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener


import matplotlib.pyplot as plt

import ipywidgets as wgt
from IPython.display import display
from sklearn.feature_extraction.text import CountVectorizer
import re
from datetime import datetime

%matplotlib inline

api_key = "*****" # <---- Add your API Key
api_secret = "****" # <---- Add your API Secret
access_token = "****" # <---- Add your access token
access_token_secret = "****" # <---- Add your access token secret

auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)

class listener(StreamListener):

def __init__(self, start_time, time_limit=60):

    self.time = start_time
     self.limit = time_limit
     self.tweet_data = []

def on_data(self, data):

saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')

     while (time.time() - self.time) < self.limit:

         try:
             self.tweet_data.append(data)

            return True

        except BaseException as e:
            print ('failed ondata,', str(e))
            time.sleep(5)
            pass

    saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
    saveFile.write(u'[\n')
    saveFile.write(','.join(self.tweet_data))
    saveFile.write(u'\n]')
    saveFile.close()
    exit()

def on_error(self, status):

    print (statuses)


API = tweepy.API(auth)
API.reverse_geocode(51.4545 , -2.5879 , 2000 , 'city' , 1)


import pymongo
from pymongo import MongoClient
import json


start_time = time.time() #grabs the system time

twitterStream = Stream(auth, StreamListener)

myStreamListener = StreamListener#(max_tweets=1000)
myStream = tweepy.Stream(auth = API.auth, listener=myStreamListener)

myStream.filter(track=['API.reverse_geocode'], async=True)

class listener(StreamListener):

counter = 0

def __init__(self, max_tweets=1000, *args, **kwargs):
    self.max_tweets = max_tweets
    self.counter = 0
    super().__init__(*args, **kwargs)

def on_connect(self):
    self.counter = 0
    self.start_time = datetime.now()

def on_status(self, status):
    # Increment counter
    self.counter += 1
    collection.insert_many


    if self.counter % 1 == 0:
        value = int(100.00 * self.counter / self.max_tweets)
        mining_time = datetime.now() - self.start_time
        progress_bar.value = value
        html_value = """<span class="label label-primary">Tweets/Sec: %.1f</span>""" % (self.counter / max([1,mining_time.seconds]))
        html_value += """ <span class="label label-success">Progress: %.1f%%</span>""" % (self.counter / self.max_tweets * 100.0)
        html_value += """ <span class="label label-info">ETA: %.1f Sec</span>""" % ((self.max_tweets - self.counter) / (self.counter / max([1,mining_time.seconds])))
        wgt_status.value = html_value

        if self.counter >= self.max_tweets:
            myStream.disconnect()
            print("Finished")
            print("Total Mining Time: %s" % (mining_time))
            print("Tweets/Sec: %.1f" % (self.max_tweets / mining_time.seconds))
            progress_bar.value = 0

            try:
                client = pymongo.MongoClient('localhost', 27017)
                db = client['happycitydb']
                collection = db['happycitytweets_collection']
                tweet = json.loads(data)
                collection.insert(tweet)

                return True
            except BaseException as e:
                print ('failed ondata,', str(e))
                time.sleep(5)
                pass
            exit()


keywords = ["happy"]

progress_bar = wgt.IntProgress(value=0)
display(progress_bar)
wgt_status = wgt.HTML(value="""<span class="label label primary">Tweets/Sec: 0.0</span>""")
display(wgt_status)

for error_counter in range(5):
try:
    myStream.filter(track=keywords)
    print("Tweets collected: %s" % myStream.listener.counter)
    print("Total tweets in collection: %s" % col.count())
    break
except:
    print("ERROR# %s" % (error_counter + 1))

0 个答案:

没有答案