我是编程新手,并试图在Jupyter笔记本中查看代码,将特定位置的推文流式传输到Mongo DB数据库。我这样做有困难。有人可以告诉我,如果我使用正确的Geocode调用来过滤Twitter流吗?
谢谢
我正在使用的完整代码如下:
import numpy as np
import pandas as pd
import tweepy
import time
import math
import os
import sys
from geopy import geocoders
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import matplotlib.pyplot as plt
import ipywidgets as wgt
from IPython.display import display
from sklearn.feature_extraction.text import CountVectorizer
import re
from datetime import datetime
%matplotlib inline
api_key = "*****" # <---- Add your API Key
api_secret = "****" # <---- Add your API Secret
access_token = "****" # <---- Add your access token
access_token_secret = "****" # <---- Add your access token secret
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
self.tweet_data = []
def on_data(self, data):
saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')
while (time.time() - self.time) < self.limit:
try:
self.tweet_data.append(data)
return True
except BaseException as e:
print ('failed ondata,', str(e))
time.sleep(5)
pass
saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
saveFile.write(u'[\n')
saveFile.write(','.join(self.tweet_data))
saveFile.write(u'\n]')
saveFile.close()
exit()
def on_error(self, status):
print (statuses)
API = tweepy.API(auth)
API.reverse_geocode(51.4545 , -2.5879 , 2000 , 'city' , 1)
import pymongo
from pymongo import MongoClient
import json
start_time = time.time() #grabs the system time
twitterStream = Stream(auth, StreamListener)
myStreamListener = StreamListener#(max_tweets=1000)
myStream = tweepy.Stream(auth = API.auth, listener=myStreamListener)
myStream.filter(track=['API.reverse_geocode'], async=True)
class listener(StreamListener):
counter = 0
def __init__(self, max_tweets=1000, *args, **kwargs):
self.max_tweets = max_tweets
self.counter = 0
super().__init__(*args, **kwargs)
def on_connect(self):
self.counter = 0
self.start_time = datetime.now()
def on_status(self, status):
# Increment counter
self.counter += 1
collection.insert_many
if self.counter % 1 == 0:
value = int(100.00 * self.counter / self.max_tweets)
mining_time = datetime.now() - self.start_time
progress_bar.value = value
html_value = """<span class="label label-primary">Tweets/Sec: %.1f</span>""" % (self.counter / max([1,mining_time.seconds]))
html_value += """ <span class="label label-success">Progress: %.1f%%</span>""" % (self.counter / self.max_tweets * 100.0)
html_value += """ <span class="label label-info">ETA: %.1f Sec</span>""" % ((self.max_tweets - self.counter) / (self.counter / max([1,mining_time.seconds])))
wgt_status.value = html_value
if self.counter >= self.max_tweets:
myStream.disconnect()
print("Finished")
print("Total Mining Time: %s" % (mining_time))
print("Tweets/Sec: %.1f" % (self.max_tweets / mining_time.seconds))
progress_bar.value = 0
try:
client = pymongo.MongoClient('localhost', 27017)
db = client['happycitydb']
collection = db['happycitytweets_collection']
tweet = json.loads(data)
collection.insert(tweet)
return True
except BaseException as e:
print ('failed ondata,', str(e))
time.sleep(5)
pass
exit()
keywords = ["happy"]
progress_bar = wgt.IntProgress(value=0)
display(progress_bar)
wgt_status = wgt.HTML(value="""<span class="label label primary">Tweets/Sec: 0.0</span>""")
display(wgt_status)
for error_counter in range(5):
try:
myStream.filter(track=keywords)
print("Tweets collected: %s" % myStream.listener.counter)
print("Total tweets in collection: %s" % col.count())
break
except:
print("ERROR# %s" % (error_counter + 1))