我需要基于阿拉伯语或波斯语来过滤Twitter注释。我将推文存储在Mongodb中。我无法根据特定关键字过滤推文。我不确定我要去哪里错。我正在UNIX Shell中运行代码 下面是我的代码
# -*- coding: utf-8 -*-
import pymongo
#from pymongo import Connection
from pymongo import MongoClient
import json
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import datetime
import time
import sys
try:
from pymongo.connection import Connection
except ImportError as e:
from pymongo import MongoClient as Connection
connection = Connection('localhost', 27017)
db = connection.lang
db.tweets.ensure_index("id", unique=True, dropDups=True)
collection = db.tweets
consumer_key = "XXXXXXXXXXXXXXXXXX"
consumer_secret = "XXXXXXXXXXXXXXXXXXXXXXXX"
access_token = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
access_token_secret = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
print ("\n=== Starting Tweet Collection :) ===\n")
# The below code will get Tweets from the stream and store all fields to your database
class StdOutListener(StreamListener):
def on_data(self, data):
# Load the Tweet into the variable "t"
t = json.loads(data)
# Load all of the data from twitter
collection.insert(t)
return True
# Prints the reason for an error to your console
def on_error(self, status):
print status
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l,timeout=30.0)
while True:
try:
stream.filter(track=['الله'])
break
except Exception, e:
time.sleep(59)
这段代码总是给我0输出。我能够很好地用英语过滤,但不能用母语过滤。关于我要去哪里的任何建议