作为解析的结果,只能正确存储英文句子。
当你解析(刮)时,你会遇到其他语言。
但是,我的代码只能正确存储英文句子。
表情符号也没有保存。 (例如★♬◆♥)
我该如何解决? 感谢您的意见。
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
#Twitter Authentication Information
consumerKey = "x"
consumerSecret = "x"
accessToken = "x"
accessSecret = "x"
class listener(StreamListener):
def on_data(self, tweetRaw):
try:
# Extract relevant information from the raw tweet data
tweetAuthor = tweetRaw.split('"user":{"id":')[1].split(',"id_str":')[0]
tweetText = tweetRaw.split(',"text":"')[1].split('","source":')[0]
tweetText = tweetText.replace(',', '')
tweetDate = tweetRaw.split('"created_at":"')[1].split('","id":')[0]
tweet = tweetDate + ',' + tweetAuthor + ',' + tweetText
# Check tweet for location data
try:
tweetLocation = tweetRaw.split(',"geo":{"type":"Point","coordinates":[')[1] \
.split(']},"coordinates":{"type":')[0]
tweet = tweet + ',' + tweetLocation
except:
pass
# Write tweet to file
try:
tweetToFile = open('rr.csv', 'a')
tweetToFile.write(tweet)
tweetToFile.write('\n')
tweetToFile.close()
except:
print('Error! Unable to write to file.')
return True
except:
time.sleep(5)
def on_error(self, status):
print(status.tweetText)
auth = OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessSecret)
twitterStream = Stream(auth, listener())
# Filter(s) for stream of tweets
## Geobox made using: http://boundingbox.klokantech.com/
indiaGeoBox = [[[139.5799775191,35.5341801276],[139.9369292191,35.5341801276],[139.9369292191,35.8307871111],[139.5799775191,35.8307871111],[139.5799775191,35.5341801276]]]
## Geobox contains tweets from parts of neighbouring countries
## Topic Filter
twitterStream.filter(track=['sing'or"day"])
twitterStream.filter(locations=indiaGeoBox)
indiaGeoBox =如果不是英语国家(例如korea.japan.china)
twitterStream.filter(track = ['No English keyword'])
推文:\ ud83d \ udcaf \ ud83d \ ude4f \ ud83d \ udcaf \ ud83d \ ude4f \ ud83d \ udcaf \ ud83d \ ude4f
或
indiaGeoBox =如果不是英语国家(例如china.korea.japan)
twitterStream.filter(track = ['English keyword'])
推文:https://t.co/ppKCeiEU5n\núub178\ud1b5\uc758\ uc808 \ uce5c \ ubb38 \ uc7ac \ uc778