嗨,我是编程的新手,所以请原谅我。我正在尝试从网上推文中进行一些情绪分析。我正在尝试拉泰文推文,并在导出时将它们翻译成英文作为副本。
但是在我运行它时,输出不断给我\ xe0 \ xb8 \ xa1 \ xe0等行。如何将这些unicode字符解码为英语?请帮助这个菜鸟。
这是我的代码:
max_id = -1
maxTweets = 100
tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets))
with open(fName, 'w') as f:
while tweetCount < maxTweets:
tweets = []
try:
if (max_id <= 0):
if (not sinceId):
new_tweets = api.search(q=q, lang ="th", count=tweetsPerQry, tweet_mode='extended')
else:
new_tweets = api.search(q=q, lang ="th", count=tweetsPerQry,
since_id=sinceId, tweet_mode='extended')
else:
if (not sinceId):
new_tweets = api.search(q=q, lang ="th", count=tweetsPerQry,
max_id=str(max_id - 1), tweet_mode='extended')
else:
new_tweets = api.search(q=q, lang ="th", count=tweetsPerQry,
max_id=str(max_id - 1),
since_id=sinceId, tweet_mode='extended')
if not new_tweets:
print("No more tweets found")
break
for tweet in new_tweets:
f.write(str(tweet.full_text.replace('\n','').encode("utf-8"))+"\n")
tweetCount += len(new_tweets)
print("Downloaded {0} tweets".format(tweetCount))
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
# Just exit if any error
print("some error : " + str(e))
break
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))