我对使用 Twitter 进行数据挖掘非常陌生。我编写了以下代码来提取带有提到的关键字的推文。我还指定了位置的纬度、经度和弧度。
import sqlite3
import tweepy
import sys
import jsonpickle
import os
import time
from random import *
auth = tweepy.AppAuthHandler('', '')
api = tweepy.API(auth, wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
lowlat=28.44030935677637
uplat=28.877125125273412
lowlong=76.82610321601817
uplong=77.33527346497551
lowrad=5
uprad=50
if (not api):
print ("Can't Authenticate")
sys.exit(-1)
searchQuery = 'Covid-19 OR coronavirus OR oxygen cylinder OR ventilator OR beds OR ambulance OR remdesevir OR blood OR #SOS OR ICU'
maxTweets = 10000
tweetsPerQry = 100
f=open('C:/Users/Pratiksha Pradhan/Documents/project/Twitter historical data.csv','w')
f.write('filename,lat,long,radius'+'\n')
while 1:
lat=lowlat+random()*(uplat-lowlat)
long=lowlong+random()*(uplong-lowlong)
radius=randint(lowrad,uprad)
areas=str(lat)+','+str(long)+','+str(radius)+'mi'
sinceId = None
conn=''
max_id = -1
n=1
tweetCount = 0
print("Downloading the {0} file of {1} tweets".format(n,maxTweets))
_run=True
while _run:
fname1 = 'C://Users/Pratiksha Pradhan/Documents/project/historial tweets'
if bool(conn):
conn.close()
try:
fName =fname1+time.strftime('%Y%m%d-%H%M%S')+'.db'
conn=sqlite3.connect(fName)
except:
time.sleep(1)
fName = fname1+time.strftime('%Y%m%d-%H%M%S')+'.db'
conn=sqlite3.connect(fName)
c=conn.cursor()
#c.execute('''CREATE TABLE tweets (tweet) ''')
f.write(fName+','+areas+'\n')
while tweetCount < maxTweets:
try:
if (max_id <= 0):
if (not sinceId):
new_tweets = api.search(q=searchQuery,count=tweetsPerQry,until='2021-05-15',result_type='recent',geocode=areas)
else:
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,until='2021-05-15',result_type='recent',geocode=areas,since_id=sinceId)
else:
if (not sinceId):
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,until='2021-05-15',result_type='recent',geocode=areas,max_id=str(max_id - 1))
else:
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,until='2021-05-15',result_type='recent',geocode=areas,max_id=str(max_id - 1),since_id=sinceId)
if not new_tweets:
print("No more tweets found")
_run=False
break
for tweet in new_tweets:
data=jsonpickle.encode(tweet._json,unpicklable=False)
c.execute("INSERT INTO tweets VALUES (?)",(data,))
conn.commit()
tweetCount += len(new_tweets)
print("Downloaded {0} tweets in file {1}".format(tweetCount,n))
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
print("some error : " + str(e))
break
time.sleep(0.5)
tweetCount=0
n=n+1
当我运行此代码时,数千个数据库文件保存在上述位置:C://Users/Pratiksha Pradhan/Documents/project。但它们都是0Kb。这是什么意思?我不确定我在这里错过了什么,但我肯定错过了一些东西。
预先感谢您的帮助。