推特数据挖掘

时间:2021-06-07 09:00:53

标签: python twitter data-mining

我对使用 Twitter 进行数据挖掘非常陌生。我编写了以下代码来提取带有提到的关键字的推文。我还指定了位置的纬度、经度和弧度。

import sqlite3
import tweepy
import sys
import jsonpickle
import os
import time
from random import *

auth = tweepy.AppAuthHandler('', '')
api = tweepy.API(auth, wait_on_rate_limit=True,wait_on_rate_limit_notify=True)

lowlat=28.44030935677637
uplat=28.877125125273412
lowlong=76.82610321601817
uplong=77.33527346497551
lowrad=5
uprad=50

if (not api):
    print ("Can't Authenticate")
    sys.exit(-1)

searchQuery = 'Covid-19 OR coronavirus OR oxygen cylinder OR ventilator OR beds OR ambulance OR remdesevir OR blood OR #SOS OR ICU' 
maxTweets = 10000 
tweetsPerQry = 100  

f=open('C:/Users/Pratiksha Pradhan/Documents/project/Twitter historical data.csv','w')
f.write('filename,lat,long,radius'+'\n')

while 1:
    lat=lowlat+random()*(uplat-lowlat)
    long=lowlong+random()*(uplong-lowlong)
    radius=randint(lowrad,uprad)
    areas=str(lat)+','+str(long)+','+str(radius)+'mi'
    sinceId = None
    conn=''
    max_id = -1
    n=1
    tweetCount = 0
    print("Downloading the {0} file of {1} tweets".format(n,maxTweets))

    _run=True
    while _run:

        fname1 = 'C://Users/Pratiksha Pradhan/Documents/project/historial tweets'
        if bool(conn):
            conn.close()
        try:
            fName =fname1+time.strftime('%Y%m%d-%H%M%S')+'.db'
            conn=sqlite3.connect(fName)
        except:
            time.sleep(1)
            fName = fname1+time.strftime('%Y%m%d-%H%M%S')+'.db'
            conn=sqlite3.connect(fName)
        c=conn.cursor()
        #c.execute('''CREATE TABLE tweets (tweet) ''')

        f.write(fName+','+areas+'\n')
        while tweetCount < maxTweets:
            try:
                if (max_id <= 0):
                    if (not sinceId):
                        new_tweets = api.search(q=searchQuery,count=tweetsPerQry,until='2021-05-15',result_type='recent',geocode=areas)
                    else:
                        new_tweets = api.search(q=searchQuery, count=tweetsPerQry,until='2021-05-15',result_type='recent',geocode=areas,since_id=sinceId)
                else:
                    if (not sinceId):
                        new_tweets = api.search(q=searchQuery, count=tweetsPerQry,until='2021-05-15',result_type='recent',geocode=areas,max_id=str(max_id - 1))
                    else:
                        new_tweets = api.search(q=searchQuery, count=tweetsPerQry,until='2021-05-15',result_type='recent',geocode=areas,max_id=str(max_id - 1),since_id=sinceId)
                if not new_tweets:
                    print("No more tweets found")
                    _run=False
                    break
                for tweet in new_tweets:
                    data=jsonpickle.encode(tweet._json,unpicklable=False)
                    c.execute("INSERT INTO tweets VALUES (?)",(data,))
                    conn.commit()
                tweetCount += len(new_tweets)
                print("Downloaded {0} tweets in file {1}".format(tweetCount,n))
                max_id = new_tweets[-1].id
            except tweepy.TweepError as e:
                print("some error : " + str(e))
                break
            time.sleep(0.5)
       
        tweetCount=0
        n=n+1

当我运行此代码时,数千个数据库文件保存在上述位置:C://Users/Pratiksha Pradhan/Documents/project。但它们都是0Kb。这是什么意思?我不确定我在这里错过了什么,但我肯定错过了一些东西。

预先感谢您的帮助。

0 个答案:

没有答案