虽然urls
已正确定义,但我会继续获取"global name 'urls' is not defined"
,并且网址数据不会inserted
进入MYSQL
。有关何处的建议?我在这里搞错了?
# ! /usr/bin/python
# Description : This script can collect the URLs from Tweets and Records them into research MYSQL DB.
from __future__ import print_function
import tweepy
import json
import MySQLdb
from dateutil import parser
WORDS = ['security']
# CREDENTAILS
CONSUMER_KEY = ""
CONSUMER_SECRET = ""
ACCESS_TOKEN = ""
ACCESS_TOKEN_SECRET = ""
HOST = "192.168.150.94"
USER = "root"
PASSWD = "blah"
DATABASE = "tweets"
def store_data(tweet_url):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO tweet_url (urls) VALUES (%s)"
cursor.execute(insert_query, (urls))
db.commit()
cursor.close()
db.close()
return
class StreamListener(tweepy.StreamListener):
def on_connect(self):
print("We are now connected to the streaming API.")
def on_error(self, status_code):
print('An Error has occured: ' + repr(status_code))
return False
def on_data(self, data):
try:
datajson = json.loads(data)
web_url = datajson['entities']['urls']
print(web_url)
for i in web_url:
web_urls = i['expanded_url']
urls = web_urls
print(urls)
store_data(urls)
except Exception as e:
print(e)
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
print("Tracking: " + str(WORDS))
streamer.filter(track=WORDS)
答案 0 :(得分:1)
您只需将函数urls
中的参数store_data
重命名为tweet_url
def store_data(tweet_url):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO tweet_url (urls) VALUES (%s)"
cursor.execute(insert_query, (tweet_url))
您想要存储数据的方式仍不清楚。如果你在循环后调用store_data
,它只存储最后一个值,你最好将每个值存储在一个列表中:
def on_data(self, data):
try:
datajson = json.loads(data)
web_url = datajson['entities']['urls']
print(web_url)
urls = []
for i in web_url:
urls.append((i['expanded_url'],))
# stores a tuple to make it easy in the database insertion
print(urls)
store_data(urls)
except:
[...]
这种方式需要store_data
内的另一个小修补:
def store_data(urls):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO tweet_url (urls) VALUES (%s)"
cursor.executemany(insert_query, urls)
db.commit()
cursor.close()
db.close()
return
答案 1 :(得分:1)
在您的函数store_data()
中,您使用的是未定义的urls
,因为您传递给函数的内容是tweet_url
。
您需要将函数参数更改为urls
而不是tweet_url
,如下所示:
def store_data(urls):
# ...
或者在功能正文中将urls
更改为tweet_url
:
# ...
cursor.execute(insert_query, (tweet_url))
# ...
并确保修复on_data()
方法中的缩进,如下所示:
class StreamListener(tweepy.StreamListener):
# ...
def on_data(self, data):
try:
datajson = json.loads(data)
web_url = datajson['entities']['urls']
print(web_url)
for i in web_url:
web_urls = i['expanded_url']
urls = web_urls
print(urls)
store_data(urls)
except Exception as e:
print(e)