编写僵尸程序以抓取论坛并通过电子邮件向用户发送基于关键字的报告。遇到麻烦。
还有点担心我的进口是不正确的,因为我试图搞砸了这个。
运行时设置此错误(显然电子邮件信息是****):
E:\>python dgcrbot.py
Traceback (most recent call last):
File "dgcrbot.py", line 95, in <module>
main()
File "dgcrbot.py", line 91, in main
Email('*****')
File "dgcrbot.py", line 67, in __init__
self.run()
File "dgcrbot.py", line 87, in run
self.send_message()
File "dgcrbot.py", line 70, in send_message
matches = Site('http://www.dgcoursereview.com/forums/forumdisplay.php?f=2')
File "dgcrbot.py", line 23, in __init__
self.check_posts()
File "dgcrbot.py", line 55, in check_posts
if any(pattern.lower() in title.lower() for pattern in patterns) and self.check_database(self, posts[title]) is False:
TypeError: check_database() takes 2 positional arguments but 3 were given
完全转储
import re
import sqlite3
import urllib.request
import html.parser
import smtplib
from bs4 import BeautifulSoup
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
patterns = [****]
data = sqlite3.connect('discgolf.db')
cur = data.cursor()
cur.execute('CREATE TABLE IF NOT EXISTS checked(id)')
data.commit()
server = smtplib.SMTP()
class Site(object):
def __init__(self,forum):
self.forum = forum
self.check_posts()
def get_url(self):
posts = {}
html = BeautifulSoup(urllib.request.urlopen(self.forum).read().decode('utf-8','ignore'),'html.parser')
titles = html.find_all('td',class_='alt1')
for title in titles:
try:
url = str(title)[re.search('<a href="',str(title)).span()[1]:]
url = url[:re.search('">',url).span()[0]]
url = url[re.search('amp;t=',url).span()[1]:]
title = str(title)[re.search('title=',str(title)).span()[1]:re.search('">',str(title)).span()[0]]
posts[title] = url
except:
pass
return posts
def check_database(self, identity):
cur.execute('SELECT * FROM checked WHERE id=?',[identity])
if cur.fetchone():
return True
else:
return False
def submit_to_database(self, identity):
cur.execute('INSERT INTO checked VALUES(?)',[identity])
data.commit()
def check_posts(self):
posts = self.get_url()
matches = {}
for title in posts:
if any(pattern.lower() in title.lower() for pattern in patterns) and self.check_database(self, posts[title]) is False:
permalink = 'http://www.dgcoursereview.com/forums/showthread.php?t={}'.format(post[title])
matches[title] = permalink
self.submit_to_database(posts[title])
return matches
class Email(object):
def __init__(self, to_address, from_address='*****'):
self.to_address = to_address
self.from_address = from_address
self.run()
def send_message(self,subject='Found Match', body='N/A'):
matches = Site('http://www.dgcoursereview.com/forums/forumdisplay.php?f=2')
msg = MIMEMultipart()
msg['From'] = self.from_address
msg['To'] = self.to_address
msg['Subject'] = DGCR - AutoBot
body = ''
for title in matches:
body += '{} -- {}\n\n'.format(title,matches[title])
msg.attach(MIMEText(body,'plain'))
server = smtplib.SMTP('*****')
server.starttls()
server.login(self.from_address,'*****')
text = msg.as_string()
server.send_email(self.from_address, self.to_address, text)
server.quit()
def run(self):
self.send_message()
def main():
while True:
Email('*****')
time.sleep(10*60)
if __name__ == '__main__':
main()