我和朋友使用BeautifulSoup创建了以下脚本来获取作业页面的HTML,然后将作业附加到数组,然后是文件,然后以人类可读的格式将作业发送给我们自己。该脚本适用于Ubuntu,但在使用Raspbian的Raspberry Pi上,它不起作用。
我从终端运行时看到的唯一消息是:'文件结尾'并且'开始写...'这是代码中的行。从Pi运行时没有错误消息,但没有任何内容被附加到数组,也没有发送电子邮件。
有人可以看看吗?感谢。
import urllib2, email, smtplib, os.path
import cPickle as pickle
from bs4 import BeautifulSoup
class Job:
"""docstring for Job"""
def __init__(self, title, date, url):
self.title = title
self.date = date
self.url = "http://www.forensicfocus.com/"+url
def describJob(self):
return (self.title +" "+ self.date +" "+ self.url)
def createJobsArray():
soup = BeautifulSoup(urllib2.urlopen('http://www.forensicfocus.com/jobs').read())
bigFatString = soup.find_all('a')
#print(bigFatString) #this gets webpage as html. No issues here
findAll = soup.find_all("tr", class_="topic")
jobsArray = []
for section in findAll:
title = section.find("a", class_="topictitle").get_text()
titleEncoded = title.encode('ascii','ignore')
row = section.find_all("td")
date = row[3].find("div").get_text()
url = section.find_all("a")[3].get("href")
job = Job(titleEncoded, date, url)
print "printing job"
print job
print "printing job"
jobsArray.append(job)
return jobsArray
def sendEmail(job):
senderEmail = "sender@example.com"
recipients = ["destination@example.com"]
s = smtplib.SMTP("smtp.gmail.com",587)
s.ehlo()
s.starttls()
s.ehlo()
s.login(senderEmail, 'pass_goes_here')
for job in jobsFilteredByLocation:
msg = email.message_from_string(job.describJob())
msg['Subject'] = "New Job Found: " + job.title
s.sendmail(senderEmail, recipients, msg.as_string())
print "Sending email..."
s.quit()
def saveJobsToDisk(jobs):
with open('hadooken', 'wb') as output:
print "Start write..."
for job in jobs:
print job.title
pickle.dump(job, output)
output.close()
def getJobsFromDisk():
oldJobsArray = []
with open('hadooken', 'rb') as input:
while True:
try:
job = pickle.load(input)
print job.title, "was successfully read from file"
oldJobsArray.append(job)
except EOFError:
print "end of file"
break
return oldJobsArray
input.close()
# SCRIPT STARTS HERE
with open('hadooken', 'ab') as input:
input.close()
locationsArray = ["London"]
jobsArray = createJobsArray()
oldJobsArray = getJobsFromDisk()
jobsFilteredByLocation = []
for job in jobsArray:
for location in locationsArray:
found = job.title.find(location)
if found > 0:
if len(oldJobsArray) > 0:
if any(oldJob.title == job.title for oldJob in oldJobsArray):
print "Job previously found and sent..."
else:
print "adding ", job.title, "to array because it isnt in the old array"
jobsFilteredByLocation.append(job)
else:
print "adding ", job.title, "to array"
jobsFilteredByLocation.append(job)
sendEmail(jobsFilteredByLocation)
mergedArray = oldJobsArray + jobsFilteredByLocation
for job in mergedArray:
print "Job title: ", job.title
saveJobsToDisk(mergedArray)