我有以下代码,我需要将导出的项目通过电子邮件发送给我,以便我可以看到新闻。我知道Scrapy - 1.4 - Email Docs,我似乎无法找到足够的示例来对我的代码执行相同的操作。
启动此代码的好方法是什么?如果没有,我可以指出一些例子吗?
import scrapy
import collections
from collections import OrderedDict
from scrapy.spiders import XMLFeedSpider
from tickers.items import tickersItem
class Spider(XMLFeedSpider):
name = "EmperyScraper"
allowed_domains = ["yahoo.com"]
start_urls = ('https://feeds.finance.yahoo.com/rss/2.0/headline?s=UNXL,UQM,URRE,UUUU,VBLT,VGZ,VKTX,VTGN,WINT,XGTI,XTNT,XXII,ZSAN',)
itertag = 'item'
def parse_node(self, response, node):
item = collections.OrderedDict()
item['Title'] = node.xpath(
'title/text()').extract_first()
item['PublishDate'] = node.xpath(
'pubDate/text()').extract_first()
item['Description'] = node.xpath(
'description/text()').extract_first()
item['Link'] = node.xpath(
'link/text()').extract_first()
yield item
更新:我也正在研究使其自动化的方法!
编辑:
下面是我在pipelines.py文件中的代码。当我运行这个脚本时,我得到>>>y
的输出,那就是它。真的很困惑:
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import smtplib
from smtplib import SMTP
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
class TickersPipeline(object):
def send_mail(self, message, title):
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
gmailUser = 'example@gmail.com'
gmailPassword = 'example'
recipient = 'example@gmail.com'
msg = MIMEMultipart()
msg['From'] = gmailUser
msg['To'] = recipient
msg['Subject'] = title
msg.attach(MIMEText(message))
mailServer = smtplib.SMTP('smtp.gmail.com', 587)
mailServer.ehlo()
mailServer.starttls()
mailServer.ehlo()
mailServer.login(gmailUser, gmailPassword)
mailServer.sendmail(gmailUser, recipient, msg.as_string())
mailServer.close()
答案 0 :(得分:0)
这是一个指南,从为scrapy提供的基本教程之一拼凑而成。
import scrapy
from scrapy.crawler import CrawlerProcess
import smtplib
class QuotesSpider(scrapy.Spider):
name = "quotes"
def start_requests(self):
urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page = response.url.split("/")[-2]
filename = 'quotes-%s.html' % page
server = smtplib.SMTP(my_server, port=587)
server.starttls()
server.login(my_user, my_pswd)
server.sendmail(my_email, [my_email], filename)
server.quit()
process = CrawlerProcess({
'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
})
process.crawl(QuotesSpider)
process.start()