使用Scrapy进行电子邮件基础

时间:2017-06-20 16:14:11

标签: python email scrapy extract

我有以下代码,我需要将导出的项目通过电子邮件发送给我,以便我可以看到新闻。我知道Scrapy - 1.4 - Email Docs,我似乎无法找到足够的示例来对我的代码执行相同的操作。

启动此代码的好方法是什么?如果没有,我可以指出一些例子吗?

import scrapy
import collections

from collections import OrderedDict
from scrapy.spiders import XMLFeedSpider
from tickers.items import tickersItem
class Spider(XMLFeedSpider):
    name = "EmperyScraper"
    allowed_domains = ["yahoo.com"]
    start_urls = ('https://feeds.finance.yahoo.com/rss/2.0/headline?s=UNXL,UQM,URRE,UUUU,VBLT,VGZ,VKTX,VTGN,WINT,XGTI,XTNT,XXII,ZSAN',)
    itertag = 'item'

    def parse_node(self, response, node):
        item = collections.OrderedDict()
        item['Title'] = node.xpath(
            'title/text()').extract_first()
        item['PublishDate'] = node.xpath(
            'pubDate/text()').extract_first()
        item['Description'] = node.xpath(
            'description/text()').extract_first()      
        item['Link'] = node.xpath(
            'link/text()').extract_first()
        yield item

更新:我也正在研究使其自动化的方法!

编辑: 下面是我在pipelines.py文件中的代码。当我运行这个脚本时,我得到>>>y的输出,那就是它。真的很困惑:

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import smtplib
from smtplib import SMTP
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

class TickersPipeline(object):
    def send_mail(self, message, title):
        from email.MIMEMultipart import MIMEMultipart
        from email.MIMEText import MIMEText
        gmailUser = 'example@gmail.com'
        gmailPassword = 'example'
        recipient = 'example@gmail.com'

        msg = MIMEMultipart()
        msg['From'] = gmailUser
        msg['To'] = recipient
        msg['Subject'] = title
        msg.attach(MIMEText(message))
        mailServer = smtplib.SMTP('smtp.gmail.com', 587)

        mailServer.ehlo()
        mailServer.starttls()
        mailServer.ehlo()
        mailServer.login(gmailUser, gmailPassword)
        mailServer.sendmail(gmailUser, recipient, msg.as_string())
        mailServer.close()

1 个答案:

答案 0 :(得分:0)

这是一个指南,从为scrapy提供的基本教程之一拼凑而成。

import scrapy
from scrapy.crawler import CrawlerProcess
import smtplib


class QuotesSpider(scrapy.Spider):
    name = "quotes"

    def start_requests(self):
        urls = [
            'http://quotes.toscrape.com/page/1/',
            'http://quotes.toscrape.com/page/2/',
        ]
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):
        page = response.url.split("/")[-2]
        filename = 'quotes-%s.html' % page
        server = smtplib.SMTP(my_server, port=587)
        server.starttls()
        server.login(my_user, my_pswd)
        server.sendmail(my_email, [my_email], filename)
        server.quit()

process = CrawlerProcess({
    'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
})

process.crawl(QuotesSpider)
process.start()