我正在使用scrapy收集一些数据,我需要在提取电子邮件时延迟代码。所以不是整个代码,而是只有当它到达电子邮件提取部分时。非常感谢任何帮助。
import re
import scrapy
from scrapy.http import Request
# item class included here
class DmozItem(scrapy.Item):
# define the fields for your item here like:
link = scrapy.Field()
attr = scrapy.Field()
title = scrapy.Field()
tag = scrapy.Field()
class DmozSpider(scrapy.Spider):
name = "dmoz"
allowed_domains = ["craigslist.org"]
start_urls = [
"http://asheville.craigslist.org/bab/5078377070.html"
]
BASE_URL = 'http://asheville.craigslist.org/'
def parse_contact(self, response):
item = response.meta['item']
item["attr"] = "".join(response.xpath("//div[@class='anonemail']//text()").extract())
return item