我正在使用以下使用scrapy的代码从craigslist抓取标题,地址等信息。
import scrapy
from scrapy import Request
class JobsSpider(scrapy.Spider):
name = "jobsall"
allowed_domains = ["craigslist.org"]
start_urls = ["https://chennai.craigslist.co.in/d/parking-storage/search/prk"]
def parse(self, response):
jobs = response.xpath('//p[@class="result-info"]')
for job in jobs:
relative_url = job.xpath('a/@href').extract_first()
absolute_url = response.urljoin(relative_url)
title = job.xpath('a/text()').extract_first()
address = job.xpath('span[@class="result-meta"]/span[@class="result-hood"]/text()').extract_first("")[2:-1]
yield{'URL':absolute_url, 'Title':title, 'Address':address}
relative_next_url = response.xpath('//a[@class="button next"]/@href').extract_first()
absolute_next_url = response.urljoin(relative_next_url)
yield Request(absolute_next_url, callback=self.parse)
如何抓取海报的电子邮件ID?