我刚刚开始使用Scrapy,我正在尝试使用item['title']
这是我的蜘蛛:
import scrapy
from botName.items import botName
class botName(scrapy.Spider):
name = "bot"
allowed_domains = "example.com"
start_urls = [
"http://example.com&pageno=%s"
% page for page in xrange(1, 3)
]
def parse(self, response):
for sel in response.xpath('//html'):
item = myBotItem()
# I want to name files with the result of this
item['title'] = sel.xpath('//h5/text()').extract()
item['image_urls'] = sel.xpath('//img/@data-zoom-image').extract()
yield item
我的Pipeline课程
class myBotPipeline(object):
def process_item(self, item, spider):
return item
def file_path(self, request, response=None, info=None):
image_guid = request.meta['title'][0]
log.msg(image_guid, level=log.DEBUG)
return 'full/%s' % (image_guid)
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)
def item_completed(self, results, item, info):
image_paths = [x['path'] for ok, x in results if ok]
if not image_paths:
raise DropItem("Item contains no images")
item['image_paths'] = image_paths
return item