from __future__ import unicode_literals
import sys
from scrapy.pipelines.images import ImagesPipeline
from scrapy.exceptions import DropItem
from scrapy.http import Request
import os
reload(sys)
sys.setdefaultencoding('utf-8')
class TetePipeline(ImagesPipeline):
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)
def item_completed(self, results, item, info):
item['image'] = []
image_paths = [x['path'] for ok, x in results if ok]
if not image_paths:
raise DropItem('Items contains no images')
item['image_paths'] = image_paths
for i in item['image_paths']:
item['image'].append(item['image_titles']+i[-8:])
item['image_paths'] = item['image']
return item
scrapy版本:1.0 这是我的代码,它可以下载图像,但图像名称是图像url SHA1哈希的结果。 我想使用自定义名称更改图像名称。例如:item [' image_titles'] + i [-8:],在scrapy shell中输入项目[' image_titles' ] + i [-8:]可以是正常输出,原因在哪里?
答案 0 :(得分:1)
class TetePipeline(ImagesPipeline):
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url, meta={'item': item})
def file_path(self, request, response=None, info=None):
item = request.meta['item']
image_guid = request.url.split('/')[-1]
image_name = item['image_titles']+image_guid[-8:]
return image_name
更改file_path func,返回image_name,因为get_media_requests将下载图片,item_completed已下载