我有一个问题我只想下载.jpg图片。我使用了以下代码,但我发现我仍然下载.png或.gif图片。有谁可以帮助我?
#coding:utf-8
from scrapy.spiders import Spider
from scrapy.selector import Selector
from jianshu.items import JianshuItem
import scrapy
from scrapy.crawler import CrawlerProcess
class jiansider(Spider):
name = "jiantu"
allowed_domains = []
start_urls= [
"https://tieba.baidu.com/p/5227563995"
]
def parse(self, response):
sel = Selector(response)
sites = sel.xpath('//div/img/@src').extract()
item = JianshuItem()
item['image_url'] = response.xpath('//div/img/@src').extract()
for url in item['image_url']:
list_photo = url.split('.')
photo_type = list_photo[len(list_photo)-1]
print photo_type
if photo_type != 'jpg':
#print url
item['image_url'].remove(url)
#print "delete1"
yield item
total_page = response.xpath('//span[@class="red"]/text()').extract()
now_page = response.xpath('//li/span[@class="tP"]/text()').extract()
tpa=total_page[len(total_page)-1]
npa=now_page[len(now_page)-1]
tpage= int(tpa)
npage= int (npa)
print "present page -----"
print npage
print "total page ------"
print tpage
starturls = 'https://tieba.baidu.com/p/5227563995?pn='
if npage != tpage:
npage = npage+1
new_url = '%s%s'%(starturls,npage)
print "new_url is ------------"
print new_url
if new_url:
yield scrapy.Request(new_url,callback = self.parse)
我使用if!= .jpg来避免下载类型不是jpg的照片,但我失败了。有人可以告诉我为什么并帮助我处理它吗?