我尝试使用Scrapy抓取Shoescribe。但不知怎的,没有调用parse_item。我尝试与其他网站相同的代码,它工作正常。完全不知道出了什么问题。任何帮助都会非常感激!谢谢!
import scrapy
from scrapy import log
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from lsspider.items import *
class ShoeScribeSpider(CrawlSpider):
name = "shoescribe"
merchant_name = "shoescribe.com"
allowed_domains = ["www.shoescribe.com"]
start_urls = [
"http://www.shoescribe.com/us/women/ankle-boots_cod44709699mx.html",
]
rules = (
Rule(LinkExtractor(allow=('http://www.shoescribe.com/us/women/ankle-boots_cod44709699mx.html')), callback='parse_item', follow=True),
)
def parse_item(self, response):
print 'parse_item'
item = Item()
item['url'] = response.url.split('?')[0]
print item['url']
return item
答案 0 :(得分:0)