为什么让indexerror列表索引超出范围scrapy

时间:2016-10-05 04:59:45

标签: python web-scraping scrapy

我不太明白,我不断得到索引错误,说明超出范围。是因为列表链接是空的吗?这里写链接的格式是什么

class POSpider(CrawlSpider):
    name = 'po'
    start_urls = ['https://www.poets.org/poetsorg/poems']
    allowed_domains = ['poets.org/poetsorg/poems']

    def parse(self, response):
        items=[]
        l=response.xpath('//*[@class="themes"]//a//@href').extract()
        theme_ids=[]
        for item in l:
            theme_ids.append(item[855:1412])
        theme_urls=[]
        for tid in theme_ids:
            theme_urls.append('https://www.poets.org/poetsorg/poems?   field_occasion_tid=All&field_poem_themes_tid='+ tid)
        for link in theme_urls:
            request=scrapy.Request(link,callback=self.parse_layer2,dont_filter=True)
            yield request



    def parse_layer2(self,response):
        items=[]
        p=response.xpath('//*[@id="block-views-poems-poems-block-all"]/div/div//tbody//td[2]//@href')[-1].extract()
        poem_urls=[]
        for item in p:
            poem_urls.append(item)
        for link in poem_urls:
            request=scrapy.Request(link,callback=self.parse_layer3,dont_filter=True)

            yield request


    def parse_layer3(self,response):
        items=[]
        poems=response.xpath('//*[@id="poem-content"]/div[2]/div/div').extract()
        for poem in poems:
            item=PoetryItem()
            s=poem.xpath('*/p/text()').extract()
            t=strip_list(s)
            t=t.encode('ascii','replace').lower()+'\r\n'
            item['poem']=t
            items.append(item)
        return items

这就是我一直得到的结果。

    Traceback (most recent call last):
  File "//anaconda/lib/python2.7/site-packages/scrapy/utils/defer.py", line 102, in iter_errback
yield next(it)
  File "//anaconda/lib/python2.7/site-packages/scrapy/spidermiddlewares/offsite.py", line 29, in process_spider_output
for x in result:
  File "//anaconda/lib/python2.7/site-packages/scrapy/spidermiddlewares/referer.py", line 22, in <genexpr>
return (_set_referer(r) for r in result or ())
  File "//anaconda/lib/python2.7/site-packages/scrapy/spidermiddlewares/urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
  File "//anaconda/lib/python2.7/site-packages/scrapy/spidermiddlewares/depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
  File "/Users/chinouhane/Desktop/Org/Org/spiders/PO_spider.py", line 37, in parse_layer2
    p=response.xpath('//*[@id="block-views-poems-poems-block-all"]/div/div//tbody//td[2]//@href')[-1].extract()
  File "//anaconda/lib/python2.7/site-packages/parsel/selector.py", line 56, in __getitem__
o = super(SelectorList, self).__getitem__(pos)
IndexError: list index out of range

0 个答案:

没有答案