我写了一个简单的蜘蛛来获取徒步旅行的链接。似乎它根本没有查看网址来抓取网站:
[scrapy] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
这是我的简单蜘蛛:
from scrapy.spiders import Spider
from scrapy.selector import Selector
from oregon_hikes_scrapper.items import HikeLinkItem
ENDPOINTS = [ 'from="%27%27Peter_Iredale%27%27&to=Bonney_Meadows-Hidden_Meadows_Trail_Junction', \
'from=Bonney_Meadows-Hidden_Meadow_Trail_Junction&to=Clatsop_Loop_Hike',
]
class OrHikeSpider(Spider):
name ='or_hikes'
allowed_domains = "oregonhikers.org"
start_url = [
"http://www.oregonhikers.org/field_guide/Special:AllPages&" + l for l in ENDPOINTS
]
def parse(self, response):
hikes = Selector.xpath('//*[@id="mw-content-text"]/table[2]/tbody/tr[1]/td[1]/div/a')
for hike in hikes:
item = HikeLinkItem()
item['hike'] = hike.xpath('@title').extract()
item['link'] = hike.xpath('@href').extract()
yield item
答案 0 :(得分:0)
语法错误:
start_urls而不是start_url