我是scrapy的新手,我在使用此代码时遇到了问题,而不是通过链接进行解析。
import sys
sys.path.append(r'C:\Users\User\Miniconda3\Lib\site-packages')
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class MySpider(CrawlSpider):
name = 'poder360.com'
allowed_domais = ['poder360.com.br']
start_urls = ['https://www.poder360.com.br']
rules = (Rule(LinkExtractor(allow_domains=['poder360.com.br']),
callback='parse_item', follow=True))
def parse_item(self, response):
self.logger.info('Hi, this is an item page! %s', response.url)
item = scrapy.Item()
item['title'] = response.xpath('//title/text()').extract()
print(item['title'])
return item