我几天都有同样的错误。我无法解决它!我真的不明白我的代码在哪里不正确。我之前通过更改“链接”已经解决了类似的错误消息。部分但现在,它不再工作了。有人可以帮帮我吗?
# -*- coding: utf-8 -*-
import scrapy
import re
import numbers
from amazon_test.items import AmazonTestItem
from urllib.parse import urlparse
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class AmazonSellersSpider(CrawlSpider): #scrapy.Spider
name = 'AmazonFR'
allowed_domains = ['amazon.fr']
start_urls = ['https://www.amazon.fr']
rules = (
Rule(LinkExtractor(allow=()), callback='parse'),
)
def parse(self, response):
item = AmazonTestItem()
link = (response.xpath('//div[@class="a-column a-span6"]/h3[@id="-component-heading"]/text()'))
if link:
wait = response.xpath('//div[@class="a-column a-span6"]/h3[@id="-component-heading"]/text()').extract()
if (len(wait) != 0):
name = response.xpath('//div[@class="a-row a-spacing-medium"]/div[@class="a-column a-span6"]/ul[@class="a-unordered-list a-nostyle a-vertical"]/li//span[@class="a-list-item"]/span[contains(.,"Nom")]/following-sibling::text()').extract()
phone = response.xpath('//div[@class="a-column a-span6"]/ul[@class="a-unordered-list a-nostyle a-vertical"]/li//span[@class="a-list-item"]/span[contains(.,"Téléphone")]/following-sibling::text()').extract()
registre = response.xpath('//div[@class="a-column a-span6"]/ul[@class="a-unordered-list a-nostyle a-vertical"]/li//span[@class="a-list-item"]/span[contains(.,"registre de commerce")]/following-sibling::text()').extract()
TVA = response.xpath('//div[@class="a-column a-span6"]/ul[@class="a-unordered-list a-nostyle a-vertical"]/li//span[@class="a-list-item"]/span[contains(.,"TVA")]/following-sibling::text()').extract()
address = response.xpath('//div[@class="a-column a-span6"]/ul[@class="a-unordered-list a-nostyle a-vertical"]/li//span[span[contains(.,"Adresse")]]/ul//li//text()').extract()
item['Business_name'] = ''.join(name).strip()
item['Phone_number'] = ''.join(phone).strip()
item['VAT_number'] = ''.join(TVA).strip()
item['Address'] = '\n'.join(address).strip()
item['Registre_commerce'] = ''.join(registre).strip()
yield item
else:
for sel in response.xpath('//html/body'):
item = AmazonTestItem()
list_urls = sel.xpath('//a/@href').extract()
for url in list_urls:
yield scrapy.Request(response.urljoin(url), callback=self.parse, meta={'item': item})
,错误信息为:
Traceback (most recent call last):
File "C:\Users\paulpo\AppData\Local\Continuum\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
yield next(it)
File "C:\Users\paulpo\AppData\Local\Continuum\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
for x in result:
File "C:\Users\paulpo\AppData\Local\Continuum\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 339, in <genexpr>
return (_set_referer(r) for r in result or ())
File "C:\Users\paulpo\AppData\Local\Continuum\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "C:\Users\paulpo\AppData\Local\Continuum\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "C:\Users\paulpo\Documents\amazon_test\amazon_test\spiders\AmazonFR.py", line 21, in parse
link = (response.xpath('//div[@class="a-column a-span6"]/h3[@id="-component-heading"]/text()')).extract
File "C:\Users\paulpo\AppData\Local\Continuum\Anaconda3\lib\site-packages\scrapy\http\response\__init__.py", line 105, in xpath
raise NotSupported("Response content isn't text")
scrapy.exceptions.NotSupported: Response content isn't text