我正在尝试使用scrapy从本地http服务器中抓取html文件,但出现此错误
AttributeError:“ unicode”对象没有属性“ css”
这是代码
import scrapy
from ..items import YpauItem
class ypau(scrapy.Spider):
name = 'ypau'
#allowed_domains = ['http://localhost']
start_urls = ['http://localhost/']
def parse(self, response):
selector = response.css('a::attr(href)').extract()
for url in selector:
urls = scrapy.Request(response.urljoin(url), callback=self.parsePage)
yield urls
print urls
def parsePage(self, response):
container = response.css('.listing.listing-search.listing-data').extract()
for items in container:
bname = items.css('listing-name::text').extract()
item = YpauItem()
item['Business_Name'] = bname
yield item
答案 0 :(得分:2)
如果您打印response.css('.listing.listing-search.listing-data').extract()
,将得到错误提示,.extract()
实际上会返回一个str / unicode项列表,如果您想在返回的列表项中使用css/xpath
,请不要使用extract()
,否则它将返回Selector
类的对象。
请参见以下代码,
container = response.css('.listing.listing-search.listing-data')
for items in container:
bname = items.css('listing-name::text').extract()
item = YpauItem()
item['Business_Name'] = bname
yield item