运行scrapy项目时出现此错误 我的spider.py代码是
import scrapy
import re
from tutorial.items import TutorialItem
class tutorialSpider(scrapy.Spider):
name="tutorial"
allowed_domain=['examble.com']
start_urls = ["examble.com/something"]
def parse(self, response):
for sel in response.xpath('//*[@id="post-entry"]/div/article'):
item = TutorialItem()
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract[0]
item['MainPageUrl'] = sel.xpath('div[2]/h2/a/@href').extract[0]
item['Author'] = sel.xpath('div[2]/div/span/a/text()').extract[0]
request = scrapy.Request(item['MianPageUrl'], callback=self.parseContentDetails)
request.meta['item'] = item
yield request
def parseContentDetails(self,response):
item = response.meta['item']
item['Content'] = response.xpath()
item['Count'] = response.xpath()
print type(item)
return item
我的pipeline.py是
class TutorialPipeline(object):
def __init__(self):
#self.setupDBCon()
#self.createTables()
def process_item(self, item, spider):
for key, value in item.iteritems():
if(isinstance(value, list)):
if value:
templist = []
for obj in value:
temp = self.stripHTML(obj)
templist.append(temp)
item[key] = templist
else:
item[key] = ""
else:
item[key] = self.stripHTML(value)
print item.get('Title', '')
return item
和我的items.py是
from scrapy.item import Item, Field
class TutorialItem(Item):
Title=Field()
Author = Field()
MianPageUrl = Field()
Content=Field()
Count=Field()
请告诉我这个错误的解决方案。我搜索了很多网站。那些网站只告诉instancemethod对象在django中没有属性错误,但我想要scrapy的解决方案
答案 0 :(得分:0)
你没有正确地调用提取,对于每个提取你必须实际调用该方法然后索引它:
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract()[0]
^^^
如果您只想要第一个元素,可以使用 extract_first 。
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract_first()