我正在玩Scrapy,并尝试将Spiders生成的项目传递给ItemPipe。问题是,在输入管道时,从不调用实际的process_items方法。尽管已经调试了蜘蛛并且看到它正确地产生了引用项目。总而言之,当我调试quotes_spider.py时,我可以看到我返回的'item'对象是Quote类型,其中author / quote具有期望值。类似地,正确加载管道并创建json文件,我只是从不输入process_items方法或写入此类文件。有什么建议吗?
quotes_spider.py
import scrapy
from scrapy.loader import ItemLoader
from tutorial.item_loaders import QuoteLoader
from tutorial.items import Quote
class QuotesSpider(scrapy.Spider):
name = "quotes"
start_urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
def parse(self, response):
for quote in response.xpath('//div[contains(@class, "quote")]'):
l = QuoteLoader(item=Quote(), response=response)
content = quote.xpath('./span[contains(@itemprop, "text")]/text()').extract_first()
l.add_value('quote', content)
author = quote.xpath('./span/small[contains(@itemprop, "author")]/text()').extract_first()
l.add_value('author', author)
item = l.load_item()
yield item
Items.py
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class TutorialItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
pass
class Quote(scrapy.Item):
quote = scrapy.Field()
author = scrapy.Field()
item_loaders.py
from scrapy.loader import ItemLoader
from scrapy.loader.processors import TakeFirst, MapCompose, Join
class QuoteLoader(ItemLoader):
default_output_processor = TakeFirst()
pipelines.py
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import json
class QuotePipeline(object):
def open_spider(self, spider):
self.file = open('itemss.json', 'w')
pass
def close_spider(self, spider):
self.file.close()
def process_items(self, item, spider):
print "HELLO"
line = json.dumps(dict(item)) + "\n"
self.file.write(line)
return "HELLO"
在settings.py中我已正确定义:
# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'tutorial.pipelines.QuotePipeline': 300,
}
答案 0 :(得分:1)
func retrieveToken(for file: File) -> Promise<Any> {
return Promise<Any> { fulfill, reject in
service.determineToken(for: file) { token, error in
// if any error, reject
guard let token = token, error == nil else {
reject(error ?? FileError.someError)
return
}
// if I don't have to make recursive call, `fulfill` immediately.
// in my example, I'm going to see if there are subfiles, and if not, `fulfill` immediately.
guard let subfiles = file.subfiles else {
fulfill(token)
return
}
// if I got here, there are subfiles and I'm going to start recursive set of promises
self.retrieveTokens(for: subfiles).then { tokens in
fulfill(tokens)
}.catch { error in
reject(error)
}
}
}
}