我在scrapy中有这个管道,我需要从Scrapy统计数据中获取信息
<%if(userInfo){%>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">Motiur<b class="caret"></b></a>
<ul class="dropdown-menu">
<li>
<a href="/admin/insert">Insert</a>
</li>
<li>
<a href="/admin/update">Update</a>
</li>
<li>
<a href="/admin/delete">Delete</a>
</li>
<li>
<a href="/logout">Logout</a>
</li>
</ul>
</li>
<% }else{ %>
<li>
<a href="/admin/login">Login</a>
</li>
<%}%>
当我运行代码时,我收到此错误
class MyPipeline(object):
def __init__(self, stats):
self.stats = stats
@classmethod
def from_crawler(cls, crawler):
return cls(crawler.stats)
def process_item(self, item, spider):
print self.stats.get_stats()['item_scraped_count']
return item
如果这不是获取统计数据值的正确方法,那么我该怎么办?
答案 0 :(得分:2)
找到答案!终于来了!
而不是self.stats.get_stats()['item_scraped_count']
使用self.stats.get_value('item_scraped_count')
答案 1 :(得分:0)
固定版本: 使用Scrapy 1.8
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy.exporters import CsvItemExporter
import datetime
class SplitFilePipeline(object):
def __init__(self, stats):
self.stats = stats
self.base_filename = "crawls/output_{}.csv"
self.next_split = self.split_limit = 10000 # assuming you want to split 50000 items/csv
self.create_exporter()
@classmethod
def from_crawler(cls, crawler):
return cls(crawler.stats)
def create_exporter(self):
now = datetime.datetime.now()
datetime_stamp = now.strftime("%Y%m%d%H%M")
self.file = open(self.base_filename.format(datetime_stamp),'w+b')
self.exporter = CsvItemExporter(self.file)
self.exporter.start_exporting()
def process_item(self, item, spider):
if (self.stats.get_value('item_scraped_count') or 0) >= self.next_split:
self.next_split += self.split_limit
self.exporter.finish_exporting()
self.file.close()
self.create_exporter()
self.exporter.export_item(item)
return item