用scrapy spider抓取一些数据后:
class Test_Spider(Spider):
name = "test"
def start_requests(self):
for i in range(900,902,1):
........
yield item
我使用sqlalchemy将数据传递给管道对象以写入sqllite表:
class SQLlitePipeline(object):
def __init__(self):
_engine = create_engine("sqlite:///data.db")
_connection = _engine.connect()
_metadata = MetaData()
_stack_items = Table("table1", _metadata,
Column("id", Integer, primary_key=True),
Column("detail_url", Text),
_metadata.create_all(_engine)
self.connection = _connection
self.stack_items = _stack_items
def process_item(self, item, spider):
is_valid = True
我希望能够将表名设置为变量,而不是像现在一样硬编码(" table1")。怎么办呢?
答案 0 :(得分:4)
假设您通过命令行传递此参数(例如-s table="table1"
),请定义from_crawler
方法。
@classmethod
def from_crawler(cls, crawler):
# Here, you get whatever value was passed through the "table" parameter
settings = crawler.settings
table = settings.get('table')
# Instantiate the pipeline with your table
return cls(table)
def __init__(self, table):
_engine = create_engine("sqlite:///data.db")
_connection = _engine.connect()
_metadata = MetaData()
_stack_items = Table(table, _metadata,
Column("id", Integer, primary_key=True),
Column("detail_url", Text),
_metadata.create_all(_engine)
self.connection = _connection
self.stack_items = _stack_items
答案 1 :(得分:4)
class SQLlitePipeline(object):
def __init__(self, table_name):
_engine = create_engine("sqlite:///data.db")
_connection = _engine.connect()
_metadata = MetaData()
_stack_items = Table(table_name, _metadata,
Column("id", Integer, primary_key=True),
Column("detail_url", Text),
_metadata.create_all(_engine)
self.connection = _connection
self.stack_items = _stack_items
@classmethod
def from_crawler(cls, crawler):
table_name = getattr(crawler.spider, 'table_name')
return cls(table_name)
使用from_crawler
,您可以使用指定的参数创建或实例化管道对象。
答案 2 :(得分:0)
一种更简单的方法是在crawl
上传递参数:
scrapy crawl -a table=table1
然后使用spider.table
获取值:
class TestScrapyPipeline(object):
def process_item(self, item, spider):
table = spider.table