我的scrapy蜘蛛上有这个
def __init__(self, table=''):
super(MgdealBotCrawlerSpider, self).__init__()
self.table_name = table
我用它从命令行启动蜘蛛
scrapy crawl mgdeal_bot_crawler -a table=lego
我希望在此处处理项目时使用此arg创建新表。我怎么能在pipeline.py
中做到这一点 def process_item(self, item, spider):
# run db query in the thread pool
d = self.dbpool.runInteraction(self._do_upsert, item, spider)
d.addErrback(self._handle_error, item, spider)
# at the end return the item in case of success or failure
d.addBoth(lambda _: item)
# return the deferred instead the item. This makes the engine to
# process next item (according to CONCURRENT_ITEMS setting) after this
# operation (deferred) has finished.
return d
def _do_upsert(self, conn, item, spider):
"""Perform an insert or update."""
guid = self._get_guid(item)
now = datetime.utcnow().replace(microsecond=0).isoformat(' ')
conn.execute("""SELECT EXISTS(
SELECT 1 FROM"""+table_name+"""WHERE guid = %s
)""", (guid, ))
ret = conn.fetchone()[0]
似乎我无法访问它。我怎么能设法做到这一点?
答案 0 :(得分:0)
如果我理解正确,您需要使用蜘蛛实例spider.table_name
访问该属性:
def _do_upsert(self, conn, item, spider):
"""Perform an insert or update."""
guid = self._get_guid(item)
now = datetime.utcnow().replace(microsecond=0).isoformat(' ')
conn.execute("""SELECT EXISTS(
SELECT 1 FROM"""+spider.table_name+"""WHERE guid = %s
)""", (guid, ))
ret = conn.fetchone()[0]