我使用Scrapy做了一个小项目。 问题是我的scrapy正在抓取页面并抓取数据。但它没有保存到我的数据库中。 我使用MySQL作为我的数据库。
我想我的pipelines.py文件中缺少一些东西
from scrapy import log
from twisted.enterprise import adbapi
import MySQLdb.cursors
# the required Pipeline settings.
class MySQLStorePipeline(object):
def __init__(self, *args, **kwargs):
# db settings
self.dbpool = adbapi.ConnectionPool('MySQLdb',
db='project2',
user='root',
passwd='',
host='127.0.0.1',
port='3306',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, item, spider):
# run db query in thread pool
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
if sites.get('//div[@class="abTbl "]'):
#runs the condition
insert_id = tx.execute(\
"insert into crawlerapp_directory (Catogory, Bussiness_name, Description, Number, Web_url) "
"values (%s, %s, %s, %s, %s)",
(item['Catogory'][0],
item['Bussiness_name'][0],
item['Description'][0],
item['Number'][0],
item['Web_url'][0],
)
)
#connection to the foreign key Adress.
tx.execute(\
"insert into crawlerapp_adress (directory_id, adress_name) "
"values (%s, %s)",
(insert_id,
item['adress_name'][0]
)
)
#connection to the foreign key Photos.
tx.execute(\
"insert into crawlerapp_photos (directory_id, Photo_path, Photo_name) "
"values (%s, %s, %s)",
(insert_id,
item['Photo_path'][0],
item['Photo_name'][0]
)
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
请指导我将已删除的数据保存在我的数据库中。
答案 0 :(得分:0)
试试这个,它对我有用
import MySQLdb
from scrapy import log
class MySpiderPipeline(object):
def __init__(self):
self.conn = MySQLdb.connect(host_name,
user_name,
password,
db_name,
charset="utf8", use_unicode=True)
self.cursor = self.conn.cursor()
def open_spider(spider):
pass
def close_spider(self,spider):
self.conn.close()
def process_item(self, item, spider):
try:
sql = """INSERT INTO tutorial (`title`,`link`,`desc`,`last_updated`)
VALUES ('%s', '%s','%s','%s')""" %(item['title'][0].replace("'","`"),
item['link'][0].replace("'","`"),
item['desc'][0].replace("'","`"),
item['last_updated'].replace("'","`")
)
self.cursor.execute(sql)
self.conn.commit()
except MySQLdb.Error, e:
print '!!!!!!!!!!!!!!!!!!DB Write failure!!!!!!!!!!!!'
print "Error %d: %s" % (e.args[0], e.args[1])
log.msg("Error %d: %s" % (e.args[0], e.args[1]), level=log.CRITICAL)
return item