我是Scrapy框架的新手
我想使用Item Pipeline
在DB中存储一些项目Spider.py
class ExampleSpider(Spider):
name = "Spider1"
allowed_domains = ["example.com"]
start_urls = ["http://www.example.com.com/.../rss_1.xml"]
def parse(self, response):
sel = Selector(response)
Examples = sel.xpath('//item')
items = []
for Example in Examples:
item = ExampleItem()
item['link'] = Example.xpath('.//link/text()').extract()
item['title'] = Example.xpath('.//title/text()').extract()
links = item['link']
titles = item['title']
items.append(item)
return items
pipelines.py
class MySQLStorePipeline(object):
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls, settings):
dbargs = dict(
host=settings['MYSQL_HOST'],
db=settings['MYSQL_DBNAME'],
user=settings['MYSQL_USER'],
passwd=settings['MYSQL_PASSWD'],
charset='utf8',
use_unicode=True,
)
dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
return cls(dbpool)
def process_item(self, item, spider):
# run db query in the thread pool
query = self.dbpool.runInteraction(self._conditional_insert, item, spider)
query.addErrback(self._handle_error, item, spider)
# at the end return the item in case of success or failure
query.addBoth(lambda _: item)
# return the deferred instead the item. This makes the engine to
# process next item (according to CONCURRENT_ITEMS setting) after this
# operation (deferred) has finished.
return query
def _conditional_insert(self, tx, item, spider):
tx.execute("select * from AnnonceGratuit where link = %s", (item['link']))
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute("""
INSERT INTO AnnonceGratuit (link, title)
VALUES (%s, %s)
""", (item['link'], item['title'])
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def _handle_error(self, failure, item, spider):
"""Handle occurred on db interaction."""
# do nothing, just log
log.err(failure)
我成功废弃了链接和标题项
但是当我托盘存放它们时......我有这个错误
_mysql_exceptions.ProgrammingError: (1064, 'You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \')
NB
当我对一个项目使用相同的代码时,它可以工作并存储在DB中 但是对于两个或更多的项目,它不起作用!
提前感谢您的帮助。
答案 0 :(得分:0)
也许您应该检查您的商品['link']或商品['title']是列表还是字符串。 我有同样的错误,因为我尝试将列表存储到mysql中,并在将列表转换为字符串后,工作正常。