我想将the tutorial spider的输出保存到MySQL但是要努力理解为什么我在项目管道中收到以下错误:
ImportError: No module named MySQLdb
Exception AttributeError: "'QuotePipeline' object has no attribute 'dbpool'"
使用Ubuntu 16.04和Python 3.5.2。
如果有人可以建议我在哪里出错,那么非常感谢,非常感谢!
以下是相关代码:
/spiders/quotes.py
import scrapy
class QuotesSpider(scrapy.Spider):
name = "quotes"
start_urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
def parse(self, response):
for quote in response.css('div.quote'):
yield {
'text': quote.css('span.text::text').extract_first(),
'author': quote.css('span small::text').extract_first(),
'tags': quote.css('div.tags a.tag::text').extract(),
}
/items.py
import scrapy
class QuoteItem(scrapy.Item):
# define the fields for your item here like:
text = scrapy.Field()
author = scrapy.Field()
tags = scrapy.Field()
pass
/pipelines.py
from twisted.enterprise import adbapi
from scrapy.utils.project import get_project_settings
settings = get_project_settings()
class QuotePipeline(object):
# The table you items.QuoteItem class map to, my table is named quotes
insert_sql = """insert into quotes (%s) values ( %s )"""
def __init__(self):
dbargs = settings.get('DB_CONNECT')
db_server = settings.get('DB_SERVER')
dbpool = adbapi.ConnectionPool(db_server, **dbargs)
self.dbpool = dbpool
def __del__(self):
self.dbpool.close()
def process_item(self, item, spider):
self.insert_data(item, self.insert_sql)
return item
def insert_data(self, item, insert):
keys = item.keys()
fields = u','.join(keys)
qm = u','.join([u'%s'] * len(keys))
sql = insert % (fields, qm)
data = [item[k] for k in keys]
return self.dbpool.runOperation(sql, data)
/settings.py
BOT_NAME = 'tutorial'
SPIDER_MODULES = ['tutorial.spiders']
NEWSPIDER_MODULE = 'tutorial.spiders'
DB_SERVER = 'MySQLdb'
DB_CONNECT = {
'db': 'scrapy',
'user': 'username',
'passwd': 'password',
'host': 'ip.of.the.server',
'charset': 'utf8',
'use_unicode': True,
}
# Obey robots.txt rules
ROBOTSTXT_OBEY = True
# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'tutorial.pipelines.QuotePipeline': 500,
}
SQL架构
CREATE DATABASE `scrapy` /*!40100 DEFAULT CHARACTER SET utf8mb4 */
CREATE TABLE `quotes` (
`id` mediumint(6) NOT NULL AUTO_INCREMENT,
`text` text NOT NULL,
`author` varchar(255) NOT NULL,
`tags` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
答案 0 :(得分:1)
问题解决了:
sudo apt-get install python-mysqldb