我想用mysql进行scrapy。 所以我在管道中创建了这个类
class MySQLStorePipeline(object):
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls, settings):
dbargs = dict(
host=settings['MYSQL_HOST'],
db=settings['MYSQL_DBNAME'],
user=settings['MYSQL_USER'],
passwd=settings['MYSQL_PASSWD'],
charset='utf8',
use_unicode=True,
)
dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
return cls(dbpool)
def process_item(self, item, spider):
# run db query in the thread pool
d = self.dbpool.runInteraction(self._do_upsert, item, spider)
d.addErrback(self._handle_error, item, spider)
# at the end return the item in case of success or failure
d.addBoth(lambda _: item)
# return the deferred instead the item. This makes the engine to
# process next item (according to CONCURRENT_ITEMS setting) after this
# operation (deferred) has finished.
return d
def _do_upsert(self, conn, item, spider):
"""Perform an insert or update."""
guid = self._get_guid(item)
now = datetime.utcnow().replace(microsecond=0).isoformat(' ')
conn.execute("""SELECT EXISTS(
SELECT 1 FROM table WHERE guid = %s
)""", (guid, ))
ret = conn.fetchone()[0]
if ret:
conn.execute("""
UPDATE table
SET topicName=%s, authorName=%s, content=%s, updated=%s
WHERE guid=%s
""", (item['topicName'], item['authorName'], item['content'], now, guid))
spider.log("Item updated in db: %s %r" % (guid, item))
else:
conn.execute("""
INSERT INTO table (guid, topicName, authorName, content, updated)
VALUES (%s, %s, %s, %s, %s)
""", (guid, item['topicName'], item['authorName'], item['content'], now))
spider.log("Item stored in db: %s %r" % (guid, item))
def _handle_error(self, failure, item, spider):
"""Handle occurred on db interaction."""
# do nothing, just log
log.err(failure)
def _get_guid(self, item):
"""Generates an unique identifier for a given item."""
# hash based solely in the url field
return md5(item['content']).hexdigest()
但是当我运行这个蜘蛛时出现了这个错误
2014-12-03 10:02:08+0800 [scrapy] INFO: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats
2014-12-03 10:02:08+0800 [scrapy] INFO: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware
/Library/Python/2.7/site-packages/scrapy/contrib/pipeline/__init__.py:21: ScrapyDeprecationWarning: ITEM_PIPELINES defined as a list or a set is deprecated, switch to a dict
category=ScrapyDeprecationWarning, stacklevel=1)
Traceback (most recent call last):
File "/usr/local/bin/scrapy", line 11, in <module>
sys.exit(execute())
File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 143, in execute
_run_print_help(parser, _run_command, cmd, args, opts)
File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 89, in _run_print_help
func(*a, **kw)
File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 150, in _run_command
cmd.run(args, opts)
File "/Library/Python/2.7/site-packages/scrapy/commands/crawl.py", line 60, in run
self.crawler_process.start()
File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 92, in start
if self.start_crawling():
File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 124, in start_crawling
return self._start_crawler() is not None
File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 139, in _start_crawler
crawler.configure()
File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 47, in configure
self.engine = ExecutionEngine(self, self._spider_closed)
File "/Library/Python/2.7/site-packages/scrapy/core/engine.py", line 65, in __init__
self.scraper = Scraper(crawler)
File "/Library/Python/2.7/site-packages/scrapy/core/scraper.py", line 66, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "/Library/Python/2.7/site-packages/scrapy/middleware.py", line 50, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "/Library/Python/2.7/site-packages/scrapy/middleware.py", line 33, in from_settings
mw = mwcls.from_settings(settings)
File "/Users/tony/Development/@Scrapy/myspider/myspider/pipelines.py", line 42, in from_settings
dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/enterprise/adbapi.py", line 203, in __init__
self.dbapi = reflect.namedModule(dbapiName)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/python/_reflectpy3.py", line 137, in namedModule
topLevel = __import__(name)
File "/Library/Python/2.7/site-packages/MySQLdb/__init__.py", line 19, in <module>
import _mysql
ImportError: dlopen(/Library/Python/2.7/site-packages/_mysql.so, 2): no suitable image found. Did find:
/Library/Python/2.7/site-packages/_mysql.so: mach-o, but wrong architecture
是没有正确安装python的mysql?我该如何解决这个问题。
答案 0 :(得分:0)
我正在使用MAC 通过这样做,问题得以解决
nano ~/.bash_profile
add these line
export PATH=/usr/local/mysql/bin:${PATH}
export DYLD_LIBRARY_PATH=/usr/local/mysql/lib/
export VERSIONER_PYTHON_PREFER_64_BIT=yes
export VERSIONER_PYTHON_PREFER_32_BIT=yes
then run source ~/.bash_profile
python setup.py build
python setup.py install