Question

我想用mysql进行scrapy。所以我在管道中创建了这个类

class MySQLStorePipeline（object）：

def __init__(self, dbpool):
    self.dbpool = dbpool

@classmethod
def from_settings(cls, settings):
    dbargs = dict(
        host=settings['MYSQL_HOST'],
        db=settings['MYSQL_DBNAME'],
        user=settings['MYSQL_USER'],
        passwd=settings['MYSQL_PASSWD'],
        charset='utf8',
        use_unicode=True,
    )
    dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
    return cls(dbpool)

def process_item(self, item, spider):
    # run db query in the thread pool
    d = self.dbpool.runInteraction(self._do_upsert, item, spider)
    d.addErrback(self._handle_error, item, spider)
    # at the end return the item in case of success or failure
    d.addBoth(lambda _: item)
    # return the deferred instead the item. This makes the engine to
    # process next item (according to CONCURRENT_ITEMS setting) after this
    # operation (deferred) has finished.
    return d

def _do_upsert(self, conn, item, spider):
    """Perform an insert or update."""
    guid = self._get_guid(item)
    now = datetime.utcnow().replace(microsecond=0).isoformat(' ')

    conn.execute("""SELECT EXISTS(
        SELECT 1 FROM table WHERE guid = %s
    )""", (guid, ))
    ret = conn.fetchone()[0]
    if ret:
        conn.execute("""
            UPDATE table
            SET topicName=%s, authorName=%s, content=%s, updated=%s
            WHERE guid=%s
        """, (item['topicName'], item['authorName'], item['content'], now, guid))
        spider.log("Item updated in db: %s %r" % (guid, item))
    else:
        conn.execute("""
            INSERT INTO table (guid, topicName, authorName, content, updated)
            VALUES (%s, %s, %s, %s, %s)
        """, (guid, item['topicName'], item['authorName'], item['content'], now))
        spider.log("Item stored in db: %s %r" % (guid, item))

def _handle_error(self, failure, item, spider):
    """Handle occurred on db interaction."""
    # do nothing, just log
    log.err(failure)

def _get_guid(self, item):
    """Generates an unique identifier for a given item."""
    # hash based solely in the url field
    return md5(item['content']).hexdigest()

但是当我运行这个蜘蛛时出现了这个错误

2014-12-03 10:02:08+0800 [scrapy] INFO: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats
2014-12-03 10:02:08+0800 [scrapy] INFO: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware
/Library/Python/2.7/site-packages/scrapy/contrib/pipeline/__init__.py:21: ScrapyDeprecationWarning: ITEM_PIPELINES defined as a list or a set is deprecated, switch to a dict
  category=ScrapyDeprecationWarning, stacklevel=1)
Traceback (most recent call last):
  File "/usr/local/bin/scrapy", line 11, in <module>
    sys.exit(execute())
  File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 143, in execute
    _run_print_help(parser, _run_command, cmd, args, opts)
  File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 89, in _run_print_help
    func(*a, **kw)
  File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 150, in _run_command
    cmd.run(args, opts)
  File "/Library/Python/2.7/site-packages/scrapy/commands/crawl.py", line 60, in run
    self.crawler_process.start()
  File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 92, in start
    if self.start_crawling():
  File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 124, in start_crawling
    return self._start_crawler() is not None
  File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 139, in _start_crawler
    crawler.configure()
  File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 47, in configure
    self.engine = ExecutionEngine(self, self._spider_closed)
  File "/Library/Python/2.7/site-packages/scrapy/core/engine.py", line 65, in __init__
    self.scraper = Scraper(crawler)
  File "/Library/Python/2.7/site-packages/scrapy/core/scraper.py", line 66, in __init__
    self.itemproc = itemproc_cls.from_crawler(crawler)
  File "/Library/Python/2.7/site-packages/scrapy/middleware.py", line 50, in from_crawler
    return cls.from_settings(crawler.settings, crawler)
  File "/Library/Python/2.7/site-packages/scrapy/middleware.py", line 33, in from_settings
    mw = mwcls.from_settings(settings)
  File "/Users/tony/Development/@Scrapy/myspider/myspider/pipelines.py", line 42, in from_settings
    dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/enterprise/adbapi.py", line 203, in __init__
    self.dbapi = reflect.namedModule(dbapiName)
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/python/_reflectpy3.py", line 137, in namedModule
    topLevel = __import__(name)
  File "/Library/Python/2.7/site-packages/MySQLdb/__init__.py", line 19, in <module>
    import _mysql
ImportError: dlopen(/Library/Python/2.7/site-packages/_mysql.so, 2): no suitable image found.  Did find:
    /Library/Python/2.7/site-packages/_mysql.so: mach-o, but wrong architecture

是没有正确安装python的mysql？我该如何解决这个问题。

Answer 1

我正在使用MAC 通过这样做，问题得以解决

nano ~/.bash_profile

add these line

export PATH=/usr/local/mysql/bin:${PATH}
export DYLD_LIBRARY_PATH=/usr/local/mysql/lib/
export VERSIONER_PYTHON_PREFER_64_BIT=yes
export VERSIONER_PYTHON_PREFER_32_BIT=yes

then run source ~/.bash_profile

python setup.py build
python setup.py install

Scql with Mysql出现了这个错误

1 个答案: