所以我的问题是信息被刮掉了,不会出现在数据库中。
我的蜘蛛可以很好地打印出信息,例如在.json文件中。
pipelines.py
import sys
import MySQLdb
import hashlib
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MySQLStorePipeline(object):
def __init__(self):
self.conn = MySQLdb.connect(host="10.0.2.2", user='root', passwd='', db='mpmf', charset="utf8", use_unicode=True)
self.cursor = self.conn.cursor()
def process_item(self, item, stack):
try:
self.cursor.execute("""INSERT INTO test (pen, name)
VALUES (%s, %s)""",
(item['pen'].encode('utf-8'), item['name'].encode('utf-8')))
self.conn.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
return item
并在settings.py中添加了
ITEM_PIPELINES = {
'stack.pipelines.MySQLStorePipeline': 300,
}
并且我的日志显示了这些错误,但您仍然可以看到信息收集工作,即使它显示了这个。
File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 577, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/root/stack/stack/pipelines.py", line 14, in process_item
self.cursor.execute("""INSERT INTO test (pen, name) VALUES (%s, %s)""", (item['pen'].encode('utf-8'), item['name'].encode('utf-8')))
AttributeError: 'list' object has no attribute 'encode'
因此没有结果导入数据库
答案 0 :(得分:0)
解决了它 问题是缩进,列表对象没有属性
import sys
import MySQLdb
import hashlib
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MySQLStorePipeline(object):
def __init__(self):
self.conn = MySQLdb.connect(host="10.0.2.2", user='root', passwd='', db='mpmf', charset="utf8", use_unicode=True)
self.cursor = self.conn.cursor()
def process_item(self, item, stack):
try:
self.cursor.execute("""INSERT INTO test (pen, name) VALUES (%s, %s)""", (item['pen'][0].encode('utf-8'), item['name'][0].encode('utf-8')))
self.conn.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
return item