我想将抓取的数据存储在MongoDb中,但是我收到了错误。
文件“C:\ Pythom27 \ lib \ site-packages \ six.py”,第599行,in iteritems 返回d.iteritems(** kw) AttributeError:'list'对象没有属性'iteritem'。 我没有在程序中的任何地方使用属性iteritem 这是程序代码: ex.py
import scrapy
from example.items import ExampleItem
class ExampleSpider(scrapy.Spider):
name = 'aaa'
allowed_domains = ["in.bookmyshow.com"]
start_urls = ["https://in.bookmyshow.com/movies"]
def parse(self, response):
links = response.xpath('//a/@href').re('movies/[^\/]+\/.*$')
for url in set(links):
url = response.urljoin(url)
yield scrapy.Request(url, callback=self.parse_movie)
def parse_movie(self, response):
item = {}
item['Moviename'] = map(unicode.strip, response.xpath('.//h1[@id="eventTitle"]/text()').extract())
item['Language'] = map(unicode.strip, response.xpath('/html/body/div[1]/div[2]/div[1]/div[2]/div[1]/div[3]/span[1]/a/text()').extract())
item['Info'] = map(unicode.strip, response.xpath('/html/body/div[1]/div[2]/div[1]/div[2]/div[1]/div[3]/span[3]/a/text()').extract())
yield item
settings.py:
BOT_NAME = 'example'
SPIDER_MODULES = ['example.spiders']
NEWSPIDER_MODULE = 'example.spiders'
ITEM_PIPELINES = ['example.pipelines.MongoDBPipeline', ]
MONGODB_SERVER = "localhost"
MONGODB_PORT = 27017
MONGODB_DB = "ticketbook"
MONGODB_COLLECTION = "movies"
pipleline.py
import pymongo
from scrapy.conf import settings
from scrapy.exceptions import DropItem
from scrapy import log
class ExamplePipeline(object):
def __init__(self):
connection = pymongo.Connection(settings['MONGODB_HOST'], settings['MONGODB_PORT'])
db = connection[settings['MONGODB_DATABASE']]
self.collection = db[settings['MONGODB_COLLECTION']]
def process_item(self, item, spider):
self.collection.insert(dict(item))
log.msg("Item wrote to MongoDB database {}, collection {}, at host {}, port {}".format(
settings['MONGODB_DATABASE'],
settings['MONGODB_COLLECTION'],
settings['MONGODB_HOST'],
settings['MONGODB_PORT']))
return item
我想知道我哪里出错了..
答案 0 :(得分:0)
在您的settings.py中,将ITEMS_PIPELINES从列表更改为字典,如下所示:
ITEM_PIPELINES = { 'example.pipelines.MongoDBPipeline': 100 }
参见说明:http://doc.scrapy.org/en/latest/topics/item-pipeline.html#activating-an-item-pipeline-component