我有scrapy spider
来从某些电子商务网站获取图片和内容。现在我想下载图像,我写了几个代码,但我收到了这个错误:
..
File "/usr/lib/python2.7/pprint.py", line 238, in format
return _safe_repr(object, context, maxlevels, level)
File "/usr/lib/python2.7/pprint.py", line 282, in _safe_repr
vrepr, vreadable, vrecur = saferepr(v, context, maxlevels, level)
File "/usr/lib/python2.7/pprint.py", line 323, in _safe_repr
rep = repr(object)
File "/usr/local/lib/python2.7/dist-packages/Scrapy-0.23.0-py2.7.egg/scrapy/item.py", line 77, in __repr__
return pformat(dict(self))
File "/usr/lib/python2.7/pprint.py", line 63, in pformat
return PrettyPrinter(indent=indent, width=width, depth=depth).pformat(object)
File "/usr/lib/python2.7/pprint.py", line 122, in pformat
self._format(object, sio, 0, 0, {}, 0)
File "/usr/lib/python2.7/pprint.py", line 140, in _format
rep = self._repr(object, context, level - 1)
File "/usr/lib/python2.7/pprint.py", line 226, in _repr
self._depth, level)
File "/usr/lib/python2.7/pprint.py", line 238, in format
return _safe_repr(object, context, maxlevels, level)
File "/usr/lib/python2.7/pprint.py", line 282, in _safe_repr
vrepr, vreadable, vrecur = saferepr(v, context, maxlevels, level)
File "/usr/lib/python2.7/pprint.py", line 323, in _safe_repr
rep = repr(object)
File "/usr/local/lib/python2.7/dist-packages/Scrapy-0.23.0-py2.7.egg/scrapy/item.py", line 77, in __repr__
return pformat(dict(self))
File "/usr/lib/python2.7/pprint.py", line 63, in pformat
return PrettyPrinter(indent=indent, width=width, depth=depth).pformat(object)
File "/usr/lib/python2.7/pprint.py", line 122, in pformat
self._format(object, sio, 0, 0, {}, 0)
File "/usr/lib/python2.7/pprint.py", line 140, in _format
rep = self._repr(object, context, level - 1)
File "/usr/lib/python2.7/pprint.py", line 226, in _repr
self._depth, level)
File "/usr/lib/python2.7/pprint.py", line 238, in format
return _safe_repr(object, context, maxlevels, level)
File "/usr/lib/python2.7/pprint.py", line 280, in _safe_repr
for k, v in _sorted(object.items()):
File "/usr/lib/python2.7/pprint.py", line 78, in _sorted
with warnings.catch_warnings():
exceptions.RuntimeError: maximum recursion depth exceeded
我的spider
:
from scrapy.spider import Spider
from scrapy.selector import Selector
from scrapy.http import Request
from loom.items import LoomItem
import sys
from scrapy.contrib.loader import XPathItemLoader
from scrapy.utils.response import get_base_url
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
class LoomSpider(CrawlSpider):
name = "loom_org"
allowed_domains = ["2loom.com"]
start_urls = [
"http://2loom.com",
"http://2loom.com/collections/basic",
"http://2loom.com/collections/design",
"http://2loom.com/collections/tum-koleksiyon"
]
rules = [
Rule(SgmlLinkExtractor(allow='products'), callback='parse_items',follow = True),
Rule(SgmlLinkExtractor(allow=()), follow=True),
]
def parse_items(self, response):
sys.setrecursionlimit(10000)
item = LoomItem()
items = []
sel = Selector(response)
name = sel.xpath('//h1[@itemprop="name"]/text()').extract()
brand = "2loom"
price_lower = sel.xpath('//h1[@class="product-price"]/text()').extract()
price = "0"
image = sel.xpath('//meta[@property="og:image"]/@content').extract()
description = sel.xpath('//meta[@property="og:description"]/@content').extract()
print image
##image indiriliyor
loader = XPathItemLoader(item, response = response)
loader.add_xpath('image_urls', '//meta[@property="og:image"]/@content')
##ID Split ediliyor (10. Design | Siyah & beyaz kalpli)
id = name[0].strip().split(". ")
id = id[0]
item['id'] = id
item['name'] = name
item['url'] = response.url
item['image'] = loader.load_item()
item['category'] = "Basic"
item['description'] = description
item["brand"] = "2Loom"
item['price'] = price
item['price_lower'] = price_lower
print item
items.append(item)
return items
Items
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
from scrapy.item import Item, Field
class LoomItem(Item):
# define the fields for your item here like:
# name = Field()
id = Field()
name = Field()
brand = Field()
image = Field()
category = Field()
description = Field()
price_lower = Field()
price = Field()
url = Field()
images = Field()
image_urls = Field()
Pipeline
:
from scrapy.contrib.pipeline.images import ImagesPipeline, ImageException
from scrapy.http import Request
from cStringIO import StringIO
import psycopg2
import hashlib
from scrapy.conf import settings
class MyImagePipeline(ImagesPipeline):
def get_media_requests(self, item, info):
return [Request(x) for x in item.get('image_urls', [])]
def item_completed(self, results, item, info):
item['images'] = [x for ok, x in results if ok]
return item
# Override the convert_image method to disable image conversion
def convert_image(self, image, size=None):
buf = StringIO()
try:
image.save(buf, image.format)
except Exception, ex:
raise ImageException("Cannot process image. Error: %s" % ex)
return image, buf
def image_key(self, url):
image_guid = hashlib.sha1(url).hexdigest()
return 'full/%s.jpg' % (image_guid)
Settings
:
BOT_NAME = 'loom'
SPIDER_MODULES = ['loom.spiders']
NEWSPIDER_MODULE = 'loom.spiders'
DOWNLOAD_DELAY = 5
ITEM_PIPELINES = {'scrapy.contrib.pipeline.images.ImagesPipeline': 1}
IMAGES_STORE = '/root/loom/images/'
IMAGES_THUMBS = {
'small': (90, 90),
'big': (300, 300),
}
USER_AGENT = "Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0"
IM_MODULE = 'loom.pipelines.MyImagePipeline'
ITEM_PIPELINES = ['loom.pipelines.MyImagePipeline']
LOG_LEVEL = 'INFO'
我不知道为什么会出现这个错误。所以,谢谢你的帮助
答案 0 :(得分:1)
在spyder中尝试更改sys.setrecursionlimit(10000)
的递归限制。我的python解释器在" RuntimeError"