引发SerializationError

时间:2019-02-22 14:49:01

标签: python-3.x elasticsearch scrapy

我正在使用Elasticsearch保存不完整的数据,但是 当我运行代码时,出现此错误:

引发SerializationError(data,e)

elasticsearch.exceptions.SerializationError:({{'real_estate_ID':[],

但它与其他项目一起起作用,我仅对以下项目有问题:real_estate_ID

from __future__ import absolute_import
import scrapy
from adds.items import AddsItem
import stomp
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch, helpers

class addsSpider(scrapy.Spider):

   name = "adds"

custom_settings = {
    'DOWNLOADER_MIDDLEWARES': {
        'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
        'scrapy_fake_useragent.middleware.RandomUserAgentMiddleware': 400,
    }
}
allowed_domains = ["www.seloger.com"]
start_urls = ['https://www.seloger.com/list.htm? 
tri=initial&idtypebien=2,1&idtt=2,5&naturebien=1,2,4&ci=750115']

   es = Elasticsearch('localhost:9200',use_ssl=False,verify_certs=True)

def parse(self, response):
    es = Elasticsearch()
    es.indices.create(index="first_index", ignore=400)
    conn = stomp.Connection()
    conn.start()
    conn.connect('admin', 'password', wait=True)
    items = AddsItem()
    items['real_estate_ID'] = response.xpath('//div[@class="c-pa-list c-pa- 
    sl c-pa-gold cartouche "]//@id').extract()
    items['real_estate_URL'] = response.xpath('//a[@class="c-pa-link 
    link_AB"]//@href').extract()
    items['real_estate_sale_price'] = response.xpath('//div[@class="h-fi- 
    pulse annonce__detail__sauvegarde"]//@data-prix').extract()
    items['real_estate_category'] = response.xpath('//a[@class="c-pa-link 
    link_AB"]//@title').extract()
    for item in items['real_estate_URL']:
       conn.send(body=item, destination='/queue/scrapy.seloger.ads.queue', 
    persistent='false')
    yield items

    nextpageurl = response.xpath('//a[@class="pagination-next"]/@href')

    if nextpageurl:
        # If we've found a pattern which matches
        path = nextpageurl.extract_first()
        nextpage = response.urljoin(path)
        print("Found url: {}".format(nextpage))  # Write a debug statement
        yield scrapy.Request(nextpage, callback=self.parse)  

    es.index(index="urls", doc_type="Ads_url", id=1, body=items)

    res = es.get(index="urls", doc_type="Ads_url", id=1)

0 个答案:

没有答案