我正在使用Elasticsearch保存不完整的数据,但是 当我运行代码时,出现此错误:
引发SerializationError(data,e)
elasticsearch.exceptions.SerializationError:({{'real_estate_ID':[],
但它与其他项目一起起作用,我仅对以下项目有问题:real_estate_ID
from __future__ import absolute_import
import scrapy
from adds.items import AddsItem
import stomp
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch, helpers
class addsSpider(scrapy.Spider):
name = "adds"
custom_settings = {
'DOWNLOADER_MIDDLEWARES': {
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
'scrapy_fake_useragent.middleware.RandomUserAgentMiddleware': 400,
}
}
allowed_domains = ["www.seloger.com"]
start_urls = ['https://www.seloger.com/list.htm?
tri=initial&idtypebien=2,1&idtt=2,5&naturebien=1,2,4&ci=750115']
es = Elasticsearch('localhost:9200',use_ssl=False,verify_certs=True)
def parse(self, response):
es = Elasticsearch()
es.indices.create(index="first_index", ignore=400)
conn = stomp.Connection()
conn.start()
conn.connect('admin', 'password', wait=True)
items = AddsItem()
items['real_estate_ID'] = response.xpath('//div[@class="c-pa-list c-pa-
sl c-pa-gold cartouche "]//@id').extract()
items['real_estate_URL'] = response.xpath('//a[@class="c-pa-link
link_AB"]//@href').extract()
items['real_estate_sale_price'] = response.xpath('//div[@class="h-fi-
pulse annonce__detail__sauvegarde"]//@data-prix').extract()
items['real_estate_category'] = response.xpath('//a[@class="c-pa-link
link_AB"]//@title').extract()
for item in items['real_estate_URL']:
conn.send(body=item, destination='/queue/scrapy.seloger.ads.queue',
persistent='false')
yield items
nextpageurl = response.xpath('//a[@class="pagination-next"]/@href')
if nextpageurl:
# If we've found a pattern which matches
path = nextpageurl.extract_first()
nextpage = response.urljoin(path)
print("Found url: {}".format(nextpage)) # Write a debug statement
yield scrapy.Request(nextpage, callback=self.parse)
es.index(index="urls", doc_type="Ads_url", id=1, body=items)
res = es.get(index="urls", doc_type="Ads_url", id=1)