当尝试在Django app中索引新用户时,Elastic无法索引...返回键['created']的键错误
追溯:
import logging
import json
from elasticsearch_dsl import Index, DocType, Integer, Text, Date, Completion, GeoPoint, analyzer, Q
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch
from django.conf import settings
from app.es.connection import conn
logger = logging.getLogger("app.es")
users_index = Index('users')
users_index.settings(
number_of_shards=5,
number_of_replicas=0
)
@users_index.doc_type
class User(DocType):
email = Text()
first_name = Text()
last_name = Text()
date_joined = Date()
expertise = Text()
institution = Text()
position = Text()
# autocomplete fields
name_suggest = Completion(analyzer=analyzer("standard"))
def user_search(query):
s = User.search()
s.query = Q(Q(
'multi_match',
query=query,
fields=["_all"],
type="phrase_prefix"
))
logger.info("ES query: {}".format(json.dumps(s.to_dict())))
results = s.execute()
logger.info("Got {} hits.".format(results.hits.total))
payloads = []
return [
{
"name": hit.name_suggest,
"email": hit.email,
"position": hit.position,
"institution": hit.institution,
"expertise": ", ".join(hit.expertise or []),
"id": hit.meta.id,
} for hit in results.hits]
用户ES索引:
class User(AbstractBaseUser, PermissionsMixin):
"""Custom Django Auth User. We can extend this to include any metadata we want on users."""
email = models.EmailField(unique=True)
first_name = models.CharField(max_length=30)
last_name = models.CharField(max_length=30)
date_joined = models.DateTimeField(auto_now_add=True)
is_active = models.BooleanField(default=True)
is_staff = models.BooleanField(default=False)
is_superuser = models.BooleanField(default=False)
# Profile information
street_address = models.CharField(max_length=200, blank=True, null=True)
city = models.CharField(max_length=100, blank=True, null=True)
state = models.CharField(max_length=100, blank=True, null=True)
zip_code = models.CharField(max_length=10)
institution = models.CharField(max_length=100, blank=True, null=True)
phone_number = models.CharField(max_length=100, blank=True, null=True)
position = models.CharField(max_length=100)
notification_preference = models.CharField(max_length=1, choices=NOTIPREF, null=True)
terms_of_service = models.BooleanField(default=False)
expertise = models.ManyToManyField("Expertise")
notification_preference = models.CharField(max_length=1, choices=NOTIPREF, null=True)
backup_email_address = models.EmailField(unique=True, blank=True, null=True)
USERNAME_FIELD = "email"
objects = UserManager()
class Meta:
verbose_name = 'user'
verbose_name_plural = 'users'
@property
def full_name(self):
full_name = '%s %s' % (self.first_name, self.last_name) if self.first_name or self.last_name else self.email
return full_name.strip()
@property
def print_queue_size(self):
return PrintQueue.objects.filter(user=self).count()
@property
def notifications(self):
return self.notifications.all()
@property
def notifications(self):
return self.notifications.all()
@property
def num_unread_notifications(self):
return len(self.notifications.unread())
@property
def expertise_str(self):
return [str(t) for t in self.expertise.all()]
def get_short_name(self):
"""Required by django admin"""
return self.first_name
def email_user(self, subject, message, from_email=None, **kwargs):
"""Sends an email to this User."""
send_mail(subject, message, from_email, [self.email], **kwargs)
def update(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
self.save()
def indexing(self):
obj = UserDoc(
meta={"id": self.id},
email=self.email,
first_name=self.first_name,x
last_name=self.last_name,
date_joined=self.date_joined,
expertise=self.expertise_str,
institution=self.institution,
position=self.position,
name_suggest=self.full_name,
)
obj.save(index="users")
return obj.to_dict(include_meta=True)
用户模型:
from django.core.management.base import BaseCommand
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from app.models import User, Resource
def bulk_indexing(model):
es = Elasticsearch()
bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
class Command(BaseCommand):
help = 'Index all users'
def handle(self, *args, **options):
bulk_indexing(User)
self.stdout.write(self.style.SUCCESS("Indexed all users."))
索引命令:
{
"users" : {
"mappings" : {
"user" : {
"properties" : {
"date_joined" : {
"type" : "date"
},
"email" : {
"type" : "text"
},
"expertise" : {
"type" : "text"
},
"first_name" : {
"type" : "text"
},
"institution" : {
"type" : "text"
},
"last_name" : {
"type" : "text"
},
"name_suggest" : {
"type" : "completion",
"analyzer" : "standard",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"position" : {
"type" : "text"
}
}
}
}
}
}
问题似乎与迭代器有关。更奇怪的是,汽车建议似乎适用于我的临时站点,但没有生产。两个站点都使用完全相同的代码。我完全失去了言语。
生产映射:
"users" : {
"mappings" : {
"doc" : {
"_all" : {
"enabled" : true
},
"properties" : {
"date_joined" : {
"type" : "date"
},
"email" : {
"type" : "text"
},
"expertise" : {
"type" : "text"
},
"first_name" : {
"type" : "text"
},
"institution" : {
"type" : "text"
},
"last_name" : {
"type" : "text"
},
"name_suggest" : {
"type" : "completion",
"analyzer" : "standard",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"position" : {
"type" : "text"
}
}
},
"user" : {
"_all" : {
"enabled" : true
},
"properties" : {
"date_joined" : {
"type" : "date"
},
"email" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"expertise" : {
"type" : "text"
},
"first_name" : {
"type" : "text"
},
"institution" : {
"type" : "text"
},
"last_name" : {
"type" : "text"
},
"name_suggest" : {
"type" : "completion",
"analyzer" : "standard",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"position" : {
"type" : "text"
}
}
}
}
}
临时映射:
Traceback (most recent call last):
File "manage.py", line 22, in <module>
execute_from_command_line(sys.argv)
File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 363, in execute_from_command_line
utility.execute()
File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 355, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/base.py", line 283, in run_from_argv
self.execute(*args, **cmd_options)
File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/base.py", line 330, in execute
output = self.handle(*args, **options)
File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 17, in handle
bulk_indexing(User)
File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 10, in bulk_indexing
bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 257, in bulk
for ok, item in streaming_bulk(client, actions, **kwargs):
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 180, in streaming_bulk
client.transport.serializer):
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 58, in _chunk_actions
for action, data in actions:
File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 10, in <genexpr>
bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
File "/home/ubuntu/sixnexus/app/models.py", line 137, in indexing
obj.save(index="users")
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch_dsl/document.py", line 419, in save
**doc_meta
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 76, in _wrapped
return func(*args, params=params, **kwargs)
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 300, in index
_make_path(index, doc_type, id), params=params, body=body)
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/transport.py", line 314, in perform_request
status, headers_response, data = connection.perform_request(method, url, params, body, headers=headers, ignore=ignore, timeout=timeout)
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 163, in perform_request
self._raise_error(response.status, raw_data)
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 125, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, u'illegal_argument_exception', u'Rejecting mapping update to [users] as the final mapping would have more than 1 type: [doc, user]')
新错误:
rollapply
答案 0 :(得分:1)
找到它。这是一个错误:https://github.com/elastic/elasticsearch-dsl-py/issues/793 看起来您的ES6及更高版本的库版本会中断。
看,他们已经将线路更新为:
https://github.com/elastic/elasticsearch-dsl-py/blob/75ee4e36027cd64c128094b3ff279ec332e8a784/elasticsearch_dsl/document.py#L426
从它原来的。它以形式突破:return meta['created']
。
升级库应该修复它。
编辑: 新错误是因为在ES6及更高版本中不允许多个类型映射。请注意,从具有多个映射的5.x迁移的索引在ES6中工作。您可能希望将类型分为索引或适合您需要的索引。本文档应该有帮助:https://www.elastic.co/guide/en/elasticsearch/reference/6.x/removal-of-types.html