弹性搜索不索引

时间:2018-03-12 17:49:58

标签: python django elasticsearch

当尝试在Django app中索引新用户时,Elastic无法索引...返回键['created']的键错误

追溯:

import logging
import json

from elasticsearch_dsl import Index, DocType, Integer, Text, Date, Completion, GeoPoint, analyzer, Q
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch
from django.conf import settings

from app.es.connection import conn

logger = logging.getLogger("app.es")


users_index = Index('users')
users_index.settings(
    number_of_shards=5,
    number_of_replicas=0
)

@users_index.doc_type
class User(DocType):
    email = Text()
    first_name = Text()
    last_name = Text()
    date_joined = Date()
    expertise = Text()
    institution = Text()
    position = Text()

    # autocomplete fields
    name_suggest = Completion(analyzer=analyzer("standard"))


def user_search(query):
    s = User.search()
    s.query = Q(Q(
        'multi_match',
        query=query,
        fields=["_all"],
        type="phrase_prefix"
    ))
    logger.info("ES query: {}".format(json.dumps(s.to_dict())))
    results = s.execute()
    logger.info("Got {} hits.".format(results.hits.total))
    payloads = []
    return [
        {
             "name": hit.name_suggest,
             "email": hit.email,
             "position": hit.position,
             "institution": hit.institution,
             "expertise": ", ".join(hit.expertise or []),
             "id": hit.meta.id,
         } for hit in results.hits]

用户ES索引:

class User(AbstractBaseUser, PermissionsMixin):
    """Custom Django Auth User. We can extend this to include any metadata we want on users."""

    email = models.EmailField(unique=True)
    first_name = models.CharField(max_length=30)
    last_name = models.CharField(max_length=30)
    date_joined = models.DateTimeField(auto_now_add=True)
    is_active = models.BooleanField(default=True)
    is_staff = models.BooleanField(default=False)
    is_superuser = models.BooleanField(default=False)

    # Profile information
    street_address = models.CharField(max_length=200, blank=True, null=True)
    city = models.CharField(max_length=100, blank=True, null=True)
    state = models.CharField(max_length=100, blank=True, null=True)
    zip_code = models.CharField(max_length=10)
    institution = models.CharField(max_length=100, blank=True, null=True)
    phone_number = models.CharField(max_length=100, blank=True, null=True)
    position = models.CharField(max_length=100)
    notification_preference = models.CharField(max_length=1, choices=NOTIPREF, null=True)
    terms_of_service = models.BooleanField(default=False)
    expertise = models.ManyToManyField("Expertise")
    notification_preference = models.CharField(max_length=1, choices=NOTIPREF, null=True)
    backup_email_address = models.EmailField(unique=True, blank=True, null=True)

    USERNAME_FIELD = "email"

    objects = UserManager()

    class Meta:
        verbose_name = 'user'
        verbose_name_plural = 'users'

    @property
    def full_name(self):
        full_name = '%s %s' % (self.first_name, self.last_name) if self.first_name or self.last_name else self.email
        return full_name.strip()

    @property
    def print_queue_size(self):
        return PrintQueue.objects.filter(user=self).count()

    @property
    def notifications(self):
        return self.notifications.all()

    @property
    def notifications(self):
        return self.notifications.all()

    @property
    def num_unread_notifications(self):
        return len(self.notifications.unread())

    @property
    def expertise_str(self):
        return [str(t) for t in self.expertise.all()]

    def get_short_name(self):
        """Required by django admin"""
        return self.first_name

    def email_user(self, subject, message, from_email=None, **kwargs):
        """Sends an email to this User."""
        send_mail(subject, message, from_email, [self.email], **kwargs)

    def update(self, **kwargs):
        for k, v in kwargs.items():
            setattr(self, k, v)
        self.save()

    def indexing(self):
        obj = UserDoc(
            meta={"id": self.id},
            email=self.email,
            first_name=self.first_name,x
            last_name=self.last_name,
            date_joined=self.date_joined,
            expertise=self.expertise_str,
            institution=self.institution,
            position=self.position,
            name_suggest=self.full_name,
        )
        obj.save(index="users")
        return obj.to_dict(include_meta=True)

用户模型:

from django.core.management.base import BaseCommand
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

from app.models import User, Resource


def bulk_indexing(model):
    es = Elasticsearch()
    bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))


class Command(BaseCommand):
    help = 'Index all users'

    def handle(self, *args, **options):
        bulk_indexing(User)
        self.stdout.write(self.style.SUCCESS("Indexed all users."))

索引命令:

{
  "users" : {
    "mappings" : {
      "user" : {
        "properties" : {
          "date_joined" : {
            "type" : "date"
          },
          "email" : {
            "type" : "text"
          },
          "expertise" : {
            "type" : "text"
          },
          "first_name" : {
            "type" : "text"
          },
          "institution" : {
            "type" : "text"
          },
          "last_name" : {
            "type" : "text"
          },
          "name_suggest" : {
            "type" : "completion",
            "analyzer" : "standard",
            "preserve_separators" : true,
            "preserve_position_increments" : true,
            "max_input_length" : 50
          },
          "position" : {
            "type" : "text"
          }
        }
      }
    }
  }
}

问题似乎与迭代器有关。更奇怪的是,汽车建议似乎适用于我的临时站点,但没有生产。两个站点都使用完全相同的代码。我完全失去了言语。

生产映射:

  "users" : {
    "mappings" : {
      "doc" : {
        "_all" : {
          "enabled" : true
        },
        "properties" : {
          "date_joined" : {
            "type" : "date"
          },
          "email" : {
            "type" : "text"
          },
          "expertise" : {
            "type" : "text"
          },
          "first_name" : {
            "type" : "text"
          },
          "institution" : {
            "type" : "text"
          },
          "last_name" : {
            "type" : "text"
          },
          "name_suggest" : {
            "type" : "completion",
            "analyzer" : "standard",
            "preserve_separators" : true,
            "preserve_position_increments" : true,
            "max_input_length" : 50
          },
          "position" : {
            "type" : "text"
          }
        }
      },
      "user" : {
        "_all" : {
          "enabled" : true
        },
        "properties" : {
          "date_joined" : {
            "type" : "date"
          },
          "email" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          },
          "expertise" : {
            "type" : "text"
          },
          "first_name" : {
            "type" : "text"
          },
          "institution" : {
            "type" : "text"
          },
          "last_name" : {
            "type" : "text"
          },
          "name_suggest" : {
            "type" : "completion",
            "analyzer" : "standard",
            "preserve_separators" : true,
            "preserve_position_increments" : true,
            "max_input_length" : 50
          },
          "position" : {
            "type" : "text"
          }
        }
      }
    }
  }

临时映射:

Traceback (most recent call last):
  File "manage.py", line 22, in <module>
    execute_from_command_line(sys.argv)
  File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 363, in execute_from_command_line
    utility.execute()
  File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 355, in execute
    self.fetch_command(subcommand).run_from_argv(self.argv)
  File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/base.py", line 283, in run_from_argv
    self.execute(*args, **cmd_options)
  File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/base.py", line 330, in execute
    output = self.handle(*args, **options)
  File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 17, in handle
    bulk_indexing(User)
  File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 10, in bulk_indexing
    bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 257, in bulk
    for ok, item in streaming_bulk(client, actions, **kwargs):
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 180, in streaming_bulk
    client.transport.serializer):
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 58, in _chunk_actions
    for action, data in actions:
  File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 10, in <genexpr>
    bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
  File "/home/ubuntu/sixnexus/app/models.py", line 137, in indexing
    obj.save(index="users")
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch_dsl/document.py", line 419, in save
    **doc_meta
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 76, in _wrapped
    return func(*args, params=params, **kwargs)
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 300, in index
    _make_path(index, doc_type, id), params=params, body=body)
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/transport.py", line 314, in perform_request
    status, headers_response, data = connection.perform_request(method, url, params, body, headers=headers, ignore=ignore, timeout=timeout)
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 163, in perform_request
    self._raise_error(response.status, raw_data)
  File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 125, in _raise_error
    raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, u'illegal_argument_exception', u'Rejecting mapping update to [users] as the final mapping would have more than 1 type: [doc, user]')

新错误:

rollapply

1 个答案:

答案 0 :(得分:1)

找到它。这是一个错误:https://github.com/elastic/elasticsearch-dsl-py/issues/793 看起来您的ES6及更高版本的库版本会中断。

看,他们已经将线路更新为: https://github.com/elastic/elasticsearch-dsl-py/blob/75ee4e36027cd64c128094b3ff279ec332e8a784/elasticsearch_dsl/document.py#L426 从它原来的。它以形式突破:return meta['created']。 升级库应该修复它。

编辑: 新错误是因为在ES6及更高版本中不允许多个类型映射。请注意,从具有多个映射的5.x迁移的索引在ES6中工作。您可能希望将类型分为索引或适合您需要的索引。本文档应该有帮助:https://www.elastic.co/guide/en/elasticsearch/reference/6.x/removal-of-types.html