django haystack elasticsearch不会给出正确的结果

时间:2017-10-19 13:56:00

标签: python django elasticsearch django-haystack

我的django-haystack elasticsearch存在问题。我搜索了一个问题,其中一个字段是study_level,它是一个1到13的int。当我尝试搜索包含10-12范围内的问题时,它也会提供其他study_level的问题。看起来study_level在搜索中并不重要。

我有这个索引

class QuestionIndex(indexes.SearchIndex, indexes.Indexable):
    """ A Haystack search index for Questions. """
    text = indexes.EdgeNgramField(document=True, use_template=True)
    id = indexes.IntegerField(model_attr='id')
    user_id = indexes.IntegerField(model_attr='user_id')
    question = indexes.EdgeNgramField(model_attr='question', boost=1.15)
    study_level = indexes.IntegerField(model_attr='study_level')
    is_answered = indexes.IntegerField(model_attr='is_answered')
    is_archived = indexes.BooleanField(model_attr='is_archived')
    created_at = indexes.DateTimeField(model_attr='created_at')
    tags = indexes.MultiValueField()
    schools = indexes.MultiValueField()
    answers = indexes.IntegerField(indexed=False)
    has_answer = indexes.IntegerField(indexed=False)
    content = indexes.CharField(indexed=False)
    content_short = indexes.CharField(indexed=True)

    def get_model(self):
        return Question

    def prepare_study_level(self, obj):
        study_level = obj.study_level

还有更多的def prepare_x,但是study_level就是我的问题所在。

然后在此代码中使用它

class QuestionSearch(object):
# Plain old Python object for question search.
MAX_RESULT_LENGTH = 12

def __init__(self, user, query='', limit=MAX_RESULT_LENGTH, subjects=[], study_level_min=None, study_level_max=None):
    self.user = user
    self.query = query
    self.limit = limit
    self.subjects = subjects
    self.study_level_min = study_level_min
    self.study_level_max = study_level_max
    # Swaps the min and max values if they are in the wrong order.
    if study_level_min and study_level_max and study_level_min > study_level_max:
        self.study_level_min = study_level_max
        self.study_level_max = study_level_min
    self.schools = None  # Signal value for the memoization below.
    self.search_qs = SearchQuerySet().filter(is_archived=0).models(Question).order_by(
        'is_answered', 'has_answer', 'created_at'
    )

def search(self, exclude_current_user=False):
    if exclude_current_user:
        self.search_qs = self.search_qs.exclude(user_id=self.user.id)

    # These methods update the search_qs member variable.
    self.filter_subjects()
    self.filter_schools()
    self.filter_query()
    self.filter_study_level()

    return self.search_qs[:self.limit]

def filter_subjects(self):
    if len(self.subjects) > 0:
        tag_sq = [SQ(tags__contains=subject) for subject in self.subjects]
        tag_query = tag_sq.pop()
        for item in tag_sq:
            tag_query |= item
        self.search_qs = self.search_qs.filter(tag_query)

def filter_schools(self):
    # Memoization of schools.
    if not self.schools:
        self.schools = get_accessible_schools(self.user)

    school_ids = [school.id for school in self.schools]
    self.search_qs = self.search_qs.filter(schools__in=school_ids)

def filter_query(self):
    if self.query != '':
        self.search_qs = self.search_qs.filter(
            SQ(question__contains=self.query) | SQ(content_short__contains=self.query)
        )

def filter_study_level(self):
    from redbutton.helpers import init_logger
    logger = init_logger()
    logger.warning("--------------------------------")

    study_levels = self._get_study_level_range()

    logger.warning(study_levels)
    logger.warning("before filter study_level")
    logger.warning(vars(self.search_qs.query))

    self.search_qs = self.search_qs.filter(study_level__in=study_levels)

    logger.warning("After filter study_level")
    logger.warning(vars(self.search_qs.query))

def _get_study_level_range(self):
    study_level_min = 1 if not self.study_level_min else self.study_level_min
    study_level_max = 12 if not self.study_level_max else self.study_level_max
    return range(study_level_min, study_level_max+1)

@classmethod
def get_filter_params(cls, homework_settings):
    kwargs = {}
    kwargs['schools__in'] = get_accessible_schools(homework_settings.user)

    if homework_settings.study_level_min and homework_settings.study_level_max:
        kwargs['study_level__in'] = range(homework_settings.study_level_min, homework_settings.study_level_max)

    tag_names = list(homework_settings.tags.all().values_list('name', flat=True))
    if len(tag_names) > 0:
        kwargs['tags__name__in'] = tag_names

    return kwargs

@classmethod
def get_study_level_string(cls, study_level):
    """
    Parameters:
        study_level (string): An string convertible to an int, for the study_level.
    Returns:
        Valid study_level choice for questions or None.
    """
    try:
        study_level = int(study_level)
        if study_level not in dict(Question.STUDY_LEVEL_CHOICES):
            study_level = None
    except (TypeError, ValueError):
        study_level = None
    return study_level
过滤器study_level显示后

logger.warning(vars(self.search_qs.query))

{'_hit_count': None, '_using': u'default', 'distance_point': {}, 'within': {}, 'query_filter': <SQ: AND (is_archived__content=0 AND NOT (user_id__content=4) AND tags__contains=mate AND schools__in=[231L, 231L, 231L, 231L, 4470L, 2029L, 4919L, 4920L] AND study_level__in=[10, 11, 12])>, '_raw_query': None, '_raw_query_params': {}, '_spelling_suggestion': <object object at 0x7ffbdc676ed0>, '_stats': None, 'backend': <search.search_backends.CustomElasticBackend object at 0x7ffbdb64e210>, 'stats': {}, 'order_by': [u'is_answered', u'has_answer', u'created_at'], 'result_class': <class 'haystack.models.SearchResult'>, 'spelling_query': None, 'narrow_queries': set([]), 'boost': {}, 'query_facets': [], 'models': set([<class 'homeworkhelp.models.Question'>]), 'date_facets': {}, 'start_offset': 0, 'end_offset': None, '_more_like_this': False, '_mlt_instance': None, 'fields': [], 'facets': {}, '_facet_counts': None, '_results': None, 'highlight': False, 'dwithin': {}}

这里的重要部分是 AND study_level__in = [10,11,12]

但问题在于,最终搜索结果包含具有其他study_level值的匹配,而不是列表study_levels。我不明白为什么study_level不在study_levels中的问题是结果的一部分。

如果有人认为他们需要更多信息,比如问题模型,请问我尽力回答。

1 个答案:

答案 0 :(得分:0)

我对此并不满意,但它似乎有效。

if int(study_level_min) <= item.study_level <= int(study_level_max) and count_added_result <= limit:
        result_list.append(class_fields)
        count_added_result += 1
    if count_added_result > limit:
        break

现在它给出了正确的结果,但它并没有解决干草堆没有给出预期重新排列的原始问题,而且现在必须这样做。