使用django / haystack / solr

时间:2015-08-10 19:26:34

标签: django solr django-haystack django-cms

您好我正在整合我的django-cms项目的搜索。我制作了一个wiki应用程序,每个页面的内容都存储在PlaceholderField中。我最初可以使用PlaceholderFieldsudo ./manage.py rebuild_indexupdate_index的内容编制索引,搜索效果非常好。问题是当我修改PlaceholderField时,搜索索引没有更新,即使我在我的settings.py中:

HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'

这是我的model.py

from django.db import models
from django.utils.text import slugify
from djangocms_text_ckeditor.fields import HTMLField
from cms.models.fields import PlaceholderField

def my_placeholder_slotname(instance):
    return 'content_placeholder'


class WikiPage(models.Model):
    slug = models.SlugField(max_length=50,primary_key=True)
    name = models.CharField(max_length=50)
    content = HTMLField(blank=True)
    section = models.ForeignKey('WikiSection', related_name='pages', db_index=True)
    content_placeholder = PlaceholderField(my_placeholder_slotname)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiPage, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/page/%s' % self.slug


class WikiSection(models.Model):
    slug = models.SlugField(max_length=50, primary_key=True)
    name = models.CharField(max_length=50)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiSection, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/section/%s' % self.slug

这是我的search_indexes.py

import datetime
from haystack import indexes
from .models import WikiPage, WikiSection
from aldryn_search.helpers import get_cleaned_bits, get_request
from aldryn_search.utils import clean_join, get_index_base, strip_tags
from cms.models import CMSPlugin
from djangocms_text_ckeditor.cms_plugins import TextPlugin
from django.template import loader, Context


class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    content_placeholder = indexes.CharField()
    url = indexes.CharField()
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        request = get_request(obj)
        placeholders = obj.content_placeholder
        plugins = placeholders.get_plugins()
        text_bits = []
        for plugin in plugins:
            cleaned = get_cleaned_bits(plugin.render_plugin())
            text_bits.extend(cleaned)           

        return clean_join(' ', text_bits)

    def prepare(self, obj):
        data = super(WikiPageIndex, self).prepare(obj)

        template = loader.select_template(
            ("search/indexes/wiki_app/wikipage_text.txt", ),
        )
        data["text"] = template.render(Context({
            "object": obj,
            'placeholder': self.prepare_content_placeholder(obj),
        }))
        return data



class WikiSectionIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()

    def get_model(self):
        return WikiSection

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    class Meta:
        model_name = WikiSection
        app_label = 'wiki'

我正在寻找有关更新方法的帮助,我不知道如何编写。我查看了文档,看到有update_objectupdate方法可以扩展,但不知道要返回什么。

修改

我一直在推文,现在使用

class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()
    content_placeholder = indexes.CharField(model_attr='content_placeholder')
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        plugins = obj.content_placeholder.get_plugins()
        text_bits = []

        for plugin in plugins:
            plugin_text = self.get_plugin_search_text(plugin, get_request())
            text_bits.append(plugin_text)

        return clean_join(' ', text_bits)

    def get_plugin_search_text(self, base_plugin, request):
        plugin_content_bits = get_plugin_index_data(base_plugin, request)
        return clean_join(' ', plugin_content_bits)

这是我的wikipage_text.txt

{{ object.name }} - Wiki Page (Section {{ object.section }})
Content: {{ object.get_placeholder_text }}

EDIT2

对于那些在下面的答案中尝试代码的人来说,这是我为使其完全正常工作所做的工作。下面的所有代码都是一样的,但添加了一些东西。下面的解决方案一旦插件来自占位符addedremoved,就会更新索引,但不会在edited时更新。就我而言,我需要它来更新djangocms-text-ckeditor插件中的文本。所需要的只是从文本编辑器字段from djangocms_text_ckeditor.models import Text注册信号,从那里我连接另一个信号:

from djangocms_text_ckeditor.models import Text

signals.post_save.connect(
        update_wiki_page_index,
        sender=Text,
        dispatch_uid='post_save_update_wiki_page_index'
    )

这个问题是网站中的所有页面都有占位符,所有页面都可能包含文本,这意味着此信号会经常触发。为了防止从wiki.save(update_fields=['content_placeholder_data'])不必要地调用数据库,我只检查数据是否实际发生了变化,如下所示:

def update_wiki_page_index(**kwargs):
    instance = kwargs['instance']
    if instance.pk is None:
        return

    placeholder = get_placeholder(plugin=instance)

    if placeholder is None:
        return

    try:
        wiki = WikiPage.objects.get(content_placeholder=placeholder)
    except WikiPage.DoesNotExist:
        return

    # Make sure data has changed
    if wiki.content_placeholder_data != get_placeholder_index_data(placeholder):
        # DB based approach
        placeholder = wiki.content_placeholder
        placeholder_data = get_placeholder_index_data(placeholder)
        wiki.content_placeholder_data = placeholder_data

        # Saving here will trigger index update
        wiki.save(update_fields=['content_placeholder_data'])

1 个答案:

答案 0 :(得分:3)

当您向占位符添加/删除插件时,搜索索引不会更新,因为haystack信号只会侦听通过搜索明确注册的模型,这意味着您必须为每个插件注册搜索索引你想听的。

更好和更简单的方法是进行自己的信号处理,但特定于插件,django-cms在内部使用这些信号进行一些操作,因此这是一种常见的情况。

我添加了三个文件,模型和search_indexes对它们进行了一些修改,然后帮助器是一个新的。

我更改了占位符名称功能以返回更独特的值,并在那里添加了一些注释。

关于您的问题,有两种方法可以使用信号处理来解决它。

一个是通过每次保存/删除插件时计算占位符数据,然后存储这个数据放在模型的字段中,然后当我们使用updated_fields调用save()时只更新我们想要的字段,haystack的信号监听器将被触发,从而触发更新。当haystack更新索引时,它只需要查看db中的数据,而不必再次重新计算插件数据。

第二种方法是简单地从信号处理程序手动触发更新,然后告诉haystack更新搜索引擎中的wiki对象,就好像你已经保存了wiki对象一样。

我在那里添加了两个解决方案,将它们分开,如果不断修改wiki占位符,那么我建议使用db方法并将一些异步处理插入haystack更新信号(需要celery)。 否则,您只需使用手动更新。

声明 我没有亲自测试过,只是根据以前的经验编写了它:)

models.py

from django.db import models
from django.db.models import signals
from django.utils.text import slugify

from djangocms_text_ckeditor.fields import HTMLField

from cms.models import CMSPlugin
from cms.models.fields import PlaceholderField
from cms.signals.plugins import get_placeholder

from .helpers import (
    get_index_from_model,
    get_placeholder_index_data,
)


def get_wiki_placeholder_name(instance):
    # Even though slotname is not UNIQUE at db level
    # is always good to make it as "unique" as possible.
    # In this case you will easily spot the placeholder for a wiki
    # based on it's slotname.
    return 'wiki_%s_placeholder' % instance.pk


def update_wiki_page_index(**kwargs):
    instance = kwargs['instance']

    if instance.pk is None:
        return

    placeholder = get_placeholder(plugin=instance)

    if placeholder is None:
        return

    try:
        wiki = WikiPage.objects.get(content_placeholder=placeholder)
    except WikiPage.DoesNotExist:
        return

    # DB based approach
    placeholder = wiki.content_placeholder
    placeholder_data = get_placeholder_index_data(placeholder)
    wiki.content_placeholder_data = placeholder_data
    # Saving here will trigger index update
    wiki.save(update_fields=['content_placeholder_data'])

    # OR

    # Realtime
    wiki.update_object_index()


class WikiPage(models.Model):
    slug = models.SlugField(max_length=50,primary_key=True)
    name = models.CharField(max_length=50)
    content = HTMLField(blank=True)
    section = models.ForeignKey('WikiSection', related_name='pages', db_index=True)
    content_placeholder = PlaceholderField(get_wiki_placeholder_name)
    content_placeholder_data = models.TextField(editable=False)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiPage, self).save(*args, **kwargs)

    def update_object_index(self):
        # By default will update all cores associated with object
        index = get_index_from_model(self._meta.model)

        if index:
            # update_object takes a using='' paremeter
            # if on a multi-language setup, you'll need to make sure
            # using reflects the language core
            index.update_object(instance=self.model_instance)

    def get_absolute_url(self):
        return '/wiki/page/%s' % self.slug


class WikiSection(models.Model):
    slug = models.SlugField(max_length=50, primary_key=True)
    name = models.CharField(max_length=50)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiSection, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/section/%s' % self.slug


signals.pre_delete.connect(
    update_wiki_page_index,
    sender=CMSPlugin,
    dispatch_uid='pre_delete_update_wiki_page_index'
)


signals.post_save.connect(
    update_wiki_page_index,
    sender=CMSPlugin,
    dispatch_uid='post_save_update_wiki_page_index'
)

search_indexes.py

from django.template import loader, Context

from haystack import indexes

from .helpers import get_placeholder_index_data
from .models import WikiPage, WikiSection


class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    content_placeholder = indexes.CharField()
    url = indexes.CharField()
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        # DB approach
        data = obj.content_placeholder_data

        # OR

        # Realtime approach
        placeholder = obj.content_placeholder
        data = get_placeholder_index_data(placeholder)
        return data

    def prepare(self, obj):
        data = super(WikiPageIndex, self).prepare(obj)

        template = loader.select_template(
            ("search/indexes/wiki_app/wikipage_text.txt", ),
        )
        data["text"] = template.render(Context({
            "object": obj,
            'placeholder': self.prepare_content_placeholder(obj),
        }))
        return data


class WikiSectionIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()

    def get_model(self):
        return WikiSection

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

helpers.py

from haystack import connections
from haystack.constants import DEFAULT_ALIAS
from haystack.exceptions import NotHandled

from aldryn_search.helpers import get_plugin_index_data, get_request
from aldryn_search.utils import clean_join


def get_plugin_search_text(base_plugin, request):
    plugin_content_bits = get_plugin_index_data(base_plugin, request)
    return clean_join(' ', plugin_content_bits)


def get_placeholder_index_data(placeholder):
    request = get_request()
    plugins = placeholder.get_plugins()
    text_bits = []

    for base_plugin in plugins:
        plugin_text_content = get_plugin_search_text(base_plugin, request)
        text_bits.append(plugin_text_content)
    return clean_join(' ', text_bits)


def get_index_from_model(model_class):
    # Notice I'm explicitly using DEFAULT_ALIAS here
    # on a multi-language setup, you'll have to get the alias
    # from current language.
    unified_index = connections[DEFAULT_ALIAS].get_unified_index()

    try:
        model_index = unified_index.get_index(model_class)
    except NotHandled:
        model_index = None
    else:
        model_index._backend_alias = DEFAULT_ALIAS
    return model_index